xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric 
9*0b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
10*0b57cec5SDimitry Andric #include "AMDGPUArgumentUsageInfo.h"
11*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h"
12*0b57cec5SDimitry Andric #include "SIRegisterInfo.h"
13*0b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14*0b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
15*0b57cec5SDimitry Andric #include "llvm/ADT/Optional.h"
16*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
17*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
18*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
19*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
20*0b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
21*0b57cec5SDimitry Andric #include "llvm/IR/Function.h"
22*0b57cec5SDimitry Andric #include <cassert>
23*0b57cec5SDimitry Andric #include <vector>
24*0b57cec5SDimitry Andric 
25*0b57cec5SDimitry Andric #define MAX_LANES 64
26*0b57cec5SDimitry Andric 
27*0b57cec5SDimitry Andric using namespace llvm;
28*0b57cec5SDimitry Andric 
29*0b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30*0b57cec5SDimitry Andric   : AMDGPUMachineFunction(MF),
31*0b57cec5SDimitry Andric     Mode(MF.getFunction()),
32*0b57cec5SDimitry Andric     PrivateSegmentBuffer(false),
33*0b57cec5SDimitry Andric     DispatchPtr(false),
34*0b57cec5SDimitry Andric     QueuePtr(false),
35*0b57cec5SDimitry Andric     KernargSegmentPtr(false),
36*0b57cec5SDimitry Andric     DispatchID(false),
37*0b57cec5SDimitry Andric     FlatScratchInit(false),
38*0b57cec5SDimitry Andric     WorkGroupIDX(false),
39*0b57cec5SDimitry Andric     WorkGroupIDY(false),
40*0b57cec5SDimitry Andric     WorkGroupIDZ(false),
41*0b57cec5SDimitry Andric     WorkGroupInfo(false),
42*0b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset(false),
43*0b57cec5SDimitry Andric     WorkItemIDX(false),
44*0b57cec5SDimitry Andric     WorkItemIDY(false),
45*0b57cec5SDimitry Andric     WorkItemIDZ(false),
46*0b57cec5SDimitry Andric     ImplicitBufferPtr(false),
47*0b57cec5SDimitry Andric     ImplicitArgPtr(false),
48*0b57cec5SDimitry Andric     GITPtrHigh(0xffffffff),
49*0b57cec5SDimitry Andric     HighBitsOf32BitAddress(0),
50*0b57cec5SDimitry Andric     GDSSize(0) {
51*0b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
52*0b57cec5SDimitry Andric   const Function &F = MF.getFunction();
53*0b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
54*0b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
55*0b57cec5SDimitry Andric 
56*0b57cec5SDimitry Andric   Occupancy = getMaxWavesPerEU();
57*0b57cec5SDimitry Andric   limitOccupancy(MF);
58*0b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
59*0b57cec5SDimitry Andric 
60*0b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
61*0b57cec5SDimitry Andric     if (!F.arg_empty())
62*0b57cec5SDimitry Andric       KernargSegmentPtr = true;
63*0b57cec5SDimitry Andric     WorkGroupIDX = true;
64*0b57cec5SDimitry Andric     WorkItemIDX = true;
65*0b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
66*0b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
67*0b57cec5SDimitry Andric   }
68*0b57cec5SDimitry Andric 
69*0b57cec5SDimitry Andric   if (!isEntryFunction()) {
70*0b57cec5SDimitry Andric     // Non-entry functions have no special inputs for now, other registers
71*0b57cec5SDimitry Andric     // required for scratch access.
72*0b57cec5SDimitry Andric     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
73*0b57cec5SDimitry Andric     ScratchWaveOffsetReg = AMDGPU::SGPR33;
74*0b57cec5SDimitry Andric 
75*0b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
76*0b57cec5SDimitry Andric     FrameOffsetReg = AMDGPU::SGPR34;
77*0b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
78*0b57cec5SDimitry Andric 
79*0b57cec5SDimitry Andric     ArgInfo.PrivateSegmentBuffer =
80*0b57cec5SDimitry Andric       ArgDescriptor::createRegister(ScratchRSrcReg);
81*0b57cec5SDimitry Andric     ArgInfo.PrivateSegmentWaveByteOffset =
82*0b57cec5SDimitry Andric       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
83*0b57cec5SDimitry Andric 
84*0b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
85*0b57cec5SDimitry Andric       ImplicitArgPtr = true;
86*0b57cec5SDimitry Andric   } else {
87*0b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
88*0b57cec5SDimitry Andric       KernargSegmentPtr = true;
89*0b57cec5SDimitry Andric       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
90*0b57cec5SDimitry Andric                                  MaxKernArgAlign);
91*0b57cec5SDimitry Andric     }
92*0b57cec5SDimitry Andric   }
93*0b57cec5SDimitry Andric 
94*0b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-group-id-x"))
95*0b57cec5SDimitry Andric     WorkGroupIDX = true;
96*0b57cec5SDimitry Andric 
97*0b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-group-id-y"))
98*0b57cec5SDimitry Andric     WorkGroupIDY = true;
99*0b57cec5SDimitry Andric 
100*0b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-group-id-z"))
101*0b57cec5SDimitry Andric     WorkGroupIDZ = true;
102*0b57cec5SDimitry Andric 
103*0b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-item-id-x"))
104*0b57cec5SDimitry Andric     WorkItemIDX = true;
105*0b57cec5SDimitry Andric 
106*0b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-item-id-y"))
107*0b57cec5SDimitry Andric     WorkItemIDY = true;
108*0b57cec5SDimitry Andric 
109*0b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-item-id-z"))
110*0b57cec5SDimitry Andric     WorkItemIDZ = true;
111*0b57cec5SDimitry Andric 
112*0b57cec5SDimitry Andric   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
113*0b57cec5SDimitry Andric   bool HasStackObjects = FrameInfo.hasStackObjects();
114*0b57cec5SDimitry Andric 
115*0b57cec5SDimitry Andric   if (isEntryFunction()) {
116*0b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
117*0b57cec5SDimitry Andric     // enabled if Z is.
118*0b57cec5SDimitry Andric     if (WorkItemIDZ)
119*0b57cec5SDimitry Andric       WorkItemIDY = true;
120*0b57cec5SDimitry Andric 
121*0b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset = true;
122*0b57cec5SDimitry Andric 
123*0b57cec5SDimitry Andric     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
124*0b57cec5SDimitry Andric     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
125*0b57cec5SDimitry Andric         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
126*0b57cec5SDimitry Andric       ArgInfo.PrivateSegmentWaveByteOffset =
127*0b57cec5SDimitry Andric           ArgDescriptor::createRegister(AMDGPU::SGPR5);
128*0b57cec5SDimitry Andric   }
129*0b57cec5SDimitry Andric 
130*0b57cec5SDimitry Andric   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
131*0b57cec5SDimitry Andric   if (isAmdHsaOrMesa) {
132*0b57cec5SDimitry Andric     PrivateSegmentBuffer = true;
133*0b57cec5SDimitry Andric 
134*0b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
135*0b57cec5SDimitry Andric       DispatchPtr = true;
136*0b57cec5SDimitry Andric 
137*0b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-queue-ptr"))
138*0b57cec5SDimitry Andric       QueuePtr = true;
139*0b57cec5SDimitry Andric 
140*0b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-dispatch-id"))
141*0b57cec5SDimitry Andric       DispatchID = true;
142*0b57cec5SDimitry Andric   } else if (ST.isMesaGfxShader(F)) {
143*0b57cec5SDimitry Andric     ImplicitBufferPtr = true;
144*0b57cec5SDimitry Andric   }
145*0b57cec5SDimitry Andric 
146*0b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
147*0b57cec5SDimitry Andric     KernargSegmentPtr = true;
148*0b57cec5SDimitry Andric 
149*0b57cec5SDimitry Andric   if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
150*0b57cec5SDimitry Andric     auto hasNonSpillStackObjects = [&]() {
151*0b57cec5SDimitry Andric       // Avoid expensive checking if there's no stack objects.
152*0b57cec5SDimitry Andric       if (!HasStackObjects)
153*0b57cec5SDimitry Andric         return false;
154*0b57cec5SDimitry Andric       for (auto OI = FrameInfo.getObjectIndexBegin(),
155*0b57cec5SDimitry Andric                 OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI)
156*0b57cec5SDimitry Andric         if (!FrameInfo.isSpillSlotObjectIndex(OI))
157*0b57cec5SDimitry Andric           return true;
158*0b57cec5SDimitry Andric       // All stack objects are spill slots.
159*0b57cec5SDimitry Andric       return false;
160*0b57cec5SDimitry Andric     };
161*0b57cec5SDimitry Andric     // TODO: This could be refined a lot. The attribute is a poor way of
162*0b57cec5SDimitry Andric     // detecting calls that may require it before argument lowering.
163*0b57cec5SDimitry Andric     if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch"))
164*0b57cec5SDimitry Andric       FlatScratchInit = true;
165*0b57cec5SDimitry Andric   }
166*0b57cec5SDimitry Andric 
167*0b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
168*0b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
169*0b57cec5SDimitry Andric   if (!S.empty())
170*0b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
171*0b57cec5SDimitry Andric 
172*0b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
173*0b57cec5SDimitry Andric   S = A.getValueAsString();
174*0b57cec5SDimitry Andric   if (!S.empty())
175*0b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
176*0b57cec5SDimitry Andric 
177*0b57cec5SDimitry Andric   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
178*0b57cec5SDimitry Andric   if (!S.empty())
179*0b57cec5SDimitry Andric     S.consumeInteger(0, GDSSize);
180*0b57cec5SDimitry Andric }
181*0b57cec5SDimitry Andric 
182*0b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
183*0b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
184*0b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
185*0b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
186*0b57cec5SDimitry Andric                  MF.getFunction()));
187*0b57cec5SDimitry Andric }
188*0b57cec5SDimitry Andric 
189*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
190*0b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
191*0b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
192*0b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
193*0b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
194*0b57cec5SDimitry Andric   NumUserSGPRs += 4;
195*0b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
196*0b57cec5SDimitry Andric }
197*0b57cec5SDimitry Andric 
198*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
199*0b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
200*0b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
201*0b57cec5SDimitry Andric   NumUserSGPRs += 2;
202*0b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
203*0b57cec5SDimitry Andric }
204*0b57cec5SDimitry Andric 
205*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
206*0b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
207*0b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
208*0b57cec5SDimitry Andric   NumUserSGPRs += 2;
209*0b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
210*0b57cec5SDimitry Andric }
211*0b57cec5SDimitry Andric 
212*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
213*0b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
214*0b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
215*0b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
216*0b57cec5SDimitry Andric   NumUserSGPRs += 2;
217*0b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
218*0b57cec5SDimitry Andric }
219*0b57cec5SDimitry Andric 
220*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
221*0b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
222*0b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
223*0b57cec5SDimitry Andric   NumUserSGPRs += 2;
224*0b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
225*0b57cec5SDimitry Andric }
226*0b57cec5SDimitry Andric 
227*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
228*0b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
229*0b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
230*0b57cec5SDimitry Andric   NumUserSGPRs += 2;
231*0b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
232*0b57cec5SDimitry Andric }
233*0b57cec5SDimitry Andric 
234*0b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
235*0b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
236*0b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
237*0b57cec5SDimitry Andric   NumUserSGPRs += 2;
238*0b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
239*0b57cec5SDimitry Andric }
240*0b57cec5SDimitry Andric 
241*0b57cec5SDimitry Andric static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
242*0b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
243*0b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
244*0b57cec5SDimitry Andric       return true;
245*0b57cec5SDimitry Andric   }
246*0b57cec5SDimitry Andric 
247*0b57cec5SDimitry Andric   return false;
248*0b57cec5SDimitry Andric }
249*0b57cec5SDimitry Andric 
250*0b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for
251*0b57cec5SDimitry Andric /// SGPR spilling.
252*0b57cec5SDimitry Andric //
253*0b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized
254*0b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
255*0b57cec5SDimitry Andric                                                       unsigned NumNeed) const {
256*0b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
257*0b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
258*0b57cec5SDimitry Andric   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
259*0b57cec5SDimitry Andric }
260*0b57cec5SDimitry Andric 
261*0b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
262*0b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
263*0b57cec5SDimitry Andric                                                     int FI) {
264*0b57cec5SDimitry Andric   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
265*0b57cec5SDimitry Andric 
266*0b57cec5SDimitry Andric   // This has already been allocated.
267*0b57cec5SDimitry Andric   if (!SpillLanes.empty())
268*0b57cec5SDimitry Andric     return true;
269*0b57cec5SDimitry Andric 
270*0b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
271*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
272*0b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
273*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
274*0b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
275*0b57cec5SDimitry Andric 
276*0b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
277*0b57cec5SDimitry Andric   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
278*0b57cec5SDimitry Andric   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
279*0b57cec5SDimitry Andric 
280*0b57cec5SDimitry Andric   int NumLanes = Size / 4;
281*0b57cec5SDimitry Andric 
282*0b57cec5SDimitry Andric   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
283*0b57cec5SDimitry Andric 
284*0b57cec5SDimitry Andric   // Make sure to handle the case where a wide SGPR spill may span between two
285*0b57cec5SDimitry Andric   // VGPRs.
286*0b57cec5SDimitry Andric   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
287*0b57cec5SDimitry Andric     unsigned LaneVGPR;
288*0b57cec5SDimitry Andric     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
289*0b57cec5SDimitry Andric 
290*0b57cec5SDimitry Andric     if (VGPRIndex == 0) {
291*0b57cec5SDimitry Andric       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
292*0b57cec5SDimitry Andric       if (LaneVGPR == AMDGPU::NoRegister) {
293*0b57cec5SDimitry Andric         // We have no VGPRs left for spilling SGPRs. Reset because we will not
294*0b57cec5SDimitry Andric         // partially spill the SGPR to VGPRs.
295*0b57cec5SDimitry Andric         SGPRToVGPRSpills.erase(FI);
296*0b57cec5SDimitry Andric         NumVGPRSpillLanes -= I;
297*0b57cec5SDimitry Andric         return false;
298*0b57cec5SDimitry Andric       }
299*0b57cec5SDimitry Andric 
300*0b57cec5SDimitry Andric       Optional<int> CSRSpillFI;
301*0b57cec5SDimitry Andric       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
302*0b57cec5SDimitry Andric           isCalleeSavedReg(CSRegs, LaneVGPR)) {
303*0b57cec5SDimitry Andric         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
304*0b57cec5SDimitry Andric       }
305*0b57cec5SDimitry Andric 
306*0b57cec5SDimitry Andric       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
307*0b57cec5SDimitry Andric 
308*0b57cec5SDimitry Andric       // Add this register as live-in to all blocks to avoid machine verifer
309*0b57cec5SDimitry Andric       // complaining about use of an undefined physical register.
310*0b57cec5SDimitry Andric       for (MachineBasicBlock &BB : MF)
311*0b57cec5SDimitry Andric         BB.addLiveIn(LaneVGPR);
312*0b57cec5SDimitry Andric     } else {
313*0b57cec5SDimitry Andric       LaneVGPR = SpillVGPRs.back().VGPR;
314*0b57cec5SDimitry Andric     }
315*0b57cec5SDimitry Andric 
316*0b57cec5SDimitry Andric     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
317*0b57cec5SDimitry Andric   }
318*0b57cec5SDimitry Andric 
319*0b57cec5SDimitry Andric   return true;
320*0b57cec5SDimitry Andric }
321*0b57cec5SDimitry Andric 
322*0b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
323*0b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
324*0b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
325*0b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
326*0b57cec5SDimitry Andric                                                     int FI,
327*0b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
328*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
329*0b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
330*0b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
331*0b57cec5SDimitry Andric 
332*0b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
333*0b57cec5SDimitry Andric 
334*0b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
335*0b57cec5SDimitry Andric 
336*0b57cec5SDimitry Andric   // This has already been allocated.
337*0b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
338*0b57cec5SDimitry Andric     return Spill.FullyAllocated;
339*0b57cec5SDimitry Andric 
340*0b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
341*0b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
342*0b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
343*0b57cec5SDimitry Andric 
344*0b57cec5SDimitry Andric   const TargetRegisterClass &RC =
345*0b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
346*0b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
347*0b57cec5SDimitry Andric 
348*0b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
349*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
350*0b57cec5SDimitry Andric   Spill.FullyAllocated = true;
351*0b57cec5SDimitry Andric 
352*0b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
353*0b57cec5SDimitry Andric   // once.
354*0b57cec5SDimitry Andric   BitVector OtherUsedRegs;
355*0b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
356*0b57cec5SDimitry Andric 
357*0b57cec5SDimitry Andric   const uint32_t *CSRMask =
358*0b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
359*0b57cec5SDimitry Andric   if (CSRMask)
360*0b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
361*0b57cec5SDimitry Andric 
362*0b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
363*0b57cec5SDimitry Andric   // usage.
364*0b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
365*0b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
366*0b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
367*0b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
368*0b57cec5SDimitry Andric 
369*0b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
370*0b57cec5SDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I) {
371*0b57cec5SDimitry Andric     NextSpillReg = std::find_if(
372*0b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
373*0b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
374*0b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
375*0b57cec5SDimitry Andric         });
376*0b57cec5SDimitry Andric 
377*0b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
378*0b57cec5SDimitry Andric       Spill.FullyAllocated = false;
379*0b57cec5SDimitry Andric       break;
380*0b57cec5SDimitry Andric     }
381*0b57cec5SDimitry Andric 
382*0b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
383*0b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
384*0b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
385*0b57cec5SDimitry Andric   }
386*0b57cec5SDimitry Andric 
387*0b57cec5SDimitry Andric   return Spill.FullyAllocated;
388*0b57cec5SDimitry Andric }
389*0b57cec5SDimitry Andric 
390*0b57cec5SDimitry Andric void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
391*0b57cec5SDimitry Andric   // The FP spill hasn't been inserted yet, so keep it around.
392*0b57cec5SDimitry Andric   for (auto &R : SGPRToVGPRSpills) {
393*0b57cec5SDimitry Andric     if (R.first != FramePointerSaveIndex)
394*0b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
395*0b57cec5SDimitry Andric   }
396*0b57cec5SDimitry Andric 
397*0b57cec5SDimitry Andric   // All other SPGRs must be allocated on the default stack, so reset the stack
398*0b57cec5SDimitry Andric   // ID.
399*0b57cec5SDimitry Andric   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
400*0b57cec5SDimitry Andric        ++i)
401*0b57cec5SDimitry Andric     if (i != FramePointerSaveIndex)
402*0b57cec5SDimitry Andric       MFI.setStackID(i, TargetStackID::Default);
403*0b57cec5SDimitry Andric 
404*0b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
405*0b57cec5SDimitry Andric     if (R.second.FullyAllocated)
406*0b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
407*0b57cec5SDimitry Andric   }
408*0b57cec5SDimitry Andric }
409*0b57cec5SDimitry Andric 
410*0b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
411*0b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
412*0b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
413*0b57cec5SDimitry Andric }
414*0b57cec5SDimitry Andric 
415*0b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
416*0b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
417*0b57cec5SDimitry Andric }
418*0b57cec5SDimitry Andric 
419*0b57cec5SDimitry Andric static yaml::StringValue regToString(unsigned Reg,
420*0b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
421*0b57cec5SDimitry Andric   yaml::StringValue Dest;
422*0b57cec5SDimitry Andric   {
423*0b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
424*0b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
425*0b57cec5SDimitry Andric   }
426*0b57cec5SDimitry Andric   return Dest;
427*0b57cec5SDimitry Andric }
428*0b57cec5SDimitry Andric 
429*0b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo>
430*0b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
431*0b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
432*0b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
433*0b57cec5SDimitry Andric 
434*0b57cec5SDimitry Andric   auto convertArg = [&](Optional<yaml::SIArgument> &A,
435*0b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
436*0b57cec5SDimitry Andric     if (!Arg)
437*0b57cec5SDimitry Andric       return false;
438*0b57cec5SDimitry Andric 
439*0b57cec5SDimitry Andric     // Create a register or stack argument.
440*0b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
441*0b57cec5SDimitry Andric     if (Arg.isRegister()) {
442*0b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
443*0b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
444*0b57cec5SDimitry Andric     } else
445*0b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
446*0b57cec5SDimitry Andric     // Check and update the optional mask.
447*0b57cec5SDimitry Andric     if (Arg.isMasked())
448*0b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
449*0b57cec5SDimitry Andric 
450*0b57cec5SDimitry Andric     A = SA;
451*0b57cec5SDimitry Andric     return true;
452*0b57cec5SDimitry Andric   };
453*0b57cec5SDimitry Andric 
454*0b57cec5SDimitry Andric   bool Any = false;
455*0b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
456*0b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
457*0b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
458*0b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
459*0b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
460*0b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
461*0b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
462*0b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
463*0b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
464*0b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
465*0b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
466*0b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
467*0b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
468*0b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
469*0b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
470*0b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
471*0b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
472*0b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
473*0b57cec5SDimitry Andric 
474*0b57cec5SDimitry Andric   if (Any)
475*0b57cec5SDimitry Andric     return AI;
476*0b57cec5SDimitry Andric 
477*0b57cec5SDimitry Andric   return None;
478*0b57cec5SDimitry Andric }
479*0b57cec5SDimitry Andric 
480*0b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
481*0b57cec5SDimitry Andric   const llvm::SIMachineFunctionInfo& MFI,
482*0b57cec5SDimitry Andric   const TargetRegisterInfo &TRI)
483*0b57cec5SDimitry Andric   : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
484*0b57cec5SDimitry Andric     MaxKernArgAlign(MFI.getMaxKernArgAlign()),
485*0b57cec5SDimitry Andric     LDSSize(MFI.getLDSSize()),
486*0b57cec5SDimitry Andric     IsEntryFunction(MFI.isEntryFunction()),
487*0b57cec5SDimitry Andric     NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
488*0b57cec5SDimitry Andric     MemoryBound(MFI.isMemoryBound()),
489*0b57cec5SDimitry Andric     WaveLimiter(MFI.needsWaveLimiter()),
490*0b57cec5SDimitry Andric     ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
491*0b57cec5SDimitry Andric     ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
492*0b57cec5SDimitry Andric     FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
493*0b57cec5SDimitry Andric     StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
494*0b57cec5SDimitry Andric     ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
495*0b57cec5SDimitry Andric     Mode(MFI.getMode()) {}
496*0b57cec5SDimitry Andric 
497*0b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
498*0b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
499*0b57cec5SDimitry Andric }
500*0b57cec5SDimitry Andric 
501*0b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
502*0b57cec5SDimitry Andric   const yaml::SIMachineFunctionInfo &YamlMFI) {
503*0b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
504*0b57cec5SDimitry Andric   MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
505*0b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
506*0b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
507*0b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
508*0b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
509*0b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
510*0b57cec5SDimitry Andric   return false;
511*0b57cec5SDimitry Andric }
512