xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision c9539b89010900499a200cdd6c0265ea5d950875)
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include <cassert>
26 #include <vector>
27 
28 #define MAX_LANES 64
29 
30 using namespace llvm;
31 
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33   : AMDGPUMachineFunction(MF),
34     BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
35     ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
36     GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
37     PrivateSegmentBuffer(false),
38     DispatchPtr(false),
39     QueuePtr(false),
40     KernargSegmentPtr(false),
41     DispatchID(false),
42     FlatScratchInit(false),
43     WorkGroupIDX(false),
44     WorkGroupIDY(false),
45     WorkGroupIDZ(false),
46     WorkGroupInfo(false),
47     LDSKernelId(false),
48     PrivateSegmentWaveByteOffset(false),
49     WorkItemIDX(false),
50     WorkItemIDY(false),
51     WorkItemIDZ(false),
52     ImplicitBufferPtr(false),
53     ImplicitArgPtr(false),
54     GITPtrHigh(0xffffffff),
55     HighBitsOf32BitAddress(0) {
56   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
57   const Function &F = MF.getFunction();
58   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
59   WavesPerEU = ST.getWavesPerEU(F);
60 
61   Occupancy = ST.computeOccupancy(F, getLDSSize());
62   CallingConv::ID CC = F.getCallingConv();
63 
64   // FIXME: Should have analysis or something rather than attribute to detect
65   // calls.
66   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
67 
68   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
69                         CC == CallingConv::SPIR_KERNEL;
70 
71   if (IsKernel) {
72     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
73       KernargSegmentPtr = true;
74     WorkGroupIDX = true;
75     WorkItemIDX = true;
76   } else if (CC == CallingConv::AMDGPU_PS) {
77     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
78   }
79 
80   MayNeedAGPRs = ST.hasMAIInsts();
81 
82   if (!isEntryFunction()) {
83     if (CC != CallingConv::AMDGPU_Gfx)
84       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
85 
86     // TODO: Pick a high register, and shift down, similar to a kernel.
87     FrameOffsetReg = AMDGPU::SGPR33;
88     StackPtrOffsetReg = AMDGPU::SGPR32;
89 
90     if (!ST.enableFlatScratch()) {
91       // Non-entry functions have no special inputs for now, other registers
92       // required for scratch access.
93       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
94 
95       ArgInfo.PrivateSegmentBuffer =
96         ArgDescriptor::createRegister(ScratchRSrcReg);
97     }
98 
99     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
100       ImplicitArgPtr = true;
101   } else {
102     ImplicitArgPtr = false;
103     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
104                                MaxKernArgAlign);
105 
106     if (ST.hasGFX90AInsts() &&
107         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
108         !mayUseAGPRs(MF))
109       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
110   }
111 
112   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
113   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
114     PrivateSegmentBuffer = true;
115   else if (ST.isMesaGfxShader(F))
116     ImplicitBufferPtr = true;
117 
118   if (!AMDGPU::isGraphics(CC)) {
119     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
120       WorkGroupIDX = true;
121 
122     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
123       WorkGroupIDY = true;
124 
125     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
126       WorkGroupIDZ = true;
127 
128     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
129       WorkItemIDX = true;
130 
131     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
132         ST.getMaxWorkitemID(F, 1) != 0)
133       WorkItemIDY = true;
134 
135     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
136         ST.getMaxWorkitemID(F, 2) != 0)
137       WorkItemIDZ = true;
138 
139     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
140       DispatchPtr = true;
141 
142     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
143       QueuePtr = true;
144 
145     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
146       DispatchID = true;
147 
148     if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
149       LDSKernelId = true;
150   }
151 
152   // FIXME: This attribute is a hack, we just need an analysis on the function
153   // to look for allocas.
154   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
155 
156   // TODO: This could be refined a lot. The attribute is a poor way of
157   // detecting calls or stack objects that may require it before argument
158   // lowering.
159   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
160       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
161       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
162       !ST.flatScratchIsArchitected()) {
163     FlatScratchInit = true;
164   }
165 
166   if (isEntryFunction()) {
167     // X, XY, and XYZ are the only supported combinations, so make sure Y is
168     // enabled if Z is.
169     if (WorkItemIDZ)
170       WorkItemIDY = true;
171 
172     if (!ST.flatScratchIsArchitected()) {
173       PrivateSegmentWaveByteOffset = true;
174 
175       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
176       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
177           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
178         ArgInfo.PrivateSegmentWaveByteOffset =
179             ArgDescriptor::createRegister(AMDGPU::SGPR5);
180     }
181   }
182 
183   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
184   StringRef S = A.getValueAsString();
185   if (!S.empty())
186     S.consumeInteger(0, GITPtrHigh);
187 
188   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
189   S = A.getValueAsString();
190   if (!S.empty())
191     S.consumeInteger(0, HighBitsOf32BitAddress);
192 
193   // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
194   // VGPR available at all times. For now, reserve highest available VGPR. After
195   // RA, shift it to the lowest available unused VGPR if the one exist.
196   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
197     VGPRForAGPRCopy =
198         AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
199   }
200 }
201 
202 MachineFunctionInfo *SIMachineFunctionInfo::clone(
203     BumpPtrAllocator &Allocator, MachineFunction &DestMF,
204     const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
205     const {
206   return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
207 }
208 
209 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
210   limitOccupancy(getMaxWavesPerEU());
211   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
212   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
213                  MF.getFunction()));
214 }
215 
216 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
217   const SIRegisterInfo &TRI) {
218   ArgInfo.PrivateSegmentBuffer =
219     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
220     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
221   NumUserSGPRs += 4;
222   return ArgInfo.PrivateSegmentBuffer.getRegister();
223 }
224 
225 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
226   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
227     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
228   NumUserSGPRs += 2;
229   return ArgInfo.DispatchPtr.getRegister();
230 }
231 
232 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
233   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
234     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
235   NumUserSGPRs += 2;
236   return ArgInfo.QueuePtr.getRegister();
237 }
238 
239 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
240   ArgInfo.KernargSegmentPtr
241     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
242     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
243   NumUserSGPRs += 2;
244   return ArgInfo.KernargSegmentPtr.getRegister();
245 }
246 
247 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
248   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
249     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
250   NumUserSGPRs += 2;
251   return ArgInfo.DispatchID.getRegister();
252 }
253 
254 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
255   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
256     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
257   NumUserSGPRs += 2;
258   return ArgInfo.FlatScratchInit.getRegister();
259 }
260 
261 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
262   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
263     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
264   NumUserSGPRs += 2;
265   return ArgInfo.ImplicitBufferPtr.getRegister();
266 }
267 
268 Register SIMachineFunctionInfo::addLDSKernelId() {
269   ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
270   NumUserSGPRs += 1;
271   return ArgInfo.LDSKernelId.getRegister();
272 }
273 
274 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
275                                              MCPhysReg Reg) {
276   for (unsigned I = 0; CSRegs[I]; ++I) {
277     if (CSRegs[I] == Reg)
278       return true;
279   }
280 
281   return false;
282 }
283 
284 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
285 /// SGPR spilling.
286 //
287 // FIXME: This only works after processFunctionBeforeFrameFinalized
288 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
289                                                       unsigned NumNeed) const {
290   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
291   unsigned WaveSize = ST.getWavefrontSize();
292   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
293 }
294 
295 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
296 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
297                                                     int FI) {
298   std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
299 
300   // This has already been allocated.
301   if (!SpillLanes.empty())
302     return true;
303 
304   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
305   const SIRegisterInfo *TRI = ST.getRegisterInfo();
306   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
307   MachineRegisterInfo &MRI = MF.getRegInfo();
308   unsigned WaveSize = ST.getWavefrontSize();
309 
310   unsigned Size = FrameInfo.getObjectSize(FI);
311   unsigned NumLanes = Size / 4;
312 
313   if (NumLanes > WaveSize)
314     return false;
315 
316   assert(Size >= 4 && "invalid sgpr spill size");
317   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
318 
319   // Make sure to handle the case where a wide SGPR spill may span between two
320   // VGPRs.
321   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
322     Register LaneVGPR;
323     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
324 
325     if (VGPRIndex == 0) {
326       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
327       if (LaneVGPR == AMDGPU::NoRegister) {
328         // We have no VGPRs left for spilling SGPRs. Reset because we will not
329         // partially spill the SGPR to VGPRs.
330         SGPRToVGPRSpills.erase(FI);
331         NumVGPRSpillLanes -= I;
332 
333         // FIXME: We can run out of free registers with split allocation if
334         // IPRA is enabled and a called function already uses every VGPR.
335 #if 0
336         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
337                                                   "VGPRs for SGPR spilling",
338                                                   0, DS_Error);
339         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
340 #endif
341         return false;
342       }
343 
344       Optional<int> SpillFI;
345       // We need to preserve inactive lanes, so always save, even caller-save
346       // registers.
347       if (!isEntryFunction()) {
348         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
349       }
350 
351       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
352 
353       // Add this register as live-in to all blocks to avoid machine verifier
354       // complaining about use of an undefined physical register.
355       for (MachineBasicBlock &BB : MF)
356         BB.addLiveIn(LaneVGPR);
357     } else {
358       LaneVGPR = SpillVGPRs.back().VGPR;
359     }
360 
361     SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex));
362   }
363 
364   return true;
365 }
366 
367 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
368 /// Either AGPR is spilled to VGPR to vice versa.
369 /// Returns true if a \p FI can be eliminated completely.
370 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
371                                                     int FI,
372                                                     bool isAGPRtoVGPR) {
373   MachineRegisterInfo &MRI = MF.getRegInfo();
374   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
375   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
376 
377   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
378 
379   auto &Spill = VGPRToAGPRSpills[FI];
380 
381   // This has already been allocated.
382   if (!Spill.Lanes.empty())
383     return Spill.FullyAllocated;
384 
385   unsigned Size = FrameInfo.getObjectSize(FI);
386   unsigned NumLanes = Size / 4;
387   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
388 
389   const TargetRegisterClass &RC =
390       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
391   auto Regs = RC.getRegisters();
392 
393   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
394   const SIRegisterInfo *TRI = ST.getRegisterInfo();
395   Spill.FullyAllocated = true;
396 
397   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
398   // once.
399   BitVector OtherUsedRegs;
400   OtherUsedRegs.resize(TRI->getNumRegs());
401 
402   const uint32_t *CSRMask =
403       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
404   if (CSRMask)
405     OtherUsedRegs.setBitsInMask(CSRMask);
406 
407   // TODO: Should include register tuples, but doesn't matter with current
408   // usage.
409   for (MCPhysReg Reg : SpillAGPR)
410     OtherUsedRegs.set(Reg);
411   for (MCPhysReg Reg : SpillVGPR)
412     OtherUsedRegs.set(Reg);
413 
414   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
415   for (int I = NumLanes - 1; I >= 0; --I) {
416     NextSpillReg = std::find_if(
417         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
418           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
419                  !OtherUsedRegs[Reg];
420         });
421 
422     if (NextSpillReg == Regs.end()) { // Registers exhausted
423       Spill.FullyAllocated = false;
424       break;
425     }
426 
427     OtherUsedRegs.set(*NextSpillReg);
428     SpillRegs.push_back(*NextSpillReg);
429     Spill.Lanes[I] = *NextSpillReg++;
430   }
431 
432   return Spill.FullyAllocated;
433 }
434 
435 bool SIMachineFunctionInfo::removeDeadFrameIndices(
436     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
437   // Remove dead frame indices from function frame, however keep FP & BP since
438   // spills for them haven't been inserted yet. And also make sure to remove the
439   // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
440   // result in an unexpected side effect and bug, in case of any re-mapping of
441   // freed frame indices by later pass(es) like "stack slot coloring".
442   for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
443     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
444       MFI.RemoveStackObject(R.first);
445       SGPRToVGPRSpills.erase(R.first);
446     }
447   }
448 
449   bool HaveSGPRToMemory = false;
450 
451   if (ResetSGPRSpillStackIDs) {
452     // All other SPGRs must be allocated on the default stack, so reset the
453     // stack ID.
454     for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
455          ++i) {
456       if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
457         if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
458           MFI.setStackID(i, TargetStackID::Default);
459           HaveSGPRToMemory = true;
460         }
461       }
462     }
463   }
464 
465   for (auto &R : VGPRToAGPRSpills) {
466     if (R.second.IsDead)
467       MFI.RemoveStackObject(R.first);
468   }
469 
470   return HaveSGPRToMemory;
471 }
472 
473 void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
474     MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
475   assert(WWMReservedFrameIndexes.empty());
476 
477   WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
478 
479   int I = 0;
480   for (Register VGPR : WWMReservedRegs) {
481     const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
482     WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
483         TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
484   }
485 }
486 
487 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
488                                          const SIRegisterInfo &TRI) {
489   if (ScavengeFI)
490     return *ScavengeFI;
491   if (isEntryFunction()) {
492     ScavengeFI = MFI.CreateFixedObject(
493         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
494   } else {
495     ScavengeFI = MFI.CreateStackObject(
496         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
497         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
498   }
499   return *ScavengeFI;
500 }
501 
502 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
503   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
504   return AMDGPU::SGPR0 + NumUserSGPRs;
505 }
506 
507 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
508   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
509 }
510 
511 Register
512 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
513   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
514   if (!ST.isAmdPalOS())
515     return Register();
516   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
517   if (ST.hasMergedShaders()) {
518     switch (MF.getFunction().getCallingConv()) {
519     case CallingConv::AMDGPU_HS:
520     case CallingConv::AMDGPU_GS:
521       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
522       // ES+GS merged shader on gfx9+.
523       GitPtrLo = AMDGPU::SGPR8;
524       return GitPtrLo;
525     default:
526       return GitPtrLo;
527     }
528   }
529   return GitPtrLo;
530 }
531 
532 static yaml::StringValue regToString(Register Reg,
533                                      const TargetRegisterInfo &TRI) {
534   yaml::StringValue Dest;
535   {
536     raw_string_ostream OS(Dest.Value);
537     OS << printReg(Reg, &TRI);
538   }
539   return Dest;
540 }
541 
542 static Optional<yaml::SIArgumentInfo>
543 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
544                     const TargetRegisterInfo &TRI) {
545   yaml::SIArgumentInfo AI;
546 
547   auto convertArg = [&](Optional<yaml::SIArgument> &A,
548                         const ArgDescriptor &Arg) {
549     if (!Arg)
550       return false;
551 
552     // Create a register or stack argument.
553     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
554     if (Arg.isRegister()) {
555       raw_string_ostream OS(SA.RegisterName.Value);
556       OS << printReg(Arg.getRegister(), &TRI);
557     } else
558       SA.StackOffset = Arg.getStackOffset();
559     // Check and update the optional mask.
560     if (Arg.isMasked())
561       SA.Mask = Arg.getMask();
562 
563     A = SA;
564     return true;
565   };
566 
567   bool Any = false;
568   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
569   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
570   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
571   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
572   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
573   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
574   Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
575   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
576   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
577   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
578   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
579   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
580   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
581                     ArgInfo.PrivateSegmentWaveByteOffset);
582   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
583   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
584   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
585   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
586   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
587 
588   if (Any)
589     return AI;
590 
591   return None;
592 }
593 
594 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
595     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
596     const llvm::MachineFunction &MF)
597     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
598       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
599       GDSSize(MFI.getGDSSize()),
600       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
601       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
602       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
603       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
604       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
605       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
606       Occupancy(MFI.getOccupancy()),
607       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
608       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
609       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
610       BytesInStackArgArea(MFI.getBytesInStackArgArea()),
611       ReturnsVoid(MFI.returnsVoid()),
612       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
613   for (Register Reg : MFI.WWMReservedRegs)
614     WWMReservedRegs.push_back(regToString(Reg, TRI));
615 
616   if (MFI.getVGPRForAGPRCopy())
617     VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
618   auto SFI = MFI.getOptionalScavengeFI();
619   if (SFI)
620     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
621 }
622 
623 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
624   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
625 }
626 
627 bool SIMachineFunctionInfo::initializeBaseYamlFields(
628     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
629     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
630   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
631   MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
632   LDSSize = YamlMFI.LDSSize;
633   GDSSize = YamlMFI.GDSSize;
634   DynLDSAlign = YamlMFI.DynLDSAlign;
635   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
636   Occupancy = YamlMFI.Occupancy;
637   IsEntryFunction = YamlMFI.IsEntryFunction;
638   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
639   MemoryBound = YamlMFI.MemoryBound;
640   WaveLimiter = YamlMFI.WaveLimiter;
641   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
642   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
643   BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
644   ReturnsVoid = YamlMFI.ReturnsVoid;
645 
646   if (YamlMFI.ScavengeFI) {
647     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
648     if (!FIOrErr) {
649       // Create a diagnostic for a the frame index.
650       const MemoryBuffer &Buffer =
651           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
652 
653       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
654                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
655                            "", None, None);
656       SourceRange = YamlMFI.ScavengeFI->SourceRange;
657       return true;
658     }
659     ScavengeFI = *FIOrErr;
660   } else {
661     ScavengeFI = None;
662   }
663   return false;
664 }
665 
666 bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
667   for (const BasicBlock &BB : MF.getFunction()) {
668     for (const Instruction &I : BB) {
669       const auto *CB = dyn_cast<CallBase>(&I);
670       if (!CB)
671         continue;
672 
673       if (CB->isInlineAsm()) {
674         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
675         for (const auto &CI : IA->ParseConstraints()) {
676           for (StringRef Code : CI.Codes) {
677             Code.consume_front("{");
678             if (Code.startswith("a"))
679               return true;
680           }
681         }
682         continue;
683       }
684 
685       const Function *Callee =
686           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
687       if (!Callee)
688         return true;
689 
690       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
691         return true;
692     }
693   }
694 
695   return false;
696 }
697 
698 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
699   if (UsesAGPRs)
700     return *UsesAGPRs;
701 
702   if (!mayNeedAGPRs()) {
703     UsesAGPRs = false;
704     return false;
705   }
706 
707   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
708       MF.getFrameInfo().hasCalls()) {
709     UsesAGPRs = true;
710     return true;
711   }
712 
713   const MachineRegisterInfo &MRI = MF.getRegInfo();
714 
715   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
716     const Register Reg = Register::index2VirtReg(I);
717     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
718     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
719       UsesAGPRs = true;
720       return true;
721     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
722       // Defer caching UsesAGPRs, function might not yet been regbank selected.
723       return true;
724     }
725   }
726 
727   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
728     if (MRI.isPhysRegUsed(Reg)) {
729       UsesAGPRs = true;
730       return true;
731     }
732   }
733 
734   UsesAGPRs = false;
735   return false;
736 }
737