xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (revision ae7e8a02e6e93455e026036132c4d053b2c12ad9)
1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 
16 #include "AMDGPUArgumentUsageInfo.h"
17 #include "AMDGPUMachineFunction.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIInstrInfo.h"
20 #include "llvm/CodeGen/MIRYamlMapping.h"
21 #include "llvm/CodeGen/PseudoSourceValue.h"
22 #include "llvm/Support/raw_ostream.h"
23 
24 namespace llvm {
25 
26 class MachineFrameInfo;
27 class MachineFunction;
28 class TargetRegisterClass;
29 class SIMachineFunctionInfo;
30 class SIRegisterInfo;
31 
32 class AMDGPUPseudoSourceValue : public PseudoSourceValue {
33 public:
34   enum AMDGPUPSVKind : unsigned {
35     PSVBuffer = PseudoSourceValue::TargetCustom,
36     PSVImage,
37     GWSResource
38   };
39 
40 protected:
41   AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
42       : PseudoSourceValue(Kind, TII) {}
43 
44 public:
45   bool isConstant(const MachineFrameInfo *) const override {
46     // This should probably be true for most images, but we will start by being
47     // conservative.
48     return false;
49   }
50 
51   bool isAliased(const MachineFrameInfo *) const override {
52     return true;
53   }
54 
55   bool mayAlias(const MachineFrameInfo *) const override {
56     return true;
57   }
58 };
59 
60 class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
61 public:
62   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
63       : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
64 
65   static bool classof(const PseudoSourceValue *V) {
66     return V->kind() == PSVBuffer;
67   }
68 
69   void printCustom(raw_ostream &OS) const override { OS << "BufferResource"; }
70 };
71 
72 class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
73 public:
74   // TODO: Is the img rsrc useful?
75   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
76       : AMDGPUPseudoSourceValue(PSVImage, TII) {}
77 
78   static bool classof(const PseudoSourceValue *V) {
79     return V->kind() == PSVImage;
80   }
81 
82   void printCustom(raw_ostream &OS) const override { OS << "ImageResource"; }
83 };
84 
85 class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
86 public:
87   explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
88       : AMDGPUPseudoSourceValue(GWSResource, TII) {}
89 
90   static bool classof(const PseudoSourceValue *V) {
91     return V->kind() == GWSResource;
92   }
93 
94   // These are inaccessible memory from IR.
95   bool isAliased(const MachineFrameInfo *) const override {
96     return false;
97   }
98 
99   // These are inaccessible memory from IR.
100   bool mayAlias(const MachineFrameInfo *) const override {
101     return false;
102   }
103 
104   void printCustom(raw_ostream &OS) const override {
105     OS << "GWSResource";
106   }
107 };
108 
109 namespace yaml {
110 
111 struct SIArgument {
112   bool IsRegister;
113   union {
114     StringValue RegisterName;
115     unsigned StackOffset;
116   };
117   Optional<unsigned> Mask;
118 
119   // Default constructor, which creates a stack argument.
120   SIArgument() : IsRegister(false), StackOffset(0) {}
121   SIArgument(const SIArgument &Other) {
122     IsRegister = Other.IsRegister;
123     if (IsRegister) {
124       ::new ((void *)std::addressof(RegisterName))
125           StringValue(Other.RegisterName);
126     } else
127       StackOffset = Other.StackOffset;
128     Mask = Other.Mask;
129   }
130   SIArgument &operator=(const SIArgument &Other) {
131     IsRegister = Other.IsRegister;
132     if (IsRegister) {
133       ::new ((void *)std::addressof(RegisterName))
134           StringValue(Other.RegisterName);
135     } else
136       StackOffset = Other.StackOffset;
137     Mask = Other.Mask;
138     return *this;
139   }
140   ~SIArgument() {
141     if (IsRegister)
142       RegisterName.~StringValue();
143   }
144 
145   // Helper to create a register or stack argument.
146   static inline SIArgument createArgument(bool IsReg) {
147     if (IsReg)
148       return SIArgument(IsReg);
149     return SIArgument();
150   }
151 
152 private:
153   // Construct a register argument.
154   SIArgument(bool) : IsRegister(true), RegisterName() {}
155 };
156 
157 template <> struct MappingTraits<SIArgument> {
158   static void mapping(IO &YamlIO, SIArgument &A) {
159     if (YamlIO.outputting()) {
160       if (A.IsRegister)
161         YamlIO.mapRequired("reg", A.RegisterName);
162       else
163         YamlIO.mapRequired("offset", A.StackOffset);
164     } else {
165       auto Keys = YamlIO.keys();
166       if (is_contained(Keys, "reg")) {
167         A = SIArgument::createArgument(true);
168         YamlIO.mapRequired("reg", A.RegisterName);
169       } else if (is_contained(Keys, "offset"))
170         YamlIO.mapRequired("offset", A.StackOffset);
171       else
172         YamlIO.setError("missing required key 'reg' or 'offset'");
173     }
174     YamlIO.mapOptional("mask", A.Mask);
175   }
176   static const bool flow = true;
177 };
178 
179 struct SIArgumentInfo {
180   Optional<SIArgument> PrivateSegmentBuffer;
181   Optional<SIArgument> DispatchPtr;
182   Optional<SIArgument> QueuePtr;
183   Optional<SIArgument> KernargSegmentPtr;
184   Optional<SIArgument> DispatchID;
185   Optional<SIArgument> FlatScratchInit;
186   Optional<SIArgument> PrivateSegmentSize;
187 
188   Optional<SIArgument> WorkGroupIDX;
189   Optional<SIArgument> WorkGroupIDY;
190   Optional<SIArgument> WorkGroupIDZ;
191   Optional<SIArgument> WorkGroupInfo;
192   Optional<SIArgument> PrivateSegmentWaveByteOffset;
193 
194   Optional<SIArgument> ImplicitArgPtr;
195   Optional<SIArgument> ImplicitBufferPtr;
196 
197   Optional<SIArgument> WorkItemIDX;
198   Optional<SIArgument> WorkItemIDY;
199   Optional<SIArgument> WorkItemIDZ;
200 };
201 
202 template <> struct MappingTraits<SIArgumentInfo> {
203   static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
204     YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
205     YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
206     YamlIO.mapOptional("queuePtr", AI.QueuePtr);
207     YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
208     YamlIO.mapOptional("dispatchID", AI.DispatchID);
209     YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
210     YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
211 
212     YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
213     YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
214     YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
215     YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
216     YamlIO.mapOptional("privateSegmentWaveByteOffset",
217                        AI.PrivateSegmentWaveByteOffset);
218 
219     YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
220     YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
221 
222     YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
223     YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
224     YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
225   }
226 };
227 
228 // Default to default mode for default calling convention.
229 struct SIMode {
230   bool IEEE = true;
231   bool DX10Clamp = true;
232   bool FP32InputDenormals = true;
233   bool FP32OutputDenormals = true;
234   bool FP64FP16InputDenormals = true;
235   bool FP64FP16OutputDenormals = true;
236 
237   SIMode() = default;
238 
239   SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
240     IEEE = Mode.IEEE;
241     DX10Clamp = Mode.DX10Clamp;
242     FP32InputDenormals = Mode.FP32InputDenormals;
243     FP32OutputDenormals = Mode.FP32OutputDenormals;
244     FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
245     FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
246   }
247 
248   bool operator ==(const SIMode Other) const {
249     return IEEE == Other.IEEE &&
250            DX10Clamp == Other.DX10Clamp &&
251            FP32InputDenormals == Other.FP32InputDenormals &&
252            FP32OutputDenormals == Other.FP32OutputDenormals &&
253            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
254            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
255   }
256 };
257 
258 template <> struct MappingTraits<SIMode> {
259   static void mapping(IO &YamlIO, SIMode &Mode) {
260     YamlIO.mapOptional("ieee", Mode.IEEE, true);
261     YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
262     YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
263     YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
264     YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
265     YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
266   }
267 };
268 
269 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
270   uint64_t ExplicitKernArgSize = 0;
271   unsigned MaxKernArgAlign = 0;
272   unsigned LDSSize = 0;
273   Align DynLDSAlign;
274   bool IsEntryFunction = false;
275   bool NoSignedZerosFPMath = false;
276   bool MemoryBound = false;
277   bool WaveLimiter = false;
278   bool HasSpilledSGPRs = false;
279   bool HasSpilledVGPRs = false;
280   uint32_t HighBitsOf32BitAddress = 0;
281 
282   // TODO: 10 may be a better default since it's the maximum.
283   unsigned Occupancy = 0;
284 
285   StringValue ScratchRSrcReg = "$private_rsrc_reg";
286   StringValue FrameOffsetReg = "$fp_reg";
287   StringValue StackPtrOffsetReg = "$sp_reg";
288 
289   Optional<SIArgumentInfo> ArgInfo;
290   SIMode Mode;
291 
292   SIMachineFunctionInfo() = default;
293   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
294                         const TargetRegisterInfo &TRI);
295 
296   void mappingImpl(yaml::IO &YamlIO) override;
297   ~SIMachineFunctionInfo() = default;
298 };
299 
300 template <> struct MappingTraits<SIMachineFunctionInfo> {
301   static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
302     YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
303                        UINT64_C(0));
304     YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
305     YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
306     YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
307     YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
308     YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
309     YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
310     YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
311     YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
312     YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
313     YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
314                        StringValue("$private_rsrc_reg"));
315     YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
316                        StringValue("$fp_reg"));
317     YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
318                        StringValue("$sp_reg"));
319     YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
320     YamlIO.mapOptional("mode", MFI.Mode, SIMode());
321     YamlIO.mapOptional("highBitsOf32BitAddress",
322                        MFI.HighBitsOf32BitAddress, 0u);
323     YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
324   }
325 };
326 
327 } // end namespace yaml
328 
329 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
330 /// tells the hardware which interpolation parameters to load.
331 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
332   friend class GCNTargetMachine;
333 
334   Register TIDReg = AMDGPU::NoRegister;
335 
336   // Registers that may be reserved for spilling purposes. These may be the same
337   // as the input registers.
338   Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
339 
340   // This is the the unswizzled offset from the current dispatch's scratch wave
341   // base to the beginning of the current function's frame.
342   Register FrameOffsetReg = AMDGPU::FP_REG;
343 
344   // This is an ABI register used in the non-entry calling convention to
345   // communicate the unswizzled offset from the current dispatch's scratch wave
346   // base to the beginning of the new function's frame.
347   Register StackPtrOffsetReg = AMDGPU::SP_REG;
348 
349   AMDGPUFunctionArgInfo ArgInfo;
350 
351   // Graphics info.
352   unsigned PSInputAddr = 0;
353   unsigned PSInputEnable = 0;
354 
355   /// Number of bytes of arguments this function has on the stack. If the callee
356   /// is expected to restore the argument stack this should be a multiple of 16,
357   /// all usable during a tail call.
358   ///
359   /// The alternative would forbid tail call optimisation in some cases: if we
360   /// want to transfer control from a function with 8-bytes of stack-argument
361   /// space to a function with 16-bytes then misalignment of this value would
362   /// make a stack adjustment necessary, which could not be undone by the
363   /// callee.
364   unsigned BytesInStackArgArea = 0;
365 
366   bool ReturnsVoid = true;
367 
368   // A pair of default/requested minimum/maximum flat work group sizes.
369   // Minimum - first, maximum - second.
370   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
371 
372   // A pair of default/requested minimum/maximum number of waves per execution
373   // unit. Minimum - first, maximum - second.
374   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
375 
376   std::unique_ptr<const AMDGPUBufferPseudoSourceValue> BufferPSV;
377   std::unique_ptr<const AMDGPUImagePseudoSourceValue> ImagePSV;
378   std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
379 
380 private:
381   unsigned LDSWaveSpillSize = 0;
382   unsigned NumUserSGPRs = 0;
383   unsigned NumSystemSGPRs = 0;
384 
385   bool HasSpilledSGPRs = false;
386   bool HasSpilledVGPRs = false;
387   bool HasNonSpillStackObjects = false;
388   bool IsStackRealigned = false;
389 
390   unsigned NumSpilledSGPRs = 0;
391   unsigned NumSpilledVGPRs = 0;
392 
393   // Feature bits required for inputs passed in user SGPRs.
394   bool PrivateSegmentBuffer : 1;
395   bool DispatchPtr : 1;
396   bool QueuePtr : 1;
397   bool KernargSegmentPtr : 1;
398   bool DispatchID : 1;
399   bool FlatScratchInit : 1;
400 
401   // Feature bits required for inputs passed in system SGPRs.
402   bool WorkGroupIDX : 1; // Always initialized.
403   bool WorkGroupIDY : 1;
404   bool WorkGroupIDZ : 1;
405   bool WorkGroupInfo : 1;
406   bool PrivateSegmentWaveByteOffset : 1;
407 
408   bool WorkItemIDX : 1; // Always initialized.
409   bool WorkItemIDY : 1;
410   bool WorkItemIDZ : 1;
411 
412   // Private memory buffer
413   // Compute directly in sgpr[0:1]
414   // Other shaders indirect 64-bits at sgpr[0:1]
415   bool ImplicitBufferPtr : 1;
416 
417   // Pointer to where the ABI inserts special kernel arguments separate from the
418   // user arguments. This is an offset from the KernargSegmentPtr.
419   bool ImplicitArgPtr : 1;
420 
421   // The hard-wired high half of the address of the global information table
422   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
423   // current hardware only allows a 16 bit value.
424   unsigned GITPtrHigh;
425 
426   unsigned HighBitsOf32BitAddress;
427   unsigned GDSSize;
428 
429   // Current recorded maximum possible occupancy.
430   unsigned Occupancy;
431 
432   MCPhysReg getNextUserSGPR() const;
433 
434   MCPhysReg getNextSystemSGPR() const;
435 
436 public:
437   struct SpilledReg {
438     Register VGPR;
439     int Lane = -1;
440 
441     SpilledReg() = default;
442     SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
443 
444     bool hasLane() { return Lane != -1;}
445     bool hasReg() { return VGPR != 0;}
446   };
447 
448   struct SGPRSpillVGPRCSR {
449     // VGPR used for SGPR spills
450     Register VGPR;
451 
452     // If the VGPR is a CSR, the stack slot used to save/restore it in the
453     // prolog/epilog.
454     Optional<int> FI;
455 
456     SGPRSpillVGPRCSR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
457   };
458 
459   struct VGPRSpillToAGPR {
460     SmallVector<MCPhysReg, 32> Lanes;
461     bool FullyAllocated = false;
462   };
463 
464   SparseBitVector<> WWMReservedRegs;
465 
466   void ReserveWWMRegister(Register Reg) { WWMReservedRegs.set(Reg); }
467 
468 private:
469   // Track VGPR + wave index for each subregister of the SGPR spilled to
470   // frameindex key.
471   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
472   unsigned NumVGPRSpillLanes = 0;
473   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
474 
475   DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
476 
477   // AGPRs used for VGPR spills.
478   SmallVector<MCPhysReg, 32> SpillAGPR;
479 
480   // VGPRs used for AGPR spills.
481   SmallVector<MCPhysReg, 32> SpillVGPR;
482 
483 public: // FIXME
484   /// If this is set, an SGPR used for save/restore of the register used for the
485   /// frame pointer.
486   Register SGPRForFPSaveRestoreCopy;
487   Optional<int> FramePointerSaveIndex;
488 
489   /// If this is set, an SGPR used for save/restore of the register used for the
490   /// base pointer.
491   Register SGPRForBPSaveRestoreCopy;
492   Optional<int> BasePointerSaveIndex;
493 
494   Register VGPRReservedForSGPRSpill;
495   bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
496 
497 public:
498   SIMachineFunctionInfo(const MachineFunction &MF);
499 
500   bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
501 
502   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
503     auto I = SGPRToVGPRSpills.find(FrameIndex);
504     return (I == SGPRToVGPRSpills.end()) ?
505       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
506   }
507 
508   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
509     return SpillVGPRs;
510   }
511 
512   void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
513     SpillVGPRs[Index].VGPR = NewVGPR;
514     SpillVGPRs[Index].FI = newFI;
515     VGPRReservedForSGPRSpill = NewVGPR;
516   }
517 
518   bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
519 
520   ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
521     return SpillAGPR;
522   }
523 
524   ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
525     return SpillVGPR;
526   }
527 
528   MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
529     auto I = VGPRToAGPRSpills.find(FrameIndex);
530     return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
531                                          : I->second.Lanes[Lane];
532   }
533 
534   bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
535                                  unsigned NumLane) const;
536   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
537   bool reserveVGPRforSGPRSpills(MachineFunction &MF);
538   bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
539   void removeDeadFrameIndices(MachineFrameInfo &MFI);
540 
541   bool hasCalculatedTID() const { return TIDReg != 0; };
542   Register getTIDReg() const { return TIDReg; };
543   void setTIDReg(Register Reg) { TIDReg = Reg; }
544 
545   unsigned getBytesInStackArgArea() const {
546     return BytesInStackArgArea;
547   }
548 
549   void setBytesInStackArgArea(unsigned Bytes) {
550     BytesInStackArgArea = Bytes;
551   }
552 
553   // Add user SGPRs.
554   Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
555   Register addDispatchPtr(const SIRegisterInfo &TRI);
556   Register addQueuePtr(const SIRegisterInfo &TRI);
557   Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
558   Register addDispatchID(const SIRegisterInfo &TRI);
559   Register addFlatScratchInit(const SIRegisterInfo &TRI);
560   Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
561 
562   // Add system SGPRs.
563   Register addWorkGroupIDX() {
564     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
565     NumSystemSGPRs += 1;
566     return ArgInfo.WorkGroupIDX.getRegister();
567   }
568 
569   Register addWorkGroupIDY() {
570     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
571     NumSystemSGPRs += 1;
572     return ArgInfo.WorkGroupIDY.getRegister();
573   }
574 
575   Register addWorkGroupIDZ() {
576     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
577     NumSystemSGPRs += 1;
578     return ArgInfo.WorkGroupIDZ.getRegister();
579   }
580 
581   Register addWorkGroupInfo() {
582     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
583     NumSystemSGPRs += 1;
584     return ArgInfo.WorkGroupInfo.getRegister();
585   }
586 
587   // Add special VGPR inputs
588   void setWorkItemIDX(ArgDescriptor Arg) {
589     ArgInfo.WorkItemIDX = Arg;
590   }
591 
592   void setWorkItemIDY(ArgDescriptor Arg) {
593     ArgInfo.WorkItemIDY = Arg;
594   }
595 
596   void setWorkItemIDZ(ArgDescriptor Arg) {
597     ArgInfo.WorkItemIDZ = Arg;
598   }
599 
600   Register addPrivateSegmentWaveByteOffset() {
601     ArgInfo.PrivateSegmentWaveByteOffset
602       = ArgDescriptor::createRegister(getNextSystemSGPR());
603     NumSystemSGPRs += 1;
604     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
605   }
606 
607   void setPrivateSegmentWaveByteOffset(Register Reg) {
608     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
609   }
610 
611   bool hasPrivateSegmentBuffer() const {
612     return PrivateSegmentBuffer;
613   }
614 
615   bool hasDispatchPtr() const {
616     return DispatchPtr;
617   }
618 
619   bool hasQueuePtr() const {
620     return QueuePtr;
621   }
622 
623   bool hasKernargSegmentPtr() const {
624     return KernargSegmentPtr;
625   }
626 
627   bool hasDispatchID() const {
628     return DispatchID;
629   }
630 
631   bool hasFlatScratchInit() const {
632     return FlatScratchInit;
633   }
634 
635   bool hasWorkGroupIDX() const {
636     return WorkGroupIDX;
637   }
638 
639   bool hasWorkGroupIDY() const {
640     return WorkGroupIDY;
641   }
642 
643   bool hasWorkGroupIDZ() const {
644     return WorkGroupIDZ;
645   }
646 
647   bool hasWorkGroupInfo() const {
648     return WorkGroupInfo;
649   }
650 
651   bool hasPrivateSegmentWaveByteOffset() const {
652     return PrivateSegmentWaveByteOffset;
653   }
654 
655   bool hasWorkItemIDX() const {
656     return WorkItemIDX;
657   }
658 
659   bool hasWorkItemIDY() const {
660     return WorkItemIDY;
661   }
662 
663   bool hasWorkItemIDZ() const {
664     return WorkItemIDZ;
665   }
666 
667   bool hasImplicitArgPtr() const {
668     return ImplicitArgPtr;
669   }
670 
671   bool hasImplicitBufferPtr() const {
672     return ImplicitBufferPtr;
673   }
674 
675   AMDGPUFunctionArgInfo &getArgInfo() {
676     return ArgInfo;
677   }
678 
679   const AMDGPUFunctionArgInfo &getArgInfo() const {
680     return ArgInfo;
681   }
682 
683   std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
684   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
685     return ArgInfo.getPreloadedValue(Value);
686   }
687 
688   MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
689     auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
690     return Arg ? Arg->getRegister() : MCRegister();
691   }
692 
693   unsigned getGITPtrHigh() const {
694     return GITPtrHigh;
695   }
696 
697   Register getGITPtrLoReg(const MachineFunction &MF) const;
698 
699   uint32_t get32BitAddressHighBits() const {
700     return HighBitsOf32BitAddress;
701   }
702 
703   unsigned getGDSSize() const {
704     return GDSSize;
705   }
706 
707   unsigned getNumUserSGPRs() const {
708     return NumUserSGPRs;
709   }
710 
711   unsigned getNumPreloadedSGPRs() const {
712     return NumUserSGPRs + NumSystemSGPRs;
713   }
714 
715   Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
716     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
717   }
718 
719   /// Returns the physical register reserved for use as the resource
720   /// descriptor for scratch accesses.
721   Register getScratchRSrcReg() const {
722     return ScratchRSrcReg;
723   }
724 
725   void setScratchRSrcReg(Register Reg) {
726     assert(Reg != 0 && "Should never be unset");
727     ScratchRSrcReg = Reg;
728   }
729 
730   Register getFrameOffsetReg() const {
731     return FrameOffsetReg;
732   }
733 
734   void setFrameOffsetReg(Register Reg) {
735     assert(Reg != 0 && "Should never be unset");
736     FrameOffsetReg = Reg;
737   }
738 
739   void setStackPtrOffsetReg(Register Reg) {
740     assert(Reg != 0 && "Should never be unset");
741     StackPtrOffsetReg = Reg;
742   }
743 
744   // Note the unset value for this is AMDGPU::SP_REG rather than
745   // NoRegister. This is mostly a workaround for MIR tests where state that
746   // can't be directly computed from the function is not preserved in serialized
747   // MIR.
748   Register getStackPtrOffsetReg() const {
749     return StackPtrOffsetReg;
750   }
751 
752   Register getQueuePtrUserSGPR() const {
753     return ArgInfo.QueuePtr.getRegister();
754   }
755 
756   Register getImplicitBufferPtrUserSGPR() const {
757     return ArgInfo.ImplicitBufferPtr.getRegister();
758   }
759 
760   bool hasSpilledSGPRs() const {
761     return HasSpilledSGPRs;
762   }
763 
764   void setHasSpilledSGPRs(bool Spill = true) {
765     HasSpilledSGPRs = Spill;
766   }
767 
768   bool hasSpilledVGPRs() const {
769     return HasSpilledVGPRs;
770   }
771 
772   void setHasSpilledVGPRs(bool Spill = true) {
773     HasSpilledVGPRs = Spill;
774   }
775 
776   bool hasNonSpillStackObjects() const {
777     return HasNonSpillStackObjects;
778   }
779 
780   void setHasNonSpillStackObjects(bool StackObject = true) {
781     HasNonSpillStackObjects = StackObject;
782   }
783 
784   bool isStackRealigned() const {
785     return IsStackRealigned;
786   }
787 
788   void setIsStackRealigned(bool Realigned = true) {
789     IsStackRealigned = Realigned;
790   }
791 
792   unsigned getNumSpilledSGPRs() const {
793     return NumSpilledSGPRs;
794   }
795 
796   unsigned getNumSpilledVGPRs() const {
797     return NumSpilledVGPRs;
798   }
799 
800   void addToSpilledSGPRs(unsigned num) {
801     NumSpilledSGPRs += num;
802   }
803 
804   void addToSpilledVGPRs(unsigned num) {
805     NumSpilledVGPRs += num;
806   }
807 
808   unsigned getPSInputAddr() const {
809     return PSInputAddr;
810   }
811 
812   unsigned getPSInputEnable() const {
813     return PSInputEnable;
814   }
815 
816   bool isPSInputAllocated(unsigned Index) const {
817     return PSInputAddr & (1 << Index);
818   }
819 
820   void markPSInputAllocated(unsigned Index) {
821     PSInputAddr |= 1 << Index;
822   }
823 
824   void markPSInputEnabled(unsigned Index) {
825     PSInputEnable |= 1 << Index;
826   }
827 
828   bool returnsVoid() const {
829     return ReturnsVoid;
830   }
831 
832   void setIfReturnsVoid(bool Value) {
833     ReturnsVoid = Value;
834   }
835 
836   /// \returns A pair of default/requested minimum/maximum flat work group sizes
837   /// for this function.
838   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
839     return FlatWorkGroupSizes;
840   }
841 
842   /// \returns Default/requested minimum flat work group size for this function.
843   unsigned getMinFlatWorkGroupSize() const {
844     return FlatWorkGroupSizes.first;
845   }
846 
847   /// \returns Default/requested maximum flat work group size for this function.
848   unsigned getMaxFlatWorkGroupSize() const {
849     return FlatWorkGroupSizes.second;
850   }
851 
852   /// \returns A pair of default/requested minimum/maximum number of waves per
853   /// execution unit.
854   std::pair<unsigned, unsigned> getWavesPerEU() const {
855     return WavesPerEU;
856   }
857 
858   /// \returns Default/requested minimum number of waves per execution unit.
859   unsigned getMinWavesPerEU() const {
860     return WavesPerEU.first;
861   }
862 
863   /// \returns Default/requested maximum number of waves per execution unit.
864   unsigned getMaxWavesPerEU() const {
865     return WavesPerEU.second;
866   }
867 
868   /// \returns SGPR used for \p Dim's work group ID.
869   Register getWorkGroupIDSGPR(unsigned Dim) const {
870     switch (Dim) {
871     case 0:
872       assert(hasWorkGroupIDX());
873       return ArgInfo.WorkGroupIDX.getRegister();
874     case 1:
875       assert(hasWorkGroupIDY());
876       return ArgInfo.WorkGroupIDY.getRegister();
877     case 2:
878       assert(hasWorkGroupIDZ());
879       return ArgInfo.WorkGroupIDZ.getRegister();
880     }
881     llvm_unreachable("unexpected dimension");
882   }
883 
884   unsigned getLDSWaveSpillSize() const {
885     return LDSWaveSpillSize;
886   }
887 
888   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII) {
889     if (!BufferPSV)
890       BufferPSV = std::make_unique<AMDGPUBufferPseudoSourceValue>(TII);
891 
892     return BufferPSV.get();
893   }
894 
895   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII) {
896     if (!ImagePSV)
897       ImagePSV = std::make_unique<AMDGPUImagePseudoSourceValue>(TII);
898 
899     return ImagePSV.get();
900   }
901 
902   const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
903     if (!GWSResourcePSV) {
904       GWSResourcePSV =
905           std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
906     }
907 
908     return GWSResourcePSV.get();
909   }
910 
911   unsigned getOccupancy() const {
912     return Occupancy;
913   }
914 
915   unsigned getMinAllowedOccupancy() const {
916     if (!isMemoryBound() && !needsWaveLimiter())
917       return Occupancy;
918     return (Occupancy < 4) ? Occupancy : 4;
919   }
920 
921   void limitOccupancy(const MachineFunction &MF);
922 
923   void limitOccupancy(unsigned Limit) {
924     if (Occupancy > Limit)
925       Occupancy = Limit;
926   }
927 
928   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
929     if (Occupancy < Limit)
930       Occupancy = Limit;
931     limitOccupancy(MF);
932   }
933 };
934 
935 } // end namespace llvm
936 
937 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
938