xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "AMDGPUSubtarget.h"
13 #include "SIDefines.h"
14 #include "llvm/IR/CallingConv.h"
15 #include "llvm/IR/InstrTypes.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/Support/Alignment.h"
18 #include <array>
19 #include <functional>
20 #include <utility>
21 
22 // Pull in OpName enum definition and getNamedOperandIdx() declaration.
23 #define GET_INSTRINFO_OPERAND_ENUM
24 #include "AMDGPUGenInstrInfo.inc"
25 
26 struct amd_kernel_code_t;
27 
28 namespace llvm {
29 
30 struct Align;
31 class Argument;
32 class Function;
33 class GlobalValue;
34 class MCInstrInfo;
35 class MCRegisterClass;
36 class MCRegisterInfo;
37 class MCSubtargetInfo;
38 class StringRef;
39 class Triple;
40 class raw_ostream;
41 
42 namespace AMDGPU {
43 
44 struct AMDGPUMCKernelCodeT;
45 struct IsaVersion;
46 
47 /// Generic target versions emitted by this version of LLVM.
48 ///
49 /// These numbers are incremented every time a codegen breaking change occurs
50 /// within a generic family.
51 namespace GenericVersion {
52 static constexpr unsigned GFX9 = 1;
53 static constexpr unsigned GFX9_4 = 1;
54 static constexpr unsigned GFX10_1 = 1;
55 static constexpr unsigned GFX10_3 = 1;
56 static constexpr unsigned GFX11 = 1;
57 static constexpr unsigned GFX12 = 1;
58 } // namespace GenericVersion
59 
60 enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
61 
62 enum class FPType { None, FP4, FP8 };
63 
64 /// \returns True if \p STI is AMDHSA.
65 bool isHsaAbi(const MCSubtargetInfo &STI);
66 
67 /// \returns Code object version from the IR module flag.
68 unsigned getAMDHSACodeObjectVersion(const Module &M);
69 
70 /// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
71 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
72 
73 /// \returns The default HSA code object version. This should only be used when
74 /// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
75 /// flag or a .amdhsa_code_object_version directive)
76 unsigned getDefaultAMDHSACodeObjectVersion();
77 
78 /// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
79 /// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
80 uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
81 
82 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
83 unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
84 
85 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr
86 unsigned getHostcallImplicitArgPosition(unsigned COV);
87 
88 unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
89 unsigned getCompletionActionImplicitArgPosition(unsigned COV);
90 
91 struct GcnBufferFormatInfo {
92   unsigned Format;
93   unsigned BitsPerComp;
94   unsigned NumComponents;
95   unsigned NumFormat;
96   unsigned DataFormat;
97 };
98 
99 struct MAIInstInfo {
100   uint16_t Opcode;
101   bool is_dgemm;
102   bool is_gfx940_xdl;
103 };
104 
105 struct MFMA_F8F6F4_Info {
106   unsigned Opcode;
107   unsigned F8F8Opcode;
108   uint8_t NumRegsSrcA;
109   uint8_t NumRegsSrcB;
110 };
111 
112 struct CvtScaleF32_F32F16ToF8F4_Info {
113   unsigned Opcode;
114 };
115 
116 struct True16D16Info {
117   unsigned T16Op;
118   unsigned HiOp;
119   unsigned LoOp;
120 };
121 
122 #define GET_MIMGBaseOpcode_DECL
123 #define GET_MIMGDim_DECL
124 #define GET_MIMGEncoding_DECL
125 #define GET_MIMGLZMapping_DECL
126 #define GET_MIMGMIPMapping_DECL
127 #define GET_MIMGBiASMapping_DECL
128 #define GET_MAIInstInfoTable_DECL
129 #define GET_isMFMA_F8F6F4Table_DECL
130 #define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
131 #define GET_True16D16Table_DECL
132 #include "AMDGPUGenSearchableTables.inc"
133 
134 namespace IsaInfo {
135 
136 enum {
137   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
138   // doesn't spill SGPRs as much as when 80 is set.
139   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
140   TRAP_NUM_SGPRS = 16
141 };
142 
143 enum class TargetIDSetting { Unsupported, Any, Off, On };
144 
145 class AMDGPUTargetID {
146 private:
147   const MCSubtargetInfo &STI;
148   TargetIDSetting XnackSetting;
149   TargetIDSetting SramEccSetting;
150 
151 public:
152   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
153   ~AMDGPUTargetID() = default;
154 
155   /// \return True if the current xnack setting is not "Unsupported".
isXnackSupported()156   bool isXnackSupported() const {
157     return XnackSetting != TargetIDSetting::Unsupported;
158   }
159 
160   /// \returns True if the current xnack setting is "On" or "Any".
isXnackOnOrAny()161   bool isXnackOnOrAny() const {
162     return XnackSetting == TargetIDSetting::On ||
163            XnackSetting == TargetIDSetting::Any;
164   }
165 
166   /// \returns True if current xnack setting is "On" or "Off",
167   /// false otherwise.
isXnackOnOrOff()168   bool isXnackOnOrOff() const {
169     return getXnackSetting() == TargetIDSetting::On ||
170            getXnackSetting() == TargetIDSetting::Off;
171   }
172 
173   /// \returns The current xnack TargetIDSetting, possible options are
174   /// "Unsupported", "Any", "Off", and "On".
getXnackSetting()175   TargetIDSetting getXnackSetting() const { return XnackSetting; }
176 
177   /// Sets xnack setting to \p NewXnackSetting.
setXnackSetting(TargetIDSetting NewXnackSetting)178   void setXnackSetting(TargetIDSetting NewXnackSetting) {
179     XnackSetting = NewXnackSetting;
180   }
181 
182   /// \return True if the current sramecc setting is not "Unsupported".
isSramEccSupported()183   bool isSramEccSupported() const {
184     return SramEccSetting != TargetIDSetting::Unsupported;
185   }
186 
187   /// \returns True if the current sramecc setting is "On" or "Any".
isSramEccOnOrAny()188   bool isSramEccOnOrAny() const {
189     return SramEccSetting == TargetIDSetting::On ||
190            SramEccSetting == TargetIDSetting::Any;
191   }
192 
193   /// \returns True if current sramecc setting is "On" or "Off",
194   /// false otherwise.
isSramEccOnOrOff()195   bool isSramEccOnOrOff() const {
196     return getSramEccSetting() == TargetIDSetting::On ||
197            getSramEccSetting() == TargetIDSetting::Off;
198   }
199 
200   /// \returns The current sramecc TargetIDSetting, possible options are
201   /// "Unsupported", "Any", "Off", and "On".
getSramEccSetting()202   TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
203 
204   /// Sets sramecc setting to \p NewSramEccSetting.
setSramEccSetting(TargetIDSetting NewSramEccSetting)205   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
206     SramEccSetting = NewSramEccSetting;
207   }
208 
209   void setTargetIDFromFeaturesString(StringRef FS);
210   void setTargetIDFromTargetIDStream(StringRef TargetID);
211 
212   /// \returns String representation of an object.
213   std::string toString() const;
214 };
215 
216 /// \returns Wavefront size for given subtarget \p STI.
217 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
218 
219 /// \returns Local memory size in bytes for given subtarget \p STI.
220 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
221 
222 /// \returns Maximum addressable local memory size in bytes for given subtarget
223 /// \p STI.
224 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
225 
226 /// \returns Number of execution units per compute unit for given subtarget \p
227 /// STI.
228 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
229 
230 /// \returns Maximum number of work groups per compute unit for given subtarget
231 /// \p STI and limited by given \p FlatWorkGroupSize.
232 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
233                                unsigned FlatWorkGroupSize);
234 
235 /// \returns Minimum number of waves per execution unit for given subtarget \p
236 /// STI.
237 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
238 
239 /// \returns Maximum number of waves per execution unit for given subtarget \p
240 /// STI without any kind of limitation.
241 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
242 
243 /// \returns Number of waves per execution unit required to support the given \p
244 /// FlatWorkGroupSize.
245 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
246                                    unsigned FlatWorkGroupSize);
247 
248 /// \returns Minimum flat work group size for given subtarget \p STI.
249 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
250 
251 /// \returns Maximum flat work group size for given subtarget \p STI.
252 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
253 
254 /// \returns Number of waves per work group for given subtarget \p STI and
255 /// \p FlatWorkGroupSize.
256 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
257                               unsigned FlatWorkGroupSize);
258 
259 /// \returns SGPR allocation granularity for given subtarget \p STI.
260 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
261 
262 /// \returns SGPR encoding granularity for given subtarget \p STI.
263 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
264 
265 /// \returns Total number of SGPRs for given subtarget \p STI.
266 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
267 
268 /// \returns Addressable number of SGPRs for given subtarget \p STI.
269 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
270 
271 /// \returns Minimum number of SGPRs that meets the given number of waves per
272 /// execution unit requirement for given subtarget \p STI.
273 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
274 
275 /// \returns Maximum number of SGPRs that meets the given number of waves per
276 /// execution unit requirement for given subtarget \p STI.
277 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
278                         bool Addressable);
279 
280 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
281 /// STI when the given special registers are used.
282 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
283                           bool FlatScrUsed, bool XNACKUsed);
284 
285 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
286 /// STI when the given special registers are used. XNACK is inferred from
287 /// \p STI.
288 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
289                           bool FlatScrUsed);
290 
291 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
292 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
293 /// register counts.
294 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
295 
296 /// \returns VGPR allocation granularity for given subtarget \p STI.
297 ///
298 /// For subtargets which support it, \p EnableWavefrontSize32 should match
299 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
300 unsigned
301 getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize,
302                     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
303 
304 /// \returns VGPR encoding granularity for given subtarget \p STI.
305 ///
306 /// For subtargets which support it, \p EnableWavefrontSize32 should match
307 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
308 unsigned getVGPREncodingGranule(
309     const MCSubtargetInfo *STI,
310     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
311 
312 /// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
313 /// returns the allocation granule for ArchVGPRs.
314 unsigned getArchVGPRAllocGranule();
315 
316 /// \returns Total number of VGPRs for given subtarget \p STI.
317 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
318 
319 /// \returns Addressable number of architectural VGPRs for a given subtarget \p
320 /// STI.
321 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
322 
323 /// \returns Addressable number of VGPRs for given subtarget \p STI.
324 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
325                                 unsigned DynamicVGPRBlockSize);
326 
327 /// \returns Minimum number of VGPRs that meets given number of waves per
328 /// execution unit requirement for given subtarget \p STI.
329 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
330                         unsigned DynamicVGPRBlockSize);
331 
332 /// \returns Maximum number of VGPRs that meets given number of waves per
333 /// execution unit requirement for given subtarget \p STI.
334 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
335                         unsigned DynamicVGPRBlockSize);
336 
337 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given
338 /// subtarget \p STI.
339 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
340                                       unsigned NumVGPRs,
341                                       unsigned DynamicVGPRBlockSize);
342 
343 /// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
344 /// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
345 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
346                                       unsigned MaxWaves,
347                                       unsigned TotalNumVGPRs);
348 
349 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
350 /// Gen.
351 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
352                                   AMDGPUSubtarget::Generation Gen);
353 
354 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
355 /// \p NumVGPRs are used. We actually return the number of blocks -1, since
356 /// that's what we encode.
357 ///
358 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
359 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
360 unsigned getEncodedNumVGPRBlocks(
361     const MCSubtargetInfo *STI, unsigned NumVGPRs,
362     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
363 
364 /// \returns Number of VGPR blocks that need to be allocated for the given
365 /// subtarget \p STI when \p NumVGPRs are used.
366 unsigned getAllocatedNumVGPRBlocks(
367     const MCSubtargetInfo *STI, unsigned NumVGPRs,
368     unsigned DynamicVGPRBlockSize,
369     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
370 
371 } // end namespace IsaInfo
372 
373 // Represents a field in an encoded value.
374 template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
375 struct EncodingField {
376   static_assert(HighBit >= LowBit, "Invalid bit range!");
377   static constexpr unsigned Offset = LowBit;
378   static constexpr unsigned Width = HighBit - LowBit + 1;
379 
380   using ValueType = unsigned;
381   static constexpr ValueType Default = D;
382 
383   ValueType Value;
EncodingFieldEncodingField384   constexpr EncodingField(ValueType Value) : Value(Value) {}
385 
encodeEncodingField386   constexpr uint64_t encode() const { return Value; }
decodeEncodingField387   static ValueType decode(uint64_t Encoded) { return Encoded; }
388 };
389 
390 // Represents a single bit in an encoded value.
391 template <unsigned Bit, unsigned D = 0>
392 using EncodingBit = EncodingField<Bit, Bit, D>;
393 
394 // A helper for encoding and decoding multiple fields.
395 template <typename... Fields> struct EncodingFields {
encodeEncodingFields396   static constexpr uint64_t encode(Fields... Values) {
397     return ((Values.encode() << Values.Offset) | ...);
398   }
399 
decodeEncodingFields400   static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
401     return {Fields::decode((Encoded >> Fields::Offset) &
402                            maxUIntN(Fields::Width))...};
403   }
404 };
405 
406 LLVM_READONLY
hasNamedOperand(uint64_t Opcode,OpName NamedIdx)407 inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
408   return getNamedOperandIdx(Opcode, NamedIdx) != -1;
409 }
410 
411 LLVM_READONLY
412 int getSOPPWithRelaxation(uint16_t Opcode);
413 
414 struct MIMGBaseOpcodeInfo {
415   MIMGBaseOpcode BaseOpcode;
416   bool Store;
417   bool Atomic;
418   bool AtomicX2;
419   bool Sampler;
420   bool Gather4;
421 
422   uint8_t NumExtraArgs;
423   bool Gradients;
424   bool G16;
425   bool Coordinates;
426   bool LodOrClampOrMip;
427   bool HasD16;
428   bool MSAA;
429   bool BVH;
430   bool A16;
431   bool NoReturn;
432   bool PointSampleAccel;
433 };
434 
435 LLVM_READONLY
436 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
437 
438 LLVM_READONLY
439 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
440 
441 struct MIMGDimInfo {
442   MIMGDim Dim;
443   uint8_t NumCoords;
444   uint8_t NumGradients;
445   bool MSAA;
446   bool DA;
447   uint8_t Encoding;
448   const char *AsmSuffix;
449 };
450 
451 LLVM_READONLY
452 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
453 
454 LLVM_READONLY
455 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
456 
457 LLVM_READONLY
458 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
459 
460 struct MIMGLZMappingInfo {
461   MIMGBaseOpcode L;
462   MIMGBaseOpcode LZ;
463 };
464 
465 struct MIMGMIPMappingInfo {
466   MIMGBaseOpcode MIP;
467   MIMGBaseOpcode NONMIP;
468 };
469 
470 struct MIMGBiasMappingInfo {
471   MIMGBaseOpcode Bias;
472   MIMGBaseOpcode NoBias;
473 };
474 
475 struct MIMGOffsetMappingInfo {
476   MIMGBaseOpcode Offset;
477   MIMGBaseOpcode NoOffset;
478 };
479 
480 struct MIMGG16MappingInfo {
481   MIMGBaseOpcode G;
482   MIMGBaseOpcode G16;
483 };
484 
485 LLVM_READONLY
486 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
487 
488 struct WMMAOpcodeMappingInfo {
489   unsigned Opcode2Addr;
490   unsigned Opcode3Addr;
491 };
492 
493 LLVM_READONLY
494 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
495 
496 LLVM_READONLY
497 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
498 
499 LLVM_READONLY
500 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
501 
502 LLVM_READONLY
503 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
504 
505 LLVM_READONLY
506 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
507                   unsigned VDataDwords, unsigned VAddrDwords);
508 
509 LLVM_READONLY
510 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
511 
512 LLVM_READONLY
513 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
514                            const MIMGDimInfo *Dim, bool IsA16,
515                            bool IsG16Supported);
516 
517 struct MIMGInfo {
518   uint16_t Opcode;
519   uint16_t BaseOpcode;
520   uint8_t MIMGEncoding;
521   uint8_t VDataDwords;
522   uint8_t VAddrDwords;
523   uint8_t VAddrOperands;
524 };
525 
526 LLVM_READONLY
527 const MIMGInfo *getMIMGInfo(unsigned Opc);
528 
529 LLVM_READONLY
530 int getMTBUFBaseOpcode(unsigned Opc);
531 
532 LLVM_READONLY
533 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
534 
535 LLVM_READONLY
536 int getMTBUFElements(unsigned Opc);
537 
538 LLVM_READONLY
539 bool getMTBUFHasVAddr(unsigned Opc);
540 
541 LLVM_READONLY
542 bool getMTBUFHasSrsrc(unsigned Opc);
543 
544 LLVM_READONLY
545 bool getMTBUFHasSoffset(unsigned Opc);
546 
547 LLVM_READONLY
548 int getMUBUFBaseOpcode(unsigned Opc);
549 
550 LLVM_READONLY
551 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
552 
553 LLVM_READONLY
554 int getMUBUFElements(unsigned Opc);
555 
556 LLVM_READONLY
557 bool getMUBUFHasVAddr(unsigned Opc);
558 
559 LLVM_READONLY
560 bool getMUBUFHasSrsrc(unsigned Opc);
561 
562 LLVM_READONLY
563 bool getMUBUFHasSoffset(unsigned Opc);
564 
565 LLVM_READONLY
566 bool getMUBUFIsBufferInv(unsigned Opc);
567 
568 LLVM_READONLY
569 bool getMUBUFTfe(unsigned Opc);
570 
571 LLVM_READONLY
572 bool getSMEMIsBuffer(unsigned Opc);
573 
574 LLVM_READONLY
575 bool getVOP1IsSingle(unsigned Opc);
576 
577 LLVM_READONLY
578 bool getVOP2IsSingle(unsigned Opc);
579 
580 LLVM_READONLY
581 bool getVOP3IsSingle(unsigned Opc);
582 
583 LLVM_READONLY
584 bool isVOPC64DPP(unsigned Opc);
585 
586 LLVM_READONLY
587 bool isVOPCAsmOnly(unsigned Opc);
588 
589 /// Returns true if MAI operation is a double precision GEMM.
590 LLVM_READONLY
591 bool getMAIIsDGEMM(unsigned Opc);
592 
593 LLVM_READONLY
594 bool getMAIIsGFX940XDL(unsigned Opc);
595 
596 // Get an equivalent BitOp3 for a binary logical \p Opc.
597 // \returns BitOp3 modifier for the logical operation or zero.
598 // Used in VOPD3 conversion.
599 unsigned getBitOp2(unsigned Opc);
600 
601 struct CanBeVOPD {
602   bool X;
603   bool Y;
604 };
605 
606 /// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
607 LLVM_READONLY
608 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
609 
610 LLVM_READONLY
611 CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
612 
613 LLVM_READNONE
614 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
615 
616 LLVM_READONLY
617 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
618                                                       unsigned BLGP,
619                                                       unsigned F8F8Opcode);
620 
621 LLVM_READONLY
622 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
623                                                   uint8_t NumComponents,
624                                                   uint8_t NumFormat,
625                                                   const MCSubtargetInfo &STI);
626 LLVM_READONLY
627 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
628                                                   const MCSubtargetInfo &STI);
629 
630 LLVM_READONLY
631 int getMCOpcode(uint16_t Opcode, unsigned Gen);
632 
633 LLVM_READONLY
634 unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
635 
636 LLVM_READONLY
637 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
638                 bool VOPD3);
639 
640 LLVM_READONLY
641 bool isVOPD(unsigned Opc);
642 
643 LLVM_READNONE
644 bool isMAC(unsigned Opc);
645 
646 LLVM_READNONE
647 bool isPermlane16(unsigned Opc);
648 
649 LLVM_READNONE
650 bool isGenericAtomic(unsigned Opc);
651 
652 LLVM_READNONE
653 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
654 
655 namespace VOPD {
656 
657 enum Component : unsigned {
658   DST = 0,
659   SRC0,
660   SRC1,
661   SRC2,
662 
663   DST_NUM = 1,
664   MAX_SRC_NUM = 3,
665   MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
666 };
667 
668 // LSB mask for VGPR banks per VOPD component operand.
669 // 4 banks result in a mask 3, setting 2 lower bits.
670 constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
671 constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
672 
673 enum ComponentIndex : unsigned { X = 0, Y = 1 };
674 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
675 constexpr unsigned COMPONENTS_NUM = 2;
676 
677 // Properties of VOPD components.
678 class ComponentProps {
679 private:
680   unsigned SrcOperandsNum = 0;
681   unsigned MandatoryLiteralIdx = ~0u;
682   bool HasSrc2Acc = false;
683   unsigned NumVOPD3Mods = 0;
684   unsigned Opcode = 0;
685   bool IsVOP3 = false;
686 
687 public:
688   ComponentProps() = default;
689   ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
690 
691   // Return the total number of src operands this component has.
getCompSrcOperandsNum()692   unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
693 
694   // Return the number of src operands of this component visible to the parser.
getCompParsedSrcOperandsNum()695   unsigned getCompParsedSrcOperandsNum() const {
696     return SrcOperandsNum - HasSrc2Acc;
697   }
698 
699   // Return true iif this component has a mandatory literal.
hasMandatoryLiteral()700   bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
701 
702   // If this component has a mandatory literal, return component operand
703   // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
getMandatoryLiteralCompOperandIndex()704   unsigned getMandatoryLiteralCompOperandIndex() const {
705     assert(hasMandatoryLiteral());
706     return MandatoryLiteralIdx;
707   }
708 
709   // Return true iif this component has operand
710   // with component index CompSrcIdx and this operand may be a register.
hasRegSrcOperand(unsigned CompSrcIdx)711   bool hasRegSrcOperand(unsigned CompSrcIdx) const {
712     assert(CompSrcIdx < Component::MAX_SRC_NUM);
713     return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
714   }
715 
716   // Return true iif this component has tied src2.
hasSrc2Acc()717   bool hasSrc2Acc() const { return HasSrc2Acc; }
718 
719   // Return a number of source modifiers if instruction is used in VOPD3.
getCompVOPD3ModsNum()720   unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
721 
722   // Return opcode of the component.
getOpcode()723   unsigned getOpcode() const { return Opcode; }
724 
725   // Returns if component opcode is in VOP3 encoding.
isVOP3()726   unsigned isVOP3() const { return IsVOP3; }
727 
728   // Return index of BitOp3 operand or -1.
729   int getBitOp3OperandIdx() const;
730 
731 private:
hasMandatoryLiteralAt(unsigned CompSrcIdx)732   bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
733     assert(CompSrcIdx < Component::MAX_SRC_NUM);
734     return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
735   }
736 };
737 
738 enum ComponentKind : unsigned {
739   SINGLE = 0,  // A single VOP1 or VOP2 instruction which may be used in VOPD.
740   COMPONENT_X, // A VOPD instruction, X component.
741   COMPONENT_Y, // A VOPD instruction, Y component.
742   MAX = COMPONENT_Y
743 };
744 
745 // Interface functions of this class map VOPD component operand indices
746 // to indices of operands in MachineInstr/MCInst or parsed operands array.
747 //
748 // Note that this class operates with 3 kinds of indices:
749 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
750 // - MC operand indices (they refer operands in a MachineInstr/MCInst);
751 // - parsed operand indices (they refer operands in parsed operands array).
752 //
753 // For SINGLE components mapping between these indices is trivial.
754 // But things get more complicated for COMPONENT_X and
755 // COMPONENT_Y because these components share the same
756 // MachineInstr/MCInst and the same parsed operands array.
757 // Below is an example of component operand to parsed operand
758 // mapping for the following instruction:
759 //
760 //   v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
761 //
762 //                          PARSED        COMPONENT         PARSED
763 // COMPONENT               OPERANDS     OPERAND INDEX    OPERAND INDEX
764 // -------------------------------------------------------------------
765 //                     "v_dual_add_f32"                        0
766 // v_dual_add_f32            v255          0 (DST)    -->      1
767 //                           v4            1 (SRC0)   -->      2
768 //                           v5            2 (SRC1)   -->      3
769 //                          "::"                               4
770 //                     "v_dual_mov_b32"                        5
771 // v_dual_mov_b32            v6            0 (DST)    -->      6
772 //                           v1            1 (SRC0)   -->      7
773 // -------------------------------------------------------------------
774 //
775 class ComponentLayout {
776 private:
777   // Regular MachineInstr/MCInst operands are ordered as follows:
778   //   dst, src0 [, other src operands]
779   // VOPD MachineInstr/MCInst operands are ordered as follows:
780   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
781   // Each ComponentKind has operand indices defined below.
782   static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
783 
784   // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
785   // used if there is tied accumulator. Indexing of this array:
786   // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
787   // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
788   // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
789   // For VOPD1/VOPD2 use column with zero modifiers.
790   static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
791       {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
792 
793   // Parsed operands of regular instructions are ordered as follows:
794   //   Mnemo dst src0 [vsrc1 ...]
795   // Parsed VOPD operands are ordered as follows:
796   //   OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
797   //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
798   // Each ComponentKind has operand indices defined below.
799   static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
800                                                 4 /* + OpX.ParsedSrcNum */};
801   static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
802       2, 2, 5 /* + OpX.ParsedSrcNum */};
803 
804 private:
805   const ComponentKind Kind;
806   const ComponentProps PrevComp;
807   const unsigned VOPD3ModsNum;
808   const int BitOp3Idx; // Index of bitop3 operand or -1
809 
810 public:
811   // Create layout for COMPONENT_X or SINGLE component.
ComponentLayout(ComponentKind Kind,unsigned VOPD3ModsNum,int BitOp3Idx)812   ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
813       : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
814     assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
815   }
816 
817   // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
ComponentLayout(const ComponentProps & OpXProps,unsigned VOPD3ModsNum,int BitOp3Idx)818   ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
819                   int BitOp3Idx)
820       : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
821         VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
822 
823 public:
824   // Return the index of dst operand in MCInst operands.
getIndexOfDstInMCOperands()825   unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
826 
827   // Return the index of the specified src operand in MCInst operands.
getIndexOfSrcInMCOperands(unsigned CompSrcIdx,bool VOPD3)828   unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
829     assert(CompSrcIdx < Component::MAX_SRC_NUM);
830 
831     if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
832       return BitOp3Idx;
833 
834     if (VOPD3) {
835       return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
836              getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
837     }
838 
839     return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
840            (Kind != SINGLE ? 1 : 0);
841   }
842 
843   // Return the index of dst operand in the parsed operands array.
getIndexOfDstInParsedOperands()844   unsigned getIndexOfDstInParsedOperands() const {
845     return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
846   }
847 
848   // Return the index of the specified src operand in the parsed operands array.
getIndexOfSrcInParsedOperands(unsigned CompSrcIdx)849   unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
850     assert(CompSrcIdx < Component::MAX_SRC_NUM);
851     return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
852   }
853 
854 private:
getPrevCompSrcNum()855   unsigned getPrevCompSrcNum() const {
856     return PrevComp.getCompSrcOperandsNum();
857   }
getPrevCompParsedSrcNum()858   unsigned getPrevCompParsedSrcNum() const {
859     return PrevComp.getCompParsedSrcOperandsNum();
860   }
getPrevCompVOPD3ModsNum()861   unsigned getPrevCompVOPD3ModsNum() const {
862     return PrevComp.getCompVOPD3ModsNum();
863   }
864 };
865 
866 // Layout and properties of VOPD components.
867 class ComponentInfo : public ComponentProps, public ComponentLayout {
868 public:
869   // Create ComponentInfo for COMPONENT_X or SINGLE component.
870   ComponentInfo(const MCInstrDesc &OpDesc,
871                 ComponentKind Kind = ComponentKind::SINGLE,
872                 bool VOP3Layout = false)
ComponentProps(OpDesc,VOP3Layout)873       : ComponentProps(OpDesc, VOP3Layout),
874         ComponentLayout(Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {}
875 
876   // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
877   ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
878                 bool VOP3Layout = false)
ComponentProps(OpDesc,VOP3Layout)879       : ComponentProps(OpDesc, VOP3Layout),
880         ComponentLayout(OpXProps, getCompVOPD3ModsNum(),
881                         getBitOp3OperandIdx()) {}
882 
883   // Map component operand index to parsed operand index.
884   // Return 0 if the specified operand does not exist.
885   unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
886 };
887 
888 // Properties of VOPD instructions.
889 class InstInfo {
890 private:
891   const ComponentInfo CompInfo[COMPONENTS_NUM];
892 
893 public:
894   using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
895 
InstInfo(const MCInstrDesc & OpX,const MCInstrDesc & OpY)896   InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
897       : CompInfo{OpX, OpY} {}
898 
InstInfo(const ComponentInfo & OprInfoX,const ComponentInfo & OprInfoY)899   InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
900       : CompInfo{OprInfoX, OprInfoY} {}
901 
902   const ComponentInfo &operator[](size_t ComponentIdx) const {
903     assert(ComponentIdx < COMPONENTS_NUM);
904     return CompInfo[ComponentIdx];
905   }
906 
907   // Check VOPD operands constraints.
908   // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
909   // for the specified component and MC operand. The callback must return 0
910   // if the operand is not a register or not a VGPR.
911   // If \p SkipSrc is set to true then constraints for source operands are not
912   // checked.
913   // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
914   // even though it violates requirement to be from different banks.
915   // If \p VOPD3 is set to true both dst registers allowed to be either odd
916   // or even and instruction may have real src2 as opposed to tied accumulator.
917   bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
918                          const MCRegisterInfo &MRI, bool SkipSrc = false,
919                          bool AllowSameVGPR = false, bool VOPD3 = false) const {
920     return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
921                                       VOPD3)
922         .has_value();
923   }
924 
925   // Check VOPD operands constraints.
926   // Return the index of an invalid component operand, if any.
927   // If \p SkipSrc is set to true then constraints for source operands are not
928   // checked except for being from the same halves of VGPR file on gfx1250.
929   // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
930   // even though it violates requirement to be from different banks.
931   // If \p VOPD3 is set to true both dst registers allowed to be either odd
932   // or even and instruction may have real src2 as opposed to tied accumulator.
933   std::optional<unsigned> getInvalidCompOperandIndex(
934       std::function<unsigned(unsigned, unsigned)> GetRegIdx,
935       const MCRegisterInfo &MRI, bool SkipSrc = false,
936       bool AllowSameVGPR = false, bool VOPD3 = false) const;
937 
938 private:
939   RegIndices
940   getRegIndices(unsigned ComponentIdx,
941                 std::function<unsigned(unsigned, unsigned)> GetRegIdx,
942                 bool VOPD3) const;
943 };
944 
945 } // namespace VOPD
946 
947 LLVM_READONLY
948 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
949 
950 LLVM_READONLY
951 // Get properties of 2 single VOP1/VOP2 instructions
952 // used as components to create a VOPD instruction.
953 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
954 
955 LLVM_READONLY
956 // Get properties of VOPD X and Y components.
957 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
958                                const MCInstrInfo *InstrInfo);
959 
960 LLVM_READONLY
961 bool isAsyncStore(unsigned Opc);
962 LLVM_READONLY
963 bool isTensorStore(unsigned Opc);
964 LLVM_READONLY
965 unsigned getTemporalHintType(const MCInstrDesc TID);
966 
967 LLVM_READONLY
968 bool isTrue16Inst(unsigned Opc);
969 
970 LLVM_READONLY
971 FPType getFPDstSelType(unsigned Opc);
972 
973 LLVM_READONLY
974 bool isInvalidSingleUseConsumerInst(unsigned Opc);
975 
976 LLVM_READONLY
977 bool isInvalidSingleUseProducerInst(unsigned Opc);
978 
979 bool isDPMACCInstruction(unsigned Opc);
980 
981 LLVM_READONLY
982 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
983 
984 LLVM_READONLY
985 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
986 
987 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
988                                const MCSubtargetInfo *STI);
989 
990 bool isGroupSegment(const GlobalValue *GV);
991 bool isGlobalSegment(const GlobalValue *GV);
992 bool isReadOnlySegment(const GlobalValue *GV);
993 
994 /// \returns True if constants should be emitted to .text section for given
995 /// target triple \p TT, false otherwise.
996 bool shouldEmitConstantsToTextSection(const Triple &TT);
997 
998 /// \returns Integer value requested using \p F's \p Name attribute.
999 ///
1000 /// \returns \p Default if attribute is not present.
1001 ///
1002 /// \returns \p Default and emits error if requested value cannot be converted
1003 /// to integer.
1004 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
1005 
1006 /// \returns A pair of integer values requested using \p F's \p Name attribute
1007 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1008 /// is false).
1009 ///
1010 /// \returns \p Default if attribute is not present.
1011 ///
1012 /// \returns \p Default and emits error if one of the requested values cannot be
1013 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1014 /// not present.
1015 std::pair<unsigned, unsigned>
1016 getIntegerPairAttribute(const Function &F, StringRef Name,
1017                         std::pair<unsigned, unsigned> Default,
1018                         bool OnlyFirstRequired = false);
1019 
1020 /// \returns A pair of integer values requested using \p F's \p Name attribute
1021 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1022 /// is false).
1023 ///
1024 /// \returns \p std::nullopt if attribute is not present.
1025 ///
1026 /// \returns \p std::nullopt and emits error if one of the requested values
1027 /// cannot be converted to integer, or \p OnlyFirstRequired is false and
1028 /// "second" value is not present.
1029 std::optional<std::pair<unsigned, std::optional<unsigned>>>
1030 getIntegerPairAttribute(const Function &F, StringRef Name,
1031                         bool OnlyFirstRequired = false);
1032 
1033 /// \returns Generate a vector of integer values requested using \p F's \p Name
1034 /// attribute.
1035 /// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1036 /// if any error occurs. The corresponding error will also be emitted.
1037 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1038                                              unsigned Size,
1039                                              unsigned DefaultVal);
1040 /// Similar to the function above, but returns std::nullopt if any error occurs.
1041 std::optional<SmallVector<unsigned>>
1042 getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1043 
1044 /// Represents the counter values to wait for in an s_waitcnt instruction.
1045 ///
1046 /// Large values (including the maximum possible integer) can be used to
1047 /// represent "don't care" waits.
1048 struct Waitcnt {
1049   unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
1050   unsigned ExpCnt = ~0u;
1051   unsigned DsCnt = ~0u;     // Corresponds to LGKMcnt prior to gfx12.
1052   unsigned StoreCnt = ~0u;  // Corresponds to VScnt on gfx10/gfx11.
1053   unsigned SampleCnt = ~0u; // gfx12+ only.
1054   unsigned BvhCnt = ~0u;    // gfx12+ only.
1055   unsigned KmCnt = ~0u;     // gfx12+ only.
1056   unsigned XCnt = ~0u;      // gfx1250.
1057 
1058   Waitcnt() = default;
1059   // Pre-gfx12 constructor.
WaitcntWaitcnt1060   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
1061       : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
1062 
1063   // gfx12+ constructor.
WaitcntWaitcnt1064   Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
1065           unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
1066       : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
1067         SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
1068 
hasWaitWaitcnt1069   bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
1070 
hasWaitExceptStoreCntWaitcnt1071   bool hasWaitExceptStoreCnt() const {
1072     return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
1073            SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u;
1074   }
1075 
hasWaitStoreCntWaitcnt1076   bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
1077 
combinedWaitcnt1078   Waitcnt combined(const Waitcnt &Other) const {
1079     // Does the right thing provided self and Other are either both pre-gfx12
1080     // or both gfx12+.
1081     return Waitcnt(
1082         std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
1083         std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
1084         std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
1085         std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt));
1086   }
1087 };
1088 
1089 // The following methods are only meaningful on targets that support
1090 // S_WAITCNT.
1091 
1092 /// \returns Vmcnt bit mask for given isa \p Version.
1093 unsigned getVmcntBitMask(const IsaVersion &Version);
1094 
1095 /// \returns Expcnt bit mask for given isa \p Version.
1096 unsigned getExpcntBitMask(const IsaVersion &Version);
1097 
1098 /// \returns Lgkmcnt bit mask for given isa \p Version.
1099 unsigned getLgkmcntBitMask(const IsaVersion &Version);
1100 
1101 /// \returns Waitcnt bit mask for given isa \p Version.
1102 unsigned getWaitcntBitMask(const IsaVersion &Version);
1103 
1104 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1105 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1106 
1107 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1108 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1109 
1110 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1111 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1112 
1113 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1114 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1115 /// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1116 /// which needs it is deprecated
1117 ///
1118 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1119 ///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
1120 ///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
1121 ///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11)
1122 ///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
1123 ///     \p Expcnt = \p Waitcnt[2:0]       (gfx11)
1124 ///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
1125 ///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
1126 ///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11)
1127 ///
1128 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1129                    unsigned &Expcnt, unsigned &Lgkmcnt);
1130 
1131 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1132 
1133 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1134 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1135                      unsigned Vmcnt);
1136 
1137 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1138 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1139                       unsigned Expcnt);
1140 
1141 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1142 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1143                        unsigned Lgkmcnt);
1144 
1145 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1146 /// \p Version. Should not be used on gfx12+, the instruction which needs
1147 /// it is deprecated
1148 ///
1149 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1150 ///     Waitcnt[2:0]   = \p Expcnt      (gfx11+)
1151 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9)
1152 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9,10)
1153 ///     Waitcnt[6:4]   = \p Expcnt      (pre-gfx11)
1154 ///     Waitcnt[9:4]   = \p Lgkmcnt     (gfx11)
1155 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10)
1156 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10)
1157 ///     Waitcnt[15:10] = \p Vmcnt       (gfx11)
1158 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9,10)
1159 ///
1160 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1161 /// isa \p Version.
1162 ///
1163 unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1164                        unsigned Expcnt, unsigned Lgkmcnt);
1165 
1166 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1167 
1168 // The following methods are only meaningful on targets that support
1169 // S_WAIT_*CNT, introduced with gfx12.
1170 
1171 /// \returns Loadcnt bit mask for given isa \p Version.
1172 /// Returns 0 for versions that do not support LOADcnt
1173 unsigned getLoadcntBitMask(const IsaVersion &Version);
1174 
1175 /// \returns Samplecnt bit mask for given isa \p Version.
1176 /// Returns 0 for versions that do not support SAMPLEcnt
1177 unsigned getSamplecntBitMask(const IsaVersion &Version);
1178 
1179 /// \returns Bvhcnt bit mask for given isa \p Version.
1180 /// Returns 0 for versions that do not support BVHcnt
1181 unsigned getBvhcntBitMask(const IsaVersion &Version);
1182 
1183 /// \returns Dscnt bit mask for given isa \p Version.
1184 /// Returns 0 for versions that do not support DScnt
1185 unsigned getDscntBitMask(const IsaVersion &Version);
1186 
1187 /// \returns Dscnt bit mask for given isa \p Version.
1188 /// Returns 0 for versions that do not support KMcnt
1189 unsigned getKmcntBitMask(const IsaVersion &Version);
1190 
1191 /// \returns Xcnt bit mask for given isa \p Version.
1192 /// Returns 0 for versions that do not support Xcnt.
1193 unsigned getXcntBitMask(const IsaVersion &Version);
1194 
1195 /// \return STOREcnt or VScnt bit mask for given isa \p Version.
1196 /// returns 0 for versions that do not support STOREcnt or VScnt.
1197 /// STOREcnt and VScnt are the same counter, the name used
1198 /// depends on the ISA version.
1199 unsigned getStorecntBitMask(const IsaVersion &Version);
1200 
1201 // The following are only meaningful on targets that support
1202 // S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1203 
1204 /// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1205 /// isa \p Version.
1206 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1207 
1208 /// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1209 /// isa \p Version.
1210 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1211 
1212 /// \returns \p Loadcnt and \p Dscnt components of \p Decoded  encoded as an
1213 /// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1214 /// \p Version.
1215 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1216 
1217 /// \returns \p Storecnt and \p Dscnt components of \p Decoded  encoded as an
1218 /// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1219 /// \p Version.
1220 unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1221 
1222 namespace Hwreg {
1223 
1224 using HwregId = EncodingField<5, 0>;
1225 using HwregOffset = EncodingField<10, 6>;
1226 
1227 struct HwregSize : EncodingField<15, 11, 32> {
1228   using EncodingField::EncodingField;
encodeHwregSize1229   constexpr uint64_t encode() const { return Value - 1; }
decodeHwregSize1230   static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1231 };
1232 
1233 using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1234 
1235 } // namespace Hwreg
1236 
1237 namespace DepCtr {
1238 
1239 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1240 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1241                  const MCSubtargetInfo &STI);
1242 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1243                               const MCSubtargetInfo &STI);
1244 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1245                   bool &IsDefault, const MCSubtargetInfo &STI);
1246 
1247 /// \returns Decoded VaVdst from given immediate \p Encoded.
1248 unsigned decodeFieldVaVdst(unsigned Encoded);
1249 
1250 /// \returns Decoded VmVsrc from given immediate \p Encoded.
1251 unsigned decodeFieldVmVsrc(unsigned Encoded);
1252 
1253 /// \returns Decoded SaSdst from given immediate \p Encoded.
1254 unsigned decodeFieldSaSdst(unsigned Encoded);
1255 
1256 /// \returns Decoded VaSdst from given immediate \p Encoded.
1257 unsigned decodeFieldVaSdst(unsigned Encoded);
1258 
1259 /// \returns Decoded VaVcc from given immediate \p Encoded.
1260 unsigned decodeFieldVaVcc(unsigned Encoded);
1261 
1262 /// \returns Decoded SaSrc from given immediate \p Encoded.
1263 unsigned decodeFieldVaSsrc(unsigned Encoded);
1264 
1265 /// \returns Decoded HoldCnt from given immediate \p Encoded.
1266 unsigned decodeFieldHoldCnt(unsigned Encoded);
1267 
1268 /// \returns \p VmVsrc as an encoded Depctr immediate.
1269 unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1270 
1271 /// \returns \p Encoded combined with encoded \p VmVsrc.
1272 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1273 
1274 /// \returns \p VaVdst as an encoded Depctr immediate.
1275 unsigned encodeFieldVaVdst(unsigned VaVdst);
1276 
1277 /// \returns \p Encoded combined with encoded \p VaVdst.
1278 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1279 
1280 /// \returns \p SaSdst as an encoded Depctr immediate.
1281 unsigned encodeFieldSaSdst(unsigned SaSdst);
1282 
1283 /// \returns \p Encoded combined with encoded \p SaSdst.
1284 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1285 
1286 /// \returns \p VaSdst as an encoded Depctr immediate.
1287 unsigned encodeFieldVaSdst(unsigned VaSdst);
1288 
1289 /// \returns \p Encoded combined with encoded \p VaSdst.
1290 unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1291 
1292 /// \returns \p VaVcc as an encoded Depctr immediate.
1293 unsigned encodeFieldVaVcc(unsigned VaVcc);
1294 
1295 /// \returns \p Encoded combined with encoded \p VaVcc.
1296 unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1297 
1298 /// \returns \p HoldCnt as an encoded Depctr immediate.
1299 unsigned encodeFieldHoldCnt(unsigned HoldCnt);
1300 
1301 /// \returns \p Encoded combined with encoded \p HoldCnt.
1302 unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded);
1303 
1304 /// \returns \p VaSsrc as an encoded Depctr immediate.
1305 unsigned encodeFieldVaSsrc(unsigned VaSsrc);
1306 
1307 /// \returns \p Encoded combined with encoded \p VaSsrc.
1308 unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1309 
1310 } // namespace DepCtr
1311 
1312 namespace Exp {
1313 
1314 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1315 
1316 LLVM_READONLY
1317 unsigned getTgtId(const StringRef Name);
1318 
1319 LLVM_READNONE
1320 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1321 
1322 } // namespace Exp
1323 
1324 namespace MTBUFFormat {
1325 
1326 LLVM_READNONE
1327 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1328 
1329 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1330 
1331 int64_t getDfmt(const StringRef Name);
1332 
1333 StringRef getDfmtName(unsigned Id);
1334 
1335 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1336 
1337 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1338 
1339 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1340 
1341 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1342 
1343 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1344 
1345 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1346 
1347 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1348 
1349 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1350                              const MCSubtargetInfo &STI);
1351 
1352 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1353 
1354 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1355 
1356 } // namespace MTBUFFormat
1357 
1358 namespace SendMsg {
1359 
1360 LLVM_READNONE
1361 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1362 
1363 LLVM_READNONE
1364 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1365                   bool Strict = true);
1366 
1367 LLVM_READNONE
1368 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1369                       const MCSubtargetInfo &STI, bool Strict = true);
1370 
1371 LLVM_READNONE
1372 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1373 
1374 LLVM_READNONE
1375 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1376 
1377 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1378                uint16_t &StreamId, const MCSubtargetInfo &STI);
1379 
1380 LLVM_READNONE
1381 uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1382 
1383 } // namespace SendMsg
1384 
1385 unsigned getInitialPSInputAddr(const Function &F);
1386 
1387 bool getHasColorExport(const Function &F);
1388 
1389 bool getHasDepthExport(const Function &F);
1390 
1391 bool hasDynamicVGPR(const Function &F);
1392 
1393 // Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1394 // the attribute is missing or its value is invalid.
1395 unsigned getDynamicVGPRBlockSize(const Function &F);
1396 
1397 LLVM_READNONE
isShader(CallingConv::ID CC)1398 constexpr bool isShader(CallingConv::ID CC) {
1399   switch (CC) {
1400   case CallingConv::AMDGPU_VS:
1401   case CallingConv::AMDGPU_LS:
1402   case CallingConv::AMDGPU_HS:
1403   case CallingConv::AMDGPU_ES:
1404   case CallingConv::AMDGPU_GS:
1405   case CallingConv::AMDGPU_PS:
1406   case CallingConv::AMDGPU_CS_Chain:
1407   case CallingConv::AMDGPU_CS_ChainPreserve:
1408   case CallingConv::AMDGPU_CS:
1409     return true;
1410   default:
1411     return false;
1412   }
1413 }
1414 
1415 LLVM_READNONE
isGraphics(CallingConv::ID CC)1416 constexpr bool isGraphics(CallingConv::ID CC) {
1417   return isShader(CC) || CC == CallingConv::AMDGPU_Gfx;
1418 }
1419 
1420 LLVM_READNONE
isCompute(CallingConv::ID CC)1421 constexpr bool isCompute(CallingConv::ID CC) {
1422   return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1423 }
1424 
1425 LLVM_READNONE
isEntryFunctionCC(CallingConv::ID CC)1426 constexpr bool isEntryFunctionCC(CallingConv::ID CC) {
1427   switch (CC) {
1428   case CallingConv::AMDGPU_KERNEL:
1429   case CallingConv::SPIR_KERNEL:
1430   case CallingConv::AMDGPU_VS:
1431   case CallingConv::AMDGPU_GS:
1432   case CallingConv::AMDGPU_PS:
1433   case CallingConv::AMDGPU_CS:
1434   case CallingConv::AMDGPU_ES:
1435   case CallingConv::AMDGPU_HS:
1436   case CallingConv::AMDGPU_LS:
1437     return true;
1438   default:
1439     return false;
1440   }
1441 }
1442 
1443 LLVM_READNONE
isChainCC(CallingConv::ID CC)1444 constexpr bool isChainCC(CallingConv::ID CC) {
1445   switch (CC) {
1446   case CallingConv::AMDGPU_CS_Chain:
1447   case CallingConv::AMDGPU_CS_ChainPreserve:
1448     return true;
1449   default:
1450     return false;
1451   }
1452 }
1453 
1454 // These functions are considered entrypoints into the current module, i.e. they
1455 // are allowed to be called from outside the current module. This is different
1456 // from isEntryFunctionCC, which is only true for functions that are entered by
1457 // the hardware. Module entry points include all entry functions but also
1458 // include functions that can be called from other functions inside or outside
1459 // the current module. Module entry functions are allowed to allocate LDS.
1460 LLVM_READNONE
isModuleEntryFunctionCC(CallingConv::ID CC)1461 constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1462   switch (CC) {
1463   case CallingConv::AMDGPU_Gfx:
1464     return true;
1465   default:
1466     return isEntryFunctionCC(CC) || isChainCC(CC);
1467   }
1468 }
1469 
1470 LLVM_READNONE
isKernel(CallingConv::ID CC)1471 constexpr inline bool isKernel(CallingConv::ID CC) {
1472   switch (CC) {
1473   case CallingConv::AMDGPU_KERNEL:
1474   case CallingConv::SPIR_KERNEL:
1475     return true;
1476   default:
1477     return false;
1478   }
1479 }
1480 
1481 LLVM_READNONE
canGuaranteeTCO(CallingConv::ID CC)1482 constexpr bool canGuaranteeTCO(CallingConv::ID CC) {
1483   return CC == CallingConv::Fast;
1484 }
1485 
1486 /// Return true if we might ever do TCO for calls with this calling convention.
1487 LLVM_READNONE
mayTailCallThisCC(CallingConv::ID CC)1488 constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
1489   switch (CC) {
1490   case CallingConv::C:
1491   case CallingConv::AMDGPU_Gfx:
1492     return true;
1493   default:
1494     return canGuaranteeTCO(CC);
1495   }
1496 }
1497 
1498 bool hasXNACK(const MCSubtargetInfo &STI);
1499 bool hasSRAMECC(const MCSubtargetInfo &STI);
1500 bool hasMIMG_R128(const MCSubtargetInfo &STI);
1501 bool hasA16(const MCSubtargetInfo &STI);
1502 bool hasG16(const MCSubtargetInfo &STI);
1503 bool hasPackedD16(const MCSubtargetInfo &STI);
1504 bool hasGDS(const MCSubtargetInfo &STI);
1505 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1506 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1507 
1508 bool isSI(const MCSubtargetInfo &STI);
1509 bool isCI(const MCSubtargetInfo &STI);
1510 bool isVI(const MCSubtargetInfo &STI);
1511 bool isGFX9(const MCSubtargetInfo &STI);
1512 bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1513 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1514 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1515 bool isGFX8Plus(const MCSubtargetInfo &STI);
1516 bool isGFX9Plus(const MCSubtargetInfo &STI);
1517 bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1518 bool isGFX10(const MCSubtargetInfo &STI);
1519 bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1520 bool isGFX10Plus(const MCSubtargetInfo &STI);
1521 bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1522 bool isGFX10Before1030(const MCSubtargetInfo &STI);
1523 bool isGFX11(const MCSubtargetInfo &STI);
1524 bool isGFX11Plus(const MCSubtargetInfo &STI);
1525 bool isGFX12(const MCSubtargetInfo &STI);
1526 bool isGFX12Plus(const MCSubtargetInfo &STI);
1527 bool isGFX1250(const MCSubtargetInfo &STI);
1528 bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1529 bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1530 bool isGCN3Encoding(const MCSubtargetInfo &STI);
1531 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1532 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1533 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1534 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1535 bool isGFX90A(const MCSubtargetInfo &STI);
1536 bool isGFX940(const MCSubtargetInfo &STI);
1537 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1538 bool hasMAIInsts(const MCSubtargetInfo &STI);
1539 bool hasVOPD(const MCSubtargetInfo &STI);
1540 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1541 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1542 unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1543 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1544 
1545 /// Is Reg - scalar register
1546 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1547 
1548 /// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1549 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1550 
1551 /// If \p Reg is a pseudo reg, return the correct hardware register given
1552 /// \p STI otherwise return \p Reg.
1553 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1554 
1555 /// Convert hardware register \p Reg to a pseudo register
1556 LLVM_READNONE
1557 MCRegister mc2PseudoReg(MCRegister Reg);
1558 
1559 LLVM_READNONE
1560 bool isInlineValue(unsigned Reg);
1561 
1562 /// Is this an AMDGPU specific source operand? These include registers,
1563 /// inline constants, literals and mandatory literals (KImm).
1564 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1565 
1566 /// Is this a KImm operand?
1567 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1568 
1569 /// Is this floating-point operand?
1570 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1571 
1572 /// Does this operand support only inlinable literals?
1573 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1574 
1575 /// Get the size in bits of a register from the register class \p RC.
1576 unsigned getRegBitWidth(unsigned RCID);
1577 
1578 /// Get the size in bits of a register from the register class \p RC.
1579 unsigned getRegBitWidth(const MCRegisterClass &RC);
1580 
1581 /// Get size of register operand
1582 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1583                            unsigned OpNo);
1584 
1585 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)1586 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1587   switch (OpInfo.OperandType) {
1588   case AMDGPU::OPERAND_REG_IMM_INT32:
1589   case AMDGPU::OPERAND_REG_IMM_FP32:
1590   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1591   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1592   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1593   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1594   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1595   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1596   case AMDGPU::OPERAND_KIMM32:
1597   case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1598   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1599     return 4;
1600 
1601   case AMDGPU::OPERAND_REG_IMM_INT64:
1602   case AMDGPU::OPERAND_REG_IMM_FP64:
1603   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1604   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1605   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1606   case AMDGPU::OPERAND_KIMM64:
1607     return 8;
1608 
1609   case AMDGPU::OPERAND_REG_IMM_INT16:
1610   case AMDGPU::OPERAND_REG_IMM_BF16:
1611   case AMDGPU::OPERAND_REG_IMM_FP16:
1612   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1613   case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1614   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1615   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1616   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1617   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1618   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1619   case AMDGPU::OPERAND_REG_IMM_V2BF16:
1620   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1621     return 2;
1622 
1623   default:
1624     llvm_unreachable("unhandled operand type");
1625   }
1626 }
1627 
1628 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)1629 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1630   return getOperandSize(Desc.operands()[OpNo]);
1631 }
1632 
1633 /// Is this literal inlinable, and not one of the values intended for floating
1634 /// point values.
1635 LLVM_READNONE
isInlinableIntLiteral(int64_t Literal)1636 inline bool isInlinableIntLiteral(int64_t Literal) {
1637   return Literal >= -16 && Literal <= 64;
1638 }
1639 
1640 /// Is this literal inlinable
1641 LLVM_READNONE
1642 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1643 
1644 LLVM_READNONE
1645 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1646 
1647 LLVM_READNONE
1648 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1649 
1650 LLVM_READNONE
1651 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1652 
1653 LLVM_READNONE
1654 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1655 
1656 LLVM_READNONE
1657 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1658 
1659 LLVM_READNONE
1660 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1661 
1662 LLVM_READNONE
1663 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1664 
1665 LLVM_READNONE
1666 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1667 
1668 LLVM_READNONE
1669 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1670 
1671 LLVM_READNONE
1672 bool isInlinableLiteralV2I16(uint32_t Literal);
1673 
1674 LLVM_READNONE
1675 bool isInlinableLiteralV2BF16(uint32_t Literal);
1676 
1677 LLVM_READNONE
1678 bool isInlinableLiteralV2F16(uint32_t Literal);
1679 
1680 LLVM_READNONE
1681 bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1682 
1683 bool isArgPassedInSGPR(const Argument *Arg);
1684 
1685 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1686 
1687 LLVM_READONLY
1688 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1689                                       int64_t EncodedOffset);
1690 
1691 LLVM_READONLY
1692 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1693                                     int64_t EncodedOffset, bool IsBuffer);
1694 
1695 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1696 /// offsets.
1697 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1698 
1699 /// \returns The encoding that will be used for \p ByteOffset in the
1700 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1701 /// S_LOAD instructions have a signed offset, on other subtargets it is
1702 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1703 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1704                                             int64_t ByteOffset, bool IsBuffer,
1705                                             bool HasSOffset = false);
1706 
1707 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1708 /// instruction. This is only useful on CI.s
1709 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1710                                                      int64_t ByteOffset);
1711 
1712 /// For pre-GFX12 FLAT instructions the offset must be positive;
1713 /// MSB is ignored and forced to zero.
1714 ///
1715 /// \return The number of bits available for the signed offset field in flat
1716 /// instructions. Note that some forms of the instruction disallow negative
1717 /// offsets.
1718 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1719 
1720 /// \returns true if this offset is small enough to fit in the SMRD
1721 /// offset field.  \p ByteOffset should be the offset in bytes and
1722 /// not the encoded offset.
1723 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1724 
1725 LLVM_READNONE
isLegalDPALU_DPPControl(unsigned DC)1726 inline bool isLegalDPALU_DPPControl(unsigned DC) {
1727   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1728 }
1729 
1730 /// \returns true if an instruction may have a 64-bit VGPR operand.
1731 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1732 
1733 /// \returns true if an instruction is a DP ALU DPP.
1734 bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1735 
1736 /// \returns true if the intrinsic is divergent
1737 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1738 
1739 /// \returns true if the intrinsic is uniform
1740 bool isIntrinsicAlwaysUniform(unsigned IntrID);
1741 
1742 /// \returns lds block size in terms of dwords. \p
1743 /// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1744 /// must be defined in terms of bytes.
1745 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1746 
1747 } // end namespace AMDGPU
1748 
1749 raw_ostream &operator<<(raw_ostream &OS,
1750                         const AMDGPU::IsaInfo::TargetIDSetting S);
1751 
1752 } // end namespace llvm
1753 
1754 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1755