xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "AMDGPUSubtarget.h"
13 #include "SIDefines.h"
14 #include "llvm/IR/CallingConv.h"
15 #include "llvm/IR/InstrTypes.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/Support/Alignment.h"
18 #include <array>
19 #include <functional>
20 #include <utility>
21 
22 struct amd_kernel_code_t;
23 
24 namespace llvm {
25 
26 struct Align;
27 class Argument;
28 class Function;
29 class GlobalValue;
30 class MCInstrInfo;
31 class MCRegisterClass;
32 class MCRegisterInfo;
33 class MCSubtargetInfo;
34 class StringRef;
35 class Triple;
36 class raw_ostream;
37 
38 namespace AMDGPU {
39 
40 struct AMDGPUMCKernelCodeT;
41 struct IsaVersion;
42 
43 /// Generic target versions emitted by this version of LLVM.
44 ///
45 /// These numbers are incremented every time a codegen breaking change occurs
46 /// within a generic family.
47 namespace GenericVersion {
48 static constexpr unsigned GFX9 = 1;
49 static constexpr unsigned GFX10_1 = 1;
50 static constexpr unsigned GFX10_3 = 1;
51 static constexpr unsigned GFX11 = 1;
52 static constexpr unsigned GFX12 = 1;
53 } // namespace GenericVersion
54 
55 enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
56 
57 /// \returns True if \p STI is AMDHSA.
58 bool isHsaAbi(const MCSubtargetInfo &STI);
59 
60 /// \returns Code object version from the IR module flag.
61 unsigned getAMDHSACodeObjectVersion(const Module &M);
62 
63 /// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
64 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
65 
66 /// \returns The default HSA code object version. This should only be used when
67 /// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
68 /// flag or a .amdhsa_code_object_version directive)
69 unsigned getDefaultAMDHSACodeObjectVersion();
70 
71 /// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
72 /// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
73 uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
74 
75 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
76 unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
77 
78 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr
79 unsigned getHostcallImplicitArgPosition(unsigned COV);
80 
81 unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
82 unsigned getCompletionActionImplicitArgPosition(unsigned COV);
83 
84 struct GcnBufferFormatInfo {
85   unsigned Format;
86   unsigned BitsPerComp;
87   unsigned NumComponents;
88   unsigned NumFormat;
89   unsigned DataFormat;
90 };
91 
92 struct MAIInstInfo {
93   uint16_t Opcode;
94   bool is_dgemm;
95   bool is_gfx940_xdl;
96 };
97 
98 #define GET_MIMGBaseOpcode_DECL
99 #define GET_MIMGDim_DECL
100 #define GET_MIMGEncoding_DECL
101 #define GET_MIMGLZMapping_DECL
102 #define GET_MIMGMIPMapping_DECL
103 #define GET_MIMGBiASMapping_DECL
104 #define GET_MAIInstInfoTable_DECL
105 #include "AMDGPUGenSearchableTables.inc"
106 
107 namespace IsaInfo {
108 
109 enum {
110   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
111   // doesn't spill SGPRs as much as when 80 is set.
112   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
113   TRAP_NUM_SGPRS = 16
114 };
115 
116 enum class TargetIDSetting {
117   Unsupported,
118   Any,
119   Off,
120   On
121 };
122 
123 class AMDGPUTargetID {
124 private:
125   const MCSubtargetInfo &STI;
126   TargetIDSetting XnackSetting;
127   TargetIDSetting SramEccSetting;
128 
129 public:
130   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
131   ~AMDGPUTargetID() = default;
132 
133   /// \return True if the current xnack setting is not "Unsupported".
isXnackSupported()134   bool isXnackSupported() const {
135     return XnackSetting != TargetIDSetting::Unsupported;
136   }
137 
138   /// \returns True if the current xnack setting is "On" or "Any".
isXnackOnOrAny()139   bool isXnackOnOrAny() const {
140     return XnackSetting == TargetIDSetting::On ||
141         XnackSetting == TargetIDSetting::Any;
142   }
143 
144   /// \returns True if current xnack setting is "On" or "Off",
145   /// false otherwise.
isXnackOnOrOff()146   bool isXnackOnOrOff() const {
147     return getXnackSetting() == TargetIDSetting::On ||
148         getXnackSetting() == TargetIDSetting::Off;
149   }
150 
151   /// \returns The current xnack TargetIDSetting, possible options are
152   /// "Unsupported", "Any", "Off", and "On".
getXnackSetting()153   TargetIDSetting getXnackSetting() const {
154     return XnackSetting;
155   }
156 
157   /// Sets xnack setting to \p NewXnackSetting.
setXnackSetting(TargetIDSetting NewXnackSetting)158   void setXnackSetting(TargetIDSetting NewXnackSetting) {
159     XnackSetting = NewXnackSetting;
160   }
161 
162   /// \return True if the current sramecc setting is not "Unsupported".
isSramEccSupported()163   bool isSramEccSupported() const {
164     return SramEccSetting != TargetIDSetting::Unsupported;
165   }
166 
167   /// \returns True if the current sramecc setting is "On" or "Any".
isSramEccOnOrAny()168   bool isSramEccOnOrAny() const {
169   return SramEccSetting == TargetIDSetting::On ||
170       SramEccSetting == TargetIDSetting::Any;
171   }
172 
173   /// \returns True if current sramecc setting is "On" or "Off",
174   /// false otherwise.
isSramEccOnOrOff()175   bool isSramEccOnOrOff() const {
176     return getSramEccSetting() == TargetIDSetting::On ||
177         getSramEccSetting() == TargetIDSetting::Off;
178   }
179 
180   /// \returns The current sramecc TargetIDSetting, possible options are
181   /// "Unsupported", "Any", "Off", and "On".
getSramEccSetting()182   TargetIDSetting getSramEccSetting() const {
183     return SramEccSetting;
184   }
185 
186   /// Sets sramecc setting to \p NewSramEccSetting.
setSramEccSetting(TargetIDSetting NewSramEccSetting)187   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
188     SramEccSetting = NewSramEccSetting;
189   }
190 
191   void setTargetIDFromFeaturesString(StringRef FS);
192   void setTargetIDFromTargetIDStream(StringRef TargetID);
193 
194   /// \returns String representation of an object.
195   std::string toString() const;
196 };
197 
198 /// \returns Wavefront size for given subtarget \p STI.
199 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
200 
201 /// \returns Local memory size in bytes for given subtarget \p STI.
202 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
203 
204 /// \returns Maximum addressable local memory size in bytes for given subtarget
205 /// \p STI.
206 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
207 
208 /// \returns Number of execution units per compute unit for given subtarget \p
209 /// STI.
210 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
211 
212 /// \returns Maximum number of work groups per compute unit for given subtarget
213 /// \p STI and limited by given \p FlatWorkGroupSize.
214 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
215                                unsigned FlatWorkGroupSize);
216 
217 /// \returns Minimum number of waves per execution unit for given subtarget \p
218 /// STI.
219 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
220 
221 /// \returns Maximum number of waves per execution unit for given subtarget \p
222 /// STI without any kind of limitation.
223 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
224 
225 /// \returns Number of waves per execution unit required to support the given \p
226 /// FlatWorkGroupSize.
227 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
228                                    unsigned FlatWorkGroupSize);
229 
230 /// \returns Minimum flat work group size for given subtarget \p STI.
231 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
232 
233 /// \returns Maximum flat work group size for given subtarget \p STI.
234 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
235 
236 /// \returns Number of waves per work group for given subtarget \p STI and
237 /// \p FlatWorkGroupSize.
238 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
239                               unsigned FlatWorkGroupSize);
240 
241 /// \returns SGPR allocation granularity for given subtarget \p STI.
242 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
243 
244 /// \returns SGPR encoding granularity for given subtarget \p STI.
245 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
246 
247 /// \returns Total number of SGPRs for given subtarget \p STI.
248 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
249 
250 /// \returns Addressable number of SGPRs for given subtarget \p STI.
251 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
252 
253 /// \returns Minimum number of SGPRs that meets the given number of waves per
254 /// execution unit requirement for given subtarget \p STI.
255 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
256 
257 /// \returns Maximum number of SGPRs that meets the given number of waves per
258 /// execution unit requirement for given subtarget \p STI.
259 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
260                         bool Addressable);
261 
262 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
263 /// STI when the given special registers are used.
264 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
265                           bool FlatScrUsed, bool XNACKUsed);
266 
267 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
268 /// STI when the given special registers are used. XNACK is inferred from
269 /// \p STI.
270 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
271                           bool FlatScrUsed);
272 
273 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
274 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
275 /// register counts.
276 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
277 
278 /// \returns VGPR allocation granularity for given subtarget \p STI.
279 ///
280 /// For subtargets which support it, \p EnableWavefrontSize32 should match
281 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
282 unsigned
283 getVGPRAllocGranule(const MCSubtargetInfo *STI,
284                     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
285 
286 /// \returns VGPR encoding granularity for given subtarget \p STI.
287 ///
288 /// For subtargets which support it, \p EnableWavefrontSize32 should match
289 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
290 unsigned getVGPREncodingGranule(
291     const MCSubtargetInfo *STI,
292     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
293 
294 /// \returns Total number of VGPRs for given subtarget \p STI.
295 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
296 
297 /// \returns Addressable number of architectural VGPRs for a given subtarget \p
298 /// STI.
299 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
300 
301 /// \returns Addressable number of VGPRs for given subtarget \p STI.
302 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
303 
304 /// \returns Minimum number of VGPRs that meets given number of waves per
305 /// execution unit requirement for given subtarget \p STI.
306 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
307 
308 /// \returns Maximum number of VGPRs that meets given number of waves per
309 /// execution unit requirement for given subtarget \p STI.
310 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
311 
312 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given
313 /// subtarget \p STI.
314 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
315                                       unsigned NumVGPRs);
316 
317 /// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
318 /// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
319 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
320                                       unsigned MaxWaves,
321                                       unsigned TotalNumVGPRs);
322 
323 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
324 /// Gen.
325 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
326                                   AMDGPUSubtarget::Generation Gen);
327 
328 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
329 /// \p NumVGPRs are used. We actually return the number of blocks -1, since
330 /// that's what we encode.
331 ///
332 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
333 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
334 unsigned getEncodedNumVGPRBlocks(
335     const MCSubtargetInfo *STI, unsigned NumVGPRs,
336     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
337 
338 /// \returns Number of VGPR blocks that need to be allocated for the given
339 /// subtarget \p STI when \p NumVGPRs are used.
340 unsigned getAllocatedNumVGPRBlocks(
341     const MCSubtargetInfo *STI, unsigned NumVGPRs,
342     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
343 
344 } // end namespace IsaInfo
345 
346 // Represents a field in an encoded value.
347 template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
348 struct EncodingField {
349   static_assert(HighBit >= LowBit, "Invalid bit range!");
350   static constexpr unsigned Offset = LowBit;
351   static constexpr unsigned Width = HighBit - LowBit + 1;
352 
353   using ValueType = unsigned;
354   static constexpr ValueType Default = D;
355 
356   ValueType Value;
EncodingFieldEncodingField357   constexpr EncodingField(ValueType Value) : Value(Value) {}
358 
encodeEncodingField359   constexpr uint64_t encode() const { return Value; }
decodeEncodingField360   static ValueType decode(uint64_t Encoded) { return Encoded; }
361 };
362 
363 // Represents a single bit in an encoded value.
364 template <unsigned Bit, unsigned D = 0>
365 using EncodingBit = EncodingField<Bit, Bit, D>;
366 
367 // A helper for encoding and decoding multiple fields.
368 template <typename... Fields> struct EncodingFields {
encodeEncodingFields369   static constexpr uint64_t encode(Fields... Values) {
370     return ((Values.encode() << Values.Offset) | ...);
371   }
372 
decodeEncodingFields373   static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
374     return {Fields::decode((Encoded >> Fields::Offset) &
375                            maxUIntN(Fields::Width))...};
376   }
377 };
378 
379 LLVM_READONLY
380 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
381 
382 LLVM_READONLY
hasNamedOperand(uint64_t Opcode,uint64_t NamedIdx)383 inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) {
384   return getNamedOperandIdx(Opcode, NamedIdx) != -1;
385 }
386 
387 LLVM_READONLY
388 int getSOPPWithRelaxation(uint16_t Opcode);
389 
390 struct MIMGBaseOpcodeInfo {
391   MIMGBaseOpcode BaseOpcode;
392   bool Store;
393   bool Atomic;
394   bool AtomicX2;
395   bool Sampler;
396   bool Gather4;
397 
398   uint8_t NumExtraArgs;
399   bool Gradients;
400   bool G16;
401   bool Coordinates;
402   bool LodOrClampOrMip;
403   bool HasD16;
404   bool MSAA;
405   bool BVH;
406   bool A16;
407   bool NoReturn;
408 };
409 
410 LLVM_READONLY
411 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
412 
413 LLVM_READONLY
414 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
415 
416 struct MIMGDimInfo {
417   MIMGDim Dim;
418   uint8_t NumCoords;
419   uint8_t NumGradients;
420   bool MSAA;
421   bool DA;
422   uint8_t Encoding;
423   const char *AsmSuffix;
424 };
425 
426 LLVM_READONLY
427 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
428 
429 LLVM_READONLY
430 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
431 
432 LLVM_READONLY
433 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
434 
435 struct MIMGLZMappingInfo {
436   MIMGBaseOpcode L;
437   MIMGBaseOpcode LZ;
438 };
439 
440 struct MIMGMIPMappingInfo {
441   MIMGBaseOpcode MIP;
442   MIMGBaseOpcode NONMIP;
443 };
444 
445 struct MIMGBiasMappingInfo {
446   MIMGBaseOpcode Bias;
447   MIMGBaseOpcode NoBias;
448 };
449 
450 struct MIMGOffsetMappingInfo {
451   MIMGBaseOpcode Offset;
452   MIMGBaseOpcode NoOffset;
453 };
454 
455 struct MIMGG16MappingInfo {
456   MIMGBaseOpcode G;
457   MIMGBaseOpcode G16;
458 };
459 
460 LLVM_READONLY
461 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
462 
463 struct WMMAOpcodeMappingInfo {
464   unsigned Opcode2Addr;
465   unsigned Opcode3Addr;
466 };
467 
468 LLVM_READONLY
469 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
470 
471 LLVM_READONLY
472 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
473 
474 LLVM_READONLY
475 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
476 
477 LLVM_READONLY
478 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
479 
480 LLVM_READONLY
481 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
482                   unsigned VDataDwords, unsigned VAddrDwords);
483 
484 LLVM_READONLY
485 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
486 
487 LLVM_READONLY
488 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
489                            const MIMGDimInfo *Dim, bool IsA16,
490                            bool IsG16Supported);
491 
492 struct MIMGInfo {
493   uint16_t Opcode;
494   uint16_t BaseOpcode;
495   uint8_t MIMGEncoding;
496   uint8_t VDataDwords;
497   uint8_t VAddrDwords;
498   uint8_t VAddrOperands;
499 };
500 
501 LLVM_READONLY
502 const MIMGInfo *getMIMGInfo(unsigned Opc);
503 
504 LLVM_READONLY
505 int getMTBUFBaseOpcode(unsigned Opc);
506 
507 LLVM_READONLY
508 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
509 
510 LLVM_READONLY
511 int getMTBUFElements(unsigned Opc);
512 
513 LLVM_READONLY
514 bool getMTBUFHasVAddr(unsigned Opc);
515 
516 LLVM_READONLY
517 bool getMTBUFHasSrsrc(unsigned Opc);
518 
519 LLVM_READONLY
520 bool getMTBUFHasSoffset(unsigned Opc);
521 
522 LLVM_READONLY
523 int getMUBUFBaseOpcode(unsigned Opc);
524 
525 LLVM_READONLY
526 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
527 
528 LLVM_READONLY
529 int getMUBUFElements(unsigned Opc);
530 
531 LLVM_READONLY
532 bool getMUBUFHasVAddr(unsigned Opc);
533 
534 LLVM_READONLY
535 bool getMUBUFHasSrsrc(unsigned Opc);
536 
537 LLVM_READONLY
538 bool getMUBUFHasSoffset(unsigned Opc);
539 
540 LLVM_READONLY
541 bool getMUBUFIsBufferInv(unsigned Opc);
542 
543 LLVM_READONLY
544 bool getMUBUFTfe(unsigned Opc);
545 
546 LLVM_READONLY
547 bool getSMEMIsBuffer(unsigned Opc);
548 
549 LLVM_READONLY
550 bool getVOP1IsSingle(unsigned Opc);
551 
552 LLVM_READONLY
553 bool getVOP2IsSingle(unsigned Opc);
554 
555 LLVM_READONLY
556 bool getVOP3IsSingle(unsigned Opc);
557 
558 LLVM_READONLY
559 bool isVOPC64DPP(unsigned Opc);
560 
561 LLVM_READONLY
562 bool isVOPCAsmOnly(unsigned Opc);
563 
564 /// Returns true if MAI operation is a double precision GEMM.
565 LLVM_READONLY
566 bool getMAIIsDGEMM(unsigned Opc);
567 
568 LLVM_READONLY
569 bool getMAIIsGFX940XDL(unsigned Opc);
570 
571 struct CanBeVOPD {
572   bool X;
573   bool Y;
574 };
575 
576 /// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
577 LLVM_READONLY
578 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
579 
580 LLVM_READONLY
581 CanBeVOPD getCanBeVOPD(unsigned Opc);
582 
583 LLVM_READONLY
584 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
585                                                   uint8_t NumComponents,
586                                                   uint8_t NumFormat,
587                                                   const MCSubtargetInfo &STI);
588 LLVM_READONLY
589 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
590                                                   const MCSubtargetInfo &STI);
591 
592 LLVM_READONLY
593 int getMCOpcode(uint16_t Opcode, unsigned Gen);
594 
595 LLVM_READONLY
596 unsigned getVOPDOpcode(unsigned Opc);
597 
598 LLVM_READONLY
599 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
600 
601 LLVM_READONLY
602 bool isVOPD(unsigned Opc);
603 
604 LLVM_READNONE
605 bool isMAC(unsigned Opc);
606 
607 LLVM_READNONE
608 bool isPermlane16(unsigned Opc);
609 
610 LLVM_READNONE
611 bool isGenericAtomic(unsigned Opc);
612 
613 LLVM_READNONE
614 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
615 
616 namespace VOPD {
617 
618 enum Component : unsigned {
619   DST = 0,
620   SRC0,
621   SRC1,
622   SRC2,
623 
624   DST_NUM = 1,
625   MAX_SRC_NUM = 3,
626   MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
627 };
628 
629 // LSB mask for VGPR banks per VOPD component operand.
630 // 4 banks result in a mask 3, setting 2 lower bits.
631 constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
632 
633 enum ComponentIndex : unsigned { X = 0, Y = 1 };
634 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
635 constexpr unsigned COMPONENTS_NUM = 2;
636 
637 // Properties of VOPD components.
638 class ComponentProps {
639 private:
640   unsigned SrcOperandsNum = 0;
641   unsigned MandatoryLiteralIdx = ~0u;
642   bool HasSrc2Acc = false;
643 
644 public:
645   ComponentProps() = default;
646   ComponentProps(const MCInstrDesc &OpDesc);
647 
648   // Return the total number of src operands this component has.
getCompSrcOperandsNum()649   unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
650 
651   // Return the number of src operands of this component visible to the parser.
getCompParsedSrcOperandsNum()652   unsigned getCompParsedSrcOperandsNum() const {
653     return SrcOperandsNum - HasSrc2Acc;
654   }
655 
656   // Return true iif this component has a mandatory literal.
hasMandatoryLiteral()657   bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
658 
659   // If this component has a mandatory literal, return component operand
660   // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
getMandatoryLiteralCompOperandIndex()661   unsigned getMandatoryLiteralCompOperandIndex() const {
662     assert(hasMandatoryLiteral());
663     return MandatoryLiteralIdx;
664   }
665 
666   // Return true iif this component has operand
667   // with component index CompSrcIdx and this operand may be a register.
hasRegSrcOperand(unsigned CompSrcIdx)668   bool hasRegSrcOperand(unsigned CompSrcIdx) const {
669     assert(CompSrcIdx < Component::MAX_SRC_NUM);
670     return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
671   }
672 
673   // Return true iif this component has tied src2.
hasSrc2Acc()674   bool hasSrc2Acc() const { return HasSrc2Acc; }
675 
676 private:
hasMandatoryLiteralAt(unsigned CompSrcIdx)677   bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
678     assert(CompSrcIdx < Component::MAX_SRC_NUM);
679     return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
680   }
681 };
682 
683 enum ComponentKind : unsigned {
684   SINGLE = 0,  // A single VOP1 or VOP2 instruction which may be used in VOPD.
685   COMPONENT_X, // A VOPD instruction, X component.
686   COMPONENT_Y, // A VOPD instruction, Y component.
687   MAX = COMPONENT_Y
688 };
689 
690 // Interface functions of this class map VOPD component operand indices
691 // to indices of operands in MachineInstr/MCInst or parsed operands array.
692 //
693 // Note that this class operates with 3 kinds of indices:
694 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
695 // - MC operand indices (they refer operands in a MachineInstr/MCInst);
696 // - parsed operand indices (they refer operands in parsed operands array).
697 //
698 // For SINGLE components mapping between these indices is trivial.
699 // But things get more complicated for COMPONENT_X and
700 // COMPONENT_Y because these components share the same
701 // MachineInstr/MCInst and the same parsed operands array.
702 // Below is an example of component operand to parsed operand
703 // mapping for the following instruction:
704 //
705 //   v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
706 //
707 //                          PARSED        COMPONENT         PARSED
708 // COMPONENT               OPERANDS     OPERAND INDEX    OPERAND INDEX
709 // -------------------------------------------------------------------
710 //                     "v_dual_add_f32"                        0
711 // v_dual_add_f32            v255          0 (DST)    -->      1
712 //                           v4            1 (SRC0)   -->      2
713 //                           v5            2 (SRC1)   -->      3
714 //                          "::"                               4
715 //                     "v_dual_mov_b32"                        5
716 // v_dual_mov_b32            v6            0 (DST)    -->      6
717 //                           v1            1 (SRC0)   -->      7
718 // -------------------------------------------------------------------
719 //
720 class ComponentLayout {
721 private:
722   // Regular MachineInstr/MCInst operands are ordered as follows:
723   //   dst, src0 [, other src operands]
724   // VOPD MachineInstr/MCInst operands are ordered as follows:
725   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
726   // Each ComponentKind has operand indices defined below.
727   static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
728   static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */};
729 
730   // Parsed operands of regular instructions are ordered as follows:
731   //   Mnemo dst src0 [vsrc1 ...]
732   // Parsed VOPD operands are ordered as follows:
733   //   OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
734   //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
735   // Each ComponentKind has operand indices defined below.
736   static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
737                                                 4 /* + OpX.ParsedSrcNum */};
738   static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
739       2, 2, 5 /* + OpX.ParsedSrcNum */};
740 
741 private:
742   const ComponentKind Kind;
743   const ComponentProps PrevComp;
744 
745 public:
746   // Create layout for COMPONENT_X or SINGLE component.
ComponentLayout(ComponentKind Kind)747   ComponentLayout(ComponentKind Kind) : Kind(Kind) {
748     assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
749   }
750 
751   // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
ComponentLayout(const ComponentProps & OpXProps)752   ComponentLayout(const ComponentProps &OpXProps)
753       : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {}
754 
755 public:
756   // Return the index of dst operand in MCInst operands.
getIndexOfDstInMCOperands()757   unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
758 
759   // Return the index of the specified src operand in MCInst operands.
getIndexOfSrcInMCOperands(unsigned CompSrcIdx)760   unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
761     assert(CompSrcIdx < Component::MAX_SRC_NUM);
762     return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
763   }
764 
765   // Return the index of dst operand in the parsed operands array.
getIndexOfDstInParsedOperands()766   unsigned getIndexOfDstInParsedOperands() const {
767     return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
768   }
769 
770   // Return the index of the specified src operand in the parsed operands array.
getIndexOfSrcInParsedOperands(unsigned CompSrcIdx)771   unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
772     assert(CompSrcIdx < Component::MAX_SRC_NUM);
773     return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
774   }
775 
776 private:
getPrevCompSrcNum()777   unsigned getPrevCompSrcNum() const {
778     return PrevComp.getCompSrcOperandsNum();
779   }
getPrevCompParsedSrcNum()780   unsigned getPrevCompParsedSrcNum() const {
781     return PrevComp.getCompParsedSrcOperandsNum();
782   }
783 };
784 
785 // Layout and properties of VOPD components.
786 class ComponentInfo : public ComponentLayout, public ComponentProps {
787 public:
788   // Create ComponentInfo for COMPONENT_X or SINGLE component.
789   ComponentInfo(const MCInstrDesc &OpDesc,
790                 ComponentKind Kind = ComponentKind::SINGLE)
ComponentLayout(Kind)791       : ComponentLayout(Kind), ComponentProps(OpDesc) {}
792 
793   // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
ComponentInfo(const MCInstrDesc & OpDesc,const ComponentProps & OpXProps)794   ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
795       : ComponentLayout(OpXProps), ComponentProps(OpDesc) {}
796 
797   // Map component operand index to parsed operand index.
798   // Return 0 if the specified operand does not exist.
799   unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
800 };
801 
802 // Properties of VOPD instructions.
803 class InstInfo {
804 private:
805   const ComponentInfo CompInfo[COMPONENTS_NUM];
806 
807 public:
808   using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
809 
InstInfo(const MCInstrDesc & OpX,const MCInstrDesc & OpY)810   InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
811       : CompInfo{OpX, OpY} {}
812 
InstInfo(const ComponentInfo & OprInfoX,const ComponentInfo & OprInfoY)813   InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
814       : CompInfo{OprInfoX, OprInfoY} {}
815 
816   const ComponentInfo &operator[](size_t ComponentIdx) const {
817     assert(ComponentIdx < COMPONENTS_NUM);
818     return CompInfo[ComponentIdx];
819   }
820 
821   // Check VOPD operands constraints.
822   // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
823   // for the specified component and MC operand. The callback must return 0
824   // if the operand is not a register or not a VGPR.
825   // If \p SkipSrc is set to true then constraints for source operands are not
826   // checked.
827   bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
828                          bool SkipSrc = false) const {
829     return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value();
830   }
831 
832   // Check VOPD operands constraints.
833   // Return the index of an invalid component operand, if any.
834   // If \p SkipSrc is set to true then constraints for source operands are not
835   // checked.
836   std::optional<unsigned> getInvalidCompOperandIndex(
837       std::function<unsigned(unsigned, unsigned)> GetRegIdx,
838       bool SkipSrc = false) const;
839 
840 private:
841   RegIndices
842   getRegIndices(unsigned ComponentIdx,
843                 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
844 };
845 
846 } // namespace VOPD
847 
848 LLVM_READONLY
849 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
850 
851 LLVM_READONLY
852 // Get properties of 2 single VOP1/VOP2 instructions
853 // used as components to create a VOPD instruction.
854 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
855 
856 LLVM_READONLY
857 // Get properties of VOPD X and Y components.
858 VOPD::InstInfo
859 getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
860 
861 LLVM_READONLY
862 bool isTrue16Inst(unsigned Opc);
863 
864 LLVM_READONLY
865 bool isInvalidSingleUseConsumerInst(unsigned Opc);
866 
867 LLVM_READONLY
868 bool isInvalidSingleUseProducerInst(unsigned Opc);
869 
870 LLVM_READONLY
871 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
872 
873 LLVM_READONLY
874 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
875 
876 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
877                                const MCSubtargetInfo *STI);
878 
879 bool isGroupSegment(const GlobalValue *GV);
880 bool isGlobalSegment(const GlobalValue *GV);
881 bool isReadOnlySegment(const GlobalValue *GV);
882 
883 /// \returns True if constants should be emitted to .text section for given
884 /// target triple \p TT, false otherwise.
885 bool shouldEmitConstantsToTextSection(const Triple &TT);
886 
887 /// \returns Integer value requested using \p F's \p Name attribute.
888 ///
889 /// \returns \p Default if attribute is not present.
890 ///
891 /// \returns \p Default and emits error if requested value cannot be converted
892 /// to integer.
893 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
894 
895 /// \returns A pair of integer values requested using \p F's \p Name attribute
896 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
897 /// is false).
898 ///
899 /// \returns \p Default if attribute is not present.
900 ///
901 /// \returns \p Default and emits error if one of the requested values cannot be
902 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
903 /// not present.
904 std::pair<unsigned, unsigned>
905 getIntegerPairAttribute(const Function &F, StringRef Name,
906                         std::pair<unsigned, unsigned> Default,
907                         bool OnlyFirstRequired = false);
908 
909 /// \returns Generate a vector of integer values requested using \p F's \p Name
910 /// attribute.
911 ///
912 /// \returns true if exactly Size (>2) number of integers are found in the
913 /// attribute.
914 ///
915 /// \returns false if any error occurs.
916 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
917                                              unsigned Size);
918 
919 /// Represents the counter values to wait for in an s_waitcnt instruction.
920 ///
921 /// Large values (including the maximum possible integer) can be used to
922 /// represent "don't care" waits.
923 struct Waitcnt {
924   unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
925   unsigned ExpCnt = ~0u;
926   unsigned DsCnt = ~0u;     // Corresponds to LGKMcnt prior to gfx12.
927   unsigned StoreCnt = ~0u;  // Corresponds to VScnt on gfx10/gfx11.
928   unsigned SampleCnt = ~0u; // gfx12+ only.
929   unsigned BvhCnt = ~0u;    // gfx12+ only.
930   unsigned KmCnt = ~0u;     // gfx12+ only.
931 
932   Waitcnt() = default;
933   // Pre-gfx12 constructor.
WaitcntWaitcnt934   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
935       : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt),
936         SampleCnt(~0u), BvhCnt(~0u), KmCnt(~0u) {}
937 
938   // gfx12+ constructor.
WaitcntWaitcnt939   Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
940           unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt)
941       : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
942         SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}
943 
hasWaitWaitcnt944   bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
945 
hasWaitExceptStoreCntWaitcnt946   bool hasWaitExceptStoreCnt() const {
947     return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
948            SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u;
949   }
950 
hasWaitStoreCntWaitcnt951   bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
952 
combinedWaitcnt953   Waitcnt combined(const Waitcnt &Other) const {
954     // Does the right thing provided self and Other are either both pre-gfx12
955     // or both gfx12+.
956     return Waitcnt(
957         std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
958         std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
959         std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
960         std::min(KmCnt, Other.KmCnt));
961   }
962 };
963 
964 // The following methods are only meaningful on targets that support
965 // S_WAITCNT.
966 
967 /// \returns Vmcnt bit mask for given isa \p Version.
968 unsigned getVmcntBitMask(const IsaVersion &Version);
969 
970 /// \returns Expcnt bit mask for given isa \p Version.
971 unsigned getExpcntBitMask(const IsaVersion &Version);
972 
973 /// \returns Lgkmcnt bit mask for given isa \p Version.
974 unsigned getLgkmcntBitMask(const IsaVersion &Version);
975 
976 /// \returns Waitcnt bit mask for given isa \p Version.
977 unsigned getWaitcntBitMask(const IsaVersion &Version);
978 
979 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
980 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
981 
982 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
983 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
984 
985 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
986 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
987 
988 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
989 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
990 /// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
991 /// which needs it is deprecated
992 ///
993 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
994 ///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
995 ///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
996 ///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11)
997 ///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
998 ///     \p Expcnt = \p Waitcnt[2:0]       (gfx11)
999 ///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
1000 ///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
1001 ///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11)
1002 ///
1003 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1004                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
1005 
1006 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1007 
1008 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1009 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1010                      unsigned Vmcnt);
1011 
1012 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1013 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1014                       unsigned Expcnt);
1015 
1016 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1017 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1018                        unsigned Lgkmcnt);
1019 
1020 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1021 /// \p Version. Should not be used on gfx12+, the instruction which needs
1022 /// it is deprecated
1023 ///
1024 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1025 ///     Waitcnt[2:0]   = \p Expcnt      (gfx11+)
1026 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9)
1027 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9,10)
1028 ///     Waitcnt[6:4]   = \p Expcnt      (pre-gfx11)
1029 ///     Waitcnt[9:4]   = \p Lgkmcnt     (gfx11)
1030 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10)
1031 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10)
1032 ///     Waitcnt[15:10] = \p Vmcnt       (gfx11)
1033 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9,10)
1034 ///
1035 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1036 /// isa \p Version.
1037 ///
1038 unsigned encodeWaitcnt(const IsaVersion &Version,
1039                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
1040 
1041 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1042 
1043 // The following methods are only meaningful on targets that support
1044 // S_WAIT_*CNT, introduced with gfx12.
1045 
1046 /// \returns Loadcnt bit mask for given isa \p Version.
1047 /// Returns 0 for versions that do not support LOADcnt
1048 unsigned getLoadcntBitMask(const IsaVersion &Version);
1049 
1050 /// \returns Samplecnt bit mask for given isa \p Version.
1051 /// Returns 0 for versions that do not support SAMPLEcnt
1052 unsigned getSamplecntBitMask(const IsaVersion &Version);
1053 
1054 /// \returns Bvhcnt bit mask for given isa \p Version.
1055 /// Returns 0 for versions that do not support BVHcnt
1056 unsigned getBvhcntBitMask(const IsaVersion &Version);
1057 
1058 /// \returns Dscnt bit mask for given isa \p Version.
1059 /// Returns 0 for versions that do not support DScnt
1060 unsigned getDscntBitMask(const IsaVersion &Version);
1061 
1062 /// \returns Dscnt bit mask for given isa \p Version.
1063 /// Returns 0 for versions that do not support KMcnt
1064 unsigned getKmcntBitMask(const IsaVersion &Version);
1065 
1066 /// \return STOREcnt or VScnt bit mask for given isa \p Version.
1067 /// returns 0 for versions that do not support STOREcnt or VScnt.
1068 /// STOREcnt and VScnt are the same counter, the name used
1069 /// depends on the ISA version.
1070 unsigned getStorecntBitMask(const IsaVersion &Version);
1071 
1072 // The following are only meaningful on targets that support
1073 // S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1074 
1075 /// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1076 /// isa \p Version.
1077 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1078 
1079 /// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1080 /// isa \p Version.
1081 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1082 
1083 /// \returns \p Loadcnt and \p Dscnt components of \p Decoded  encoded as an
1084 /// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1085 /// \p Version.
1086 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1087 
1088 /// \returns \p Storecnt and \p Dscnt components of \p Decoded  encoded as an
1089 /// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1090 /// \p Version.
1091 unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1092 
1093 namespace Hwreg {
1094 
1095 using HwregId = EncodingField<5, 0>;
1096 using HwregOffset = EncodingField<10, 6>;
1097 
1098 struct HwregSize : EncodingField<15, 11, 32> {
1099   using EncodingField::EncodingField;
encodeHwregSize1100   constexpr uint64_t encode() const { return Value - 1; }
decodeHwregSize1101   static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1102 };
1103 
1104 using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1105 
1106 } // namespace Hwreg
1107 
1108 namespace DepCtr {
1109 
1110 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1111 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1112                  const MCSubtargetInfo &STI);
1113 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1114                               const MCSubtargetInfo &STI);
1115 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1116                   bool &IsDefault, const MCSubtargetInfo &STI);
1117 
1118 /// \returns Decoded VaVdst from given immediate \p Encoded.
1119 unsigned decodeFieldVaVdst(unsigned Encoded);
1120 
1121 /// \returns Decoded VmVsrc from given immediate \p Encoded.
1122 unsigned decodeFieldVmVsrc(unsigned Encoded);
1123 
1124 /// \returns Decoded SaSdst from given immediate \p Encoded.
1125 unsigned decodeFieldSaSdst(unsigned Encoded);
1126 
1127 /// \returns \p VmVsrc as an encoded Depctr immediate.
1128 unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1129 
1130 /// \returns \p Encoded combined with encoded \p VmVsrc.
1131 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1132 
1133 /// \returns \p VaVdst as an encoded Depctr immediate.
1134 unsigned encodeFieldVaVdst(unsigned VaVdst);
1135 
1136 /// \returns \p Encoded combined with encoded \p VaVdst.
1137 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1138 
1139 /// \returns \p SaSdst as an encoded Depctr immediate.
1140 unsigned encodeFieldSaSdst(unsigned SaSdst);
1141 
1142 /// \returns \p Encoded combined with encoded \p SaSdst.
1143 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1144 
1145 } // namespace DepCtr
1146 
1147 namespace Exp {
1148 
1149 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1150 
1151 LLVM_READONLY
1152 unsigned getTgtId(const StringRef Name);
1153 
1154 LLVM_READNONE
1155 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1156 
1157 } // namespace Exp
1158 
1159 namespace MTBUFFormat {
1160 
1161 LLVM_READNONE
1162 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1163 
1164 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1165 
1166 int64_t getDfmt(const StringRef Name);
1167 
1168 StringRef getDfmtName(unsigned Id);
1169 
1170 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1171 
1172 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1173 
1174 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1175 
1176 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1177 
1178 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1179 
1180 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1181 
1182 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1183 
1184 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1185                              const MCSubtargetInfo &STI);
1186 
1187 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1188 
1189 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1190 
1191 } // namespace MTBUFFormat
1192 
1193 namespace SendMsg {
1194 
1195 LLVM_READNONE
1196 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1197 
1198 LLVM_READNONE
1199 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1200                   bool Strict = true);
1201 
1202 LLVM_READNONE
1203 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1204                       const MCSubtargetInfo &STI, bool Strict = true);
1205 
1206 LLVM_READNONE
1207 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1208 
1209 LLVM_READNONE
1210 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1211 
1212 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1213                uint16_t &StreamId, const MCSubtargetInfo &STI);
1214 
1215 LLVM_READNONE
1216 uint64_t encodeMsg(uint64_t MsgId,
1217                    uint64_t OpId,
1218                    uint64_t StreamId);
1219 
1220 } // namespace SendMsg
1221 
1222 
1223 unsigned getInitialPSInputAddr(const Function &F);
1224 
1225 bool getHasColorExport(const Function &F);
1226 
1227 bool getHasDepthExport(const Function &F);
1228 
1229 LLVM_READNONE
1230 bool isShader(CallingConv::ID CC);
1231 
1232 LLVM_READNONE
1233 bool isGraphics(CallingConv::ID CC);
1234 
1235 LLVM_READNONE
1236 bool isCompute(CallingConv::ID CC);
1237 
1238 LLVM_READNONE
1239 bool isEntryFunctionCC(CallingConv::ID CC);
1240 
1241 // These functions are considered entrypoints into the current module, i.e. they
1242 // are allowed to be called from outside the current module. This is different
1243 // from isEntryFunctionCC, which is only true for functions that are entered by
1244 // the hardware. Module entry points include all entry functions but also
1245 // include functions that can be called from other functions inside or outside
1246 // the current module. Module entry functions are allowed to allocate LDS.
1247 LLVM_READNONE
1248 bool isModuleEntryFunctionCC(CallingConv::ID CC);
1249 
1250 LLVM_READNONE
1251 bool isChainCC(CallingConv::ID CC);
1252 
1253 bool isKernelCC(const Function *Func);
1254 
1255 // FIXME: Remove this when calling conventions cleaned up
1256 LLVM_READNONE
isKernel(CallingConv::ID CC)1257 inline bool isKernel(CallingConv::ID CC) {
1258   switch (CC) {
1259   case CallingConv::AMDGPU_KERNEL:
1260   case CallingConv::SPIR_KERNEL:
1261     return true;
1262   default:
1263     return false;
1264   }
1265 }
1266 
1267 bool hasXNACK(const MCSubtargetInfo &STI);
1268 bool hasSRAMECC(const MCSubtargetInfo &STI);
1269 bool hasMIMG_R128(const MCSubtargetInfo &STI);
1270 bool hasA16(const MCSubtargetInfo &STI);
1271 bool hasG16(const MCSubtargetInfo &STI);
1272 bool hasPackedD16(const MCSubtargetInfo &STI);
1273 bool hasGDS(const MCSubtargetInfo &STI);
1274 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1275 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1276 
1277 bool isSI(const MCSubtargetInfo &STI);
1278 bool isCI(const MCSubtargetInfo &STI);
1279 bool isVI(const MCSubtargetInfo &STI);
1280 bool isGFX9(const MCSubtargetInfo &STI);
1281 bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1282 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1283 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1284 bool isGFX8Plus(const MCSubtargetInfo &STI);
1285 bool isGFX9Plus(const MCSubtargetInfo &STI);
1286 bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1287 bool isGFX10(const MCSubtargetInfo &STI);
1288 bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1289 bool isGFX10Plus(const MCSubtargetInfo &STI);
1290 bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1291 bool isGFX10Before1030(const MCSubtargetInfo &STI);
1292 bool isGFX11(const MCSubtargetInfo &STI);
1293 bool isGFX11Plus(const MCSubtargetInfo &STI);
1294 bool isGFX12(const MCSubtargetInfo &STI);
1295 bool isGFX12Plus(const MCSubtargetInfo &STI);
1296 bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1297 bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1298 bool isGCN3Encoding(const MCSubtargetInfo &STI);
1299 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1300 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1301 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1302 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1303 bool isGFX90A(const MCSubtargetInfo &STI);
1304 bool isGFX940(const MCSubtargetInfo &STI);
1305 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1306 bool hasMAIInsts(const MCSubtargetInfo &STI);
1307 bool hasVOPD(const MCSubtargetInfo &STI);
1308 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1309 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1310 unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1311 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1312 
1313 /// Is Reg - scalar register
1314 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
1315 
1316 /// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1317 /// The bit indicating isHi is the LSB of the encoding.
1318 bool isHi(unsigned Reg, const MCRegisterInfo &MRI);
1319 
1320 /// If \p Reg is a pseudo reg, return the correct hardware register given
1321 /// \p STI otherwise return \p Reg.
1322 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
1323 
1324 /// Convert hardware register \p Reg to a pseudo register
1325 LLVM_READNONE
1326 unsigned mc2PseudoReg(unsigned Reg);
1327 
1328 LLVM_READNONE
1329 bool isInlineValue(unsigned Reg);
1330 
1331 /// Is this an AMDGPU specific source operand? These include registers,
1332 /// inline constants, literals and mandatory literals (KImm).
1333 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1334 
1335 /// Is this a KImm operand?
1336 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1337 
1338 /// Is this floating-point operand?
1339 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1340 
1341 /// Does this operand support only inlinable literals?
1342 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1343 
1344 /// Get the size in bits of a register from the register class \p RC.
1345 unsigned getRegBitWidth(unsigned RCID);
1346 
1347 /// Get the size in bits of a register from the register class \p RC.
1348 unsigned getRegBitWidth(const MCRegisterClass &RC);
1349 
1350 /// Get size of register operand
1351 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1352                            unsigned OpNo);
1353 
1354 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)1355 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1356   switch (OpInfo.OperandType) {
1357   case AMDGPU::OPERAND_REG_IMM_INT32:
1358   case AMDGPU::OPERAND_REG_IMM_FP32:
1359   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1360   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1361   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1362   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1363   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1364   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1365   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1366   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1367   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1368   case AMDGPU::OPERAND_KIMM32:
1369   case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1370   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1371     return 4;
1372 
1373   case AMDGPU::OPERAND_REG_IMM_INT64:
1374   case AMDGPU::OPERAND_REG_IMM_FP64:
1375   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1376   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1377   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1378     return 8;
1379 
1380   case AMDGPU::OPERAND_REG_IMM_INT16:
1381   case AMDGPU::OPERAND_REG_IMM_BF16:
1382   case AMDGPU::OPERAND_REG_IMM_FP16:
1383   case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1384   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1385   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1386   case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1387   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1388   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1389   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1390   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1391   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1392   case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1393   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1394   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1395   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1396   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1397   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1398   case AMDGPU::OPERAND_REG_IMM_V2BF16:
1399   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1400     return 2;
1401 
1402   default:
1403     llvm_unreachable("unhandled operand type");
1404   }
1405 }
1406 
1407 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)1408 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1409   return getOperandSize(Desc.operands()[OpNo]);
1410 }
1411 
1412 /// Is this literal inlinable, and not one of the values intended for floating
1413 /// point values.
1414 LLVM_READNONE
isInlinableIntLiteral(int64_t Literal)1415 inline bool isInlinableIntLiteral(int64_t Literal) {
1416   return Literal >= -16 && Literal <= 64;
1417 }
1418 
1419 /// Is this literal inlinable
1420 LLVM_READNONE
1421 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1422 
1423 LLVM_READNONE
1424 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1425 
1426 LLVM_READNONE
1427 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1428 
1429 LLVM_READNONE
1430 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1431 
1432 LLVM_READNONE
1433 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1434 
1435 LLVM_READNONE
1436 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1437 
1438 LLVM_READNONE
1439 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1440 
1441 LLVM_READNONE
1442 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1443 
1444 LLVM_READNONE
1445 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1446 
1447 LLVM_READNONE
1448 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1449 
1450 LLVM_READNONE
1451 bool isInlinableLiteralV2I16(uint32_t Literal);
1452 
1453 LLVM_READNONE
1454 bool isInlinableLiteralV2BF16(uint32_t Literal);
1455 
1456 LLVM_READNONE
1457 bool isInlinableLiteralV2F16(uint32_t Literal);
1458 
1459 LLVM_READNONE
1460 bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1461 
1462 bool isArgPassedInSGPR(const Argument *Arg);
1463 
1464 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1465 
1466 LLVM_READONLY
1467 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1468                                       int64_t EncodedOffset);
1469 
1470 LLVM_READONLY
1471 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1472                                     int64_t EncodedOffset,
1473                                     bool IsBuffer);
1474 
1475 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1476 /// offsets.
1477 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1478 
1479 /// \returns The encoding that will be used for \p ByteOffset in the
1480 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1481 /// S_LOAD instructions have a signed offset, on other subtargets it is
1482 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1483 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1484                                             int64_t ByteOffset, bool IsBuffer,
1485                                             bool HasSOffset = false);
1486 
1487 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1488 /// instruction. This is only useful on CI.s
1489 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1490                                                      int64_t ByteOffset);
1491 
1492 /// For pre-GFX12 FLAT instructions the offset must be positive;
1493 /// MSB is ignored and forced to zero.
1494 ///
1495 /// \return The number of bits available for the signed offset field in flat
1496 /// instructions. Note that some forms of the instruction disallow negative
1497 /// offsets.
1498 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1499 
1500 /// \returns true if this offset is small enough to fit in the SMRD
1501 /// offset field.  \p ByteOffset should be the offset in bytes and
1502 /// not the encoded offset.
1503 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1504 
1505 LLVM_READNONE
isLegalDPALU_DPPControl(unsigned DC)1506 inline bool isLegalDPALU_DPPControl(unsigned DC) {
1507   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1508 }
1509 
1510 /// \returns true if an instruction may have a 64-bit VGPR operand.
1511 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1512 
1513 /// \returns true if an instruction is a DP ALU DPP.
1514 bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1515 
1516 /// \returns true if the intrinsic is divergent
1517 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1518 
1519 /// \returns true if the intrinsic is uniform
1520 bool isIntrinsicAlwaysUniform(unsigned IntrID);
1521 
1522 /// \returns lds block size in terms of dwords. \p
1523 /// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1524 /// must be defined in terms of bytes.
1525 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1526 
1527 } // end namespace AMDGPU
1528 
1529 raw_ostream &operator<<(raw_ostream &OS,
1530                         const AMDGPU::IsaInfo::TargetIDSetting S);
1531 
1532 } // end namespace llvm
1533 
1534 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1535