1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12 #include "AMDGPUSubtarget.h"
13 #include "SIDefines.h"
14 #include "llvm/IR/CallingConv.h"
15 #include "llvm/IR/InstrTypes.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/Support/Alignment.h"
18 #include <array>
19 #include <functional>
20 #include <utility>
21
22 // Pull in OpName enum definition and getNamedOperandIdx() declaration.
23 #define GET_INSTRINFO_OPERAND_ENUM
24 #include "AMDGPUGenInstrInfo.inc"
25
26 struct amd_kernel_code_t;
27
28 namespace llvm {
29
30 struct Align;
31 class Argument;
32 class Function;
33 class GlobalValue;
34 class MCInstrInfo;
35 class MCRegisterClass;
36 class MCRegisterInfo;
37 class MCSubtargetInfo;
38 class StringRef;
39 class Triple;
40 class raw_ostream;
41
42 namespace AMDGPU {
43
44 struct AMDGPUMCKernelCodeT;
45 struct IsaVersion;
46
47 /// Generic target versions emitted by this version of LLVM.
48 ///
49 /// These numbers are incremented every time a codegen breaking change occurs
50 /// within a generic family.
51 namespace GenericVersion {
52 static constexpr unsigned GFX9 = 1;
53 static constexpr unsigned GFX9_4 = 1;
54 static constexpr unsigned GFX10_1 = 1;
55 static constexpr unsigned GFX10_3 = 1;
56 static constexpr unsigned GFX11 = 1;
57 static constexpr unsigned GFX12 = 1;
58 } // namespace GenericVersion
59
60 enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
61
62 enum class FPType { None, FP4, FP8 };
63
64 /// \returns True if \p STI is AMDHSA.
65 bool isHsaAbi(const MCSubtargetInfo &STI);
66
67 /// \returns Code object version from the IR module flag.
68 unsigned getAMDHSACodeObjectVersion(const Module &M);
69
70 /// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
71 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
72
73 /// \returns The default HSA code object version. This should only be used when
74 /// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
75 /// flag or a .amdhsa_code_object_version directive)
76 unsigned getDefaultAMDHSACodeObjectVersion();
77
78 /// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
79 /// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
80 uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
81
82 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
83 unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
84
85 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr
86 unsigned getHostcallImplicitArgPosition(unsigned COV);
87
88 unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
89 unsigned getCompletionActionImplicitArgPosition(unsigned COV);
90
91 struct GcnBufferFormatInfo {
92 unsigned Format;
93 unsigned BitsPerComp;
94 unsigned NumComponents;
95 unsigned NumFormat;
96 unsigned DataFormat;
97 };
98
99 struct MAIInstInfo {
100 uint16_t Opcode;
101 bool is_dgemm;
102 bool is_gfx940_xdl;
103 };
104
105 struct MFMA_F8F6F4_Info {
106 unsigned Opcode;
107 unsigned F8F8Opcode;
108 uint8_t NumRegsSrcA;
109 uint8_t NumRegsSrcB;
110 };
111
112 struct CvtScaleF32_F32F16ToF8F4_Info {
113 unsigned Opcode;
114 };
115
116 struct True16D16Info {
117 unsigned T16Op;
118 unsigned HiOp;
119 unsigned LoOp;
120 };
121
122 #define GET_MIMGBaseOpcode_DECL
123 #define GET_MIMGDim_DECL
124 #define GET_MIMGEncoding_DECL
125 #define GET_MIMGLZMapping_DECL
126 #define GET_MIMGMIPMapping_DECL
127 #define GET_MIMGBiASMapping_DECL
128 #define GET_MAIInstInfoTable_DECL
129 #define GET_isMFMA_F8F6F4Table_DECL
130 #define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
131 #define GET_True16D16Table_DECL
132 #include "AMDGPUGenSearchableTables.inc"
133
134 namespace IsaInfo {
135
136 enum {
137 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
138 // doesn't spill SGPRs as much as when 80 is set.
139 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
140 TRAP_NUM_SGPRS = 16
141 };
142
143 enum class TargetIDSetting { Unsupported, Any, Off, On };
144
145 class AMDGPUTargetID {
146 private:
147 const MCSubtargetInfo &STI;
148 TargetIDSetting XnackSetting;
149 TargetIDSetting SramEccSetting;
150
151 public:
152 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
153 ~AMDGPUTargetID() = default;
154
155 /// \return True if the current xnack setting is not "Unsupported".
isXnackSupported()156 bool isXnackSupported() const {
157 return XnackSetting != TargetIDSetting::Unsupported;
158 }
159
160 /// \returns True if the current xnack setting is "On" or "Any".
isXnackOnOrAny()161 bool isXnackOnOrAny() const {
162 return XnackSetting == TargetIDSetting::On ||
163 XnackSetting == TargetIDSetting::Any;
164 }
165
166 /// \returns True if current xnack setting is "On" or "Off",
167 /// false otherwise.
isXnackOnOrOff()168 bool isXnackOnOrOff() const {
169 return getXnackSetting() == TargetIDSetting::On ||
170 getXnackSetting() == TargetIDSetting::Off;
171 }
172
173 /// \returns The current xnack TargetIDSetting, possible options are
174 /// "Unsupported", "Any", "Off", and "On".
getXnackSetting()175 TargetIDSetting getXnackSetting() const { return XnackSetting; }
176
177 /// Sets xnack setting to \p NewXnackSetting.
setXnackSetting(TargetIDSetting NewXnackSetting)178 void setXnackSetting(TargetIDSetting NewXnackSetting) {
179 XnackSetting = NewXnackSetting;
180 }
181
182 /// \return True if the current sramecc setting is not "Unsupported".
isSramEccSupported()183 bool isSramEccSupported() const {
184 return SramEccSetting != TargetIDSetting::Unsupported;
185 }
186
187 /// \returns True if the current sramecc setting is "On" or "Any".
isSramEccOnOrAny()188 bool isSramEccOnOrAny() const {
189 return SramEccSetting == TargetIDSetting::On ||
190 SramEccSetting == TargetIDSetting::Any;
191 }
192
193 /// \returns True if current sramecc setting is "On" or "Off",
194 /// false otherwise.
isSramEccOnOrOff()195 bool isSramEccOnOrOff() const {
196 return getSramEccSetting() == TargetIDSetting::On ||
197 getSramEccSetting() == TargetIDSetting::Off;
198 }
199
200 /// \returns The current sramecc TargetIDSetting, possible options are
201 /// "Unsupported", "Any", "Off", and "On".
getSramEccSetting()202 TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
203
204 /// Sets sramecc setting to \p NewSramEccSetting.
setSramEccSetting(TargetIDSetting NewSramEccSetting)205 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
206 SramEccSetting = NewSramEccSetting;
207 }
208
209 void setTargetIDFromFeaturesString(StringRef FS);
210 void setTargetIDFromTargetIDStream(StringRef TargetID);
211
212 /// \returns String representation of an object.
213 std::string toString() const;
214 };
215
216 /// \returns Wavefront size for given subtarget \p STI.
217 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
218
219 /// \returns Local memory size in bytes for given subtarget \p STI.
220 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
221
222 /// \returns Maximum addressable local memory size in bytes for given subtarget
223 /// \p STI.
224 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
225
226 /// \returns Number of execution units per compute unit for given subtarget \p
227 /// STI.
228 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
229
230 /// \returns Maximum number of work groups per compute unit for given subtarget
231 /// \p STI and limited by given \p FlatWorkGroupSize.
232 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
233 unsigned FlatWorkGroupSize);
234
235 /// \returns Minimum number of waves per execution unit for given subtarget \p
236 /// STI.
237 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
238
239 /// \returns Maximum number of waves per execution unit for given subtarget \p
240 /// STI without any kind of limitation.
241 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
242
243 /// \returns Number of waves per execution unit required to support the given \p
244 /// FlatWorkGroupSize.
245 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
246 unsigned FlatWorkGroupSize);
247
248 /// \returns Minimum flat work group size for given subtarget \p STI.
249 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
250
251 /// \returns Maximum flat work group size for given subtarget \p STI.
252 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
253
254 /// \returns Number of waves per work group for given subtarget \p STI and
255 /// \p FlatWorkGroupSize.
256 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
257 unsigned FlatWorkGroupSize);
258
259 /// \returns SGPR allocation granularity for given subtarget \p STI.
260 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
261
262 /// \returns SGPR encoding granularity for given subtarget \p STI.
263 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
264
265 /// \returns Total number of SGPRs for given subtarget \p STI.
266 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
267
268 /// \returns Addressable number of SGPRs for given subtarget \p STI.
269 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
270
271 /// \returns Minimum number of SGPRs that meets the given number of waves per
272 /// execution unit requirement for given subtarget \p STI.
273 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
274
275 /// \returns Maximum number of SGPRs that meets the given number of waves per
276 /// execution unit requirement for given subtarget \p STI.
277 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
278 bool Addressable);
279
280 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
281 /// STI when the given special registers are used.
282 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
283 bool FlatScrUsed, bool XNACKUsed);
284
285 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
286 /// STI when the given special registers are used. XNACK is inferred from
287 /// \p STI.
288 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
289 bool FlatScrUsed);
290
291 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
292 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
293 /// register counts.
294 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
295
296 /// \returns VGPR allocation granularity for given subtarget \p STI.
297 ///
298 /// For subtargets which support it, \p EnableWavefrontSize32 should match
299 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
300 unsigned
301 getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize,
302 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
303
304 /// \returns VGPR encoding granularity for given subtarget \p STI.
305 ///
306 /// For subtargets which support it, \p EnableWavefrontSize32 should match
307 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
308 unsigned getVGPREncodingGranule(
309 const MCSubtargetInfo *STI,
310 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
311
312 /// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
313 /// returns the allocation granule for ArchVGPRs.
314 unsigned getArchVGPRAllocGranule();
315
316 /// \returns Total number of VGPRs for given subtarget \p STI.
317 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
318
319 /// \returns Addressable number of architectural VGPRs for a given subtarget \p
320 /// STI.
321 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
322
323 /// \returns Addressable number of VGPRs for given subtarget \p STI.
324 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
325 unsigned DynamicVGPRBlockSize);
326
327 /// \returns Minimum number of VGPRs that meets given number of waves per
328 /// execution unit requirement for given subtarget \p STI.
329 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
330 unsigned DynamicVGPRBlockSize);
331
332 /// \returns Maximum number of VGPRs that meets given number of waves per
333 /// execution unit requirement for given subtarget \p STI.
334 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
335 unsigned DynamicVGPRBlockSize);
336
337 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given
338 /// subtarget \p STI.
339 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
340 unsigned NumVGPRs,
341 unsigned DynamicVGPRBlockSize);
342
343 /// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
344 /// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
345 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
346 unsigned MaxWaves,
347 unsigned TotalNumVGPRs);
348
349 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
350 /// Gen.
351 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
352 AMDGPUSubtarget::Generation Gen);
353
354 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
355 /// \p NumVGPRs are used. We actually return the number of blocks -1, since
356 /// that's what we encode.
357 ///
358 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
359 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
360 unsigned getEncodedNumVGPRBlocks(
361 const MCSubtargetInfo *STI, unsigned NumVGPRs,
362 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
363
364 /// \returns Number of VGPR blocks that need to be allocated for the given
365 /// subtarget \p STI when \p NumVGPRs are used.
366 unsigned getAllocatedNumVGPRBlocks(
367 const MCSubtargetInfo *STI, unsigned NumVGPRs,
368 unsigned DynamicVGPRBlockSize,
369 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
370
371 } // end namespace IsaInfo
372
373 // Represents a field in an encoded value.
374 template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
375 struct EncodingField {
376 static_assert(HighBit >= LowBit, "Invalid bit range!");
377 static constexpr unsigned Offset = LowBit;
378 static constexpr unsigned Width = HighBit - LowBit + 1;
379
380 using ValueType = unsigned;
381 static constexpr ValueType Default = D;
382
383 ValueType Value;
EncodingFieldEncodingField384 constexpr EncodingField(ValueType Value) : Value(Value) {}
385
encodeEncodingField386 constexpr uint64_t encode() const { return Value; }
decodeEncodingField387 static ValueType decode(uint64_t Encoded) { return Encoded; }
388 };
389
390 // Represents a single bit in an encoded value.
391 template <unsigned Bit, unsigned D = 0>
392 using EncodingBit = EncodingField<Bit, Bit, D>;
393
394 // A helper for encoding and decoding multiple fields.
395 template <typename... Fields> struct EncodingFields {
encodeEncodingFields396 static constexpr uint64_t encode(Fields... Values) {
397 return ((Values.encode() << Values.Offset) | ...);
398 }
399
decodeEncodingFields400 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
401 return {Fields::decode((Encoded >> Fields::Offset) &
402 maxUIntN(Fields::Width))...};
403 }
404 };
405
406 LLVM_READONLY
hasNamedOperand(uint64_t Opcode,OpName NamedIdx)407 inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
408 return getNamedOperandIdx(Opcode, NamedIdx) != -1;
409 }
410
411 LLVM_READONLY
412 int getSOPPWithRelaxation(uint16_t Opcode);
413
414 struct MIMGBaseOpcodeInfo {
415 MIMGBaseOpcode BaseOpcode;
416 bool Store;
417 bool Atomic;
418 bool AtomicX2;
419 bool Sampler;
420 bool Gather4;
421
422 uint8_t NumExtraArgs;
423 bool Gradients;
424 bool G16;
425 bool Coordinates;
426 bool LodOrClampOrMip;
427 bool HasD16;
428 bool MSAA;
429 bool BVH;
430 bool A16;
431 bool NoReturn;
432 bool PointSampleAccel;
433 };
434
435 LLVM_READONLY
436 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
437
438 LLVM_READONLY
439 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
440
441 struct MIMGDimInfo {
442 MIMGDim Dim;
443 uint8_t NumCoords;
444 uint8_t NumGradients;
445 bool MSAA;
446 bool DA;
447 uint8_t Encoding;
448 const char *AsmSuffix;
449 };
450
451 LLVM_READONLY
452 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
453
454 LLVM_READONLY
455 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
456
457 LLVM_READONLY
458 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
459
460 struct MIMGLZMappingInfo {
461 MIMGBaseOpcode L;
462 MIMGBaseOpcode LZ;
463 };
464
465 struct MIMGMIPMappingInfo {
466 MIMGBaseOpcode MIP;
467 MIMGBaseOpcode NONMIP;
468 };
469
470 struct MIMGBiasMappingInfo {
471 MIMGBaseOpcode Bias;
472 MIMGBaseOpcode NoBias;
473 };
474
475 struct MIMGOffsetMappingInfo {
476 MIMGBaseOpcode Offset;
477 MIMGBaseOpcode NoOffset;
478 };
479
480 struct MIMGG16MappingInfo {
481 MIMGBaseOpcode G;
482 MIMGBaseOpcode G16;
483 };
484
485 LLVM_READONLY
486 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
487
488 struct WMMAOpcodeMappingInfo {
489 unsigned Opcode2Addr;
490 unsigned Opcode3Addr;
491 };
492
493 LLVM_READONLY
494 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
495
496 LLVM_READONLY
497 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
498
499 LLVM_READONLY
500 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
501
502 LLVM_READONLY
503 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
504
505 LLVM_READONLY
506 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
507 unsigned VDataDwords, unsigned VAddrDwords);
508
509 LLVM_READONLY
510 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
511
512 LLVM_READONLY
513 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
514 const MIMGDimInfo *Dim, bool IsA16,
515 bool IsG16Supported);
516
517 struct MIMGInfo {
518 uint16_t Opcode;
519 uint16_t BaseOpcode;
520 uint8_t MIMGEncoding;
521 uint8_t VDataDwords;
522 uint8_t VAddrDwords;
523 uint8_t VAddrOperands;
524 };
525
526 LLVM_READONLY
527 const MIMGInfo *getMIMGInfo(unsigned Opc);
528
529 LLVM_READONLY
530 int getMTBUFBaseOpcode(unsigned Opc);
531
532 LLVM_READONLY
533 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
534
535 LLVM_READONLY
536 int getMTBUFElements(unsigned Opc);
537
538 LLVM_READONLY
539 bool getMTBUFHasVAddr(unsigned Opc);
540
541 LLVM_READONLY
542 bool getMTBUFHasSrsrc(unsigned Opc);
543
544 LLVM_READONLY
545 bool getMTBUFHasSoffset(unsigned Opc);
546
547 LLVM_READONLY
548 int getMUBUFBaseOpcode(unsigned Opc);
549
550 LLVM_READONLY
551 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
552
553 LLVM_READONLY
554 int getMUBUFElements(unsigned Opc);
555
556 LLVM_READONLY
557 bool getMUBUFHasVAddr(unsigned Opc);
558
559 LLVM_READONLY
560 bool getMUBUFHasSrsrc(unsigned Opc);
561
562 LLVM_READONLY
563 bool getMUBUFHasSoffset(unsigned Opc);
564
565 LLVM_READONLY
566 bool getMUBUFIsBufferInv(unsigned Opc);
567
568 LLVM_READONLY
569 bool getMUBUFTfe(unsigned Opc);
570
571 LLVM_READONLY
572 bool getSMEMIsBuffer(unsigned Opc);
573
574 LLVM_READONLY
575 bool getVOP1IsSingle(unsigned Opc);
576
577 LLVM_READONLY
578 bool getVOP2IsSingle(unsigned Opc);
579
580 LLVM_READONLY
581 bool getVOP3IsSingle(unsigned Opc);
582
583 LLVM_READONLY
584 bool isVOPC64DPP(unsigned Opc);
585
586 LLVM_READONLY
587 bool isVOPCAsmOnly(unsigned Opc);
588
589 /// Returns true if MAI operation is a double precision GEMM.
590 LLVM_READONLY
591 bool getMAIIsDGEMM(unsigned Opc);
592
593 LLVM_READONLY
594 bool getMAIIsGFX940XDL(unsigned Opc);
595
596 // Get an equivalent BitOp3 for a binary logical \p Opc.
597 // \returns BitOp3 modifier for the logical operation or zero.
598 // Used in VOPD3 conversion.
599 unsigned getBitOp2(unsigned Opc);
600
601 struct CanBeVOPD {
602 bool X;
603 bool Y;
604 };
605
606 /// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
607 LLVM_READONLY
608 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
609
610 LLVM_READONLY
611 CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
612
613 LLVM_READNONE
614 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
615
616 LLVM_READONLY
617 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
618 unsigned BLGP,
619 unsigned F8F8Opcode);
620
621 LLVM_READONLY
622 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
623 uint8_t NumComponents,
624 uint8_t NumFormat,
625 const MCSubtargetInfo &STI);
626 LLVM_READONLY
627 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
628 const MCSubtargetInfo &STI);
629
630 LLVM_READONLY
631 int getMCOpcode(uint16_t Opcode, unsigned Gen);
632
633 LLVM_READONLY
634 unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
635
636 LLVM_READONLY
637 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
638 bool VOPD3);
639
640 LLVM_READONLY
641 bool isVOPD(unsigned Opc);
642
643 LLVM_READNONE
644 bool isMAC(unsigned Opc);
645
646 LLVM_READNONE
647 bool isPermlane16(unsigned Opc);
648
649 LLVM_READNONE
650 bool isGenericAtomic(unsigned Opc);
651
652 LLVM_READNONE
653 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
654
655 namespace VOPD {
656
657 enum Component : unsigned {
658 DST = 0,
659 SRC0,
660 SRC1,
661 SRC2,
662
663 DST_NUM = 1,
664 MAX_SRC_NUM = 3,
665 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
666 };
667
668 // LSB mask for VGPR banks per VOPD component operand.
669 // 4 banks result in a mask 3, setting 2 lower bits.
670 constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
671 constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
672
673 enum ComponentIndex : unsigned { X = 0, Y = 1 };
674 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
675 constexpr unsigned COMPONENTS_NUM = 2;
676
677 // Properties of VOPD components.
678 class ComponentProps {
679 private:
680 unsigned SrcOperandsNum = 0;
681 unsigned MandatoryLiteralIdx = ~0u;
682 bool HasSrc2Acc = false;
683 unsigned NumVOPD3Mods = 0;
684 unsigned Opcode = 0;
685 bool IsVOP3 = false;
686
687 public:
688 ComponentProps() = default;
689 ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
690
691 // Return the total number of src operands this component has.
getCompSrcOperandsNum()692 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
693
694 // Return the number of src operands of this component visible to the parser.
getCompParsedSrcOperandsNum()695 unsigned getCompParsedSrcOperandsNum() const {
696 return SrcOperandsNum - HasSrc2Acc;
697 }
698
699 // Return true iif this component has a mandatory literal.
hasMandatoryLiteral()700 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
701
702 // If this component has a mandatory literal, return component operand
703 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
getMandatoryLiteralCompOperandIndex()704 unsigned getMandatoryLiteralCompOperandIndex() const {
705 assert(hasMandatoryLiteral());
706 return MandatoryLiteralIdx;
707 }
708
709 // Return true iif this component has operand
710 // with component index CompSrcIdx and this operand may be a register.
hasRegSrcOperand(unsigned CompSrcIdx)711 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
712 assert(CompSrcIdx < Component::MAX_SRC_NUM);
713 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
714 }
715
716 // Return true iif this component has tied src2.
hasSrc2Acc()717 bool hasSrc2Acc() const { return HasSrc2Acc; }
718
719 // Return a number of source modifiers if instruction is used in VOPD3.
getCompVOPD3ModsNum()720 unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
721
722 // Return opcode of the component.
getOpcode()723 unsigned getOpcode() const { return Opcode; }
724
725 // Returns if component opcode is in VOP3 encoding.
isVOP3()726 unsigned isVOP3() const { return IsVOP3; }
727
728 // Return index of BitOp3 operand or -1.
729 int getBitOp3OperandIdx() const;
730
731 private:
hasMandatoryLiteralAt(unsigned CompSrcIdx)732 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
733 assert(CompSrcIdx < Component::MAX_SRC_NUM);
734 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
735 }
736 };
737
738 enum ComponentKind : unsigned {
739 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
740 COMPONENT_X, // A VOPD instruction, X component.
741 COMPONENT_Y, // A VOPD instruction, Y component.
742 MAX = COMPONENT_Y
743 };
744
745 // Interface functions of this class map VOPD component operand indices
746 // to indices of operands in MachineInstr/MCInst or parsed operands array.
747 //
748 // Note that this class operates with 3 kinds of indices:
749 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
750 // - MC operand indices (they refer operands in a MachineInstr/MCInst);
751 // - parsed operand indices (they refer operands in parsed operands array).
752 //
753 // For SINGLE components mapping between these indices is trivial.
754 // But things get more complicated for COMPONENT_X and
755 // COMPONENT_Y because these components share the same
756 // MachineInstr/MCInst and the same parsed operands array.
757 // Below is an example of component operand to parsed operand
758 // mapping for the following instruction:
759 //
760 // v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
761 //
762 // PARSED COMPONENT PARSED
763 // COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
764 // -------------------------------------------------------------------
765 // "v_dual_add_f32" 0
766 // v_dual_add_f32 v255 0 (DST) --> 1
767 // v4 1 (SRC0) --> 2
768 // v5 2 (SRC1) --> 3
769 // "::" 4
770 // "v_dual_mov_b32" 5
771 // v_dual_mov_b32 v6 0 (DST) --> 6
772 // v1 1 (SRC0) --> 7
773 // -------------------------------------------------------------------
774 //
775 class ComponentLayout {
776 private:
777 // Regular MachineInstr/MCInst operands are ordered as follows:
778 // dst, src0 [, other src operands]
779 // VOPD MachineInstr/MCInst operands are ordered as follows:
780 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
781 // Each ComponentKind has operand indices defined below.
782 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
783
784 // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
785 // used if there is tied accumulator. Indexing of this array:
786 // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
787 // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
788 // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
789 // For VOPD1/VOPD2 use column with zero modifiers.
790 static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
791 {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
792
793 // Parsed operands of regular instructions are ordered as follows:
794 // Mnemo dst src0 [vsrc1 ...]
795 // Parsed VOPD operands are ordered as follows:
796 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
797 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
798 // Each ComponentKind has operand indices defined below.
799 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
800 4 /* + OpX.ParsedSrcNum */};
801 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
802 2, 2, 5 /* + OpX.ParsedSrcNum */};
803
804 private:
805 const ComponentKind Kind;
806 const ComponentProps PrevComp;
807 const unsigned VOPD3ModsNum;
808 const int BitOp3Idx; // Index of bitop3 operand or -1
809
810 public:
811 // Create layout for COMPONENT_X or SINGLE component.
ComponentLayout(ComponentKind Kind,unsigned VOPD3ModsNum,int BitOp3Idx)812 ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
813 : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
814 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
815 }
816
817 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
ComponentLayout(const ComponentProps & OpXProps,unsigned VOPD3ModsNum,int BitOp3Idx)818 ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
819 int BitOp3Idx)
820 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
821 VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
822
823 public:
824 // Return the index of dst operand in MCInst operands.
getIndexOfDstInMCOperands()825 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
826
827 // Return the index of the specified src operand in MCInst operands.
getIndexOfSrcInMCOperands(unsigned CompSrcIdx,bool VOPD3)828 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
829 assert(CompSrcIdx < Component::MAX_SRC_NUM);
830
831 if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
832 return BitOp3Idx;
833
834 if (VOPD3) {
835 return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
836 getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
837 }
838
839 return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
840 (Kind != SINGLE ? 1 : 0);
841 }
842
843 // Return the index of dst operand in the parsed operands array.
getIndexOfDstInParsedOperands()844 unsigned getIndexOfDstInParsedOperands() const {
845 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
846 }
847
848 // Return the index of the specified src operand in the parsed operands array.
getIndexOfSrcInParsedOperands(unsigned CompSrcIdx)849 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
850 assert(CompSrcIdx < Component::MAX_SRC_NUM);
851 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
852 }
853
854 private:
getPrevCompSrcNum()855 unsigned getPrevCompSrcNum() const {
856 return PrevComp.getCompSrcOperandsNum();
857 }
getPrevCompParsedSrcNum()858 unsigned getPrevCompParsedSrcNum() const {
859 return PrevComp.getCompParsedSrcOperandsNum();
860 }
getPrevCompVOPD3ModsNum()861 unsigned getPrevCompVOPD3ModsNum() const {
862 return PrevComp.getCompVOPD3ModsNum();
863 }
864 };
865
866 // Layout and properties of VOPD components.
867 class ComponentInfo : public ComponentProps, public ComponentLayout {
868 public:
869 // Create ComponentInfo for COMPONENT_X or SINGLE component.
870 ComponentInfo(const MCInstrDesc &OpDesc,
871 ComponentKind Kind = ComponentKind::SINGLE,
872 bool VOP3Layout = false)
ComponentProps(OpDesc,VOP3Layout)873 : ComponentProps(OpDesc, VOP3Layout),
874 ComponentLayout(Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {}
875
876 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
877 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
878 bool VOP3Layout = false)
ComponentProps(OpDesc,VOP3Layout)879 : ComponentProps(OpDesc, VOP3Layout),
880 ComponentLayout(OpXProps, getCompVOPD3ModsNum(),
881 getBitOp3OperandIdx()) {}
882
883 // Map component operand index to parsed operand index.
884 // Return 0 if the specified operand does not exist.
885 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
886 };
887
888 // Properties of VOPD instructions.
889 class InstInfo {
890 private:
891 const ComponentInfo CompInfo[COMPONENTS_NUM];
892
893 public:
894 using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
895
InstInfo(const MCInstrDesc & OpX,const MCInstrDesc & OpY)896 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
897 : CompInfo{OpX, OpY} {}
898
InstInfo(const ComponentInfo & OprInfoX,const ComponentInfo & OprInfoY)899 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
900 : CompInfo{OprInfoX, OprInfoY} {}
901
902 const ComponentInfo &operator[](size_t ComponentIdx) const {
903 assert(ComponentIdx < COMPONENTS_NUM);
904 return CompInfo[ComponentIdx];
905 }
906
907 // Check VOPD operands constraints.
908 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
909 // for the specified component and MC operand. The callback must return 0
910 // if the operand is not a register or not a VGPR.
911 // If \p SkipSrc is set to true then constraints for source operands are not
912 // checked.
913 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
914 // even though it violates requirement to be from different banks.
915 // If \p VOPD3 is set to true both dst registers allowed to be either odd
916 // or even and instruction may have real src2 as opposed to tied accumulator.
917 bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
918 const MCRegisterInfo &MRI, bool SkipSrc = false,
919 bool AllowSameVGPR = false, bool VOPD3 = false) const {
920 return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
921 VOPD3)
922 .has_value();
923 }
924
925 // Check VOPD operands constraints.
926 // Return the index of an invalid component operand, if any.
927 // If \p SkipSrc is set to true then constraints for source operands are not
928 // checked except for being from the same halves of VGPR file on gfx1250.
929 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
930 // even though it violates requirement to be from different banks.
931 // If \p VOPD3 is set to true both dst registers allowed to be either odd
932 // or even and instruction may have real src2 as opposed to tied accumulator.
933 std::optional<unsigned> getInvalidCompOperandIndex(
934 std::function<unsigned(unsigned, unsigned)> GetRegIdx,
935 const MCRegisterInfo &MRI, bool SkipSrc = false,
936 bool AllowSameVGPR = false, bool VOPD3 = false) const;
937
938 private:
939 RegIndices
940 getRegIndices(unsigned ComponentIdx,
941 std::function<unsigned(unsigned, unsigned)> GetRegIdx,
942 bool VOPD3) const;
943 };
944
945 } // namespace VOPD
946
947 LLVM_READONLY
948 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
949
950 LLVM_READONLY
951 // Get properties of 2 single VOP1/VOP2 instructions
952 // used as components to create a VOPD instruction.
953 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
954
955 LLVM_READONLY
956 // Get properties of VOPD X and Y components.
957 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
958 const MCInstrInfo *InstrInfo);
959
960 LLVM_READONLY
961 bool isAsyncStore(unsigned Opc);
962 LLVM_READONLY
963 bool isTensorStore(unsigned Opc);
964 LLVM_READONLY
965 unsigned getTemporalHintType(const MCInstrDesc TID);
966
967 LLVM_READONLY
968 bool isTrue16Inst(unsigned Opc);
969
970 LLVM_READONLY
971 FPType getFPDstSelType(unsigned Opc);
972
973 LLVM_READONLY
974 bool isInvalidSingleUseConsumerInst(unsigned Opc);
975
976 LLVM_READONLY
977 bool isInvalidSingleUseProducerInst(unsigned Opc);
978
979 bool isDPMACCInstruction(unsigned Opc);
980
981 LLVM_READONLY
982 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
983
984 LLVM_READONLY
985 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
986
987 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
988 const MCSubtargetInfo *STI);
989
990 bool isGroupSegment(const GlobalValue *GV);
991 bool isGlobalSegment(const GlobalValue *GV);
992 bool isReadOnlySegment(const GlobalValue *GV);
993
994 /// \returns True if constants should be emitted to .text section for given
995 /// target triple \p TT, false otherwise.
996 bool shouldEmitConstantsToTextSection(const Triple &TT);
997
998 /// \returns Integer value requested using \p F's \p Name attribute.
999 ///
1000 /// \returns \p Default if attribute is not present.
1001 ///
1002 /// \returns \p Default and emits error if requested value cannot be converted
1003 /// to integer.
1004 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
1005
1006 /// \returns A pair of integer values requested using \p F's \p Name attribute
1007 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1008 /// is false).
1009 ///
1010 /// \returns \p Default if attribute is not present.
1011 ///
1012 /// \returns \p Default and emits error if one of the requested values cannot be
1013 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1014 /// not present.
1015 std::pair<unsigned, unsigned>
1016 getIntegerPairAttribute(const Function &F, StringRef Name,
1017 std::pair<unsigned, unsigned> Default,
1018 bool OnlyFirstRequired = false);
1019
1020 /// \returns A pair of integer values requested using \p F's \p Name attribute
1021 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1022 /// is false).
1023 ///
1024 /// \returns \p std::nullopt if attribute is not present.
1025 ///
1026 /// \returns \p std::nullopt and emits error if one of the requested values
1027 /// cannot be converted to integer, or \p OnlyFirstRequired is false and
1028 /// "second" value is not present.
1029 std::optional<std::pair<unsigned, std::optional<unsigned>>>
1030 getIntegerPairAttribute(const Function &F, StringRef Name,
1031 bool OnlyFirstRequired = false);
1032
1033 /// \returns Generate a vector of integer values requested using \p F's \p Name
1034 /// attribute.
1035 /// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1036 /// if any error occurs. The corresponding error will also be emitted.
1037 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1038 unsigned Size,
1039 unsigned DefaultVal);
1040 /// Similar to the function above, but returns std::nullopt if any error occurs.
1041 std::optional<SmallVector<unsigned>>
1042 getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1043
1044 /// Represents the counter values to wait for in an s_waitcnt instruction.
1045 ///
1046 /// Large values (including the maximum possible integer) can be used to
1047 /// represent "don't care" waits.
1048 struct Waitcnt {
1049 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
1050 unsigned ExpCnt = ~0u;
1051 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
1052 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
1053 unsigned SampleCnt = ~0u; // gfx12+ only.
1054 unsigned BvhCnt = ~0u; // gfx12+ only.
1055 unsigned KmCnt = ~0u; // gfx12+ only.
1056 unsigned XCnt = ~0u; // gfx1250.
1057
1058 Waitcnt() = default;
1059 // Pre-gfx12 constructor.
WaitcntWaitcnt1060 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
1061 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
1062
1063 // gfx12+ constructor.
WaitcntWaitcnt1064 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
1065 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
1066 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
1067 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
1068
hasWaitWaitcnt1069 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
1070
hasWaitExceptStoreCntWaitcnt1071 bool hasWaitExceptStoreCnt() const {
1072 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
1073 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u;
1074 }
1075
hasWaitStoreCntWaitcnt1076 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
1077
combinedWaitcnt1078 Waitcnt combined(const Waitcnt &Other) const {
1079 // Does the right thing provided self and Other are either both pre-gfx12
1080 // or both gfx12+.
1081 return Waitcnt(
1082 std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
1083 std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
1084 std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
1085 std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt));
1086 }
1087 };
1088
1089 // The following methods are only meaningful on targets that support
1090 // S_WAITCNT.
1091
1092 /// \returns Vmcnt bit mask for given isa \p Version.
1093 unsigned getVmcntBitMask(const IsaVersion &Version);
1094
1095 /// \returns Expcnt bit mask for given isa \p Version.
1096 unsigned getExpcntBitMask(const IsaVersion &Version);
1097
1098 /// \returns Lgkmcnt bit mask for given isa \p Version.
1099 unsigned getLgkmcntBitMask(const IsaVersion &Version);
1100
1101 /// \returns Waitcnt bit mask for given isa \p Version.
1102 unsigned getWaitcntBitMask(const IsaVersion &Version);
1103
1104 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1105 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1106
1107 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1108 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1109
1110 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1111 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1112
1113 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1114 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1115 /// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1116 /// which needs it is deprecated
1117 ///
1118 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1119 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1120 /// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1121 /// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1122 /// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1123 /// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1124 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1125 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1126 /// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1127 ///
1128 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1129 unsigned &Expcnt, unsigned &Lgkmcnt);
1130
1131 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1132
1133 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1134 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1135 unsigned Vmcnt);
1136
1137 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1138 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1139 unsigned Expcnt);
1140
1141 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1142 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1143 unsigned Lgkmcnt);
1144
1145 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1146 /// \p Version. Should not be used on gfx12+, the instruction which needs
1147 /// it is deprecated
1148 ///
1149 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1150 /// Waitcnt[2:0] = \p Expcnt (gfx11+)
1151 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1152 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1153 /// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1154 /// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1155 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1156 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1157 /// Waitcnt[15:10] = \p Vmcnt (gfx11)
1158 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1159 ///
1160 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1161 /// isa \p Version.
1162 ///
1163 unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1164 unsigned Expcnt, unsigned Lgkmcnt);
1165
1166 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1167
1168 // The following methods are only meaningful on targets that support
1169 // S_WAIT_*CNT, introduced with gfx12.
1170
1171 /// \returns Loadcnt bit mask for given isa \p Version.
1172 /// Returns 0 for versions that do not support LOADcnt
1173 unsigned getLoadcntBitMask(const IsaVersion &Version);
1174
1175 /// \returns Samplecnt bit mask for given isa \p Version.
1176 /// Returns 0 for versions that do not support SAMPLEcnt
1177 unsigned getSamplecntBitMask(const IsaVersion &Version);
1178
1179 /// \returns Bvhcnt bit mask for given isa \p Version.
1180 /// Returns 0 for versions that do not support BVHcnt
1181 unsigned getBvhcntBitMask(const IsaVersion &Version);
1182
1183 /// \returns Dscnt bit mask for given isa \p Version.
1184 /// Returns 0 for versions that do not support DScnt
1185 unsigned getDscntBitMask(const IsaVersion &Version);
1186
1187 /// \returns Dscnt bit mask for given isa \p Version.
1188 /// Returns 0 for versions that do not support KMcnt
1189 unsigned getKmcntBitMask(const IsaVersion &Version);
1190
1191 /// \returns Xcnt bit mask for given isa \p Version.
1192 /// Returns 0 for versions that do not support Xcnt.
1193 unsigned getXcntBitMask(const IsaVersion &Version);
1194
1195 /// \return STOREcnt or VScnt bit mask for given isa \p Version.
1196 /// returns 0 for versions that do not support STOREcnt or VScnt.
1197 /// STOREcnt and VScnt are the same counter, the name used
1198 /// depends on the ISA version.
1199 unsigned getStorecntBitMask(const IsaVersion &Version);
1200
1201 // The following are only meaningful on targets that support
1202 // S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1203
1204 /// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1205 /// isa \p Version.
1206 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1207
1208 /// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1209 /// isa \p Version.
1210 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1211
1212 /// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1213 /// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1214 /// \p Version.
1215 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1216
1217 /// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1218 /// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1219 /// \p Version.
1220 unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1221
1222 namespace Hwreg {
1223
1224 using HwregId = EncodingField<5, 0>;
1225 using HwregOffset = EncodingField<10, 6>;
1226
1227 struct HwregSize : EncodingField<15, 11, 32> {
1228 using EncodingField::EncodingField;
encodeHwregSize1229 constexpr uint64_t encode() const { return Value - 1; }
decodeHwregSize1230 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1231 };
1232
1233 using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1234
1235 } // namespace Hwreg
1236
1237 namespace DepCtr {
1238
1239 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1240 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1241 const MCSubtargetInfo &STI);
1242 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1243 const MCSubtargetInfo &STI);
1244 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1245 bool &IsDefault, const MCSubtargetInfo &STI);
1246
1247 /// \returns Decoded VaVdst from given immediate \p Encoded.
1248 unsigned decodeFieldVaVdst(unsigned Encoded);
1249
1250 /// \returns Decoded VmVsrc from given immediate \p Encoded.
1251 unsigned decodeFieldVmVsrc(unsigned Encoded);
1252
1253 /// \returns Decoded SaSdst from given immediate \p Encoded.
1254 unsigned decodeFieldSaSdst(unsigned Encoded);
1255
1256 /// \returns Decoded VaSdst from given immediate \p Encoded.
1257 unsigned decodeFieldVaSdst(unsigned Encoded);
1258
1259 /// \returns Decoded VaVcc from given immediate \p Encoded.
1260 unsigned decodeFieldVaVcc(unsigned Encoded);
1261
1262 /// \returns Decoded SaSrc from given immediate \p Encoded.
1263 unsigned decodeFieldVaSsrc(unsigned Encoded);
1264
1265 /// \returns Decoded HoldCnt from given immediate \p Encoded.
1266 unsigned decodeFieldHoldCnt(unsigned Encoded);
1267
1268 /// \returns \p VmVsrc as an encoded Depctr immediate.
1269 unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1270
1271 /// \returns \p Encoded combined with encoded \p VmVsrc.
1272 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1273
1274 /// \returns \p VaVdst as an encoded Depctr immediate.
1275 unsigned encodeFieldVaVdst(unsigned VaVdst);
1276
1277 /// \returns \p Encoded combined with encoded \p VaVdst.
1278 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1279
1280 /// \returns \p SaSdst as an encoded Depctr immediate.
1281 unsigned encodeFieldSaSdst(unsigned SaSdst);
1282
1283 /// \returns \p Encoded combined with encoded \p SaSdst.
1284 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1285
1286 /// \returns \p VaSdst as an encoded Depctr immediate.
1287 unsigned encodeFieldVaSdst(unsigned VaSdst);
1288
1289 /// \returns \p Encoded combined with encoded \p VaSdst.
1290 unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1291
1292 /// \returns \p VaVcc as an encoded Depctr immediate.
1293 unsigned encodeFieldVaVcc(unsigned VaVcc);
1294
1295 /// \returns \p Encoded combined with encoded \p VaVcc.
1296 unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1297
1298 /// \returns \p HoldCnt as an encoded Depctr immediate.
1299 unsigned encodeFieldHoldCnt(unsigned HoldCnt);
1300
1301 /// \returns \p Encoded combined with encoded \p HoldCnt.
1302 unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded);
1303
1304 /// \returns \p VaSsrc as an encoded Depctr immediate.
1305 unsigned encodeFieldVaSsrc(unsigned VaSsrc);
1306
1307 /// \returns \p Encoded combined with encoded \p VaSsrc.
1308 unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1309
1310 } // namespace DepCtr
1311
1312 namespace Exp {
1313
1314 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1315
1316 LLVM_READONLY
1317 unsigned getTgtId(const StringRef Name);
1318
1319 LLVM_READNONE
1320 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1321
1322 } // namespace Exp
1323
1324 namespace MTBUFFormat {
1325
1326 LLVM_READNONE
1327 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1328
1329 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1330
1331 int64_t getDfmt(const StringRef Name);
1332
1333 StringRef getDfmtName(unsigned Id);
1334
1335 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1336
1337 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1338
1339 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1340
1341 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1342
1343 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1344
1345 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1346
1347 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1348
1349 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1350 const MCSubtargetInfo &STI);
1351
1352 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1353
1354 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1355
1356 } // namespace MTBUFFormat
1357
1358 namespace SendMsg {
1359
1360 LLVM_READNONE
1361 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1362
1363 LLVM_READNONE
1364 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1365 bool Strict = true);
1366
1367 LLVM_READNONE
1368 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1369 const MCSubtargetInfo &STI, bool Strict = true);
1370
1371 LLVM_READNONE
1372 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1373
1374 LLVM_READNONE
1375 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1376
1377 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1378 uint16_t &StreamId, const MCSubtargetInfo &STI);
1379
1380 LLVM_READNONE
1381 uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1382
1383 } // namespace SendMsg
1384
1385 unsigned getInitialPSInputAddr(const Function &F);
1386
1387 bool getHasColorExport(const Function &F);
1388
1389 bool getHasDepthExport(const Function &F);
1390
1391 bool hasDynamicVGPR(const Function &F);
1392
1393 // Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1394 // the attribute is missing or its value is invalid.
1395 unsigned getDynamicVGPRBlockSize(const Function &F);
1396
1397 LLVM_READNONE
isShader(CallingConv::ID CC)1398 constexpr bool isShader(CallingConv::ID CC) {
1399 switch (CC) {
1400 case CallingConv::AMDGPU_VS:
1401 case CallingConv::AMDGPU_LS:
1402 case CallingConv::AMDGPU_HS:
1403 case CallingConv::AMDGPU_ES:
1404 case CallingConv::AMDGPU_GS:
1405 case CallingConv::AMDGPU_PS:
1406 case CallingConv::AMDGPU_CS_Chain:
1407 case CallingConv::AMDGPU_CS_ChainPreserve:
1408 case CallingConv::AMDGPU_CS:
1409 return true;
1410 default:
1411 return false;
1412 }
1413 }
1414
1415 LLVM_READNONE
isGraphics(CallingConv::ID CC)1416 constexpr bool isGraphics(CallingConv::ID CC) {
1417 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx;
1418 }
1419
1420 LLVM_READNONE
isCompute(CallingConv::ID CC)1421 constexpr bool isCompute(CallingConv::ID CC) {
1422 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1423 }
1424
1425 LLVM_READNONE
isEntryFunctionCC(CallingConv::ID CC)1426 constexpr bool isEntryFunctionCC(CallingConv::ID CC) {
1427 switch (CC) {
1428 case CallingConv::AMDGPU_KERNEL:
1429 case CallingConv::SPIR_KERNEL:
1430 case CallingConv::AMDGPU_VS:
1431 case CallingConv::AMDGPU_GS:
1432 case CallingConv::AMDGPU_PS:
1433 case CallingConv::AMDGPU_CS:
1434 case CallingConv::AMDGPU_ES:
1435 case CallingConv::AMDGPU_HS:
1436 case CallingConv::AMDGPU_LS:
1437 return true;
1438 default:
1439 return false;
1440 }
1441 }
1442
1443 LLVM_READNONE
isChainCC(CallingConv::ID CC)1444 constexpr bool isChainCC(CallingConv::ID CC) {
1445 switch (CC) {
1446 case CallingConv::AMDGPU_CS_Chain:
1447 case CallingConv::AMDGPU_CS_ChainPreserve:
1448 return true;
1449 default:
1450 return false;
1451 }
1452 }
1453
1454 // These functions are considered entrypoints into the current module, i.e. they
1455 // are allowed to be called from outside the current module. This is different
1456 // from isEntryFunctionCC, which is only true for functions that are entered by
1457 // the hardware. Module entry points include all entry functions but also
1458 // include functions that can be called from other functions inside or outside
1459 // the current module. Module entry functions are allowed to allocate LDS.
1460 LLVM_READNONE
isModuleEntryFunctionCC(CallingConv::ID CC)1461 constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1462 switch (CC) {
1463 case CallingConv::AMDGPU_Gfx:
1464 return true;
1465 default:
1466 return isEntryFunctionCC(CC) || isChainCC(CC);
1467 }
1468 }
1469
1470 LLVM_READNONE
isKernel(CallingConv::ID CC)1471 constexpr inline bool isKernel(CallingConv::ID CC) {
1472 switch (CC) {
1473 case CallingConv::AMDGPU_KERNEL:
1474 case CallingConv::SPIR_KERNEL:
1475 return true;
1476 default:
1477 return false;
1478 }
1479 }
1480
1481 LLVM_READNONE
canGuaranteeTCO(CallingConv::ID CC)1482 constexpr bool canGuaranteeTCO(CallingConv::ID CC) {
1483 return CC == CallingConv::Fast;
1484 }
1485
1486 /// Return true if we might ever do TCO for calls with this calling convention.
1487 LLVM_READNONE
mayTailCallThisCC(CallingConv::ID CC)1488 constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
1489 switch (CC) {
1490 case CallingConv::C:
1491 case CallingConv::AMDGPU_Gfx:
1492 return true;
1493 default:
1494 return canGuaranteeTCO(CC);
1495 }
1496 }
1497
1498 bool hasXNACK(const MCSubtargetInfo &STI);
1499 bool hasSRAMECC(const MCSubtargetInfo &STI);
1500 bool hasMIMG_R128(const MCSubtargetInfo &STI);
1501 bool hasA16(const MCSubtargetInfo &STI);
1502 bool hasG16(const MCSubtargetInfo &STI);
1503 bool hasPackedD16(const MCSubtargetInfo &STI);
1504 bool hasGDS(const MCSubtargetInfo &STI);
1505 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1506 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1507
1508 bool isSI(const MCSubtargetInfo &STI);
1509 bool isCI(const MCSubtargetInfo &STI);
1510 bool isVI(const MCSubtargetInfo &STI);
1511 bool isGFX9(const MCSubtargetInfo &STI);
1512 bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1513 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1514 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1515 bool isGFX8Plus(const MCSubtargetInfo &STI);
1516 bool isGFX9Plus(const MCSubtargetInfo &STI);
1517 bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1518 bool isGFX10(const MCSubtargetInfo &STI);
1519 bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1520 bool isGFX10Plus(const MCSubtargetInfo &STI);
1521 bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1522 bool isGFX10Before1030(const MCSubtargetInfo &STI);
1523 bool isGFX11(const MCSubtargetInfo &STI);
1524 bool isGFX11Plus(const MCSubtargetInfo &STI);
1525 bool isGFX12(const MCSubtargetInfo &STI);
1526 bool isGFX12Plus(const MCSubtargetInfo &STI);
1527 bool isGFX1250(const MCSubtargetInfo &STI);
1528 bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1529 bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1530 bool isGCN3Encoding(const MCSubtargetInfo &STI);
1531 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1532 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1533 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1534 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1535 bool isGFX90A(const MCSubtargetInfo &STI);
1536 bool isGFX940(const MCSubtargetInfo &STI);
1537 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1538 bool hasMAIInsts(const MCSubtargetInfo &STI);
1539 bool hasVOPD(const MCSubtargetInfo &STI);
1540 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1541 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1542 unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1543 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1544
1545 /// Is Reg - scalar register
1546 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1547
1548 /// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1549 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1550
1551 /// If \p Reg is a pseudo reg, return the correct hardware register given
1552 /// \p STI otherwise return \p Reg.
1553 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1554
1555 /// Convert hardware register \p Reg to a pseudo register
1556 LLVM_READNONE
1557 MCRegister mc2PseudoReg(MCRegister Reg);
1558
1559 LLVM_READNONE
1560 bool isInlineValue(unsigned Reg);
1561
1562 /// Is this an AMDGPU specific source operand? These include registers,
1563 /// inline constants, literals and mandatory literals (KImm).
1564 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1565
1566 /// Is this a KImm operand?
1567 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1568
1569 /// Is this floating-point operand?
1570 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1571
1572 /// Does this operand support only inlinable literals?
1573 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1574
1575 /// Get the size in bits of a register from the register class \p RC.
1576 unsigned getRegBitWidth(unsigned RCID);
1577
1578 /// Get the size in bits of a register from the register class \p RC.
1579 unsigned getRegBitWidth(const MCRegisterClass &RC);
1580
1581 /// Get size of register operand
1582 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1583 unsigned OpNo);
1584
1585 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)1586 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1587 switch (OpInfo.OperandType) {
1588 case AMDGPU::OPERAND_REG_IMM_INT32:
1589 case AMDGPU::OPERAND_REG_IMM_FP32:
1590 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1591 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1592 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1593 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1594 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1595 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1596 case AMDGPU::OPERAND_KIMM32:
1597 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1598 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1599 return 4;
1600
1601 case AMDGPU::OPERAND_REG_IMM_INT64:
1602 case AMDGPU::OPERAND_REG_IMM_FP64:
1603 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1604 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1605 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1606 case AMDGPU::OPERAND_KIMM64:
1607 return 8;
1608
1609 case AMDGPU::OPERAND_REG_IMM_INT16:
1610 case AMDGPU::OPERAND_REG_IMM_BF16:
1611 case AMDGPU::OPERAND_REG_IMM_FP16:
1612 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1613 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1614 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1615 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1616 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1617 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1618 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1619 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1620 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1621 return 2;
1622
1623 default:
1624 llvm_unreachable("unhandled operand type");
1625 }
1626 }
1627
1628 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)1629 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1630 return getOperandSize(Desc.operands()[OpNo]);
1631 }
1632
1633 /// Is this literal inlinable, and not one of the values intended for floating
1634 /// point values.
1635 LLVM_READNONE
isInlinableIntLiteral(int64_t Literal)1636 inline bool isInlinableIntLiteral(int64_t Literal) {
1637 return Literal >= -16 && Literal <= 64;
1638 }
1639
1640 /// Is this literal inlinable
1641 LLVM_READNONE
1642 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1643
1644 LLVM_READNONE
1645 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1646
1647 LLVM_READNONE
1648 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1649
1650 LLVM_READNONE
1651 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1652
1653 LLVM_READNONE
1654 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1655
1656 LLVM_READNONE
1657 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1658
1659 LLVM_READNONE
1660 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1661
1662 LLVM_READNONE
1663 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1664
1665 LLVM_READNONE
1666 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1667
1668 LLVM_READNONE
1669 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1670
1671 LLVM_READNONE
1672 bool isInlinableLiteralV2I16(uint32_t Literal);
1673
1674 LLVM_READNONE
1675 bool isInlinableLiteralV2BF16(uint32_t Literal);
1676
1677 LLVM_READNONE
1678 bool isInlinableLiteralV2F16(uint32_t Literal);
1679
1680 LLVM_READNONE
1681 bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1682
1683 bool isArgPassedInSGPR(const Argument *Arg);
1684
1685 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1686
1687 LLVM_READONLY
1688 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1689 int64_t EncodedOffset);
1690
1691 LLVM_READONLY
1692 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1693 int64_t EncodedOffset, bool IsBuffer);
1694
1695 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1696 /// offsets.
1697 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1698
1699 /// \returns The encoding that will be used for \p ByteOffset in the
1700 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1701 /// S_LOAD instructions have a signed offset, on other subtargets it is
1702 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1703 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1704 int64_t ByteOffset, bool IsBuffer,
1705 bool HasSOffset = false);
1706
1707 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1708 /// instruction. This is only useful on CI.s
1709 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1710 int64_t ByteOffset);
1711
1712 /// For pre-GFX12 FLAT instructions the offset must be positive;
1713 /// MSB is ignored and forced to zero.
1714 ///
1715 /// \return The number of bits available for the signed offset field in flat
1716 /// instructions. Note that some forms of the instruction disallow negative
1717 /// offsets.
1718 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1719
1720 /// \returns true if this offset is small enough to fit in the SMRD
1721 /// offset field. \p ByteOffset should be the offset in bytes and
1722 /// not the encoded offset.
1723 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1724
1725 LLVM_READNONE
isLegalDPALU_DPPControl(unsigned DC)1726 inline bool isLegalDPALU_DPPControl(unsigned DC) {
1727 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1728 }
1729
1730 /// \returns true if an instruction may have a 64-bit VGPR operand.
1731 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1732
1733 /// \returns true if an instruction is a DP ALU DPP.
1734 bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1735
1736 /// \returns true if the intrinsic is divergent
1737 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1738
1739 /// \returns true if the intrinsic is uniform
1740 bool isIntrinsicAlwaysUniform(unsigned IntrID);
1741
1742 /// \returns lds block size in terms of dwords. \p
1743 /// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1744 /// must be defined in terms of bytes.
1745 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1746
1747 } // end namespace AMDGPU
1748
1749 raw_ostream &operator<<(raw_ostream &OS,
1750 const AMDGPU::IsaInfo::TargetIDSetting S);
1751
1752 } // end namespace llvm
1753
1754 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1755