xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDKernelCodeT.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDKernelCodeTUtils.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/IR/Attributes.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/Function.h"
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/IR/IntrinsicsR600.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/TargetParser/TargetParser.h"
29 #include <optional>
30 
31 #define GET_INSTRINFO_NAMED_OPS
32 #define GET_INSTRMAP_INFO
33 #include "AMDGPUGenInstrInfo.inc"
34 
35 static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
36     "amdhsa-code-object-version", llvm::cl::Hidden,
37     llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6),
38     llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
39                    "or asm directive still take priority if present)"));
40 
41 namespace {
42 
43 /// \returns Bit mask for given bit \p Shift and bit \p Width.
44 unsigned getBitMask(unsigned Shift, unsigned Width) {
45   return ((1 << Width) - 1) << Shift;
46 }
47 
48 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
49 ///
50 /// \returns Packed \p Dst.
51 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
52   unsigned Mask = getBitMask(Shift, Width);
53   return ((Src << Shift) & Mask) | (Dst & ~Mask);
54 }
55 
56 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Unpacked bits.
59 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
60   return (Src & getBitMask(Shift, Width)) >> Shift;
61 }
62 
63 /// \returns Vmcnt bit shift (lower bits).
64 unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
65   return VersionMajor >= 11 ? 10 : 0;
66 }
67 
68 /// \returns Vmcnt bit width (lower bits).
69 unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
70   return VersionMajor >= 11 ? 6 : 4;
71 }
72 
73 /// \returns Expcnt bit shift.
74 unsigned getExpcntBitShift(unsigned VersionMajor) {
75   return VersionMajor >= 11 ? 0 : 4;
76 }
77 
78 /// \returns Expcnt bit width.
79 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
80 
81 /// \returns Lgkmcnt bit shift.
82 unsigned getLgkmcntBitShift(unsigned VersionMajor) {
83   return VersionMajor >= 11 ? 4 : 8;
84 }
85 
86 /// \returns Lgkmcnt bit width.
87 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
88   return VersionMajor >= 10 ? 6 : 4;
89 }
90 
91 /// \returns Vmcnt bit shift (higher bits).
92 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
93 
94 /// \returns Vmcnt bit width (higher bits).
95 unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
96   return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
97 }
98 
99 /// \returns Loadcnt bit width
100 unsigned getLoadcntBitWidth(unsigned VersionMajor) {
101   return VersionMajor >= 12 ? 6 : 0;
102 }
103 
104 /// \returns Samplecnt bit width.
105 unsigned getSamplecntBitWidth(unsigned VersionMajor) {
106   return VersionMajor >= 12 ? 6 : 0;
107 }
108 
109 /// \returns Bvhcnt bit width.
110 unsigned getBvhcntBitWidth(unsigned VersionMajor) {
111   return VersionMajor >= 12 ? 3 : 0;
112 }
113 
114 /// \returns Dscnt bit width.
115 unsigned getDscntBitWidth(unsigned VersionMajor) {
116   return VersionMajor >= 12 ? 6 : 0;
117 }
118 
119 /// \returns Dscnt bit shift in combined S_WAIT instructions.
120 unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
121 
122 /// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
123 unsigned getStorecntBitWidth(unsigned VersionMajor) {
124   return VersionMajor >= 10 ? 6 : 0;
125 }
126 
127 /// \returns Kmcnt bit width.
128 unsigned getKmcntBitWidth(unsigned VersionMajor) {
129   return VersionMajor >= 12 ? 5 : 0;
130 }
131 
132 /// \returns Xcnt bit width.
133 unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
134   return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
135 }
136 
137 /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
138 unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
139   return VersionMajor >= 12 ? 8 : 0;
140 }
141 
142 /// \returns VaSdst bit width
143 inline unsigned getVaSdstBitWidth() { return 3; }
144 
145 /// \returns VaSdst bit shift
146 inline unsigned getVaSdstBitShift() { return 9; }
147 
148 /// \returns VmVsrc bit width
149 inline unsigned getVmVsrcBitWidth() { return 3; }
150 
151 /// \returns VmVsrc bit shift
152 inline unsigned getVmVsrcBitShift() { return 2; }
153 
154 /// \returns VaVdst bit width
155 inline unsigned getVaVdstBitWidth() { return 4; }
156 
157 /// \returns VaVdst bit shift
158 inline unsigned getVaVdstBitShift() { return 12; }
159 
160 /// \returns VaVcc bit width
161 inline unsigned getVaVccBitWidth() { return 1; }
162 
163 /// \returns VaVcc bit shift
164 inline unsigned getVaVccBitShift() { return 1; }
165 
166 /// \returns SaSdst bit width
167 inline unsigned getSaSdstBitWidth() { return 1; }
168 
169 /// \returns SaSdst bit shift
170 inline unsigned getSaSdstBitShift() { return 0; }
171 
172 /// \returns VaSsrc width
173 inline unsigned getVaSsrcBitWidth() { return 1; }
174 
175 /// \returns VaSsrc bit shift
176 inline unsigned getVaSsrcBitShift() { return 8; }
177 
178 /// \returns HoldCnt bit shift
179 inline unsigned getHoldCntWidth() { return 1; }
180 
181 /// \returns HoldCnt bit shift
182 inline unsigned getHoldCntBitShift() { return 7; }
183 
184 } // end anonymous namespace
185 
186 namespace llvm {
187 
188 namespace AMDGPU {
189 
190 /// \returns true if the target supports signed immediate offset for SMRD
191 /// instructions.
192 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
193   return isGFX9Plus(ST);
194 }
195 
196 /// \returns True if \p STI is AMDHSA.
197 bool isHsaAbi(const MCSubtargetInfo &STI) {
198   return STI.getTargetTriple().getOS() == Triple::AMDHSA;
199 }
200 
201 unsigned getAMDHSACodeObjectVersion(const Module &M) {
202   if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
203           M.getModuleFlag("amdhsa_code_object_version"))) {
204     return (unsigned)Ver->getZExtValue() / 100;
205   }
206 
207   return getDefaultAMDHSACodeObjectVersion();
208 }
209 
210 unsigned getDefaultAMDHSACodeObjectVersion() {
211   return DefaultAMDHSACodeObjectVersion;
212 }
213 
214 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
215   switch (ABIVersion) {
216   case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
217     return 4;
218   case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
219     return 5;
220   case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
221     return 6;
222   default:
223     return getDefaultAMDHSACodeObjectVersion();
224   }
225 }
226 
227 uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
228   if (T.getOS() != Triple::AMDHSA)
229     return 0;
230 
231   switch (CodeObjectVersion) {
232   case 4:
233     return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
234   case 5:
235     return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
236   case 6:
237     return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
238   default:
239     report_fatal_error("Unsupported AMDHSA Code Object Version " +
240                        Twine(CodeObjectVersion));
241   }
242 }
243 
244 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
245   switch (CodeObjectVersion) {
246   case AMDHSA_COV4:
247     return 48;
248   case AMDHSA_COV5:
249   case AMDHSA_COV6:
250   default:
251     return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
252   }
253 }
254 
255 // FIXME: All such magic numbers about the ABI should be in a
256 // central TD file.
257 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
258   switch (CodeObjectVersion) {
259   case AMDHSA_COV4:
260     return 24;
261   case AMDHSA_COV5:
262   case AMDHSA_COV6:
263   default:
264     return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
265   }
266 }
267 
268 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
269   switch (CodeObjectVersion) {
270   case AMDHSA_COV4:
271     return 32;
272   case AMDHSA_COV5:
273   case AMDHSA_COV6:
274   default:
275     return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
276   }
277 }
278 
279 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
280   switch (CodeObjectVersion) {
281   case AMDHSA_COV4:
282     return 40;
283   case AMDHSA_COV5:
284   case AMDHSA_COV6:
285   default:
286     return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
287   }
288 }
289 
290 #define GET_MIMGBaseOpcodesTable_IMPL
291 #define GET_MIMGDimInfoTable_IMPL
292 #define GET_MIMGInfoTable_IMPL
293 #define GET_MIMGLZMappingTable_IMPL
294 #define GET_MIMGMIPMappingTable_IMPL
295 #define GET_MIMGBiasMappingTable_IMPL
296 #define GET_MIMGOffsetMappingTable_IMPL
297 #define GET_MIMGG16MappingTable_IMPL
298 #define GET_MAIInstInfoTable_IMPL
299 #include "AMDGPUGenSearchableTables.inc"
300 
301 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
302                   unsigned VDataDwords, unsigned VAddrDwords) {
303   const MIMGInfo *Info =
304       getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
305   return Info ? Info->Opcode : -1;
306 }
307 
308 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
309   const MIMGInfo *Info = getMIMGInfo(Opc);
310   return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
311 }
312 
313 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
314   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
315   const MIMGInfo *NewInfo =
316       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
317                           NewChannels, OrigInfo->VAddrDwords);
318   return NewInfo ? NewInfo->Opcode : -1;
319 }
320 
321 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
322                            const MIMGDimInfo *Dim, bool IsA16,
323                            bool IsG16Supported) {
324   unsigned AddrWords = BaseOpcode->NumExtraArgs;
325   unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
326                             (BaseOpcode->LodOrClampOrMip ? 1 : 0);
327   if (IsA16)
328     AddrWords += divideCeil(AddrComponents, 2);
329   else
330     AddrWords += AddrComponents;
331 
332   // Note: For subtargets that support A16 but not G16, enabling A16 also
333   // enables 16 bit gradients.
334   // For subtargets that support A16 (operand) and G16 (done with a different
335   // instruction encoding), they are independent.
336 
337   if (BaseOpcode->Gradients) {
338     if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
339       // There are two gradients per coordinate, we pack them separately.
340       // For the 3d case,
341       // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
342       AddrWords += alignTo<2>(Dim->NumGradients / 2);
343     else
344       AddrWords += Dim->NumGradients;
345   }
346   return AddrWords;
347 }
348 
349 struct MUBUFInfo {
350   uint16_t Opcode;
351   uint16_t BaseOpcode;
352   uint8_t elements;
353   bool has_vaddr;
354   bool has_srsrc;
355   bool has_soffset;
356   bool IsBufferInv;
357   bool tfe;
358 };
359 
360 struct MTBUFInfo {
361   uint16_t Opcode;
362   uint16_t BaseOpcode;
363   uint8_t elements;
364   bool has_vaddr;
365   bool has_srsrc;
366   bool has_soffset;
367 };
368 
369 struct SMInfo {
370   uint16_t Opcode;
371   bool IsBuffer;
372 };
373 
374 struct VOPInfo {
375   uint16_t Opcode;
376   bool IsSingle;
377 };
378 
379 struct VOPC64DPPInfo {
380   uint16_t Opcode;
381 };
382 
383 struct VOPCDPPAsmOnlyInfo {
384   uint16_t Opcode;
385 };
386 
387 struct VOP3CDPPAsmOnlyInfo {
388   uint16_t Opcode;
389 };
390 
391 struct VOPDComponentInfo {
392   uint16_t BaseVOP;
393   uint16_t VOPDOp;
394   bool CanBeVOPDX;
395   bool CanBeVOPD3X;
396 };
397 
398 struct VOPDInfo {
399   uint16_t Opcode;
400   uint16_t OpX;
401   uint16_t OpY;
402   uint16_t Subtarget;
403   bool VOPD3;
404 };
405 
406 struct VOPTrue16Info {
407   uint16_t Opcode;
408   bool IsTrue16;
409 };
410 
411 #define GET_FP4FP8DstByteSelTable_DECL
412 #define GET_FP4FP8DstByteSelTable_IMPL
413 
414 struct DPMACCInstructionInfo {
415   uint16_t Opcode;
416   bool IsDPMACCInstruction;
417 };
418 
419 struct FP4FP8DstByteSelInfo {
420   uint16_t Opcode;
421   bool HasFP8DstByteSel;
422   bool HasFP4DstByteSel;
423 };
424 
425 #define GET_MTBUFInfoTable_DECL
426 #define GET_MTBUFInfoTable_IMPL
427 #define GET_MUBUFInfoTable_DECL
428 #define GET_MUBUFInfoTable_IMPL
429 #define GET_SMInfoTable_DECL
430 #define GET_SMInfoTable_IMPL
431 #define GET_VOP1InfoTable_DECL
432 #define GET_VOP1InfoTable_IMPL
433 #define GET_VOP2InfoTable_DECL
434 #define GET_VOP2InfoTable_IMPL
435 #define GET_VOP3InfoTable_DECL
436 #define GET_VOP3InfoTable_IMPL
437 #define GET_VOPC64DPPTable_DECL
438 #define GET_VOPC64DPPTable_IMPL
439 #define GET_VOPC64DPP8Table_DECL
440 #define GET_VOPC64DPP8Table_IMPL
441 #define GET_VOPCAsmOnlyInfoTable_DECL
442 #define GET_VOPCAsmOnlyInfoTable_IMPL
443 #define GET_VOP3CAsmOnlyInfoTable_DECL
444 #define GET_VOP3CAsmOnlyInfoTable_IMPL
445 #define GET_VOPDComponentTable_DECL
446 #define GET_VOPDComponentTable_IMPL
447 #define GET_VOPDPairs_DECL
448 #define GET_VOPDPairs_IMPL
449 #define GET_VOPTrue16Table_DECL
450 #define GET_VOPTrue16Table_IMPL
451 #define GET_True16D16Table_IMPL
452 #define GET_WMMAOpcode2AddrMappingTable_DECL
453 #define GET_WMMAOpcode2AddrMappingTable_IMPL
454 #define GET_WMMAOpcode3AddrMappingTable_DECL
455 #define GET_WMMAOpcode3AddrMappingTable_IMPL
456 #define GET_getMFMA_F8F6F4_WithSize_DECL
457 #define GET_getMFMA_F8F6F4_WithSize_IMPL
458 #define GET_isMFMA_F8F6F4Table_IMPL
459 #define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
460 
461 #include "AMDGPUGenSearchableTables.inc"
462 
463 int getMTBUFBaseOpcode(unsigned Opc) {
464   const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
465   return Info ? Info->BaseOpcode : -1;
466 }
467 
468 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
469   const MTBUFInfo *Info =
470       getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
471   return Info ? Info->Opcode : -1;
472 }
473 
474 int getMTBUFElements(unsigned Opc) {
475   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
476   return Info ? Info->elements : 0;
477 }
478 
479 bool getMTBUFHasVAddr(unsigned Opc) {
480   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
481   return Info && Info->has_vaddr;
482 }
483 
484 bool getMTBUFHasSrsrc(unsigned Opc) {
485   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
486   return Info && Info->has_srsrc;
487 }
488 
489 bool getMTBUFHasSoffset(unsigned Opc) {
490   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
491   return Info && Info->has_soffset;
492 }
493 
494 int getMUBUFBaseOpcode(unsigned Opc) {
495   const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
496   return Info ? Info->BaseOpcode : -1;
497 }
498 
499 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
500   const MUBUFInfo *Info =
501       getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
502   return Info ? Info->Opcode : -1;
503 }
504 
505 int getMUBUFElements(unsigned Opc) {
506   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
507   return Info ? Info->elements : 0;
508 }
509 
510 bool getMUBUFHasVAddr(unsigned Opc) {
511   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
512   return Info && Info->has_vaddr;
513 }
514 
515 bool getMUBUFHasSrsrc(unsigned Opc) {
516   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
517   return Info && Info->has_srsrc;
518 }
519 
520 bool getMUBUFHasSoffset(unsigned Opc) {
521   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
522   return Info && Info->has_soffset;
523 }
524 
525 bool getMUBUFIsBufferInv(unsigned Opc) {
526   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
527   return Info && Info->IsBufferInv;
528 }
529 
530 bool getMUBUFTfe(unsigned Opc) {
531   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
532   return Info && Info->tfe;
533 }
534 
535 bool getSMEMIsBuffer(unsigned Opc) {
536   const SMInfo *Info = getSMEMOpcodeHelper(Opc);
537   return Info && Info->IsBuffer;
538 }
539 
540 bool getVOP1IsSingle(unsigned Opc) {
541   const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
542   return !Info || Info->IsSingle;
543 }
544 
545 bool getVOP2IsSingle(unsigned Opc) {
546   const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
547   return !Info || Info->IsSingle;
548 }
549 
550 bool getVOP3IsSingle(unsigned Opc) {
551   const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
552   return !Info || Info->IsSingle;
553 }
554 
555 bool isVOPC64DPP(unsigned Opc) {
556   return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
557 }
558 
559 bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
560 
561 bool getMAIIsDGEMM(unsigned Opc) {
562   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
563   return Info && Info->is_dgemm;
564 }
565 
566 bool getMAIIsGFX940XDL(unsigned Opc) {
567   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
568   return Info && Info->is_gfx940_xdl;
569 }
570 
571 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
572   switch (EncodingVal) {
573   case MFMAScaleFormats::FP6_E2M3:
574   case MFMAScaleFormats::FP6_E3M2:
575     return 6;
576   case MFMAScaleFormats::FP4_E2M1:
577     return 4;
578   case MFMAScaleFormats::FP8_E4M3:
579   case MFMAScaleFormats::FP8_E5M2:
580   default:
581     return 8;
582   }
583 
584   llvm_unreachable("covered switch over mfma scale formats");
585 }
586 
587 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
588                                                       unsigned BLGP,
589                                                       unsigned F8F8Opcode) {
590   uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ);
591   uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP);
592   return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
593 }
594 
595 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
596   if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
597     return SIEncodingFamily::GFX1250;
598   if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
599     return SIEncodingFamily::GFX12;
600   if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
601     return SIEncodingFamily::GFX11;
602   llvm_unreachable("Subtarget generation does not support VOPD!");
603 }
604 
605 CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
606   bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
607   Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
608   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
609   if (Info) {
610     // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a
611     // VOPDX is just a placeholder here, it is supported on all encodings.
612     // TODO: This can be optimized by creating tables of supported VOPDY
613     // opcodes per encoding.
614     unsigned VOPDMov = AMDGPU::getVOPDOpcode(AMDGPU::V_MOV_B32_e32, VOPD3);
615     bool CanBeVOPDY = getVOPDFull(VOPDMov, AMDGPU::getVOPDOpcode(Opc, VOPD3),
616                                   EncodingFamily, VOPD3) != -1;
617     return {VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, CanBeVOPDY};
618   }
619 
620   return {false, false};
621 }
622 
623 unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
624   bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
625   Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
626   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
627   return Info ? Info->VOPDOp : ~0u;
628 }
629 
630 bool isVOPD(unsigned Opc) {
631   return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
632 }
633 
634 bool isMAC(unsigned Opc) {
635   return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
636          Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
637          Opc == AMDGPU::V_MAC_F32_e64_vi ||
638          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
639          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
640          Opc == AMDGPU::V_MAC_F16_e64_vi ||
641          Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
642          Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
643          Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
644          Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
645          Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
646          Opc == AMDGPU::V_FMAC_F32_e64_vi ||
647          Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
648          Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
649          Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
650          Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
651          Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
652          Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
653          Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
654          Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
655          Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
656          Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
657          Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
658          Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
659 }
660 
661 bool isPermlane16(unsigned Opc) {
662   return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
663          Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
664          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
665          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
666          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
667          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
668          Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
669          Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
670 }
671 
672 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
673   return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
674          Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
675          Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
676          Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
677          Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
678          Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
679          Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
680          Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
681          Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
682          Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
683 }
684 
685 bool isGenericAtomic(unsigned Opc) {
686   return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
687          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
688          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
689          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
690          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
691          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
692          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
693          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
694          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
695          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
696          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
697          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
698          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
699          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
700          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
701          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
702          Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
703 }
704 
705 bool isAsyncStore(unsigned Opc) {
706   return false; // placeholder before async store implementation.
707 }
708 
709 bool isTensorStore(unsigned Opc) {
710   return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||
711          Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;
712 }
713 
714 unsigned getTemporalHintType(const MCInstrDesc TID) {
715   if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))
716     return CPol::TH_TYPE_ATOMIC;
717   unsigned Opc = TID.getOpcode();
718   // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
719   if (TID.mayStore() &&
720       (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
721     return CPol::TH_TYPE_STORE;
722 
723   // This will default to returning TH_TYPE_LOAD when neither MayStore nor
724   // MayLoad flag is present which is the case with instructions like
725   // image_get_resinfo.
726   return CPol::TH_TYPE_LOAD;
727 }
728 
729 bool isTrue16Inst(unsigned Opc) {
730   const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
731   return Info && Info->IsTrue16;
732 }
733 
734 FPType getFPDstSelType(unsigned Opc) {
735   const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
736   if (!Info)
737     return FPType::None;
738   if (Info->HasFP8DstByteSel)
739     return FPType::FP8;
740   if (Info->HasFP4DstByteSel)
741     return FPType::FP4;
742 
743   return FPType::None;
744 }
745 
746 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
747   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
748   return Info ? Info->Opcode3Addr : ~0u;
749 }
750 
751 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
752   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
753   return Info ? Info->Opcode2Addr : ~0u;
754 }
755 
756 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
757 // header files, so we need to wrap it in a function that takes unsigned
758 // instead.
759 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
760   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
761 }
762 
763 unsigned getBitOp2(unsigned Opc) {
764   switch (Opc) {
765   default:
766     return 0;
767   case AMDGPU::V_AND_B32_e32:
768     return 0x40;
769   case AMDGPU::V_OR_B32_e32:
770     return 0x54;
771   case AMDGPU::V_XOR_B32_e32:
772     return 0x14;
773   case AMDGPU::V_XNOR_B32_e32:
774     return 0x41;
775   }
776 }
777 
778 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
779                 bool VOPD3) {
780   bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
781   OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
782   const VOPDInfo *Info =
783       getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
784   return Info ? Info->Opcode : -1;
785 }
786 
787 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
788   const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
789   assert(Info);
790   const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
791   const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
792   assert(OpX && OpY);
793   return {OpX->BaseVOP, OpY->BaseVOP};
794 }
795 
796 namespace VOPD {
797 
798 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
799   assert(OpDesc.getNumDefs() == Component::DST_NUM);
800 
801   assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
802   assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
803   auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
804   assert(TiedIdx == -1 || TiedIdx == Component::DST);
805   HasSrc2Acc = TiedIdx != -1;
806   Opcode = OpDesc.getOpcode();
807 
808   IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
809   SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2)   ? 3
810                    : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)  ? 3
811                    : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2
812                                                                            : 1;
813   assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
814 
815   if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
816       Opcode == AMDGPU::V_CNDMASK_B32_e64) {
817     // CNDMASK is an awkward exception, it has FP modifiers, but not FP
818     // operands.
819     NumVOPD3Mods = 2;
820     if (IsVOP3)
821       SrcOperandsNum = 3;
822   } else if (isSISrcFPOperand(OpDesc,
823                               getNamedOperandIdx(Opcode, OpName::src0))) {
824     // All FP VOPD instructions have Neg modifiers for all operands except
825     // for tied src2.
826     NumVOPD3Mods = SrcOperandsNum;
827     if (HasSrc2Acc)
828       --NumVOPD3Mods;
829   }
830 
831   if (OpDesc.TSFlags & SIInstrFlags::VOP3)
832     return;
833 
834   auto OperandsNum = OpDesc.getNumOperands();
835   unsigned CompOprIdx;
836   for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
837     if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
838       MandatoryLiteralIdx = CompOprIdx;
839       break;
840     }
841   }
842 }
843 
844 int ComponentProps::getBitOp3OperandIdx() const {
845   return getNamedOperandIdx(Opcode, OpName::bitop3);
846 }
847 
848 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
849   assert(CompOprIdx < Component::MAX_OPR_NUM);
850 
851   if (CompOprIdx == Component::DST)
852     return getIndexOfDstInParsedOperands();
853 
854   auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
855   if (CompSrcIdx < getCompParsedSrcOperandsNum())
856     return getIndexOfSrcInParsedOperands(CompSrcIdx);
857 
858   // The specified operand does not exist.
859   return 0;
860 }
861 
862 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
863     std::function<unsigned(unsigned, unsigned)> GetRegIdx,
864     const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
865     bool VOPD3) const {
866 
867   auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx,
868                                CompInfo[ComponentIndex::X].isVOP3());
869   auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx,
870                                CompInfo[ComponentIndex::Y].isVOP3());
871 
872   const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
873                                    unsigned BanksMask) -> bool {
874     MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0);
875     MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0);
876     if (!BaseX)
877       BaseX = X;
878     if (!BaseY)
879       BaseY = Y;
880     if ((BaseX & BanksMask) == (BaseY & BanksMask))
881       return true;
882     if (BaseX != X /* This is 64-bit register */ &&
883         ((BaseX + 1) & BanksMask) == (BaseY & BanksMask))
884       return true;
885     if (BaseY != Y && (BaseX & BanksMask) == ((BaseY + 1) & BanksMask))
886       return true;
887 
888     // If both are 64-bit bank conflict will be detected yet while checking
889     // the first subreg.
890     return false;
891   };
892 
893   unsigned CompOprIdx;
894   for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
895     unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
896                                 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
897     if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
898       continue;
899 
900     if (SkipSrc && CompOprIdx >= Component::DST_NUM)
901       continue;
902 
903     if (CompOprIdx < Component::DST_NUM) {
904       // Even if we do not check vdst parity, vdst operands still shall not
905       // overlap.
906       if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
907         return CompOprIdx;
908       if (VOPD3) // No need to check dst parity.
909         continue;
910     }
911 
912     if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
913         (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
914          OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
915       return CompOprIdx;
916   }
917 
918   return {};
919 }
920 
921 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
922 // by the specified component. If an operand is unused
923 // or is not a VGPR, the corresponding value is 0.
924 //
925 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
926 // for the specified component and MC operand. The callback must return 0
927 // if the operand is not a register or not a VGPR.
928 InstInfo::RegIndices
929 InstInfo::getRegIndices(unsigned CompIdx,
930                         std::function<unsigned(unsigned, unsigned)> GetRegIdx,
931                         bool VOPD3) const {
932   assert(CompIdx < COMPONENTS_NUM);
933 
934   const auto &Comp = CompInfo[CompIdx];
935   InstInfo::RegIndices RegIndices;
936 
937   RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
938 
939   for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
940     unsigned CompSrcIdx = CompOprIdx - DST_NUM;
941     RegIndices[CompOprIdx] =
942         Comp.hasRegSrcOperand(CompSrcIdx)
943             ? GetRegIdx(CompIdx,
944                         Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
945             : 0;
946   }
947   return RegIndices;
948 }
949 
950 } // namespace VOPD
951 
952 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
953   return VOPD::InstInfo(OpX, OpY);
954 }
955 
956 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
957                                const MCInstrInfo *InstrInfo) {
958   auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
959   const auto &OpXDesc = InstrInfo->get(OpX);
960   const auto &OpYDesc = InstrInfo->get(OpY);
961   bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
962   VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3);
963   VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
964   return VOPD::InstInfo(OpXInfo, OpYInfo);
965 }
966 
967 namespace IsaInfo {
968 
969 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
970     : STI(STI), XnackSetting(TargetIDSetting::Any),
971       SramEccSetting(TargetIDSetting::Any) {
972   if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
973     XnackSetting = TargetIDSetting::Unsupported;
974   if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
975     SramEccSetting = TargetIDSetting::Unsupported;
976 }
977 
978 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
979   // Check if xnack or sramecc is explicitly enabled or disabled.  In the
980   // absence of the target features we assume we must generate code that can run
981   // in any environment.
982   SubtargetFeatures Features(FS);
983   std::optional<bool> XnackRequested;
984   std::optional<bool> SramEccRequested;
985 
986   for (const std::string &Feature : Features.getFeatures()) {
987     if (Feature == "+xnack")
988       XnackRequested = true;
989     else if (Feature == "-xnack")
990       XnackRequested = false;
991     else if (Feature == "+sramecc")
992       SramEccRequested = true;
993     else if (Feature == "-sramecc")
994       SramEccRequested = false;
995   }
996 
997   bool XnackSupported = isXnackSupported();
998   bool SramEccSupported = isSramEccSupported();
999 
1000   if (XnackRequested) {
1001     if (XnackSupported) {
1002       XnackSetting =
1003           *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1004     } else {
1005       // If a specific xnack setting was requested and this GPU does not support
1006       // xnack emit a warning. Setting will remain set to "Unsupported".
1007       if (*XnackRequested) {
1008         errs() << "warning: xnack 'On' was requested for a processor that does "
1009                   "not support it!\n";
1010       } else {
1011         errs() << "warning: xnack 'Off' was requested for a processor that "
1012                   "does not support it!\n";
1013       }
1014     }
1015   }
1016 
1017   if (SramEccRequested) {
1018     if (SramEccSupported) {
1019       SramEccSetting =
1020           *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1021     } else {
1022       // If a specific sramecc setting was requested and this GPU does not
1023       // support sramecc emit a warning. Setting will remain set to
1024       // "Unsupported".
1025       if (*SramEccRequested) {
1026         errs() << "warning: sramecc 'On' was requested for a processor that "
1027                   "does not support it!\n";
1028       } else {
1029         errs() << "warning: sramecc 'Off' was requested for a processor that "
1030                   "does not support it!\n";
1031       }
1032     }
1033   }
1034 }
1035 
1036 static TargetIDSetting
1037 getTargetIDSettingFromFeatureString(StringRef FeatureString) {
1038   if (FeatureString.ends_with("-"))
1039     return TargetIDSetting::Off;
1040   if (FeatureString.ends_with("+"))
1041     return TargetIDSetting::On;
1042 
1043   llvm_unreachable("Malformed feature string");
1044 }
1045 
1046 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
1047   SmallVector<StringRef, 3> TargetIDSplit;
1048   TargetID.split(TargetIDSplit, ':');
1049 
1050   for (const auto &FeatureString : TargetIDSplit) {
1051     if (FeatureString.starts_with("xnack"))
1052       XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1053     if (FeatureString.starts_with("sramecc"))
1054       SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1055   }
1056 }
1057 
1058 std::string AMDGPUTargetID::toString() const {
1059   std::string StringRep;
1060   raw_string_ostream StreamRep(StringRep);
1061 
1062   auto TargetTriple = STI.getTargetTriple();
1063   auto Version = getIsaVersion(STI.getCPU());
1064 
1065   StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1066             << '-' << TargetTriple.getOSName() << '-'
1067             << TargetTriple.getEnvironmentName() << '-';
1068 
1069   std::string Processor;
1070   // TODO: Following else statement is present here because we used various
1071   // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1072   // Remove once all aliases are removed from GCNProcessors.td.
1073   if (Version.Major >= 9)
1074     Processor = STI.getCPU().str();
1075   else
1076     Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1077                  Twine(Version.Stepping))
1078                     .str();
1079 
1080   std::string Features;
1081   if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
1082     // sramecc.
1083     if (getSramEccSetting() == TargetIDSetting::Off)
1084       Features += ":sramecc-";
1085     else if (getSramEccSetting() == TargetIDSetting::On)
1086       Features += ":sramecc+";
1087     // xnack.
1088     if (getXnackSetting() == TargetIDSetting::Off)
1089       Features += ":xnack-";
1090     else if (getXnackSetting() == TargetIDSetting::On)
1091       Features += ":xnack+";
1092   }
1093 
1094   StreamRep << Processor << Features;
1095 
1096   return StringRep;
1097 }
1098 
1099 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
1100   if (STI->getFeatureBits().test(FeatureWavefrontSize16))
1101     return 16;
1102   if (STI->getFeatureBits().test(FeatureWavefrontSize32))
1103     return 32;
1104 
1105   return 64;
1106 }
1107 
1108 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
1109   unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1110 
1111   // "Per CU" really means "per whatever functional block the waves of a
1112   // workgroup must share". So the effective local memory size is doubled in
1113   // WGP mode on gfx10.
1114   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
1115     BytesPerCU *= 2;
1116 
1117   return BytesPerCU;
1118 }
1119 
1120 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
1121   if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
1122     return 32768;
1123   if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
1124     return 65536;
1125   if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
1126     return 163840;
1127   return 0;
1128 }
1129 
1130 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
1131   // "Per CU" really means "per whatever functional block the waves of a
1132   // workgroup must share". For gfx10 in CU mode this is the CU, which contains
1133   // two SIMDs.
1134   if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
1135     return 2;
1136   // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
1137   // two CUs, so a total of four SIMDs.
1138   return 4;
1139 }
1140 
1141 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
1142                                unsigned FlatWorkGroupSize) {
1143   assert(FlatWorkGroupSize != 0);
1144   if (!STI->getTargetTriple().isAMDGCN())
1145     return 8;
1146   unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1147   unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1148   if (N == 1) {
1149     // Single-wave workgroups don't consume barrier resources.
1150     return MaxWaves;
1151   }
1152 
1153   unsigned MaxBarriers = 16;
1154   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
1155     MaxBarriers = 32;
1156 
1157   return std::min(MaxWaves / N, MaxBarriers);
1158 }
1159 
1160 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; }
1161 
1162 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1163   // FIXME: Need to take scratch memory into account.
1164   if (isGFX90A(*STI))
1165     return 8;
1166   if (!isGFX10Plus(*STI))
1167     return 10;
1168   return hasGFX10_3Insts(*STI) ? 16 : 20;
1169 }
1170 
1171 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
1172                                    unsigned FlatWorkGroupSize) {
1173   return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1174                     getEUsPerCU(STI));
1175 }
1176 
1177 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; }
1178 
1179 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
1180   // Some subtargets allow encoding 2048, but this isn't tested or supported.
1181   return 1024;
1182 }
1183 
1184 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
1185                               unsigned FlatWorkGroupSize) {
1186   return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
1187 }
1188 
1189 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
1190   IsaVersion Version = getIsaVersion(STI->getCPU());
1191   if (Version.Major >= 10)
1192     return getAddressableNumSGPRs(STI);
1193   if (Version.Major >= 8)
1194     return 16;
1195   return 8;
1196 }
1197 
1198 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; }
1199 
1200 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1201   IsaVersion Version = getIsaVersion(STI->getCPU());
1202   if (Version.Major >= 8)
1203     return 800;
1204   return 512;
1205 }
1206 
1207 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
1208   if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1209     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1210 
1211   IsaVersion Version = getIsaVersion(STI->getCPU());
1212   if (Version.Major >= 10)
1213     return 106;
1214   if (Version.Major >= 8)
1215     return 102;
1216   return 104;
1217 }
1218 
1219 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1220   assert(WavesPerEU != 0);
1221 
1222   IsaVersion Version = getIsaVersion(STI->getCPU());
1223   if (Version.Major >= 10)
1224     return 0;
1225 
1226   if (WavesPerEU >= getMaxWavesPerEU(STI))
1227     return 0;
1228 
1229   unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1230   if (STI->getFeatureBits().test(FeatureTrapHandler))
1231     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1232   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1233   return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1234 }
1235 
1236 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1237                         bool Addressable) {
1238   assert(WavesPerEU != 0);
1239 
1240   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1241   IsaVersion Version = getIsaVersion(STI->getCPU());
1242   if (Version.Major >= 10)
1243     return Addressable ? AddressableNumSGPRs : 108;
1244   if (Version.Major >= 8 && !Addressable)
1245     AddressableNumSGPRs = 112;
1246   unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1247   if (STI->getFeatureBits().test(FeatureTrapHandler))
1248     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1249   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1250   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1251 }
1252 
1253 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1254                           bool FlatScrUsed, bool XNACKUsed) {
1255   unsigned ExtraSGPRs = 0;
1256   if (VCCUsed)
1257     ExtraSGPRs = 2;
1258 
1259   IsaVersion Version = getIsaVersion(STI->getCPU());
1260   if (Version.Major >= 10)
1261     return ExtraSGPRs;
1262 
1263   if (Version.Major < 8) {
1264     if (FlatScrUsed)
1265       ExtraSGPRs = 4;
1266   } else {
1267     if (XNACKUsed)
1268       ExtraSGPRs = 4;
1269 
1270     if (FlatScrUsed ||
1271         STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1272       ExtraSGPRs = 6;
1273   }
1274 
1275   return ExtraSGPRs;
1276 }
1277 
1278 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1279                           bool FlatScrUsed) {
1280   return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1281                           STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1282 }
1283 
1284 static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1285                                                unsigned Granule) {
1286   return divideCeil(std::max(1u, NumRegs), Granule);
1287 }
1288 
1289 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1290   // SGPRBlocks is actual number of SGPR blocks minus 1.
1291   return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) -
1292          1;
1293 }
1294 
1295 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1296                              unsigned DynamicVGPRBlockSize,
1297                              std::optional<bool> EnableWavefrontSize32) {
1298   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1299     return 8;
1300 
1301   if (DynamicVGPRBlockSize != 0)
1302     return DynamicVGPRBlockSize;
1303 
1304   // Temporarily check the subtarget feature, until we fully switch to using
1305   // attributes.
1306   if (STI->getFeatureBits().test(FeatureDynamicVGPR))
1307     return STI->getFeatureBits().test(FeatureDynamicVGPRBlockSize32) ? 32 : 16;
1308 
1309   bool IsWave32 = EnableWavefrontSize32
1310                       ? *EnableWavefrontSize32
1311                       : STI->getFeatureBits().test(FeatureWavefrontSize32);
1312 
1313   if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1314     return IsWave32 ? 24 : 12;
1315 
1316   if (hasGFX10_3Insts(*STI))
1317     return IsWave32 ? 16 : 8;
1318 
1319   return IsWave32 ? 8 : 4;
1320 }
1321 
1322 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1323                                 std::optional<bool> EnableWavefrontSize32) {
1324   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1325     return 8;
1326 
1327   bool IsWave32 = EnableWavefrontSize32
1328                       ? *EnableWavefrontSize32
1329                       : STI->getFeatureBits().test(FeatureWavefrontSize32);
1330 
1331   return IsWave32 ? 8 : 4;
1332 }
1333 
1334 unsigned getArchVGPRAllocGranule() { return 4; }
1335 
1336 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1337   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1338     return 512;
1339   if (!isGFX10Plus(*STI))
1340     return 256;
1341   bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1342   if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1343     return IsWave32 ? 1536 : 768;
1344   return IsWave32 ? 1024 : 512;
1345 }
1346 
1347 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1348 
1349 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
1350                                 unsigned DynamicVGPRBlockSize) {
1351   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1352     return 512;
1353 
1354   // Temporarily check the subtarget feature, until we fully switch to using
1355   // attributes.
1356   if (DynamicVGPRBlockSize != 0 ||
1357       STI->getFeatureBits().test(FeatureDynamicVGPR))
1358     // On GFX12 we can allocate at most 8 blocks of VGPRs.
1359     return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1360   return getAddressableNumArchVGPRs(STI);
1361 }
1362 
1363 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1364                                       unsigned NumVGPRs,
1365                                       unsigned DynamicVGPRBlockSize) {
1366   return getNumWavesPerEUWithNumVGPRs(
1367       NumVGPRs, getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1368       getMaxWavesPerEU(STI), getTotalNumVGPRs(STI));
1369 }
1370 
1371 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1372                                       unsigned MaxWaves,
1373                                       unsigned TotalNumVGPRs) {
1374   if (NumVGPRs < Granule)
1375     return MaxWaves;
1376   unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1377   return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1378 }
1379 
1380 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1381                                   AMDGPUSubtarget::Generation Gen) {
1382   if (Gen >= AMDGPUSubtarget::GFX10)
1383     return MaxWaves;
1384 
1385   if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1386     if (SGPRs <= 80)
1387       return 10;
1388     if (SGPRs <= 88)
1389       return 9;
1390     if (SGPRs <= 100)
1391       return 8;
1392     return 7;
1393   }
1394   if (SGPRs <= 48)
1395     return 10;
1396   if (SGPRs <= 56)
1397     return 9;
1398   if (SGPRs <= 64)
1399     return 8;
1400   if (SGPRs <= 72)
1401     return 7;
1402   if (SGPRs <= 80)
1403     return 6;
1404   return 5;
1405 }
1406 
1407 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1408                         unsigned DynamicVGPRBlockSize) {
1409   assert(WavesPerEU != 0);
1410 
1411   unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1412   if (WavesPerEU >= MaxWavesPerEU)
1413     return 0;
1414 
1415   unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1416   unsigned AddrsableNumVGPRs =
1417       getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1418   unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1419   unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1420 
1421   if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1422     return 0;
1423 
1424   unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs,
1425                                                         DynamicVGPRBlockSize);
1426   if (WavesPerEU < MinWavesPerEU)
1427     return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);
1428 
1429   unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1430   unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1431   return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1432 }
1433 
1434 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1435                         unsigned DynamicVGPRBlockSize) {
1436   assert(WavesPerEU != 0);
1437 
1438   unsigned MaxNumVGPRs =
1439       alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1440                 getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1441   unsigned AddressableNumVGPRs =
1442       getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1443   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1444 }
1445 
1446 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1447                                  std::optional<bool> EnableWavefrontSize32) {
1448   return getGranulatedNumRegisterBlocks(
1449              NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1450          1;
1451 }
1452 
1453 unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
1454                                    unsigned NumVGPRs,
1455                                    unsigned DynamicVGPRBlockSize,
1456                                    std::optional<bool> EnableWavefrontSize32) {
1457   return getGranulatedNumRegisterBlocks(
1458       NumVGPRs,
1459       getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1460 }
1461 } // end namespace IsaInfo
1462 
1463 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1464                                const MCSubtargetInfo *STI) {
1465   IsaVersion Version = getIsaVersion(STI->getCPU());
1466   KernelCode.amd_kernel_code_version_major = 1;
1467   KernelCode.amd_kernel_code_version_minor = 2;
1468   KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1469   KernelCode.amd_machine_version_major = Version.Major;
1470   KernelCode.amd_machine_version_minor = Version.Minor;
1471   KernelCode.amd_machine_version_stepping = Version.Stepping;
1472   KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1473   if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1474     KernelCode.wavefront_size = 5;
1475     KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1476   } else {
1477     KernelCode.wavefront_size = 6;
1478   }
1479 
1480   // If the code object does not support indirect functions, then the value must
1481   // be 0xffffffff.
1482   KernelCode.call_convention = -1;
1483 
1484   // These alignment values are specified in powers of two, so alignment =
1485   // 2^n.  The minimum alignment is 2^4 = 16.
1486   KernelCode.kernarg_segment_alignment = 4;
1487   KernelCode.group_segment_alignment = 4;
1488   KernelCode.private_segment_alignment = 4;
1489 
1490   if (Version.Major >= 10) {
1491     KernelCode.compute_pgm_resource_registers |=
1492         S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1493         S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1);
1494   }
1495 }
1496 
1497 bool isGroupSegment(const GlobalValue *GV) {
1498   return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1499 }
1500 
1501 bool isGlobalSegment(const GlobalValue *GV) {
1502   return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1503 }
1504 
1505 bool isReadOnlySegment(const GlobalValue *GV) {
1506   unsigned AS = GV->getAddressSpace();
1507   return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1508          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1509 }
1510 
1511 bool shouldEmitConstantsToTextSection(const Triple &TT) {
1512   return TT.getArch() == Triple::r600;
1513 }
1514 
1515 std::pair<unsigned, unsigned>
1516 getIntegerPairAttribute(const Function &F, StringRef Name,
1517                         std::pair<unsigned, unsigned> Default,
1518                         bool OnlyFirstRequired) {
1519   if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1520     return {Attr->first, Attr->second.value_or(Default.second)};
1521   return Default;
1522 }
1523 
1524 std::optional<std::pair<unsigned, std::optional<unsigned>>>
1525 getIntegerPairAttribute(const Function &F, StringRef Name,
1526                         bool OnlyFirstRequired) {
1527   Attribute A = F.getFnAttribute(Name);
1528   if (!A.isStringAttribute())
1529     return std::nullopt;
1530 
1531   LLVMContext &Ctx = F.getContext();
1532   std::pair<unsigned, std::optional<unsigned>> Ints;
1533   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1534   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1535     Ctx.emitError("can't parse first integer attribute " + Name);
1536     return std::nullopt;
1537   }
1538   unsigned Second = 0;
1539   if (Strs.second.trim().getAsInteger(0, Second)) {
1540     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1541       Ctx.emitError("can't parse second integer attribute " + Name);
1542       return std::nullopt;
1543     }
1544   } else {
1545     Ints.second = Second;
1546   }
1547 
1548   return Ints;
1549 }
1550 
1551 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1552                                              unsigned Size,
1553                                              unsigned DefaultVal) {
1554   std::optional<SmallVector<unsigned>> R =
1555       getIntegerVecAttribute(F, Name, Size);
1556   return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1557 }
1558 
1559 std::optional<SmallVector<unsigned>>
1560 getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
1561   assert(Size > 2);
1562   LLVMContext &Ctx = F.getContext();
1563 
1564   Attribute A = F.getFnAttribute(Name);
1565   if (!A.isValid())
1566     return std::nullopt;
1567   if (!A.isStringAttribute()) {
1568     Ctx.emitError(Name + " is not a string attribute");
1569     return std::nullopt;
1570   }
1571 
1572   SmallVector<unsigned> Vals(Size);
1573 
1574   StringRef S = A.getValueAsString();
1575   unsigned i = 0;
1576   for (; !S.empty() && i < Size; i++) {
1577     std::pair<StringRef, StringRef> Strs = S.split(',');
1578     unsigned IntVal;
1579     if (Strs.first.trim().getAsInteger(0, IntVal)) {
1580       Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1581                     Name);
1582       return std::nullopt;
1583     }
1584     Vals[i] = IntVal;
1585     S = Strs.second;
1586   }
1587 
1588   if (!S.empty() || i < Size) {
1589     Ctx.emitError("attribute " + Name +
1590                   " has incorrect number of integers; expected " +
1591                   llvm::utostr(Size));
1592     return std::nullopt;
1593   }
1594   return Vals;
1595 }
1596 
1597 unsigned getVmcntBitMask(const IsaVersion &Version) {
1598   return (1 << (getVmcntBitWidthLo(Version.Major) +
1599                 getVmcntBitWidthHi(Version.Major))) -
1600          1;
1601 }
1602 
1603 unsigned getLoadcntBitMask(const IsaVersion &Version) {
1604   return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1605 }
1606 
1607 unsigned getSamplecntBitMask(const IsaVersion &Version) {
1608   return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1609 }
1610 
1611 unsigned getBvhcntBitMask(const IsaVersion &Version) {
1612   return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1613 }
1614 
1615 unsigned getExpcntBitMask(const IsaVersion &Version) {
1616   return (1 << getExpcntBitWidth(Version.Major)) - 1;
1617 }
1618 
1619 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1620   return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1621 }
1622 
1623 unsigned getDscntBitMask(const IsaVersion &Version) {
1624   return (1 << getDscntBitWidth(Version.Major)) - 1;
1625 }
1626 
1627 unsigned getKmcntBitMask(const IsaVersion &Version) {
1628   return (1 << getKmcntBitWidth(Version.Major)) - 1;
1629 }
1630 
1631 unsigned getXcntBitMask(const IsaVersion &Version) {
1632   return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;
1633 }
1634 
1635 unsigned getStorecntBitMask(const IsaVersion &Version) {
1636   return (1 << getStorecntBitWidth(Version.Major)) - 1;
1637 }
1638 
1639 unsigned getWaitcntBitMask(const IsaVersion &Version) {
1640   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1641                                 getVmcntBitWidthLo(Version.Major));
1642   unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1643                                getExpcntBitWidth(Version.Major));
1644   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1645                                 getLgkmcntBitWidth(Version.Major));
1646   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1647                                 getVmcntBitWidthHi(Version.Major));
1648   return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1649 }
1650 
1651 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1652   unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1653                                 getVmcntBitWidthLo(Version.Major));
1654   unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1655                                 getVmcntBitWidthHi(Version.Major));
1656   return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1657 }
1658 
1659 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1660   return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1661                     getExpcntBitWidth(Version.Major));
1662 }
1663 
1664 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1665   return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1666                     getLgkmcntBitWidth(Version.Major));
1667 }
1668 
1669 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1670                    unsigned &Expcnt, unsigned &Lgkmcnt) {
1671   Vmcnt = decodeVmcnt(Version, Waitcnt);
1672   Expcnt = decodeExpcnt(Version, Waitcnt);
1673   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1674 }
1675 
1676 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1677   Waitcnt Decoded;
1678   Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1679   Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1680   Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1681   return Decoded;
1682 }
1683 
1684 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1685                      unsigned Vmcnt) {
1686   Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1687                      getVmcntBitWidthLo(Version.Major));
1688   return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1689                   getVmcntBitShiftHi(Version.Major),
1690                   getVmcntBitWidthHi(Version.Major));
1691 }
1692 
1693 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1694                       unsigned Expcnt) {
1695   return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1696                   getExpcntBitWidth(Version.Major));
1697 }
1698 
1699 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1700                        unsigned Lgkmcnt) {
1701   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1702                   getLgkmcntBitWidth(Version.Major));
1703 }
1704 
1705 unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1706                        unsigned Expcnt, unsigned Lgkmcnt) {
1707   unsigned Waitcnt = getWaitcntBitMask(Version);
1708   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1709   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1710   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1711   return Waitcnt;
1712 }
1713 
1714 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1715   return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1716 }
1717 
1718 static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1719                                         bool IsStore) {
1720   unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1721                               getDscntBitWidth(Version.Major));
1722   if (IsStore) {
1723     unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1724                                    getStorecntBitWidth(Version.Major));
1725     return Dscnt | Storecnt;
1726   }
1727   unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1728                                 getLoadcntBitWidth(Version.Major));
1729   return Dscnt | Loadcnt;
1730 }
1731 
1732 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1733   Waitcnt Decoded;
1734   Decoded.LoadCnt =
1735       unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1736                  getLoadcntBitWidth(Version.Major));
1737   Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1738                              getDscntBitWidth(Version.Major));
1739   return Decoded;
1740 }
1741 
1742 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1743   Waitcnt Decoded;
1744   Decoded.StoreCnt =
1745       unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1746                  getStorecntBitWidth(Version.Major));
1747   Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1748                              getDscntBitWidth(Version.Major));
1749   return Decoded;
1750 }
1751 
1752 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1753                               unsigned Loadcnt) {
1754   return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1755                   getLoadcntBitWidth(Version.Major));
1756 }
1757 
1758 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1759                                unsigned Storecnt) {
1760   return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1761                   getStorecntBitWidth(Version.Major));
1762 }
1763 
1764 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1765                             unsigned Dscnt) {
1766   return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1767                   getDscntBitWidth(Version.Major));
1768 }
1769 
1770 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1771                                    unsigned Dscnt) {
1772   unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1773   Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1774   Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1775   return Waitcnt;
1776 }
1777 
1778 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1779   return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1780 }
1781 
1782 static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1783                                     unsigned Storecnt, unsigned Dscnt) {
1784   unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1785   Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1786   Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1787   return Waitcnt;
1788 }
1789 
1790 unsigned encodeStorecntDscnt(const IsaVersion &Version,
1791                              const Waitcnt &Decoded) {
1792   return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1793 }
1794 
1795 //===----------------------------------------------------------------------===//
1796 // Custom Operand Values
1797 //===----------------------------------------------------------------------===//
1798 
1799 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1800                                                 int Size,
1801                                                 const MCSubtargetInfo &STI) {
1802   unsigned Enc = 0;
1803   for (int Idx = 0; Idx < Size; ++Idx) {
1804     const auto &Op = Opr[Idx];
1805     if (Op.isSupported(STI))
1806       Enc |= Op.encode(Op.Default);
1807   }
1808   return Enc;
1809 }
1810 
1811 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1812                                             int Size, unsigned Code,
1813                                             bool &HasNonDefaultVal,
1814                                             const MCSubtargetInfo &STI) {
1815   unsigned UsedOprMask = 0;
1816   HasNonDefaultVal = false;
1817   for (int Idx = 0; Idx < Size; ++Idx) {
1818     const auto &Op = Opr[Idx];
1819     if (!Op.isSupported(STI))
1820       continue;
1821     UsedOprMask |= Op.getMask();
1822     unsigned Val = Op.decode(Code);
1823     if (!Op.isValid(Val))
1824       return false;
1825     HasNonDefaultVal |= (Val != Op.Default);
1826   }
1827   return (Code & ~UsedOprMask) == 0;
1828 }
1829 
1830 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1831                                 unsigned Code, int &Idx, StringRef &Name,
1832                                 unsigned &Val, bool &IsDefault,
1833                                 const MCSubtargetInfo &STI) {
1834   while (Idx < Size) {
1835     const auto &Op = Opr[Idx++];
1836     if (Op.isSupported(STI)) {
1837       Name = Op.Name;
1838       Val = Op.decode(Code);
1839       IsDefault = (Val == Op.Default);
1840       return true;
1841     }
1842   }
1843 
1844   return false;
1845 }
1846 
1847 static int encodeCustomOperandVal(const CustomOperandVal &Op,
1848                                   int64_t InputVal) {
1849   if (InputVal < 0 || InputVal > Op.Max)
1850     return OPR_VAL_INVALID;
1851   return Op.encode(InputVal);
1852 }
1853 
1854 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1855                                const StringRef Name, int64_t InputVal,
1856                                unsigned &UsedOprMask,
1857                                const MCSubtargetInfo &STI) {
1858   int InvalidId = OPR_ID_UNKNOWN;
1859   for (int Idx = 0; Idx < Size; ++Idx) {
1860     const auto &Op = Opr[Idx];
1861     if (Op.Name == Name) {
1862       if (!Op.isSupported(STI)) {
1863         InvalidId = OPR_ID_UNSUPPORTED;
1864         continue;
1865       }
1866       auto OprMask = Op.getMask();
1867       if (OprMask & UsedOprMask)
1868         return OPR_ID_DUPLICATE;
1869       UsedOprMask |= OprMask;
1870       return encodeCustomOperandVal(Op, InputVal);
1871     }
1872   }
1873   return InvalidId;
1874 }
1875 
1876 //===----------------------------------------------------------------------===//
1877 // DepCtr
1878 //===----------------------------------------------------------------------===//
1879 
1880 namespace DepCtr {
1881 
1882 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
1883   static int Default = -1;
1884   if (Default == -1)
1885     Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI);
1886   return Default;
1887 }
1888 
1889 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1890                               const MCSubtargetInfo &STI) {
1891   return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code,
1892                                          HasNonDefaultVal, STI);
1893 }
1894 
1895 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1896                   bool &IsDefault, const MCSubtargetInfo &STI) {
1897   return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1898                              IsDefault, STI);
1899 }
1900 
1901 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1902                  const MCSubtargetInfo &STI) {
1903   return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1904                              STI);
1905 }
1906 
1907 unsigned decodeFieldVmVsrc(unsigned Encoded) {
1908   return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1909 }
1910 
1911 unsigned decodeFieldVaVdst(unsigned Encoded) {
1912   return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1913 }
1914 
1915 unsigned decodeFieldSaSdst(unsigned Encoded) {
1916   return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1917 }
1918 
1919 unsigned decodeFieldVaSdst(unsigned Encoded) {
1920   return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
1921 }
1922 
1923 unsigned decodeFieldVaVcc(unsigned Encoded) {
1924   return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
1925 }
1926 
1927 unsigned decodeFieldVaSsrc(unsigned Encoded) {
1928   return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
1929 }
1930 
1931 unsigned decodeFieldHoldCnt(unsigned Encoded) {
1932   return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());
1933 }
1934 
1935 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1936   return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1937 }
1938 
1939 unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1940   return encodeFieldVmVsrc(0xffff, VmVsrc);
1941 }
1942 
1943 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1944   return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1945 }
1946 
1947 unsigned encodeFieldVaVdst(unsigned VaVdst) {
1948   return encodeFieldVaVdst(0xffff, VaVdst);
1949 }
1950 
1951 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1952   return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1953 }
1954 
1955 unsigned encodeFieldSaSdst(unsigned SaSdst) {
1956   return encodeFieldSaSdst(0xffff, SaSdst);
1957 }
1958 
1959 unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
1960   return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
1961 }
1962 
1963 unsigned encodeFieldVaSdst(unsigned VaSdst) {
1964   return encodeFieldVaSdst(0xffff, VaSdst);
1965 }
1966 
1967 unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
1968   return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());
1969 }
1970 
1971 unsigned encodeFieldVaVcc(unsigned VaVcc) {
1972   return encodeFieldVaVcc(0xffff, VaVcc);
1973 }
1974 
1975 unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
1976   return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
1977 }
1978 
1979 unsigned encodeFieldVaSsrc(unsigned VaSsrc) {
1980   return encodeFieldVaSsrc(0xffff, VaSsrc);
1981 }
1982 
1983 unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
1984   return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());
1985 }
1986 
1987 unsigned encodeFieldHoldCnt(unsigned HoldCnt) {
1988   return encodeFieldHoldCnt(0xffff, HoldCnt);
1989 }
1990 
1991 } // namespace DepCtr
1992 
1993 //===----------------------------------------------------------------------===//
1994 // exp tgt
1995 //===----------------------------------------------------------------------===//
1996 
1997 namespace Exp {
1998 
1999 struct ExpTgt {
2000   StringLiteral Name;
2001   unsigned Tgt;
2002   unsigned MaxIndex;
2003 };
2004 
2005 // clang-format off
2006 static constexpr ExpTgt ExpTgtInfo[] = {
2007     {{"null"},          ET_NULL,            ET_NULL_MAX_IDX},
2008     {{"mrtz"},          ET_MRTZ,            ET_MRTZ_MAX_IDX},
2009     {{"prim"},          ET_PRIM,            ET_PRIM_MAX_IDX},
2010     {{"mrt"},           ET_MRT0,            ET_MRT_MAX_IDX},
2011     {{"pos"},           ET_POS0,            ET_POS_MAX_IDX},
2012     {{"dual_src_blend"},ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
2013     {{"param"},         ET_PARAM0,          ET_PARAM_MAX_IDX},
2014 };
2015 // clang-format on
2016 
2017 bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2018   for (const ExpTgt &Val : ExpTgtInfo) {
2019     if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2020       Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2021       Name = Val.Name;
2022       return true;
2023     }
2024   }
2025   return false;
2026 }
2027 
2028 unsigned getTgtId(const StringRef Name) {
2029 
2030   for (const ExpTgt &Val : ExpTgtInfo) {
2031     if (Val.MaxIndex == 0 && Name == Val.Name)
2032       return Val.Tgt;
2033 
2034     if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
2035       StringRef Suffix = Name.drop_front(Val.Name.size());
2036 
2037       unsigned Id;
2038       if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
2039         return ET_INVALID;
2040 
2041       // Disable leading zeroes
2042       if (Suffix.size() > 1 && Suffix[0] == '0')
2043         return ET_INVALID;
2044 
2045       return Val.Tgt + Id;
2046     }
2047   }
2048   return ET_INVALID;
2049 }
2050 
2051 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2052   switch (Id) {
2053   case ET_NULL:
2054     return !isGFX11Plus(STI);
2055   case ET_POS4:
2056   case ET_PRIM:
2057     return isGFX10Plus(STI);
2058   case ET_DUAL_SRC_BLEND0:
2059   case ET_DUAL_SRC_BLEND1:
2060     return isGFX11Plus(STI);
2061   default:
2062     if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2063       return !isGFX11Plus(STI);
2064     return true;
2065   }
2066 }
2067 
2068 } // namespace Exp
2069 
2070 //===----------------------------------------------------------------------===//
2071 // MTBUF Format
2072 //===----------------------------------------------------------------------===//
2073 
2074 namespace MTBUFFormat {
2075 
2076 int64_t getDfmt(const StringRef Name) {
2077   for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2078     if (Name == DfmtSymbolic[Id])
2079       return Id;
2080   }
2081   return DFMT_UNDEF;
2082 }
2083 
2084 StringRef getDfmtName(unsigned Id) {
2085   assert(Id <= DFMT_MAX);
2086   return DfmtSymbolic[Id];
2087 }
2088 
2089 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
2090   if (isSI(STI) || isCI(STI))
2091     return NfmtSymbolicSICI;
2092   if (isVI(STI) || isGFX9(STI))
2093     return NfmtSymbolicVI;
2094   return NfmtSymbolicGFX10;
2095 }
2096 
2097 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2098   const auto *lookupTable = getNfmtLookupTable(STI);
2099   for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2100     if (Name == lookupTable[Id])
2101       return Id;
2102   }
2103   return NFMT_UNDEF;
2104 }
2105 
2106 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2107   assert(Id <= NFMT_MAX);
2108   return getNfmtLookupTable(STI)[Id];
2109 }
2110 
2111 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2112   unsigned Dfmt;
2113   unsigned Nfmt;
2114   decodeDfmtNfmt(Id, Dfmt, Nfmt);
2115   return isValidNfmt(Nfmt, STI);
2116 }
2117 
2118 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2119   return !getNfmtName(Id, STI).empty();
2120 }
2121 
2122 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2123   return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2124 }
2125 
2126 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2127   Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2128   Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2129 }
2130 
2131 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2132   if (isGFX11Plus(STI)) {
2133     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2134       if (Name == UfmtSymbolicGFX11[Id])
2135         return Id;
2136     }
2137   } else {
2138     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2139       if (Name == UfmtSymbolicGFX10[Id])
2140         return Id;
2141     }
2142   }
2143   return UFMT_UNDEF;
2144 }
2145 
2146 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
2147   if (isValidUnifiedFormat(Id, STI))
2148     return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2149   return "";
2150 }
2151 
2152 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2153   return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2154 }
2155 
2156 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2157                              const MCSubtargetInfo &STI) {
2158   int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2159   if (isGFX11Plus(STI)) {
2160     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2161       if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2162         return Id;
2163     }
2164   } else {
2165     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2166       if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2167         return Id;
2168     }
2169   }
2170   return UFMT_UNDEF;
2171 }
2172 
2173 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2174   return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2175 }
2176 
2177 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
2178   if (isGFX10Plus(STI))
2179     return UFMT_DEFAULT;
2180   return DFMT_NFMT_DEFAULT;
2181 }
2182 
2183 } // namespace MTBUFFormat
2184 
2185 //===----------------------------------------------------------------------===//
2186 // SendMsg
2187 //===----------------------------------------------------------------------===//
2188 
2189 namespace SendMsg {
2190 
2191 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
2192   return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
2193 }
2194 
2195 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2196   return (MsgId & ~(getMsgIdMask(STI))) == 0;
2197 }
2198 
2199 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2200                   bool Strict) {
2201   assert(isValidMsgId(MsgId, STI));
2202 
2203   if (!Strict)
2204     return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
2205 
2206   if (msgRequiresOp(MsgId, STI)) {
2207     if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2208       return false;
2209 
2210     return !getMsgOpName(MsgId, OpId, STI).empty();
2211   }
2212 
2213   return OpId == OP_NONE_;
2214 }
2215 
2216 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2217                       const MCSubtargetInfo &STI, bool Strict) {
2218   assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2219 
2220   if (!Strict)
2221     return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
2222 
2223   if (!isGFX11Plus(STI)) {
2224     switch (MsgId) {
2225     case ID_GS_PreGFX11:
2226       return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
2227     case ID_GS_DONE_PreGFX11:
2228       return (OpId == OP_GS_NOP)
2229                  ? (StreamId == STREAM_ID_NONE_)
2230                  : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
2231     }
2232   }
2233   return StreamId == STREAM_ID_NONE_;
2234 }
2235 
2236 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2237   return MsgId == ID_SYSMSG ||
2238          (!isGFX11Plus(STI) &&
2239           (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2240 }
2241 
2242 bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2243                        const MCSubtargetInfo &STI) {
2244   return !isGFX11Plus(STI) &&
2245          (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2246          OpId != OP_GS_NOP;
2247 }
2248 
2249 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2250                uint16_t &StreamId, const MCSubtargetInfo &STI) {
2251   MsgId = Val & getMsgIdMask(STI);
2252   if (isGFX11Plus(STI)) {
2253     OpId = 0;
2254     StreamId = 0;
2255   } else {
2256     OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2257     StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
2258   }
2259 }
2260 
2261 uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
2262   return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2263 }
2264 
2265 } // namespace SendMsg
2266 
2267 //===----------------------------------------------------------------------===//
2268 //
2269 //===----------------------------------------------------------------------===//
2270 
2271 unsigned getInitialPSInputAddr(const Function &F) {
2272   return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2273 }
2274 
2275 bool getHasColorExport(const Function &F) {
2276   // As a safe default always respond as if PS has color exports.
2277   return F.getFnAttributeAsParsedInteger(
2278              "amdgpu-color-export",
2279              F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2280 }
2281 
2282 bool getHasDepthExport(const Function &F) {
2283   return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2284 }
2285 
2286 unsigned getDynamicVGPRBlockSize(const Function &F) {
2287   unsigned BlockSize =
2288       F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);
2289 
2290   if (BlockSize == 16 || BlockSize == 32)
2291     return BlockSize;
2292 
2293   return 0;
2294 }
2295 
2296 bool hasXNACK(const MCSubtargetInfo &STI) {
2297   return STI.hasFeature(AMDGPU::FeatureXNACK);
2298 }
2299 
2300 bool hasSRAMECC(const MCSubtargetInfo &STI) {
2301   return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2302 }
2303 
2304 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2305   return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&
2306          !STI.hasFeature(AMDGPU::FeatureR128A16);
2307 }
2308 
2309 bool hasA16(const MCSubtargetInfo &STI) {
2310   return STI.hasFeature(AMDGPU::FeatureA16);
2311 }
2312 
2313 bool hasG16(const MCSubtargetInfo &STI) {
2314   return STI.hasFeature(AMDGPU::FeatureG16);
2315 }
2316 
2317 bool hasPackedD16(const MCSubtargetInfo &STI) {
2318   return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2319          !isSI(STI);
2320 }
2321 
2322 bool hasGDS(const MCSubtargetInfo &STI) {
2323   return STI.hasFeature(AMDGPU::FeatureGDS);
2324 }
2325 
2326 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2327   auto Version = getIsaVersion(STI.getCPU());
2328   if (Version.Major == 10)
2329     return Version.Minor >= 3 ? 13 : 5;
2330   if (Version.Major == 11)
2331     return 5;
2332   if (Version.Major >= 12)
2333     return HasSampler ? 4 : 5;
2334   return 0;
2335 }
2336 
2337 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2338 
2339 bool isSI(const MCSubtargetInfo &STI) {
2340   return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2341 }
2342 
2343 bool isCI(const MCSubtargetInfo &STI) {
2344   return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2345 }
2346 
2347 bool isVI(const MCSubtargetInfo &STI) {
2348   return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2349 }
2350 
2351 bool isGFX9(const MCSubtargetInfo &STI) {
2352   return STI.hasFeature(AMDGPU::FeatureGFX9);
2353 }
2354 
2355 bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2356   return isGFX9(STI) || isGFX10(STI);
2357 }
2358 
2359 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2360   return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2361 }
2362 
2363 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2364   return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2365 }
2366 
2367 bool isGFX8Plus(const MCSubtargetInfo &STI) {
2368   return isVI(STI) || isGFX9Plus(STI);
2369 }
2370 
2371 bool isGFX9Plus(const MCSubtargetInfo &STI) {
2372   return isGFX9(STI) || isGFX10Plus(STI);
2373 }
2374 
2375 bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2376 
2377 bool isGFX10(const MCSubtargetInfo &STI) {
2378   return STI.hasFeature(AMDGPU::FeatureGFX10);
2379 }
2380 
2381 bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2382   return isGFX10(STI) || isGFX11(STI);
2383 }
2384 
2385 bool isGFX10Plus(const MCSubtargetInfo &STI) {
2386   return isGFX10(STI) || isGFX11Plus(STI);
2387 }
2388 
2389 bool isGFX11(const MCSubtargetInfo &STI) {
2390   return STI.hasFeature(AMDGPU::FeatureGFX11);
2391 }
2392 
2393 bool isGFX11Plus(const MCSubtargetInfo &STI) {
2394   return isGFX11(STI) || isGFX12Plus(STI);
2395 }
2396 
2397 bool isGFX12(const MCSubtargetInfo &STI) {
2398   return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2399 }
2400 
2401 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2402 
2403 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2404 
2405 bool isGFX1250(const MCSubtargetInfo &STI) {
2406   return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2407 }
2408 
2409 bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2410 
2411 bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2412   return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2413 }
2414 
2415 bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2416   return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2417 }
2418 
2419 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2420   return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2421 }
2422 
2423 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2424   return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2425 }
2426 
2427 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2428   return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2429 }
2430 
2431 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2432   return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2433 }
2434 
2435 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2436   return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2437 }
2438 
2439 bool isGFX90A(const MCSubtargetInfo &STI) {
2440   return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2441 }
2442 
2443 bool isGFX940(const MCSubtargetInfo &STI) {
2444   return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2445 }
2446 
2447 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2448   return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2449 }
2450 
2451 bool hasMAIInsts(const MCSubtargetInfo &STI) {
2452   return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2453 }
2454 
2455 bool hasVOPD(const MCSubtargetInfo &STI) {
2456   return STI.hasFeature(AMDGPU::FeatureVOPD);
2457 }
2458 
2459 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2460   return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2461 }
2462 
2463 unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2464   return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2465 }
2466 
2467 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2468                          int32_t ArgNumVGPR) {
2469   if (has90AInsts && ArgNumAGPR)
2470     return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2471   return std::max(ArgNumVGPR, ArgNumAGPR);
2472 }
2473 
2474 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) {
2475   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2476   const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2477   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2478          Reg == AMDGPU::SCC;
2479 }
2480 
2481 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) {
2482   return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16;
2483 }
2484 
2485 #define MAP_REG2REG                                                                                            \
2486   using namespace AMDGPU;                                                                                      \
2487   switch (Reg.id()) {                                                                                          \
2488   default:                                                                                                     \
2489     return Reg;                                                                                                \
2490     CASE_CI_VI(FLAT_SCR)                                                                                       \
2491     CASE_CI_VI(FLAT_SCR_LO)                                                                                    \
2492     CASE_CI_VI(FLAT_SCR_HI)                                                                                    \
2493     CASE_VI_GFX9PLUS(TTMP0)                                                                                    \
2494     CASE_VI_GFX9PLUS(TTMP1)                                                                                    \
2495     CASE_VI_GFX9PLUS(TTMP2)                                                                                    \
2496     CASE_VI_GFX9PLUS(TTMP3)                                                                                    \
2497     CASE_VI_GFX9PLUS(TTMP4)                                                                                    \
2498     CASE_VI_GFX9PLUS(TTMP5)                                                                                    \
2499     CASE_VI_GFX9PLUS(TTMP6)                                                                                    \
2500     CASE_VI_GFX9PLUS(TTMP7)                                                                                    \
2501     CASE_VI_GFX9PLUS(TTMP8)                                                                                    \
2502     CASE_VI_GFX9PLUS(TTMP9)                                                                                    \
2503     CASE_VI_GFX9PLUS(TTMP10)                                                                                   \
2504     CASE_VI_GFX9PLUS(TTMP11)                                                                                   \
2505     CASE_VI_GFX9PLUS(TTMP12)                                                                                   \
2506     CASE_VI_GFX9PLUS(TTMP13)                                                                                   \
2507     CASE_VI_GFX9PLUS(TTMP14)                                                                                   \
2508     CASE_VI_GFX9PLUS(TTMP15)                                                                                   \
2509     CASE_VI_GFX9PLUS(TTMP0_TTMP1)                                                                              \
2510     CASE_VI_GFX9PLUS(TTMP2_TTMP3)                                                                              \
2511     CASE_VI_GFX9PLUS(TTMP4_TTMP5)                                                                              \
2512     CASE_VI_GFX9PLUS(TTMP6_TTMP7)                                                                              \
2513     CASE_VI_GFX9PLUS(TTMP8_TTMP9)                                                                              \
2514     CASE_VI_GFX9PLUS(TTMP10_TTMP11)                                                                            \
2515     CASE_VI_GFX9PLUS(TTMP12_TTMP13)                                                                            \
2516     CASE_VI_GFX9PLUS(TTMP14_TTMP15)                                                                            \
2517     CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3)                                                                  \
2518     CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7)                                                                  \
2519     CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11)                                                                \
2520     CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15)                                                              \
2521     CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7)                                          \
2522     CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11)                                        \
2523     CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15)                                    \
2524     CASE_VI_GFX9PLUS(                                                                                          \
2525         TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2526     CASE_GFXPRE11_GFX11PLUS(M0)                                                                                \
2527     CASE_GFXPRE11_GFX11PLUS(SGPR_NULL)                                                                         \
2528     CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL)                                                         \
2529   }
2530 
2531 #define CASE_CI_VI(node)                                                       \
2532   assert(!isSI(STI));                                                          \
2533   case node:                                                                   \
2534     return isCI(STI) ? node##_ci : node##_vi;
2535 
2536 #define CASE_VI_GFX9PLUS(node)                                                 \
2537   case node:                                                                   \
2538     return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2539 
2540 #define CASE_GFXPRE11_GFX11PLUS(node)                                          \
2541   case node:                                                                   \
2542     return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2543 
2544 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)                               \
2545   case node:                                                                   \
2546     return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2547 
2548 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) {
2549   if (STI.getTargetTriple().getArch() == Triple::r600)
2550     return Reg;
2551   MAP_REG2REG
2552 }
2553 
2554 #undef CASE_CI_VI
2555 #undef CASE_VI_GFX9PLUS
2556 #undef CASE_GFXPRE11_GFX11PLUS
2557 #undef CASE_GFXPRE11_GFX11PLUS_TO
2558 
2559 #define CASE_CI_VI(node)                                                       \
2560   case node##_ci:                                                              \
2561   case node##_vi:                                                              \
2562     return node;
2563 #define CASE_VI_GFX9PLUS(node)                                                 \
2564   case node##_vi:                                                              \
2565   case node##_gfx9plus:                                                        \
2566     return node;
2567 #define CASE_GFXPRE11_GFX11PLUS(node)                                          \
2568   case node##_gfx11plus:                                                       \
2569   case node##_gfxpre11:                                                        \
2570     return node;
2571 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2572 
2573 MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG }
2574 
2575 bool isInlineValue(unsigned Reg) {
2576   switch (Reg) {
2577   case AMDGPU::SRC_SHARED_BASE_LO:
2578   case AMDGPU::SRC_SHARED_BASE:
2579   case AMDGPU::SRC_SHARED_LIMIT_LO:
2580   case AMDGPU::SRC_SHARED_LIMIT:
2581   case AMDGPU::SRC_PRIVATE_BASE_LO:
2582   case AMDGPU::SRC_PRIVATE_BASE:
2583   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2584   case AMDGPU::SRC_PRIVATE_LIMIT:
2585   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2586     return true;
2587   case AMDGPU::SRC_VCCZ:
2588   case AMDGPU::SRC_EXECZ:
2589   case AMDGPU::SRC_SCC:
2590     return true;
2591   case AMDGPU::SGPR_NULL:
2592     return true;
2593   default:
2594     return false;
2595   }
2596 }
2597 
2598 #undef CASE_CI_VI
2599 #undef CASE_VI_GFX9PLUS
2600 #undef CASE_GFXPRE11_GFX11PLUS
2601 #undef CASE_GFXPRE11_GFX11PLUS_TO
2602 #undef MAP_REG2REG
2603 
2604 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2605   assert(OpNo < Desc.NumOperands);
2606   unsigned OpType = Desc.operands()[OpNo].OperandType;
2607   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2608          OpType <= AMDGPU::OPERAND_SRC_LAST;
2609 }
2610 
2611 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2612   assert(OpNo < Desc.NumOperands);
2613   unsigned OpType = Desc.operands()[OpNo].OperandType;
2614   return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2615          OpType <= AMDGPU::OPERAND_KIMM_LAST;
2616 }
2617 
2618 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2619   assert(OpNo < Desc.NumOperands);
2620   unsigned OpType = Desc.operands()[OpNo].OperandType;
2621   switch (OpType) {
2622   case AMDGPU::OPERAND_REG_IMM_FP32:
2623   case AMDGPU::OPERAND_REG_IMM_FP64:
2624   case AMDGPU::OPERAND_REG_IMM_FP16:
2625   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2626   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2627   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2628   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2629   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2630   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2631   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2632   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2633     return true;
2634   default:
2635     return false;
2636   }
2637 }
2638 
2639 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2640   assert(OpNo < Desc.NumOperands);
2641   unsigned OpType = Desc.operands()[OpNo].OperandType;
2642   return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2643           OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2644          (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2645           OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2646 }
2647 
2648 // Avoid using MCRegisterClass::getSize, since that function will go away
2649 // (move from MC* level to Target* level). Return size in bits.
2650 unsigned getRegBitWidth(unsigned RCID) {
2651   switch (RCID) {
2652   case AMDGPU::VGPR_16RegClassID:
2653   case AMDGPU::VGPR_16_Lo128RegClassID:
2654   case AMDGPU::SGPR_LO16RegClassID:
2655   case AMDGPU::AGPR_LO16RegClassID:
2656     return 16;
2657   case AMDGPU::SGPR_32RegClassID:
2658   case AMDGPU::VGPR_32RegClassID:
2659   case AMDGPU::VRegOrLds_32RegClassID:
2660   case AMDGPU::AGPR_32RegClassID:
2661   case AMDGPU::VS_32RegClassID:
2662   case AMDGPU::AV_32RegClassID:
2663   case AMDGPU::SReg_32RegClassID:
2664   case AMDGPU::SReg_32_XM0RegClassID:
2665   case AMDGPU::SRegOrLds_32RegClassID:
2666     return 32;
2667   case AMDGPU::SGPR_64RegClassID:
2668   case AMDGPU::VS_64RegClassID:
2669   case AMDGPU::SReg_64RegClassID:
2670   case AMDGPU::VReg_64RegClassID:
2671   case AMDGPU::AReg_64RegClassID:
2672   case AMDGPU::SReg_64_XEXECRegClassID:
2673   case AMDGPU::VReg_64_Align2RegClassID:
2674   case AMDGPU::AReg_64_Align2RegClassID:
2675   case AMDGPU::AV_64RegClassID:
2676   case AMDGPU::AV_64_Align2RegClassID:
2677     return 64;
2678   case AMDGPU::SGPR_96RegClassID:
2679   case AMDGPU::SReg_96RegClassID:
2680   case AMDGPU::VReg_96RegClassID:
2681   case AMDGPU::AReg_96RegClassID:
2682   case AMDGPU::VReg_96_Align2RegClassID:
2683   case AMDGPU::AReg_96_Align2RegClassID:
2684   case AMDGPU::AV_96RegClassID:
2685   case AMDGPU::AV_96_Align2RegClassID:
2686     return 96;
2687   case AMDGPU::SGPR_128RegClassID:
2688   case AMDGPU::SReg_128RegClassID:
2689   case AMDGPU::VReg_128RegClassID:
2690   case AMDGPU::AReg_128RegClassID:
2691   case AMDGPU::VReg_128_Align2RegClassID:
2692   case AMDGPU::AReg_128_Align2RegClassID:
2693   case AMDGPU::AV_128RegClassID:
2694   case AMDGPU::AV_128_Align2RegClassID:
2695   case AMDGPU::SReg_128_XNULLRegClassID:
2696     return 128;
2697   case AMDGPU::SGPR_160RegClassID:
2698   case AMDGPU::SReg_160RegClassID:
2699   case AMDGPU::VReg_160RegClassID:
2700   case AMDGPU::AReg_160RegClassID:
2701   case AMDGPU::VReg_160_Align2RegClassID:
2702   case AMDGPU::AReg_160_Align2RegClassID:
2703   case AMDGPU::AV_160RegClassID:
2704   case AMDGPU::AV_160_Align2RegClassID:
2705     return 160;
2706   case AMDGPU::SGPR_192RegClassID:
2707   case AMDGPU::SReg_192RegClassID:
2708   case AMDGPU::VReg_192RegClassID:
2709   case AMDGPU::AReg_192RegClassID:
2710   case AMDGPU::VReg_192_Align2RegClassID:
2711   case AMDGPU::AReg_192_Align2RegClassID:
2712   case AMDGPU::AV_192RegClassID:
2713   case AMDGPU::AV_192_Align2RegClassID:
2714     return 192;
2715   case AMDGPU::SGPR_224RegClassID:
2716   case AMDGPU::SReg_224RegClassID:
2717   case AMDGPU::VReg_224RegClassID:
2718   case AMDGPU::AReg_224RegClassID:
2719   case AMDGPU::VReg_224_Align2RegClassID:
2720   case AMDGPU::AReg_224_Align2RegClassID:
2721   case AMDGPU::AV_224RegClassID:
2722   case AMDGPU::AV_224_Align2RegClassID:
2723     return 224;
2724   case AMDGPU::SGPR_256RegClassID:
2725   case AMDGPU::SReg_256RegClassID:
2726   case AMDGPU::VReg_256RegClassID:
2727   case AMDGPU::AReg_256RegClassID:
2728   case AMDGPU::VReg_256_Align2RegClassID:
2729   case AMDGPU::AReg_256_Align2RegClassID:
2730   case AMDGPU::AV_256RegClassID:
2731   case AMDGPU::AV_256_Align2RegClassID:
2732   case AMDGPU::SReg_256_XNULLRegClassID:
2733     return 256;
2734   case AMDGPU::SGPR_288RegClassID:
2735   case AMDGPU::SReg_288RegClassID:
2736   case AMDGPU::VReg_288RegClassID:
2737   case AMDGPU::AReg_288RegClassID:
2738   case AMDGPU::VReg_288_Align2RegClassID:
2739   case AMDGPU::AReg_288_Align2RegClassID:
2740   case AMDGPU::AV_288RegClassID:
2741   case AMDGPU::AV_288_Align2RegClassID:
2742     return 288;
2743   case AMDGPU::SGPR_320RegClassID:
2744   case AMDGPU::SReg_320RegClassID:
2745   case AMDGPU::VReg_320RegClassID:
2746   case AMDGPU::AReg_320RegClassID:
2747   case AMDGPU::VReg_320_Align2RegClassID:
2748   case AMDGPU::AReg_320_Align2RegClassID:
2749   case AMDGPU::AV_320RegClassID:
2750   case AMDGPU::AV_320_Align2RegClassID:
2751     return 320;
2752   case AMDGPU::SGPR_352RegClassID:
2753   case AMDGPU::SReg_352RegClassID:
2754   case AMDGPU::VReg_352RegClassID:
2755   case AMDGPU::AReg_352RegClassID:
2756   case AMDGPU::VReg_352_Align2RegClassID:
2757   case AMDGPU::AReg_352_Align2RegClassID:
2758   case AMDGPU::AV_352RegClassID:
2759   case AMDGPU::AV_352_Align2RegClassID:
2760     return 352;
2761   case AMDGPU::SGPR_384RegClassID:
2762   case AMDGPU::SReg_384RegClassID:
2763   case AMDGPU::VReg_384RegClassID:
2764   case AMDGPU::AReg_384RegClassID:
2765   case AMDGPU::VReg_384_Align2RegClassID:
2766   case AMDGPU::AReg_384_Align2RegClassID:
2767   case AMDGPU::AV_384RegClassID:
2768   case AMDGPU::AV_384_Align2RegClassID:
2769     return 384;
2770   case AMDGPU::SGPR_512RegClassID:
2771   case AMDGPU::SReg_512RegClassID:
2772   case AMDGPU::VReg_512RegClassID:
2773   case AMDGPU::AReg_512RegClassID:
2774   case AMDGPU::VReg_512_Align2RegClassID:
2775   case AMDGPU::AReg_512_Align2RegClassID:
2776   case AMDGPU::AV_512RegClassID:
2777   case AMDGPU::AV_512_Align2RegClassID:
2778     return 512;
2779   case AMDGPU::SGPR_1024RegClassID:
2780   case AMDGPU::SReg_1024RegClassID:
2781   case AMDGPU::VReg_1024RegClassID:
2782   case AMDGPU::AReg_1024RegClassID:
2783   case AMDGPU::VReg_1024_Align2RegClassID:
2784   case AMDGPU::AReg_1024_Align2RegClassID:
2785   case AMDGPU::AV_1024RegClassID:
2786   case AMDGPU::AV_1024_Align2RegClassID:
2787     return 1024;
2788   default:
2789     llvm_unreachable("Unexpected register class");
2790   }
2791 }
2792 
2793 unsigned getRegBitWidth(const MCRegisterClass &RC) {
2794   return getRegBitWidth(RC.getID());
2795 }
2796 
2797 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
2798                            unsigned OpNo) {
2799   assert(OpNo < Desc.NumOperands);
2800   unsigned RCID = Desc.operands()[OpNo].RegClass;
2801   return getRegBitWidth(RCID) / 8;
2802 }
2803 
2804 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2805   if (isInlinableIntLiteral(Literal))
2806     return true;
2807 
2808   uint64_t Val = static_cast<uint64_t>(Literal);
2809   return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2810          (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2811          (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2812          (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2813          (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2814          (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2815          (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2816          (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2817          (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2818          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2819 }
2820 
2821 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2822   if (isInlinableIntLiteral(Literal))
2823     return true;
2824 
2825   // The actual type of the operand does not seem to matter as long
2826   // as the bits match one of the inline immediate values.  For example:
2827   //
2828   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2829   // so it is a legal inline immediate.
2830   //
2831   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2832   // floating-point, so it is a legal inline immediate.
2833 
2834   uint32_t Val = static_cast<uint32_t>(Literal);
2835   return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2836          (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2837          (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2838          (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2839          (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2840          (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2841          (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2842          (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2843          (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2844          (Val == 0x3e22f983 && HasInv2Pi);
2845 }
2846 
2847 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2848   if (!HasInv2Pi)
2849     return false;
2850   if (isInlinableIntLiteral(Literal))
2851     return true;
2852   uint16_t Val = static_cast<uint16_t>(Literal);
2853   return Val == 0x3F00 || // 0.5
2854          Val == 0xBF00 || // -0.5
2855          Val == 0x3F80 || // 1.0
2856          Val == 0xBF80 || // -1.0
2857          Val == 0x4000 || // 2.0
2858          Val == 0xC000 || // -2.0
2859          Val == 0x4080 || // 4.0
2860          Val == 0xC080 || // -4.0
2861          Val == 0x3E22;   // 1.0 / (2.0 * pi)
2862 }
2863 
2864 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
2865   return isInlinableLiteral32(Literal, HasInv2Pi);
2866 }
2867 
2868 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2869   if (!HasInv2Pi)
2870     return false;
2871   if (isInlinableIntLiteral(Literal))
2872     return true;
2873   uint16_t Val = static_cast<uint16_t>(Literal);
2874   return Val == 0x3C00 || // 1.0
2875          Val == 0xBC00 || // -1.0
2876          Val == 0x3800 || // 0.5
2877          Val == 0xB800 || // -0.5
2878          Val == 0x4000 || // 2.0
2879          Val == 0xC000 || // -2.0
2880          Val == 0x4400 || // 4.0
2881          Val == 0xC400 || // -4.0
2882          Val == 0x3118;   // 1/2pi
2883 }
2884 
2885 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2886   // Unfortunately, the Instruction Set Architecture Reference Guide is
2887   // misleading about how the inline operands work for (packed) 16-bit
2888   // instructions. In a nutshell, the actual HW behavior is:
2889   //
2890   //  - integer encodings (-16 .. 64) are always produced as sign-extended
2891   //    32-bit values
2892   //  - float encodings are produced as:
2893   //    - for F16 instructions: corresponding half-precision float values in
2894   //      the LSBs, 0 in the MSBs
2895   //    - for UI16 instructions: corresponding single-precision float value
2896   int32_t Signed = static_cast<int32_t>(Literal);
2897   if (Signed >= 0 && Signed <= 64)
2898     return 128 + Signed;
2899 
2900   if (Signed >= -16 && Signed <= -1)
2901     return 192 + std::abs(Signed);
2902 
2903   if (IsFloat) {
2904     // clang-format off
2905     switch (Literal) {
2906     case 0x3800: return 240; // 0.5
2907     case 0xB800: return 241; // -0.5
2908     case 0x3C00: return 242; // 1.0
2909     case 0xBC00: return 243; // -1.0
2910     case 0x4000: return 244; // 2.0
2911     case 0xC000: return 245; // -2.0
2912     case 0x4400: return 246; // 4.0
2913     case 0xC400: return 247; // -4.0
2914     case 0x3118: return 248; // 1.0 / (2.0 * pi)
2915     default: break;
2916     }
2917     // clang-format on
2918   } else {
2919     // clang-format off
2920     switch (Literal) {
2921     case 0x3F000000: return 240; // 0.5
2922     case 0xBF000000: return 241; // -0.5
2923     case 0x3F800000: return 242; // 1.0
2924     case 0xBF800000: return 243; // -1.0
2925     case 0x40000000: return 244; // 2.0
2926     case 0xC0000000: return 245; // -2.0
2927     case 0x40800000: return 246; // 4.0
2928     case 0xC0800000: return 247; // -4.0
2929     case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2930     default: break;
2931     }
2932     // clang-format on
2933   }
2934 
2935   return {};
2936 }
2937 
2938 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2939 // or nullopt.
2940 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2941   return getInlineEncodingV216(false, Literal);
2942 }
2943 
2944 // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2945 // or nullopt.
2946 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2947   int32_t Signed = static_cast<int32_t>(Literal);
2948   if (Signed >= 0 && Signed <= 64)
2949     return 128 + Signed;
2950 
2951   if (Signed >= -16 && Signed <= -1)
2952     return 192 + std::abs(Signed);
2953 
2954   // clang-format off
2955   switch (Literal) {
2956   case 0x3F00: return 240; // 0.5
2957   case 0xBF00: return 241; // -0.5
2958   case 0x3F80: return 242; // 1.0
2959   case 0xBF80: return 243; // -1.0
2960   case 0x4000: return 244; // 2.0
2961   case 0xC000: return 245; // -2.0
2962   case 0x4080: return 246; // 4.0
2963   case 0xC080: return 247; // -4.0
2964   case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2965   default: break;
2966   }
2967   // clang-format on
2968 
2969   return std::nullopt;
2970 }
2971 
2972 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2973 // or nullopt.
2974 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2975   return getInlineEncodingV216(true, Literal);
2976 }
2977 
2978 // Whether the given literal can be inlined for a V_PK_* instruction.
2979 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
2980   switch (OpType) {
2981   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2982   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2983     return getInlineEncodingV216(false, Literal).has_value();
2984   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2985   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2986     return getInlineEncodingV216(true, Literal).has_value();
2987   case AMDGPU::OPERAND_REG_IMM_V2BF16:
2988   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2989     return isInlinableLiteralV2BF16(Literal);
2990   default:
2991     llvm_unreachable("bad packed operand type");
2992   }
2993 }
2994 
2995 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2996 bool isInlinableLiteralV2I16(uint32_t Literal) {
2997   return getInlineEncodingV2I16(Literal).has_value();
2998 }
2999 
3000 // Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3001 bool isInlinableLiteralV2BF16(uint32_t Literal) {
3002   return getInlineEncodingV2BF16(Literal).has_value();
3003 }
3004 
3005 // Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3006 bool isInlinableLiteralV2F16(uint32_t Literal) {
3007   return getInlineEncodingV2F16(Literal).has_value();
3008 }
3009 
3010 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3011   if (IsFP64)
3012     return !Lo_32(Val);
3013 
3014   return isUInt<32>(Val) || isInt<32>(Val);
3015 }
3016 
3017 bool isArgPassedInSGPR(const Argument *A) {
3018   const Function *F = A->getParent();
3019 
3020   // Arguments to compute shaders are never a source of divergence.
3021   CallingConv::ID CC = F->getCallingConv();
3022   switch (CC) {
3023   case CallingConv::AMDGPU_KERNEL:
3024   case CallingConv::SPIR_KERNEL:
3025     return true;
3026   case CallingConv::AMDGPU_VS:
3027   case CallingConv::AMDGPU_LS:
3028   case CallingConv::AMDGPU_HS:
3029   case CallingConv::AMDGPU_ES:
3030   case CallingConv::AMDGPU_GS:
3031   case CallingConv::AMDGPU_PS:
3032   case CallingConv::AMDGPU_CS:
3033   case CallingConv::AMDGPU_Gfx:
3034   case CallingConv::AMDGPU_CS_Chain:
3035   case CallingConv::AMDGPU_CS_ChainPreserve:
3036     // For non-compute shaders, SGPR inputs are marked with either inreg or
3037     // byval. Everything else is in VGPRs.
3038     return A->hasAttribute(Attribute::InReg) ||
3039            A->hasAttribute(Attribute::ByVal);
3040   default:
3041     // TODO: treat i1 as divergent?
3042     return A->hasAttribute(Attribute::InReg);
3043   }
3044 }
3045 
3046 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3047   // Arguments to compute shaders are never a source of divergence.
3048   CallingConv::ID CC = CB->getCallingConv();
3049   switch (CC) {
3050   case CallingConv::AMDGPU_KERNEL:
3051   case CallingConv::SPIR_KERNEL:
3052     return true;
3053   case CallingConv::AMDGPU_VS:
3054   case CallingConv::AMDGPU_LS:
3055   case CallingConv::AMDGPU_HS:
3056   case CallingConv::AMDGPU_ES:
3057   case CallingConv::AMDGPU_GS:
3058   case CallingConv::AMDGPU_PS:
3059   case CallingConv::AMDGPU_CS:
3060   case CallingConv::AMDGPU_Gfx:
3061   case CallingConv::AMDGPU_CS_Chain:
3062   case CallingConv::AMDGPU_CS_ChainPreserve:
3063     // For non-compute shaders, SGPR inputs are marked with either inreg or
3064     // byval. Everything else is in VGPRs.
3065     return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
3066            CB->paramHasAttr(ArgNo, Attribute::ByVal);
3067   default:
3068     return CB->paramHasAttr(ArgNo, Attribute::InReg);
3069   }
3070 }
3071 
3072 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3073   return isGCN3Encoding(ST) || isGFX10Plus(ST);
3074 }
3075 
3076 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
3077                                       int64_t EncodedOffset) {
3078   if (isGFX12Plus(ST))
3079     return isUInt<23>(EncodedOffset);
3080 
3081   return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
3082                                : isUInt<8>(EncodedOffset);
3083 }
3084 
3085 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
3086                                     int64_t EncodedOffset, bool IsBuffer) {
3087   if (isGFX12Plus(ST))
3088     return isInt<24>(EncodedOffset);
3089 
3090   return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset);
3091 }
3092 
3093 static bool isDwordAligned(uint64_t ByteOffset) {
3094   return (ByteOffset & 3) == 0;
3095 }
3096 
3097 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
3098                                 uint64_t ByteOffset) {
3099   if (hasSMEMByteOffset(ST))
3100     return ByteOffset;
3101 
3102   assert(isDwordAligned(ByteOffset));
3103   return ByteOffset >> 2;
3104 }
3105 
3106 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3107                                             int64_t ByteOffset, bool IsBuffer,
3108                                             bool HasSOffset) {
3109   // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3110   // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3111   // Handle case where SOffset is not present.
3112   if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3113     return std::nullopt;
3114 
3115   if (isGFX12Plus(ST)) // 24 bit signed offsets
3116     return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3117                                  : std::nullopt;
3118 
3119   // The signed version is always a byte offset.
3120   if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3121     assert(hasSMEMByteOffset(ST));
3122     return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3123                                  : std::nullopt;
3124   }
3125 
3126   if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3127     return std::nullopt;
3128 
3129   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3130   return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3131              ? std::optional<int64_t>(EncodedOffset)
3132              : std::nullopt;
3133 }
3134 
3135 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3136                                                      int64_t ByteOffset) {
3137   if (!isCI(ST) || !isDwordAligned(ByteOffset))
3138     return std::nullopt;
3139 
3140   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3141   return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3142                                    : std::nullopt;
3143 }
3144 
3145 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
3146   if (AMDGPU::isGFX10(ST))
3147     return 12;
3148 
3149   if (AMDGPU::isGFX12(ST))
3150     return 24;
3151   return 13;
3152 }
3153 
3154 namespace {
3155 
3156 struct SourceOfDivergence {
3157   unsigned Intr;
3158 };
3159 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3160 
3161 struct AlwaysUniform {
3162   unsigned Intr;
3163 };
3164 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3165 
3166 #define GET_SourcesOfDivergence_IMPL
3167 #define GET_UniformIntrinsics_IMPL
3168 #define GET_Gfx9BufferFormat_IMPL
3169 #define GET_Gfx10BufferFormat_IMPL
3170 #define GET_Gfx11PlusBufferFormat_IMPL
3171 
3172 #include "AMDGPUGenSearchableTables.inc"
3173 
3174 } // end anonymous namespace
3175 
3176 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3177   return lookupSourceOfDivergence(IntrID);
3178 }
3179 
3180 bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3181   return lookupAlwaysUniform(IntrID);
3182 }
3183 
3184 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
3185                                                   uint8_t NumComponents,
3186                                                   uint8_t NumFormat,
3187                                                   const MCSubtargetInfo &STI) {
3188   return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3189                                 BitsPerComp, NumComponents, NumFormat)
3190          : isGFX10(STI)
3191              ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3192              : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3193 }
3194 
3195 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
3196                                                   const MCSubtargetInfo &STI) {
3197   return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3198          : isGFX10(STI)   ? getGfx10BufferFormatInfo(Format)
3199                           : getGfx9BufferFormatInfo(Format);
3200 }
3201 
3202 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
3203   for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3204     int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3205     if (Idx == -1)
3206       continue;
3207 
3208     if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
3209         OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
3210       return true;
3211   }
3212 
3213   return false;
3214 }
3215 
3216 bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
3217   return hasAny64BitVGPROperands(OpDesc);
3218 }
3219 
3220 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3221   // Currently this is 128 for all subtargets
3222   return 128;
3223 }
3224 
3225 } // namespace AMDGPU
3226 
3227 raw_ostream &operator<<(raw_ostream &OS,
3228                         const AMDGPU::IsaInfo::TargetIDSetting S) {
3229   switch (S) {
3230   case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
3231     OS << "Unsupported";
3232     break;
3233   case (AMDGPU::IsaInfo::TargetIDSetting::Any):
3234     OS << "Any";
3235     break;
3236   case (AMDGPU::IsaInfo::TargetIDSetting::Off):
3237     OS << "Off";
3238     break;
3239   case (AMDGPU::IsaInfo::TargetIDSetting::On):
3240     OS << "On";
3241     break;
3242   }
3243   return OS;
3244 }
3245 
3246 } // namespace llvm
3247