xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDKernelCodeT.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "llvm/BinaryFormat/ELF.h"
15 #include "llvm/IR/Attributes.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IntrinsicsR600.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/Support/AMDHSAKernelDescriptor.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/TargetParser/TargetParser.h"
28 #include <optional>
29 
30 #define GET_INSTRINFO_NAMED_OPS
31 #define GET_INSTRMAP_INFO
32 #include "AMDGPUGenInstrInfo.inc"
33 
34 static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
35     "amdhsa-code-object-version", llvm::cl::Hidden,
36     llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
37     llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
38                    "or asm directive still take priority if present)"));
39 
40 namespace {
41 
42 /// \returns Bit mask for given bit \p Shift and bit \p Width.
43 unsigned getBitMask(unsigned Shift, unsigned Width) {
44   return ((1 << Width) - 1) << Shift;
45 }
46 
47 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
48 ///
49 /// \returns Packed \p Dst.
50 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
51   unsigned Mask = getBitMask(Shift, Width);
52   return ((Src << Shift) & Mask) | (Dst & ~Mask);
53 }
54 
55 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
56 ///
57 /// \returns Unpacked bits.
58 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
59   return (Src & getBitMask(Shift, Width)) >> Shift;
60 }
61 
62 /// \returns Vmcnt bit shift (lower bits).
63 unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
64   return VersionMajor >= 11 ? 10 : 0;
65 }
66 
67 /// \returns Vmcnt bit width (lower bits).
68 unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
69   return VersionMajor >= 11 ? 6 : 4;
70 }
71 
72 /// \returns Expcnt bit shift.
73 unsigned getExpcntBitShift(unsigned VersionMajor) {
74   return VersionMajor >= 11 ? 0 : 4;
75 }
76 
77 /// \returns Expcnt bit width.
78 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
79 
80 /// \returns Lgkmcnt bit shift.
81 unsigned getLgkmcntBitShift(unsigned VersionMajor) {
82   return VersionMajor >= 11 ? 4 : 8;
83 }
84 
85 /// \returns Lgkmcnt bit width.
86 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
87   return VersionMajor >= 10 ? 6 : 4;
88 }
89 
90 /// \returns Vmcnt bit shift (higher bits).
91 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
92 
93 /// \returns Vmcnt bit width (higher bits).
94 unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
95   return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
96 }
97 
98 /// \returns Loadcnt bit width
99 unsigned getLoadcntBitWidth(unsigned VersionMajor) {
100   return VersionMajor >= 12 ? 6 : 0;
101 }
102 
103 /// \returns Samplecnt bit width.
104 unsigned getSamplecntBitWidth(unsigned VersionMajor) {
105   return VersionMajor >= 12 ? 6 : 0;
106 }
107 
108 /// \returns Bvhcnt bit width.
109 unsigned getBvhcntBitWidth(unsigned VersionMajor) {
110   return VersionMajor >= 12 ? 3 : 0;
111 }
112 
113 /// \returns Dscnt bit width.
114 unsigned getDscntBitWidth(unsigned VersionMajor) {
115   return VersionMajor >= 12 ? 6 : 0;
116 }
117 
118 /// \returns Dscnt bit shift in combined S_WAIT instructions.
119 unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
120 
121 /// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
122 unsigned getStorecntBitWidth(unsigned VersionMajor) {
123   return VersionMajor >= 10 ? 6 : 0;
124 }
125 
126 /// \returns Kmcnt bit width.
127 unsigned getKmcntBitWidth(unsigned VersionMajor) {
128   return VersionMajor >= 12 ? 5 : 0;
129 }
130 
131 /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
132 unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
133   return VersionMajor >= 12 ? 8 : 0;
134 }
135 
136 /// \returns VmVsrc bit width
137 inline unsigned getVmVsrcBitWidth() { return 3; }
138 
139 /// \returns VmVsrc bit shift
140 inline unsigned getVmVsrcBitShift() { return 2; }
141 
142 /// \returns VaVdst bit width
143 inline unsigned getVaVdstBitWidth() { return 4; }
144 
145 /// \returns VaVdst bit shift
146 inline unsigned getVaVdstBitShift() { return 12; }
147 
148 /// \returns SaSdst bit width
149 inline unsigned getSaSdstBitWidth() { return 1; }
150 
151 /// \returns SaSdst bit shift
152 inline unsigned getSaSdstBitShift() { return 0; }
153 
154 } // end namespace anonymous
155 
156 namespace llvm {
157 
158 namespace AMDGPU {
159 
160 /// \returns True if \p STI is AMDHSA.
161 bool isHsaAbi(const MCSubtargetInfo &STI) {
162   return STI.getTargetTriple().getOS() == Triple::AMDHSA;
163 }
164 
165 unsigned getAMDHSACodeObjectVersion(const Module &M) {
166   if (auto Ver = mdconst::extract_or_null<ConstantInt>(
167           M.getModuleFlag("amdgpu_code_object_version"))) {
168     return (unsigned)Ver->getZExtValue() / 100;
169   }
170 
171   return getDefaultAMDHSACodeObjectVersion();
172 }
173 
174 unsigned getDefaultAMDHSACodeObjectVersion() {
175   return DefaultAMDHSACodeObjectVersion;
176 }
177 
178 uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
179   if (T.getOS() != Triple::AMDHSA)
180     return 0;
181 
182   switch (CodeObjectVersion) {
183   case 4:
184     return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
185   case 5:
186     return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
187   default:
188     report_fatal_error("Unsupported AMDHSA Code Object Version " +
189                        Twine(CodeObjectVersion));
190   }
191 }
192 
193 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
194   switch (CodeObjectVersion) {
195   case AMDHSA_COV4:
196     return 48;
197   case AMDHSA_COV5:
198   default:
199     return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
200   }
201 }
202 
203 
204 // FIXME: All such magic numbers about the ABI should be in a
205 // central TD file.
206 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
207   switch (CodeObjectVersion) {
208   case AMDHSA_COV4:
209     return 24;
210   case AMDHSA_COV5:
211   default:
212     return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
213   }
214 }
215 
216 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
217   switch (CodeObjectVersion) {
218   case AMDHSA_COV4:
219     return 32;
220   case AMDHSA_COV5:
221   default:
222     return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
223   }
224 }
225 
226 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
227   switch (CodeObjectVersion) {
228   case AMDHSA_COV4:
229     return 40;
230   case AMDHSA_COV5:
231   default:
232     return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
233   }
234 }
235 
236 #define GET_MIMGBaseOpcodesTable_IMPL
237 #define GET_MIMGDimInfoTable_IMPL
238 #define GET_MIMGInfoTable_IMPL
239 #define GET_MIMGLZMappingTable_IMPL
240 #define GET_MIMGMIPMappingTable_IMPL
241 #define GET_MIMGBiasMappingTable_IMPL
242 #define GET_MIMGOffsetMappingTable_IMPL
243 #define GET_MIMGG16MappingTable_IMPL
244 #define GET_MAIInstInfoTable_IMPL
245 #include "AMDGPUGenSearchableTables.inc"
246 
247 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
248                   unsigned VDataDwords, unsigned VAddrDwords) {
249   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
250                                              VDataDwords, VAddrDwords);
251   return Info ? Info->Opcode : -1;
252 }
253 
254 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
255   const MIMGInfo *Info = getMIMGInfo(Opc);
256   return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
257 }
258 
259 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
260   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
261   const MIMGInfo *NewInfo =
262       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
263                           NewChannels, OrigInfo->VAddrDwords);
264   return NewInfo ? NewInfo->Opcode : -1;
265 }
266 
267 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
268                            const MIMGDimInfo *Dim, bool IsA16,
269                            bool IsG16Supported) {
270   unsigned AddrWords = BaseOpcode->NumExtraArgs;
271   unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
272                             (BaseOpcode->LodOrClampOrMip ? 1 : 0);
273   if (IsA16)
274     AddrWords += divideCeil(AddrComponents, 2);
275   else
276     AddrWords += AddrComponents;
277 
278   // Note: For subtargets that support A16 but not G16, enabling A16 also
279   // enables 16 bit gradients.
280   // For subtargets that support A16 (operand) and G16 (done with a different
281   // instruction encoding), they are independent.
282 
283   if (BaseOpcode->Gradients) {
284     if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
285       // There are two gradients per coordinate, we pack them separately.
286       // For the 3d case,
287       // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
288       AddrWords += alignTo<2>(Dim->NumGradients / 2);
289     else
290       AddrWords += Dim->NumGradients;
291   }
292   return AddrWords;
293 }
294 
295 struct MUBUFInfo {
296   uint16_t Opcode;
297   uint16_t BaseOpcode;
298   uint8_t elements;
299   bool has_vaddr;
300   bool has_srsrc;
301   bool has_soffset;
302   bool IsBufferInv;
303 };
304 
305 struct MTBUFInfo {
306   uint16_t Opcode;
307   uint16_t BaseOpcode;
308   uint8_t elements;
309   bool has_vaddr;
310   bool has_srsrc;
311   bool has_soffset;
312 };
313 
314 struct SMInfo {
315   uint16_t Opcode;
316   bool IsBuffer;
317 };
318 
319 struct VOPInfo {
320   uint16_t Opcode;
321   bool IsSingle;
322 };
323 
324 struct VOPC64DPPInfo {
325   uint16_t Opcode;
326 };
327 
328 struct VOPDComponentInfo {
329   uint16_t BaseVOP;
330   uint16_t VOPDOp;
331   bool CanBeVOPDX;
332 };
333 
334 struct VOPDInfo {
335   uint16_t Opcode;
336   uint16_t OpX;
337   uint16_t OpY;
338   uint16_t Subtarget;
339 };
340 
341 struct VOPTrue16Info {
342   uint16_t Opcode;
343   bool IsTrue16;
344 };
345 
346 #define GET_MTBUFInfoTable_DECL
347 #define GET_MTBUFInfoTable_IMPL
348 #define GET_MUBUFInfoTable_DECL
349 #define GET_MUBUFInfoTable_IMPL
350 #define GET_SMInfoTable_DECL
351 #define GET_SMInfoTable_IMPL
352 #define GET_VOP1InfoTable_DECL
353 #define GET_VOP1InfoTable_IMPL
354 #define GET_VOP2InfoTable_DECL
355 #define GET_VOP2InfoTable_IMPL
356 #define GET_VOP3InfoTable_DECL
357 #define GET_VOP3InfoTable_IMPL
358 #define GET_VOPC64DPPTable_DECL
359 #define GET_VOPC64DPPTable_IMPL
360 #define GET_VOPC64DPP8Table_DECL
361 #define GET_VOPC64DPP8Table_IMPL
362 #define GET_VOPDComponentTable_DECL
363 #define GET_VOPDComponentTable_IMPL
364 #define GET_VOPDPairs_DECL
365 #define GET_VOPDPairs_IMPL
366 #define GET_VOPTrue16Table_DECL
367 #define GET_VOPTrue16Table_IMPL
368 #define GET_WMMAOpcode2AddrMappingTable_DECL
369 #define GET_WMMAOpcode2AddrMappingTable_IMPL
370 #define GET_WMMAOpcode3AddrMappingTable_DECL
371 #define GET_WMMAOpcode3AddrMappingTable_IMPL
372 #include "AMDGPUGenSearchableTables.inc"
373 
374 int getMTBUFBaseOpcode(unsigned Opc) {
375   const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
376   return Info ? Info->BaseOpcode : -1;
377 }
378 
379 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
380   const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
381   return Info ? Info->Opcode : -1;
382 }
383 
384 int getMTBUFElements(unsigned Opc) {
385   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
386   return Info ? Info->elements : 0;
387 }
388 
389 bool getMTBUFHasVAddr(unsigned Opc) {
390   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
391   return Info ? Info->has_vaddr : false;
392 }
393 
394 bool getMTBUFHasSrsrc(unsigned Opc) {
395   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
396   return Info ? Info->has_srsrc : false;
397 }
398 
399 bool getMTBUFHasSoffset(unsigned Opc) {
400   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
401   return Info ? Info->has_soffset : false;
402 }
403 
404 int getMUBUFBaseOpcode(unsigned Opc) {
405   const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
406   return Info ? Info->BaseOpcode : -1;
407 }
408 
409 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
410   const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
411   return Info ? Info->Opcode : -1;
412 }
413 
414 int getMUBUFElements(unsigned Opc) {
415   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
416   return Info ? Info->elements : 0;
417 }
418 
419 bool getMUBUFHasVAddr(unsigned Opc) {
420   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
421   return Info ? Info->has_vaddr : false;
422 }
423 
424 bool getMUBUFHasSrsrc(unsigned Opc) {
425   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
426   return Info ? Info->has_srsrc : false;
427 }
428 
429 bool getMUBUFHasSoffset(unsigned Opc) {
430   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
431   return Info ? Info->has_soffset : false;
432 }
433 
434 bool getMUBUFIsBufferInv(unsigned Opc) {
435   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
436   return Info ? Info->IsBufferInv : false;
437 }
438 
439 bool getSMEMIsBuffer(unsigned Opc) {
440   const SMInfo *Info = getSMEMOpcodeHelper(Opc);
441   return Info ? Info->IsBuffer : false;
442 }
443 
444 bool getVOP1IsSingle(unsigned Opc) {
445   const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
446   return Info ? Info->IsSingle : false;
447 }
448 
449 bool getVOP2IsSingle(unsigned Opc) {
450   const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
451   return Info ? Info->IsSingle : false;
452 }
453 
454 bool getVOP3IsSingle(unsigned Opc) {
455   const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
456   return Info ? Info->IsSingle : false;
457 }
458 
459 bool isVOPC64DPP(unsigned Opc) {
460   return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
461 }
462 
463 bool getMAIIsDGEMM(unsigned Opc) {
464   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
465   return Info ? Info->is_dgemm : false;
466 }
467 
468 bool getMAIIsGFX940XDL(unsigned Opc) {
469   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
470   return Info ? Info->is_gfx940_xdl : false;
471 }
472 
473 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
474   if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
475     return SIEncodingFamily::GFX12;
476   if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
477     return SIEncodingFamily::GFX11;
478   llvm_unreachable("Subtarget generation does not support VOPD!");
479 }
480 
481 CanBeVOPD getCanBeVOPD(unsigned Opc) {
482   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
483   if (Info)
484     return {Info->CanBeVOPDX, true};
485   else
486     return {false, false};
487 }
488 
489 unsigned getVOPDOpcode(unsigned Opc) {
490   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
491   return Info ? Info->VOPDOp : ~0u;
492 }
493 
494 bool isVOPD(unsigned Opc) {
495   return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
496 }
497 
498 bool isMAC(unsigned Opc) {
499   return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
500          Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
501          Opc == AMDGPU::V_MAC_F32_e64_vi ||
502          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
503          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
504          Opc == AMDGPU::V_MAC_F16_e64_vi ||
505          Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
506          Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
507          Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
508          Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
509          Opc == AMDGPU::V_FMAC_F32_e64_vi ||
510          Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
511          Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
512          Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
513          Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
514          Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
515          Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
516          Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
517          Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
518          Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
519 }
520 
521 bool isPermlane16(unsigned Opc) {
522   return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
523          Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
524          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
525          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
526          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
527          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
528          Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
529          Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
530 }
531 
532 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
533   return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
534          Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
535          Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
536          Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
537          Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
538          Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
539          Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
540          Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
541 }
542 
543 bool isGenericAtomic(unsigned Opc) {
544   return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
545          Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
546          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
547          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
548          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
549          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
550          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
551          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
552          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
553          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
554          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
555          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
556          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
557          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
558          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
559          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
560          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
561          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
562          Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
563 }
564 
565 bool isTrue16Inst(unsigned Opc) {
566   const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
567   return Info ? Info->IsTrue16 : false;
568 }
569 
570 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
571   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
572   return Info ? Info->Opcode3Addr : ~0u;
573 }
574 
575 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
576   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
577   return Info ? Info->Opcode2Addr : ~0u;
578 }
579 
580 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
581 // header files, so we need to wrap it in a function that takes unsigned
582 // instead.
583 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
584   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
585 }
586 
587 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
588   const VOPDInfo *Info =
589       getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
590   return Info ? Info->Opcode : -1;
591 }
592 
593 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
594   const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
595   assert(Info);
596   auto OpX = getVOPDBaseFromComponent(Info->OpX);
597   auto OpY = getVOPDBaseFromComponent(Info->OpY);
598   assert(OpX && OpY);
599   return {OpX->BaseVOP, OpY->BaseVOP};
600 }
601 
602 namespace VOPD {
603 
604 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
605   assert(OpDesc.getNumDefs() == Component::DST_NUM);
606 
607   assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
608   assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
609   auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
610   assert(TiedIdx == -1 || TiedIdx == Component::DST);
611   HasSrc2Acc = TiedIdx != -1;
612 
613   SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
614   assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
615 
616   auto OperandsNum = OpDesc.getNumOperands();
617   unsigned CompOprIdx;
618   for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
619     if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
620       MandatoryLiteralIdx = CompOprIdx;
621       break;
622     }
623   }
624 }
625 
626 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
627   assert(CompOprIdx < Component::MAX_OPR_NUM);
628 
629   if (CompOprIdx == Component::DST)
630     return getIndexOfDstInParsedOperands();
631 
632   auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
633   if (CompSrcIdx < getCompParsedSrcOperandsNum())
634     return getIndexOfSrcInParsedOperands(CompSrcIdx);
635 
636   // The specified operand does not exist.
637   return 0;
638 }
639 
640 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
641     std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
642 
643   auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
644   auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
645 
646   const unsigned CompOprNum =
647       SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM;
648   unsigned CompOprIdx;
649   for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
650     unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
651     if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
652         ((OpXRegs[CompOprIdx] & BanksMasks) ==
653          (OpYRegs[CompOprIdx] & BanksMasks)))
654       return CompOprIdx;
655   }
656 
657   return {};
658 }
659 
660 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
661 // by the specified component. If an operand is unused
662 // or is not a VGPR, the corresponding value is 0.
663 //
664 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
665 // for the specified component and MC operand. The callback must return 0
666 // if the operand is not a register or not a VGPR.
667 InstInfo::RegIndices InstInfo::getRegIndices(
668     unsigned CompIdx,
669     std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
670   assert(CompIdx < COMPONENTS_NUM);
671 
672   const auto &Comp = CompInfo[CompIdx];
673   InstInfo::RegIndices RegIndices;
674 
675   RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
676 
677   for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
678     unsigned CompSrcIdx = CompOprIdx - DST_NUM;
679     RegIndices[CompOprIdx] =
680         Comp.hasRegSrcOperand(CompSrcIdx)
681             ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
682             : 0;
683   }
684   return RegIndices;
685 }
686 
687 } // namespace VOPD
688 
689 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
690   return VOPD::InstInfo(OpX, OpY);
691 }
692 
693 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
694                                const MCInstrInfo *InstrInfo) {
695   auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
696   const auto &OpXDesc = InstrInfo->get(OpX);
697   const auto &OpYDesc = InstrInfo->get(OpY);
698   VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
699   VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
700   return VOPD::InstInfo(OpXInfo, OpYInfo);
701 }
702 
703 namespace IsaInfo {
704 
705 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
706     : STI(STI), XnackSetting(TargetIDSetting::Any),
707       SramEccSetting(TargetIDSetting::Any) {
708   if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
709     XnackSetting = TargetIDSetting::Unsupported;
710   if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
711     SramEccSetting = TargetIDSetting::Unsupported;
712 }
713 
714 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
715   // Check if xnack or sramecc is explicitly enabled or disabled.  In the
716   // absence of the target features we assume we must generate code that can run
717   // in any environment.
718   SubtargetFeatures Features(FS);
719   std::optional<bool> XnackRequested;
720   std::optional<bool> SramEccRequested;
721 
722   for (const std::string &Feature : Features.getFeatures()) {
723     if (Feature == "+xnack")
724       XnackRequested = true;
725     else if (Feature == "-xnack")
726       XnackRequested = false;
727     else if (Feature == "+sramecc")
728       SramEccRequested = true;
729     else if (Feature == "-sramecc")
730       SramEccRequested = false;
731   }
732 
733   bool XnackSupported = isXnackSupported();
734   bool SramEccSupported = isSramEccSupported();
735 
736   if (XnackRequested) {
737     if (XnackSupported) {
738       XnackSetting =
739           *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
740     } else {
741       // If a specific xnack setting was requested and this GPU does not support
742       // xnack emit a warning. Setting will remain set to "Unsupported".
743       if (*XnackRequested) {
744         errs() << "warning: xnack 'On' was requested for a processor that does "
745                   "not support it!\n";
746       } else {
747         errs() << "warning: xnack 'Off' was requested for a processor that "
748                   "does not support it!\n";
749       }
750     }
751   }
752 
753   if (SramEccRequested) {
754     if (SramEccSupported) {
755       SramEccSetting =
756           *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
757     } else {
758       // If a specific sramecc setting was requested and this GPU does not
759       // support sramecc emit a warning. Setting will remain set to
760       // "Unsupported".
761       if (*SramEccRequested) {
762         errs() << "warning: sramecc 'On' was requested for a processor that "
763                   "does not support it!\n";
764       } else {
765         errs() << "warning: sramecc 'Off' was requested for a processor that "
766                   "does not support it!\n";
767       }
768     }
769   }
770 }
771 
772 static TargetIDSetting
773 getTargetIDSettingFromFeatureString(StringRef FeatureString) {
774   if (FeatureString.ends_with("-"))
775     return TargetIDSetting::Off;
776   if (FeatureString.ends_with("+"))
777     return TargetIDSetting::On;
778 
779   llvm_unreachable("Malformed feature string");
780 }
781 
782 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
783   SmallVector<StringRef, 3> TargetIDSplit;
784   TargetID.split(TargetIDSplit, ':');
785 
786   for (const auto &FeatureString : TargetIDSplit) {
787     if (FeatureString.starts_with("xnack"))
788       XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
789     if (FeatureString.starts_with("sramecc"))
790       SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
791   }
792 }
793 
794 std::string AMDGPUTargetID::toString() const {
795   std::string StringRep;
796   raw_string_ostream StreamRep(StringRep);
797 
798   auto TargetTriple = STI.getTargetTriple();
799   auto Version = getIsaVersion(STI.getCPU());
800 
801   StreamRep << TargetTriple.getArchName() << '-'
802             << TargetTriple.getVendorName() << '-'
803             << TargetTriple.getOSName() << '-'
804             << TargetTriple.getEnvironmentName() << '-';
805 
806   std::string Processor;
807   // TODO: Following else statement is present here because we used various
808   // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
809   // Remove once all aliases are removed from GCNProcessors.td.
810   if (Version.Major >= 9)
811     Processor = STI.getCPU().str();
812   else
813     Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
814                  Twine(Version.Stepping))
815                     .str();
816 
817   std::string Features;
818   if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
819     // sramecc.
820     if (getSramEccSetting() == TargetIDSetting::Off)
821       Features += ":sramecc-";
822     else if (getSramEccSetting() == TargetIDSetting::On)
823       Features += ":sramecc+";
824     // xnack.
825     if (getXnackSetting() == TargetIDSetting::Off)
826       Features += ":xnack-";
827     else if (getXnackSetting() == TargetIDSetting::On)
828       Features += ":xnack+";
829   }
830 
831   StreamRep << Processor << Features;
832 
833   StreamRep.flush();
834   return StringRep;
835 }
836 
837 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
838   if (STI->getFeatureBits().test(FeatureWavefrontSize16))
839     return 16;
840   if (STI->getFeatureBits().test(FeatureWavefrontSize32))
841     return 32;
842 
843   return 64;
844 }
845 
846 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
847   unsigned BytesPerCU = 0;
848   if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
849     BytesPerCU = 32768;
850   if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
851     BytesPerCU = 65536;
852 
853   // "Per CU" really means "per whatever functional block the waves of a
854   // workgroup must share". So the effective local memory size is doubled in
855   // WGP mode on gfx10.
856   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
857     BytesPerCU *= 2;
858 
859   return BytesPerCU;
860 }
861 
862 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
863   if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
864     return 32768;
865   if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
866     return 65536;
867   return 0;
868 }
869 
870 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
871   // "Per CU" really means "per whatever functional block the waves of a
872   // workgroup must share". For gfx10 in CU mode this is the CU, which contains
873   // two SIMDs.
874   if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
875     return 2;
876   // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
877   // two CUs, so a total of four SIMDs.
878   return 4;
879 }
880 
881 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
882                                unsigned FlatWorkGroupSize) {
883   assert(FlatWorkGroupSize != 0);
884   if (STI->getTargetTriple().getArch() != Triple::amdgcn)
885     return 8;
886   unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
887   unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
888   if (N == 1) {
889     // Single-wave workgroups don't consume barrier resources.
890     return MaxWaves;
891   }
892 
893   unsigned MaxBarriers = 16;
894   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
895     MaxBarriers = 32;
896 
897   return std::min(MaxWaves / N, MaxBarriers);
898 }
899 
900 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
901   return 1;
902 }
903 
904 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
905   // FIXME: Need to take scratch memory into account.
906   if (isGFX90A(*STI))
907     return 8;
908   if (!isGFX10Plus(*STI))
909     return 10;
910   return hasGFX10_3Insts(*STI) ? 16 : 20;
911 }
912 
913 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
914                                    unsigned FlatWorkGroupSize) {
915   return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
916                     getEUsPerCU(STI));
917 }
918 
919 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
920   return 1;
921 }
922 
923 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
924   // Some subtargets allow encoding 2048, but this isn't tested or supported.
925   return 1024;
926 }
927 
928 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
929                               unsigned FlatWorkGroupSize) {
930   return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
931 }
932 
933 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
934   IsaVersion Version = getIsaVersion(STI->getCPU());
935   if (Version.Major >= 10)
936     return getAddressableNumSGPRs(STI);
937   if (Version.Major >= 8)
938     return 16;
939   return 8;
940 }
941 
942 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
943   return 8;
944 }
945 
946 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
947   IsaVersion Version = getIsaVersion(STI->getCPU());
948   if (Version.Major >= 8)
949     return 800;
950   return 512;
951 }
952 
953 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
954   if (STI->getFeatureBits().test(FeatureSGPRInitBug))
955     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
956 
957   IsaVersion Version = getIsaVersion(STI->getCPU());
958   if (Version.Major >= 10)
959     return 106;
960   if (Version.Major >= 8)
961     return 102;
962   return 104;
963 }
964 
965 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
966   assert(WavesPerEU != 0);
967 
968   IsaVersion Version = getIsaVersion(STI->getCPU());
969   if (Version.Major >= 10)
970     return 0;
971 
972   if (WavesPerEU >= getMaxWavesPerEU(STI))
973     return 0;
974 
975   unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
976   if (STI->getFeatureBits().test(FeatureTrapHandler))
977     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
978   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
979   return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
980 }
981 
982 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
983                         bool Addressable) {
984   assert(WavesPerEU != 0);
985 
986   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
987   IsaVersion Version = getIsaVersion(STI->getCPU());
988   if (Version.Major >= 10)
989     return Addressable ? AddressableNumSGPRs : 108;
990   if (Version.Major >= 8 && !Addressable)
991     AddressableNumSGPRs = 112;
992   unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
993   if (STI->getFeatureBits().test(FeatureTrapHandler))
994     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
995   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
996   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
997 }
998 
999 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1000                           bool FlatScrUsed, bool XNACKUsed) {
1001   unsigned ExtraSGPRs = 0;
1002   if (VCCUsed)
1003     ExtraSGPRs = 2;
1004 
1005   IsaVersion Version = getIsaVersion(STI->getCPU());
1006   if (Version.Major >= 10)
1007     return ExtraSGPRs;
1008 
1009   if (Version.Major < 8) {
1010     if (FlatScrUsed)
1011       ExtraSGPRs = 4;
1012   } else {
1013     if (XNACKUsed)
1014       ExtraSGPRs = 4;
1015 
1016     if (FlatScrUsed ||
1017         STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1018       ExtraSGPRs = 6;
1019   }
1020 
1021   return ExtraSGPRs;
1022 }
1023 
1024 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1025                           bool FlatScrUsed) {
1026   return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1027                           STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1028 }
1029 
1030 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1031   NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
1032   // SGPRBlocks is actual number of SGPR blocks minus 1.
1033   return NumSGPRs / getSGPREncodingGranule(STI) - 1;
1034 }
1035 
1036 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1037                              std::optional<bool> EnableWavefrontSize32) {
1038   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1039     return 8;
1040 
1041   bool IsWave32 = EnableWavefrontSize32 ?
1042       *EnableWavefrontSize32 :
1043       STI->getFeatureBits().test(FeatureWavefrontSize32);
1044 
1045   if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1046     return IsWave32 ? 24 : 12;
1047 
1048   if (hasGFX10_3Insts(*STI))
1049     return IsWave32 ? 16 : 8;
1050 
1051   return IsWave32 ? 8 : 4;
1052 }
1053 
1054 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1055                                 std::optional<bool> EnableWavefrontSize32) {
1056   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1057     return 8;
1058 
1059   bool IsWave32 = EnableWavefrontSize32 ?
1060       *EnableWavefrontSize32 :
1061       STI->getFeatureBits().test(FeatureWavefrontSize32);
1062 
1063   return IsWave32 ? 8 : 4;
1064 }
1065 
1066 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1067   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1068     return 512;
1069   if (!isGFX10Plus(*STI))
1070     return 256;
1071   bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1072   if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1073     return IsWave32 ? 1536 : 768;
1074   return IsWave32 ? 1024 : 512;
1075 }
1076 
1077 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
1078   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1079     return 512;
1080   return 256;
1081 }
1082 
1083 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1084                                       unsigned NumVGPRs) {
1085   unsigned MaxWaves = getMaxWavesPerEU(STI);
1086   unsigned Granule = getVGPRAllocGranule(STI);
1087   if (NumVGPRs < Granule)
1088     return MaxWaves;
1089   unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1090   return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1091 }
1092 
1093 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1094   assert(WavesPerEU != 0);
1095 
1096   unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1097   if (WavesPerEU >= MaxWavesPerEU)
1098     return 0;
1099 
1100   unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1101   unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1102   unsigned Granule = getVGPRAllocGranule(STI);
1103   unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1104 
1105   if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1106     return 0;
1107 
1108   unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1109   if (WavesPerEU < MinWavesPerEU)
1110     return getMinNumVGPRs(STI, MinWavesPerEU);
1111 
1112   unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1113   unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1114   return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1115 }
1116 
1117 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1118   assert(WavesPerEU != 0);
1119 
1120   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1121                                    getVGPRAllocGranule(STI));
1122   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1123   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1124 }
1125 
1126 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1127                           std::optional<bool> EnableWavefrontSize32) {
1128   NumVGPRs = alignTo(std::max(1u, NumVGPRs),
1129                      getVGPREncodingGranule(STI, EnableWavefrontSize32));
1130   // VGPRBlocks is actual number of VGPR blocks minus 1.
1131   return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
1132 }
1133 
1134 } // end namespace IsaInfo
1135 
1136 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
1137                                const MCSubtargetInfo *STI) {
1138   IsaVersion Version = getIsaVersion(STI->getCPU());
1139 
1140   memset(&Header, 0, sizeof(Header));
1141 
1142   Header.amd_kernel_code_version_major = 1;
1143   Header.amd_kernel_code_version_minor = 2;
1144   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1145   Header.amd_machine_version_major = Version.Major;
1146   Header.amd_machine_version_minor = Version.Minor;
1147   Header.amd_machine_version_stepping = Version.Stepping;
1148   Header.kernel_code_entry_byte_offset = sizeof(Header);
1149   Header.wavefront_size = 6;
1150 
1151   // If the code object does not support indirect functions, then the value must
1152   // be 0xffffffff.
1153   Header.call_convention = -1;
1154 
1155   // These alignment values are specified in powers of two, so alignment =
1156   // 2^n.  The minimum alignment is 2^4 = 16.
1157   Header.kernarg_segment_alignment = 4;
1158   Header.group_segment_alignment = 4;
1159   Header.private_segment_alignment = 4;
1160 
1161   if (Version.Major >= 10) {
1162     if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1163       Header.wavefront_size = 5;
1164       Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1165     }
1166     Header.compute_pgm_resource_registers |=
1167       S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1168       S_00B848_MEM_ORDERED(1);
1169   }
1170 }
1171 
1172 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
1173     const MCSubtargetInfo *STI) {
1174   IsaVersion Version = getIsaVersion(STI->getCPU());
1175 
1176   amdhsa::kernel_descriptor_t KD;
1177   memset(&KD, 0, sizeof(KD));
1178 
1179   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1180                   amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1181                   amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
1182   if (Version.Major >= 12) {
1183     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1184                     amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
1185     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1186                     amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
1187   } else {
1188     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1189                     amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
1190     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1191                     amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
1192   }
1193   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
1194                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1195   if (Version.Major >= 10) {
1196     AMDHSA_BITS_SET(KD.kernel_code_properties,
1197                     amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1198                     STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
1199     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1200                     amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
1201                     STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
1202     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1203                     amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
1204   }
1205   if (AMDGPU::isGFX90A(*STI)) {
1206     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
1207                     amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1208                     STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
1209   }
1210   return KD;
1211 }
1212 
1213 bool isGroupSegment(const GlobalValue *GV) {
1214   return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1215 }
1216 
1217 bool isGlobalSegment(const GlobalValue *GV) {
1218   return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1219 }
1220 
1221 bool isReadOnlySegment(const GlobalValue *GV) {
1222   unsigned AS = GV->getAddressSpace();
1223   return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1224          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1225 }
1226 
1227 bool shouldEmitConstantsToTextSection(const Triple &TT) {
1228   return TT.getArch() == Triple::r600;
1229 }
1230 
1231 std::pair<unsigned, unsigned>
1232 getIntegerPairAttribute(const Function &F, StringRef Name,
1233                         std::pair<unsigned, unsigned> Default,
1234                         bool OnlyFirstRequired) {
1235   Attribute A = F.getFnAttribute(Name);
1236   if (!A.isStringAttribute())
1237     return Default;
1238 
1239   LLVMContext &Ctx = F.getContext();
1240   std::pair<unsigned, unsigned> Ints = Default;
1241   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1242   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1243     Ctx.emitError("can't parse first integer attribute " + Name);
1244     return Default;
1245   }
1246   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1247     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1248       Ctx.emitError("can't parse second integer attribute " + Name);
1249       return Default;
1250     }
1251   }
1252 
1253   return Ints;
1254 }
1255 
1256 unsigned getVmcntBitMask(const IsaVersion &Version) {
1257   return (1 << (getVmcntBitWidthLo(Version.Major) +
1258                 getVmcntBitWidthHi(Version.Major))) -
1259          1;
1260 }
1261 
1262 unsigned getLoadcntBitMask(const IsaVersion &Version) {
1263   return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1264 }
1265 
1266 unsigned getSamplecntBitMask(const IsaVersion &Version) {
1267   return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1268 }
1269 
1270 unsigned getBvhcntBitMask(const IsaVersion &Version) {
1271   return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1272 }
1273 
1274 unsigned getExpcntBitMask(const IsaVersion &Version) {
1275   return (1 << getExpcntBitWidth(Version.Major)) - 1;
1276 }
1277 
1278 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1279   return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1280 }
1281 
1282 unsigned getDscntBitMask(const IsaVersion &Version) {
1283   return (1 << getDscntBitWidth(Version.Major)) - 1;
1284 }
1285 
1286 unsigned getKmcntBitMask(const IsaVersion &Version) {
1287   return (1 << getKmcntBitWidth(Version.Major)) - 1;
1288 }
1289 
1290 unsigned getStorecntBitMask(const IsaVersion &Version) {
1291   return (1 << getStorecntBitWidth(Version.Major)) - 1;
1292 }
1293 
1294 unsigned getWaitcntBitMask(const IsaVersion &Version) {
1295   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1296                                 getVmcntBitWidthLo(Version.Major));
1297   unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1298                                getExpcntBitWidth(Version.Major));
1299   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1300                                 getLgkmcntBitWidth(Version.Major));
1301   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1302                                 getVmcntBitWidthHi(Version.Major));
1303   return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1304 }
1305 
1306 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1307   unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1308                                 getVmcntBitWidthLo(Version.Major));
1309   unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1310                                 getVmcntBitWidthHi(Version.Major));
1311   return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1312 }
1313 
1314 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1315   return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1316                     getExpcntBitWidth(Version.Major));
1317 }
1318 
1319 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1320   return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1321                     getLgkmcntBitWidth(Version.Major));
1322 }
1323 
1324 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1325                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1326   Vmcnt = decodeVmcnt(Version, Waitcnt);
1327   Expcnt = decodeExpcnt(Version, Waitcnt);
1328   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1329 }
1330 
1331 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1332   Waitcnt Decoded;
1333   Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1334   Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1335   Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1336   return Decoded;
1337 }
1338 
1339 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1340                      unsigned Vmcnt) {
1341   Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1342                      getVmcntBitWidthLo(Version.Major));
1343   return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1344                   getVmcntBitShiftHi(Version.Major),
1345                   getVmcntBitWidthHi(Version.Major));
1346 }
1347 
1348 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1349                       unsigned Expcnt) {
1350   return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1351                   getExpcntBitWidth(Version.Major));
1352 }
1353 
1354 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1355                        unsigned Lgkmcnt) {
1356   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1357                   getLgkmcntBitWidth(Version.Major));
1358 }
1359 
1360 unsigned encodeWaitcnt(const IsaVersion &Version,
1361                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1362   unsigned Waitcnt = getWaitcntBitMask(Version);
1363   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1364   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1365   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1366   return Waitcnt;
1367 }
1368 
1369 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1370   return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1371 }
1372 
1373 static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1374                                         bool IsStore) {
1375   unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1376                               getDscntBitWidth(Version.Major));
1377   if (IsStore) {
1378     unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1379                                    getStorecntBitWidth(Version.Major));
1380     return Dscnt | Storecnt;
1381   } else {
1382     unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1383                                   getLoadcntBitWidth(Version.Major));
1384     return Dscnt | Loadcnt;
1385   }
1386 }
1387 
1388 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1389   Waitcnt Decoded;
1390   Decoded.LoadCnt =
1391       unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1392                  getLoadcntBitWidth(Version.Major));
1393   Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1394                              getDscntBitWidth(Version.Major));
1395   return Decoded;
1396 }
1397 
1398 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1399   Waitcnt Decoded;
1400   Decoded.StoreCnt =
1401       unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1402                  getStorecntBitWidth(Version.Major));
1403   Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1404                              getDscntBitWidth(Version.Major));
1405   return Decoded;
1406 }
1407 
1408 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1409                               unsigned Loadcnt) {
1410   return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1411                   getLoadcntBitWidth(Version.Major));
1412 }
1413 
1414 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1415                                unsigned Storecnt) {
1416   return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1417                   getStorecntBitWidth(Version.Major));
1418 }
1419 
1420 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1421                             unsigned Dscnt) {
1422   return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1423                   getDscntBitWidth(Version.Major));
1424 }
1425 
1426 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1427                                    unsigned Dscnt) {
1428   unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1429   Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1430   Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1431   return Waitcnt;
1432 }
1433 
1434 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1435   return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1436 }
1437 
1438 static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1439                                     unsigned Storecnt, unsigned Dscnt) {
1440   unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1441   Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1442   Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1443   return Waitcnt;
1444 }
1445 
1446 unsigned encodeStorecntDscnt(const IsaVersion &Version,
1447                              const Waitcnt &Decoded) {
1448   return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1449 }
1450 
1451 //===----------------------------------------------------------------------===//
1452 // Custom Operands.
1453 //
1454 // A table of custom operands shall describe "primary" operand names
1455 // first followed by aliases if any. It is not required but recommended
1456 // to arrange operands so that operand encoding match operand position
1457 // in the table. This will make disassembly a bit more efficient.
1458 // Unused slots in the table shall have an empty name.
1459 //
1460 //===----------------------------------------------------------------------===//
1461 
1462 template <class T>
1463 static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1464                        T Context) {
1465   return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1466          (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1467 }
1468 
1469 template <class T>
1470 static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1471                      const CustomOperand<T> OpInfo[], int OpInfoSize,
1472                      T Context) {
1473   int InvalidIdx = OPR_ID_UNKNOWN;
1474   for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1475     if (Test(OpInfo[Idx])) {
1476       if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1477         return Idx;
1478       InvalidIdx = OPR_ID_UNSUPPORTED;
1479     }
1480   }
1481   return InvalidIdx;
1482 }
1483 
1484 template <class T>
1485 static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1486                      int OpInfoSize, T Context) {
1487   auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1488   return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1489 }
1490 
1491 template <class T>
1492 static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1493                      T Context, bool QuickCheck = true) {
1494   auto Test = [=](const CustomOperand<T> &Op) {
1495     return Op.Encoding == Id && !Op.Name.empty();
1496   };
1497   // This is an optimization that should work in most cases.
1498   // As a side effect, it may cause selection of an alias
1499   // instead of a primary operand name in case of sparse tables.
1500   if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1501       OpInfo[Id].Encoding == Id) {
1502     return Id;
1503   }
1504   return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1505 }
1506 
1507 //===----------------------------------------------------------------------===//
1508 // Custom Operand Values
1509 //===----------------------------------------------------------------------===//
1510 
1511 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1512                                                 int Size,
1513                                                 const MCSubtargetInfo &STI) {
1514   unsigned Enc = 0;
1515   for (int Idx = 0; Idx < Size; ++Idx) {
1516     const auto &Op = Opr[Idx];
1517     if (Op.isSupported(STI))
1518       Enc |= Op.encode(Op.Default);
1519   }
1520   return Enc;
1521 }
1522 
1523 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1524                                             int Size, unsigned Code,
1525                                             bool &HasNonDefaultVal,
1526                                             const MCSubtargetInfo &STI) {
1527   unsigned UsedOprMask = 0;
1528   HasNonDefaultVal = false;
1529   for (int Idx = 0; Idx < Size; ++Idx) {
1530     const auto &Op = Opr[Idx];
1531     if (!Op.isSupported(STI))
1532       continue;
1533     UsedOprMask |= Op.getMask();
1534     unsigned Val = Op.decode(Code);
1535     if (!Op.isValid(Val))
1536       return false;
1537     HasNonDefaultVal |= (Val != Op.Default);
1538   }
1539   return (Code & ~UsedOprMask) == 0;
1540 }
1541 
1542 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1543                                 unsigned Code, int &Idx, StringRef &Name,
1544                                 unsigned &Val, bool &IsDefault,
1545                                 const MCSubtargetInfo &STI) {
1546   while (Idx < Size) {
1547     const auto &Op = Opr[Idx++];
1548     if (Op.isSupported(STI)) {
1549       Name = Op.Name;
1550       Val = Op.decode(Code);
1551       IsDefault = (Val == Op.Default);
1552       return true;
1553     }
1554   }
1555 
1556   return false;
1557 }
1558 
1559 static int encodeCustomOperandVal(const CustomOperandVal &Op,
1560                                   int64_t InputVal) {
1561   if (InputVal < 0 || InputVal > Op.Max)
1562     return OPR_VAL_INVALID;
1563   return Op.encode(InputVal);
1564 }
1565 
1566 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1567                                const StringRef Name, int64_t InputVal,
1568                                unsigned &UsedOprMask,
1569                                const MCSubtargetInfo &STI) {
1570   int InvalidId = OPR_ID_UNKNOWN;
1571   for (int Idx = 0; Idx < Size; ++Idx) {
1572     const auto &Op = Opr[Idx];
1573     if (Op.Name == Name) {
1574       if (!Op.isSupported(STI)) {
1575         InvalidId = OPR_ID_UNSUPPORTED;
1576         continue;
1577       }
1578       auto OprMask = Op.getMask();
1579       if (OprMask & UsedOprMask)
1580         return OPR_ID_DUPLICATE;
1581       UsedOprMask |= OprMask;
1582       return encodeCustomOperandVal(Op, InputVal);
1583     }
1584   }
1585   return InvalidId;
1586 }
1587 
1588 //===----------------------------------------------------------------------===//
1589 // DepCtr
1590 //===----------------------------------------------------------------------===//
1591 
1592 namespace DepCtr {
1593 
1594 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
1595   static int Default = -1;
1596   if (Default == -1)
1597     Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI);
1598   return Default;
1599 }
1600 
1601 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1602                               const MCSubtargetInfo &STI) {
1603   return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code,
1604                                          HasNonDefaultVal, STI);
1605 }
1606 
1607 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1608                   bool &IsDefault, const MCSubtargetInfo &STI) {
1609   return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1610                              IsDefault, STI);
1611 }
1612 
1613 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1614                  const MCSubtargetInfo &STI) {
1615   return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1616                              STI);
1617 }
1618 
1619 unsigned decodeFieldVmVsrc(unsigned Encoded) {
1620   return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1621 }
1622 
1623 unsigned decodeFieldVaVdst(unsigned Encoded) {
1624   return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1625 }
1626 
1627 unsigned decodeFieldSaSdst(unsigned Encoded) {
1628   return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1629 }
1630 
1631 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1632   return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1633 }
1634 
1635 unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1636   return encodeFieldVmVsrc(0xffff, VmVsrc);
1637 }
1638 
1639 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1640   return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1641 }
1642 
1643 unsigned encodeFieldVaVdst(unsigned VaVdst) {
1644   return encodeFieldVaVdst(0xffff, VaVdst);
1645 }
1646 
1647 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1648   return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1649 }
1650 
1651 unsigned encodeFieldSaSdst(unsigned SaSdst) {
1652   return encodeFieldSaSdst(0xffff, SaSdst);
1653 }
1654 
1655 } // namespace DepCtr
1656 
1657 //===----------------------------------------------------------------------===//
1658 // hwreg
1659 //===----------------------------------------------------------------------===//
1660 
1661 namespace Hwreg {
1662 
1663 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1664   int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1665   return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1666 }
1667 
1668 bool isValidHwreg(int64_t Id) {
1669   return 0 <= Id && isUInt<ID_WIDTH_>(Id);
1670 }
1671 
1672 bool isValidHwregOffset(int64_t Offset) {
1673   return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
1674 }
1675 
1676 bool isValidHwregWidth(int64_t Width) {
1677   return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
1678 }
1679 
1680 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
1681   return (Id << ID_SHIFT_) |
1682          (Offset << OFFSET_SHIFT_) |
1683          ((Width - 1) << WIDTH_M1_SHIFT_);
1684 }
1685 
1686 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1687   int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1688   return (Idx < 0) ? "" : Opr[Idx].Name;
1689 }
1690 
1691 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
1692   Id = (Val & ID_MASK_) >> ID_SHIFT_;
1693   Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
1694   Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
1695 }
1696 
1697 } // namespace Hwreg
1698 
1699 //===----------------------------------------------------------------------===//
1700 // exp tgt
1701 //===----------------------------------------------------------------------===//
1702 
1703 namespace Exp {
1704 
1705 struct ExpTgt {
1706   StringLiteral Name;
1707   unsigned Tgt;
1708   unsigned MaxIndex;
1709 };
1710 
1711 static constexpr ExpTgt ExpTgtInfo[] = {
1712   {{"null"},           ET_NULL,            ET_NULL_MAX_IDX},
1713   {{"mrtz"},           ET_MRTZ,            ET_MRTZ_MAX_IDX},
1714   {{"prim"},           ET_PRIM,            ET_PRIM_MAX_IDX},
1715   {{"mrt"},            ET_MRT0,            ET_MRT_MAX_IDX},
1716   {{"pos"},            ET_POS0,            ET_POS_MAX_IDX},
1717   {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1718   {{"param"},          ET_PARAM0,          ET_PARAM_MAX_IDX},
1719 };
1720 
1721 bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1722   for (const ExpTgt &Val : ExpTgtInfo) {
1723     if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1724       Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1725       Name = Val.Name;
1726       return true;
1727     }
1728   }
1729   return false;
1730 }
1731 
1732 unsigned getTgtId(const StringRef Name) {
1733 
1734   for (const ExpTgt &Val : ExpTgtInfo) {
1735     if (Val.MaxIndex == 0 && Name == Val.Name)
1736       return Val.Tgt;
1737 
1738     if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1739       StringRef Suffix = Name.drop_front(Val.Name.size());
1740 
1741       unsigned Id;
1742       if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1743         return ET_INVALID;
1744 
1745       // Disable leading zeroes
1746       if (Suffix.size() > 1 && Suffix[0] == '0')
1747         return ET_INVALID;
1748 
1749       return Val.Tgt + Id;
1750     }
1751   }
1752   return ET_INVALID;
1753 }
1754 
1755 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1756   switch (Id) {
1757   case ET_NULL:
1758     return !isGFX11Plus(STI);
1759   case ET_POS4:
1760   case ET_PRIM:
1761     return isGFX10Plus(STI);
1762   case ET_DUAL_SRC_BLEND0:
1763   case ET_DUAL_SRC_BLEND1:
1764     return isGFX11Plus(STI);
1765   default:
1766     if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1767       return !isGFX11Plus(STI);
1768     return true;
1769   }
1770 }
1771 
1772 } // namespace Exp
1773 
1774 //===----------------------------------------------------------------------===//
1775 // MTBUF Format
1776 //===----------------------------------------------------------------------===//
1777 
1778 namespace MTBUFFormat {
1779 
1780 int64_t getDfmt(const StringRef Name) {
1781   for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1782     if (Name == DfmtSymbolic[Id])
1783       return Id;
1784   }
1785   return DFMT_UNDEF;
1786 }
1787 
1788 StringRef getDfmtName(unsigned Id) {
1789   assert(Id <= DFMT_MAX);
1790   return DfmtSymbolic[Id];
1791 }
1792 
1793 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
1794   if (isSI(STI) || isCI(STI))
1795     return NfmtSymbolicSICI;
1796   if (isVI(STI) || isGFX9(STI))
1797     return NfmtSymbolicVI;
1798   return NfmtSymbolicGFX10;
1799 }
1800 
1801 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1802   auto lookupTable = getNfmtLookupTable(STI);
1803   for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1804     if (Name == lookupTable[Id])
1805       return Id;
1806   }
1807   return NFMT_UNDEF;
1808 }
1809 
1810 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1811   assert(Id <= NFMT_MAX);
1812   return getNfmtLookupTable(STI)[Id];
1813 }
1814 
1815 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1816   unsigned Dfmt;
1817   unsigned Nfmt;
1818   decodeDfmtNfmt(Id, Dfmt, Nfmt);
1819   return isValidNfmt(Nfmt, STI);
1820 }
1821 
1822 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1823   return !getNfmtName(Id, STI).empty();
1824 }
1825 
1826 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1827   return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1828 }
1829 
1830 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1831   Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1832   Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1833 }
1834 
1835 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
1836   if (isGFX11Plus(STI)) {
1837     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1838       if (Name == UfmtSymbolicGFX11[Id])
1839         return Id;
1840     }
1841   } else {
1842     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1843       if (Name == UfmtSymbolicGFX10[Id])
1844         return Id;
1845     }
1846   }
1847   return UFMT_UNDEF;
1848 }
1849 
1850 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
1851   if(isValidUnifiedFormat(Id, STI))
1852     return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1853   return "";
1854 }
1855 
1856 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1857   return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1858 }
1859 
1860 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1861                              const MCSubtargetInfo &STI) {
1862   int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1863   if (isGFX11Plus(STI)) {
1864     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1865       if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1866         return Id;
1867     }
1868   } else {
1869     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1870       if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1871         return Id;
1872     }
1873   }
1874   return UFMT_UNDEF;
1875 }
1876 
1877 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1878   return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1879 }
1880 
1881 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
1882   if (isGFX10Plus(STI))
1883     return UFMT_DEFAULT;
1884   return DFMT_NFMT_DEFAULT;
1885 }
1886 
1887 } // namespace MTBUFFormat
1888 
1889 //===----------------------------------------------------------------------===//
1890 // SendMsg
1891 //===----------------------------------------------------------------------===//
1892 
1893 namespace SendMsg {
1894 
1895 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
1896   return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
1897 }
1898 
1899 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1900   int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1901   return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1902 }
1903 
1904 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1905   return (MsgId & ~(getMsgIdMask(STI))) == 0;
1906 }
1907 
1908 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1909   int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1910   return (Idx < 0) ? "" : Msg[Idx].Name;
1911 }
1912 
1913 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1914   const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1915   const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1916   const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1917   for (int i = F; i < L; ++i) {
1918     if (Name == S[i]) {
1919       return i;
1920     }
1921   }
1922   return OP_UNKNOWN_;
1923 }
1924 
1925 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1926                   bool Strict) {
1927   assert(isValidMsgId(MsgId, STI));
1928 
1929   if (!Strict)
1930     return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1931 
1932   if (MsgId == ID_SYSMSG)
1933     return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1934   if (!isGFX11Plus(STI)) {
1935     switch (MsgId) {
1936     case ID_GS_PreGFX11:
1937       return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1938     case ID_GS_DONE_PreGFX11:
1939       return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1940     }
1941   }
1942   return OpId == OP_NONE_;
1943 }
1944 
1945 StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1946                        const MCSubtargetInfo &STI) {
1947   assert(msgRequiresOp(MsgId, STI));
1948   return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1949 }
1950 
1951 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1952                       const MCSubtargetInfo &STI, bool Strict) {
1953   assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1954 
1955   if (!Strict)
1956     return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1957 
1958   if (!isGFX11Plus(STI)) {
1959     switch (MsgId) {
1960     case ID_GS_PreGFX11:
1961       return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
1962     case ID_GS_DONE_PreGFX11:
1963       return (OpId == OP_GS_NOP) ?
1964           (StreamId == STREAM_ID_NONE_) :
1965           (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
1966     }
1967   }
1968   return StreamId == STREAM_ID_NONE_;
1969 }
1970 
1971 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1972   return MsgId == ID_SYSMSG ||
1973       (!isGFX11Plus(STI) &&
1974        (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1975 }
1976 
1977 bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1978                        const MCSubtargetInfo &STI) {
1979   return !isGFX11Plus(STI) &&
1980       (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1981       OpId != OP_GS_NOP;
1982 }
1983 
1984 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1985                uint16_t &StreamId, const MCSubtargetInfo &STI) {
1986   MsgId = Val & getMsgIdMask(STI);
1987   if (isGFX11Plus(STI)) {
1988     OpId = 0;
1989     StreamId = 0;
1990   } else {
1991     OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1992     StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
1993   }
1994 }
1995 
1996 uint64_t encodeMsg(uint64_t MsgId,
1997                    uint64_t OpId,
1998                    uint64_t StreamId) {
1999   return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2000 }
2001 
2002 } // namespace SendMsg
2003 
2004 //===----------------------------------------------------------------------===//
2005 //
2006 //===----------------------------------------------------------------------===//
2007 
2008 unsigned getInitialPSInputAddr(const Function &F) {
2009   return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2010 }
2011 
2012 bool getHasColorExport(const Function &F) {
2013   // As a safe default always respond as if PS has color exports.
2014   return F.getFnAttributeAsParsedInteger(
2015              "amdgpu-color-export",
2016              F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2017 }
2018 
2019 bool getHasDepthExport(const Function &F) {
2020   return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2021 }
2022 
2023 bool isShader(CallingConv::ID cc) {
2024   switch(cc) {
2025     case CallingConv::AMDGPU_VS:
2026     case CallingConv::AMDGPU_LS:
2027     case CallingConv::AMDGPU_HS:
2028     case CallingConv::AMDGPU_ES:
2029     case CallingConv::AMDGPU_GS:
2030     case CallingConv::AMDGPU_PS:
2031     case CallingConv::AMDGPU_CS_Chain:
2032     case CallingConv::AMDGPU_CS_ChainPreserve:
2033     case CallingConv::AMDGPU_CS:
2034       return true;
2035     default:
2036       return false;
2037   }
2038 }
2039 
2040 bool isGraphics(CallingConv::ID cc) {
2041   return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2042 }
2043 
2044 bool isCompute(CallingConv::ID cc) {
2045   return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2046 }
2047 
2048 bool isEntryFunctionCC(CallingConv::ID CC) {
2049   switch (CC) {
2050   case CallingConv::AMDGPU_KERNEL:
2051   case CallingConv::SPIR_KERNEL:
2052   case CallingConv::AMDGPU_VS:
2053   case CallingConv::AMDGPU_GS:
2054   case CallingConv::AMDGPU_PS:
2055   case CallingConv::AMDGPU_CS:
2056   case CallingConv::AMDGPU_ES:
2057   case CallingConv::AMDGPU_HS:
2058   case CallingConv::AMDGPU_LS:
2059     return true;
2060   default:
2061     return false;
2062   }
2063 }
2064 
2065 bool isModuleEntryFunctionCC(CallingConv::ID CC) {
2066   switch (CC) {
2067   case CallingConv::AMDGPU_Gfx:
2068     return true;
2069   default:
2070     return isEntryFunctionCC(CC) || isChainCC(CC);
2071   }
2072 }
2073 
2074 bool isChainCC(CallingConv::ID CC) {
2075   switch (CC) {
2076   case CallingConv::AMDGPU_CS_Chain:
2077   case CallingConv::AMDGPU_CS_ChainPreserve:
2078     return true;
2079   default:
2080     return false;
2081   }
2082 }
2083 
2084 bool isKernelCC(const Function *Func) {
2085   return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2086 }
2087 
2088 bool hasXNACK(const MCSubtargetInfo &STI) {
2089   return STI.hasFeature(AMDGPU::FeatureXNACK);
2090 }
2091 
2092 bool hasSRAMECC(const MCSubtargetInfo &STI) {
2093   return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2094 }
2095 
2096 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2097   return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2098 }
2099 
2100 bool hasA16(const MCSubtargetInfo &STI) {
2101   return STI.hasFeature(AMDGPU::FeatureA16);
2102 }
2103 
2104 bool hasG16(const MCSubtargetInfo &STI) {
2105   return STI.hasFeature(AMDGPU::FeatureG16);
2106 }
2107 
2108 bool hasPackedD16(const MCSubtargetInfo &STI) {
2109   return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2110          !isSI(STI);
2111 }
2112 
2113 bool hasGDS(const MCSubtargetInfo &STI) {
2114   return STI.hasFeature(AMDGPU::FeatureGDS);
2115 }
2116 
2117 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2118   auto Version = getIsaVersion(STI.getCPU());
2119   if (Version.Major == 10)
2120     return Version.Minor >= 3 ? 13 : 5;
2121   if (Version.Major == 11)
2122     return 5;
2123   if (Version.Major >= 12)
2124     return HasSampler ? 4 : 5;
2125   return 0;
2126 }
2127 
2128 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2129 
2130 bool isSI(const MCSubtargetInfo &STI) {
2131   return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2132 }
2133 
2134 bool isCI(const MCSubtargetInfo &STI) {
2135   return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2136 }
2137 
2138 bool isVI(const MCSubtargetInfo &STI) {
2139   return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2140 }
2141 
2142 bool isGFX9(const MCSubtargetInfo &STI) {
2143   return STI.hasFeature(AMDGPU::FeatureGFX9);
2144 }
2145 
2146 bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2147   return isGFX9(STI) || isGFX10(STI);
2148 }
2149 
2150 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2151   return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2152 }
2153 
2154 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2155   return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2156 }
2157 
2158 bool isGFX8Plus(const MCSubtargetInfo &STI) {
2159   return isVI(STI) || isGFX9Plus(STI);
2160 }
2161 
2162 bool isGFX9Plus(const MCSubtargetInfo &STI) {
2163   return isGFX9(STI) || isGFX10Plus(STI);
2164 }
2165 
2166 bool isGFX10(const MCSubtargetInfo &STI) {
2167   return STI.hasFeature(AMDGPU::FeatureGFX10);
2168 }
2169 
2170 bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2171   return isGFX10(STI) || isGFX11(STI);
2172 }
2173 
2174 bool isGFX10Plus(const MCSubtargetInfo &STI) {
2175   return isGFX10(STI) || isGFX11Plus(STI);
2176 }
2177 
2178 bool isGFX11(const MCSubtargetInfo &STI) {
2179   return STI.hasFeature(AMDGPU::FeatureGFX11);
2180 }
2181 
2182 bool isGFX11Plus(const MCSubtargetInfo &STI) {
2183   return isGFX11(STI) || isGFX12Plus(STI);
2184 }
2185 
2186 bool isGFX12(const MCSubtargetInfo &STI) {
2187   return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2188 }
2189 
2190 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2191 
2192 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2193 
2194 bool isNotGFX11Plus(const MCSubtargetInfo &STI) {
2195   return !isGFX11Plus(STI);
2196 }
2197 
2198 bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2199   return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2200 }
2201 
2202 bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2203   return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2204 }
2205 
2206 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2207   return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2208 }
2209 
2210 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2211   return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2212 }
2213 
2214 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2215   return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2216 }
2217 
2218 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2219   return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2220 }
2221 
2222 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2223   return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2224 }
2225 
2226 bool isGFX90A(const MCSubtargetInfo &STI) {
2227   return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2228 }
2229 
2230 bool isGFX940(const MCSubtargetInfo &STI) {
2231   return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2232 }
2233 
2234 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2235   return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2236 }
2237 
2238 bool hasMAIInsts(const MCSubtargetInfo &STI) {
2239   return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2240 }
2241 
2242 bool hasVOPD(const MCSubtargetInfo &STI) {
2243   return STI.hasFeature(AMDGPU::FeatureVOPD);
2244 }
2245 
2246 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2247   return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2248 }
2249 
2250 unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2251   return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2252 }
2253 
2254 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2255                          int32_t ArgNumVGPR) {
2256   if (has90AInsts && ArgNumAGPR)
2257     return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2258   return std::max(ArgNumVGPR, ArgNumAGPR);
2259 }
2260 
2261 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2262   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2263   const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2264   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2265     Reg == AMDGPU::SCC;
2266 }
2267 
2268 bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2269   return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2270 }
2271 
2272 #define MAP_REG2REG \
2273   using namespace AMDGPU; \
2274   switch(Reg) { \
2275   default: return Reg; \
2276   CASE_CI_VI(FLAT_SCR) \
2277   CASE_CI_VI(FLAT_SCR_LO) \
2278   CASE_CI_VI(FLAT_SCR_HI) \
2279   CASE_VI_GFX9PLUS(TTMP0) \
2280   CASE_VI_GFX9PLUS(TTMP1) \
2281   CASE_VI_GFX9PLUS(TTMP2) \
2282   CASE_VI_GFX9PLUS(TTMP3) \
2283   CASE_VI_GFX9PLUS(TTMP4) \
2284   CASE_VI_GFX9PLUS(TTMP5) \
2285   CASE_VI_GFX9PLUS(TTMP6) \
2286   CASE_VI_GFX9PLUS(TTMP7) \
2287   CASE_VI_GFX9PLUS(TTMP8) \
2288   CASE_VI_GFX9PLUS(TTMP9) \
2289   CASE_VI_GFX9PLUS(TTMP10) \
2290   CASE_VI_GFX9PLUS(TTMP11) \
2291   CASE_VI_GFX9PLUS(TTMP12) \
2292   CASE_VI_GFX9PLUS(TTMP13) \
2293   CASE_VI_GFX9PLUS(TTMP14) \
2294   CASE_VI_GFX9PLUS(TTMP15) \
2295   CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2296   CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2297   CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2298   CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2299   CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2300   CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2301   CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2302   CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2303   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2304   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2305   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2306   CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2307   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2308   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2309   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2310   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2311   CASE_GFXPRE11_GFX11PLUS(M0) \
2312   CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2313   CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2314   }
2315 
2316 #define CASE_CI_VI(node) \
2317   assert(!isSI(STI)); \
2318   case node: return isCI(STI) ? node##_ci : node##_vi;
2319 
2320 #define CASE_VI_GFX9PLUS(node) \
2321   case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2322 
2323 #define CASE_GFXPRE11_GFX11PLUS(node) \
2324   case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2325 
2326 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2327   case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2328 
2329 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2330   if (STI.getTargetTriple().getArch() == Triple::r600)
2331     return Reg;
2332   MAP_REG2REG
2333 }
2334 
2335 #undef CASE_CI_VI
2336 #undef CASE_VI_GFX9PLUS
2337 #undef CASE_GFXPRE11_GFX11PLUS
2338 #undef CASE_GFXPRE11_GFX11PLUS_TO
2339 
2340 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
2341 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2342 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2343 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2344 
2345 unsigned mc2PseudoReg(unsigned Reg) {
2346   MAP_REG2REG
2347 }
2348 
2349 bool isInlineValue(unsigned Reg) {
2350   switch (Reg) {
2351   case AMDGPU::SRC_SHARED_BASE_LO:
2352   case AMDGPU::SRC_SHARED_BASE:
2353   case AMDGPU::SRC_SHARED_LIMIT_LO:
2354   case AMDGPU::SRC_SHARED_LIMIT:
2355   case AMDGPU::SRC_PRIVATE_BASE_LO:
2356   case AMDGPU::SRC_PRIVATE_BASE:
2357   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2358   case AMDGPU::SRC_PRIVATE_LIMIT:
2359   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2360     return true;
2361   case AMDGPU::SRC_VCCZ:
2362   case AMDGPU::SRC_EXECZ:
2363   case AMDGPU::SRC_SCC:
2364     return true;
2365   case AMDGPU::SGPR_NULL:
2366     return true;
2367   default:
2368     return false;
2369   }
2370 }
2371 
2372 #undef CASE_CI_VI
2373 #undef CASE_VI_GFX9PLUS
2374 #undef CASE_GFXPRE11_GFX11PLUS
2375 #undef CASE_GFXPRE11_GFX11PLUS_TO
2376 #undef MAP_REG2REG
2377 
2378 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2379   assert(OpNo < Desc.NumOperands);
2380   unsigned OpType = Desc.operands()[OpNo].OperandType;
2381   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2382          OpType <= AMDGPU::OPERAND_SRC_LAST;
2383 }
2384 
2385 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2386   assert(OpNo < Desc.NumOperands);
2387   unsigned OpType = Desc.operands()[OpNo].OperandType;
2388   return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2389          OpType <= AMDGPU::OPERAND_KIMM_LAST;
2390 }
2391 
2392 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2393   assert(OpNo < Desc.NumOperands);
2394   unsigned OpType = Desc.operands()[OpNo].OperandType;
2395   switch (OpType) {
2396   case AMDGPU::OPERAND_REG_IMM_FP32:
2397   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2398   case AMDGPU::OPERAND_REG_IMM_FP64:
2399   case AMDGPU::OPERAND_REG_IMM_FP16:
2400   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2401   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2402   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2403   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2404   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2405   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2406   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2407   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2408   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2409   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2410   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2411   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2412     return true;
2413   default:
2414     return false;
2415   }
2416 }
2417 
2418 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2419   assert(OpNo < Desc.NumOperands);
2420   unsigned OpType = Desc.operands()[OpNo].OperandType;
2421   return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2422           OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2423          (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2424           OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2425 }
2426 
2427 // Avoid using MCRegisterClass::getSize, since that function will go away
2428 // (move from MC* level to Target* level). Return size in bits.
2429 unsigned getRegBitWidth(unsigned RCID) {
2430   switch (RCID) {
2431   case AMDGPU::SGPR_LO16RegClassID:
2432   case AMDGPU::AGPR_LO16RegClassID:
2433     return 16;
2434   case AMDGPU::SGPR_32RegClassID:
2435   case AMDGPU::VGPR_32RegClassID:
2436   case AMDGPU::VRegOrLds_32RegClassID:
2437   case AMDGPU::AGPR_32RegClassID:
2438   case AMDGPU::VS_32RegClassID:
2439   case AMDGPU::AV_32RegClassID:
2440   case AMDGPU::SReg_32RegClassID:
2441   case AMDGPU::SReg_32_XM0RegClassID:
2442   case AMDGPU::SRegOrLds_32RegClassID:
2443     return 32;
2444   case AMDGPU::SGPR_64RegClassID:
2445   case AMDGPU::VS_64RegClassID:
2446   case AMDGPU::SReg_64RegClassID:
2447   case AMDGPU::VReg_64RegClassID:
2448   case AMDGPU::AReg_64RegClassID:
2449   case AMDGPU::SReg_64_XEXECRegClassID:
2450   case AMDGPU::VReg_64_Align2RegClassID:
2451   case AMDGPU::AReg_64_Align2RegClassID:
2452   case AMDGPU::AV_64RegClassID:
2453   case AMDGPU::AV_64_Align2RegClassID:
2454     return 64;
2455   case AMDGPU::SGPR_96RegClassID:
2456   case AMDGPU::SReg_96RegClassID:
2457   case AMDGPU::VReg_96RegClassID:
2458   case AMDGPU::AReg_96RegClassID:
2459   case AMDGPU::VReg_96_Align2RegClassID:
2460   case AMDGPU::AReg_96_Align2RegClassID:
2461   case AMDGPU::AV_96RegClassID:
2462   case AMDGPU::AV_96_Align2RegClassID:
2463     return 96;
2464   case AMDGPU::SGPR_128RegClassID:
2465   case AMDGPU::SReg_128RegClassID:
2466   case AMDGPU::VReg_128RegClassID:
2467   case AMDGPU::AReg_128RegClassID:
2468   case AMDGPU::VReg_128_Align2RegClassID:
2469   case AMDGPU::AReg_128_Align2RegClassID:
2470   case AMDGPU::AV_128RegClassID:
2471   case AMDGPU::AV_128_Align2RegClassID:
2472     return 128;
2473   case AMDGPU::SGPR_160RegClassID:
2474   case AMDGPU::SReg_160RegClassID:
2475   case AMDGPU::VReg_160RegClassID:
2476   case AMDGPU::AReg_160RegClassID:
2477   case AMDGPU::VReg_160_Align2RegClassID:
2478   case AMDGPU::AReg_160_Align2RegClassID:
2479   case AMDGPU::AV_160RegClassID:
2480   case AMDGPU::AV_160_Align2RegClassID:
2481     return 160;
2482   case AMDGPU::SGPR_192RegClassID:
2483   case AMDGPU::SReg_192RegClassID:
2484   case AMDGPU::VReg_192RegClassID:
2485   case AMDGPU::AReg_192RegClassID:
2486   case AMDGPU::VReg_192_Align2RegClassID:
2487   case AMDGPU::AReg_192_Align2RegClassID:
2488   case AMDGPU::AV_192RegClassID:
2489   case AMDGPU::AV_192_Align2RegClassID:
2490     return 192;
2491   case AMDGPU::SGPR_224RegClassID:
2492   case AMDGPU::SReg_224RegClassID:
2493   case AMDGPU::VReg_224RegClassID:
2494   case AMDGPU::AReg_224RegClassID:
2495   case AMDGPU::VReg_224_Align2RegClassID:
2496   case AMDGPU::AReg_224_Align2RegClassID:
2497   case AMDGPU::AV_224RegClassID:
2498   case AMDGPU::AV_224_Align2RegClassID:
2499     return 224;
2500   case AMDGPU::SGPR_256RegClassID:
2501   case AMDGPU::SReg_256RegClassID:
2502   case AMDGPU::VReg_256RegClassID:
2503   case AMDGPU::AReg_256RegClassID:
2504   case AMDGPU::VReg_256_Align2RegClassID:
2505   case AMDGPU::AReg_256_Align2RegClassID:
2506   case AMDGPU::AV_256RegClassID:
2507   case AMDGPU::AV_256_Align2RegClassID:
2508     return 256;
2509   case AMDGPU::SGPR_288RegClassID:
2510   case AMDGPU::SReg_288RegClassID:
2511   case AMDGPU::VReg_288RegClassID:
2512   case AMDGPU::AReg_288RegClassID:
2513   case AMDGPU::VReg_288_Align2RegClassID:
2514   case AMDGPU::AReg_288_Align2RegClassID:
2515   case AMDGPU::AV_288RegClassID:
2516   case AMDGPU::AV_288_Align2RegClassID:
2517     return 288;
2518   case AMDGPU::SGPR_320RegClassID:
2519   case AMDGPU::SReg_320RegClassID:
2520   case AMDGPU::VReg_320RegClassID:
2521   case AMDGPU::AReg_320RegClassID:
2522   case AMDGPU::VReg_320_Align2RegClassID:
2523   case AMDGPU::AReg_320_Align2RegClassID:
2524   case AMDGPU::AV_320RegClassID:
2525   case AMDGPU::AV_320_Align2RegClassID:
2526     return 320;
2527   case AMDGPU::SGPR_352RegClassID:
2528   case AMDGPU::SReg_352RegClassID:
2529   case AMDGPU::VReg_352RegClassID:
2530   case AMDGPU::AReg_352RegClassID:
2531   case AMDGPU::VReg_352_Align2RegClassID:
2532   case AMDGPU::AReg_352_Align2RegClassID:
2533   case AMDGPU::AV_352RegClassID:
2534   case AMDGPU::AV_352_Align2RegClassID:
2535     return 352;
2536   case AMDGPU::SGPR_384RegClassID:
2537   case AMDGPU::SReg_384RegClassID:
2538   case AMDGPU::VReg_384RegClassID:
2539   case AMDGPU::AReg_384RegClassID:
2540   case AMDGPU::VReg_384_Align2RegClassID:
2541   case AMDGPU::AReg_384_Align2RegClassID:
2542   case AMDGPU::AV_384RegClassID:
2543   case AMDGPU::AV_384_Align2RegClassID:
2544     return 384;
2545   case AMDGPU::SGPR_512RegClassID:
2546   case AMDGPU::SReg_512RegClassID:
2547   case AMDGPU::VReg_512RegClassID:
2548   case AMDGPU::AReg_512RegClassID:
2549   case AMDGPU::VReg_512_Align2RegClassID:
2550   case AMDGPU::AReg_512_Align2RegClassID:
2551   case AMDGPU::AV_512RegClassID:
2552   case AMDGPU::AV_512_Align2RegClassID:
2553     return 512;
2554   case AMDGPU::SGPR_1024RegClassID:
2555   case AMDGPU::SReg_1024RegClassID:
2556   case AMDGPU::VReg_1024RegClassID:
2557   case AMDGPU::AReg_1024RegClassID:
2558   case AMDGPU::VReg_1024_Align2RegClassID:
2559   case AMDGPU::AReg_1024_Align2RegClassID:
2560   case AMDGPU::AV_1024RegClassID:
2561   case AMDGPU::AV_1024_Align2RegClassID:
2562     return 1024;
2563   default:
2564     llvm_unreachable("Unexpected register class");
2565   }
2566 }
2567 
2568 unsigned getRegBitWidth(const MCRegisterClass &RC) {
2569   return getRegBitWidth(RC.getID());
2570 }
2571 
2572 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
2573                            unsigned OpNo) {
2574   assert(OpNo < Desc.NumOperands);
2575   unsigned RCID = Desc.operands()[OpNo].RegClass;
2576   return getRegBitWidth(RCID) / 8;
2577 }
2578 
2579 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2580   if (isInlinableIntLiteral(Literal))
2581     return true;
2582 
2583   uint64_t Val = static_cast<uint64_t>(Literal);
2584   return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2585          (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2586          (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2587          (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2588          (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2589          (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2590          (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2591          (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2592          (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2593          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2594 }
2595 
2596 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2597   if (isInlinableIntLiteral(Literal))
2598     return true;
2599 
2600   // The actual type of the operand does not seem to matter as long
2601   // as the bits match one of the inline immediate values.  For example:
2602   //
2603   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2604   // so it is a legal inline immediate.
2605   //
2606   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2607   // floating-point, so it is a legal inline immediate.
2608 
2609   uint32_t Val = static_cast<uint32_t>(Literal);
2610   return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2611          (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2612          (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2613          (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2614          (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2615          (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2616          (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2617          (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2618          (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2619          (Val == 0x3e22f983 && HasInv2Pi);
2620 }
2621 
2622 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2623   if (!HasInv2Pi)
2624     return false;
2625 
2626   if (isInlinableIntLiteral(Literal))
2627     return true;
2628 
2629   uint16_t Val = static_cast<uint16_t>(Literal);
2630   return Val == 0x3C00 || // 1.0
2631          Val == 0xBC00 || // -1.0
2632          Val == 0x3800 || // 0.5
2633          Val == 0xB800 || // -0.5
2634          Val == 0x4000 || // 2.0
2635          Val == 0xC000 || // -2.0
2636          Val == 0x4400 || // 4.0
2637          Val == 0xC400 || // -4.0
2638          Val == 0x3118;   // 1/2pi
2639 }
2640 
2641 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2642   // Unfortunately, the Instruction Set Architecture Reference Guide is
2643   // misleading about how the inline operands work for (packed) 16-bit
2644   // instructions. In a nutshell, the actual HW behavior is:
2645   //
2646   //  - integer encodings (-16 .. 64) are always produced as sign-extended
2647   //    32-bit values
2648   //  - float encodings are produced as:
2649   //    - for F16 instructions: corresponding half-precision float values in
2650   //      the LSBs, 0 in the MSBs
2651   //    - for UI16 instructions: corresponding single-precision float value
2652   int32_t Signed = static_cast<int32_t>(Literal);
2653   if (Signed >= 0 && Signed <= 64)
2654     return 128 + Signed;
2655 
2656   if (Signed >= -16 && Signed <= -1)
2657     return 192 + std::abs(Signed);
2658 
2659   if (IsFloat) {
2660     // clang-format off
2661     switch (Literal) {
2662     case 0x3800: return 240; // 0.5
2663     case 0xB800: return 241; // -0.5
2664     case 0x3C00: return 242; // 1.0
2665     case 0xBC00: return 243; // -1.0
2666     case 0x4000: return 244; // 2.0
2667     case 0xC000: return 245; // -2.0
2668     case 0x4400: return 246; // 4.0
2669     case 0xC400: return 247; // -4.0
2670     case 0x3118: return 248; // 1.0 / (2.0 * pi)
2671     default: break;
2672     }
2673     // clang-format on
2674   } else {
2675     // clang-format off
2676     switch (Literal) {
2677     case 0x3F000000: return 240; // 0.5
2678     case 0xBF000000: return 241; // -0.5
2679     case 0x3F800000: return 242; // 1.0
2680     case 0xBF800000: return 243; // -1.0
2681     case 0x40000000: return 244; // 2.0
2682     case 0xC0000000: return 245; // -2.0
2683     case 0x40800000: return 246; // 4.0
2684     case 0xC0800000: return 247; // -4.0
2685     case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2686     default: break;
2687     }
2688     // clang-format on
2689   }
2690 
2691   return {};
2692 }
2693 
2694 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2695 // or nullopt.
2696 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2697   return getInlineEncodingV216(false, Literal);
2698 }
2699 
2700 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2701 // or nullopt.
2702 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2703   return getInlineEncodingV216(true, Literal);
2704 }
2705 
2706 // Whether the given literal can be inlined for a V_PK_* instruction.
2707 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
2708   switch (OpType) {
2709   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2710   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2711   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2712     return getInlineEncodingV216(false, Literal).has_value();
2713   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2714   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2715   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2716     return getInlineEncodingV216(true, Literal).has_value();
2717   default:
2718     llvm_unreachable("bad packed operand type");
2719   }
2720 }
2721 
2722 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2723 bool isInlinableLiteralV2I16(uint32_t Literal) {
2724   return getInlineEncodingV2I16(Literal).has_value();
2725 }
2726 
2727 // Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2728 bool isInlinableLiteralV2F16(uint32_t Literal) {
2729   return getInlineEncodingV2F16(Literal).has_value();
2730 }
2731 
2732 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2733   if (IsFP64)
2734     return !(Val & 0xffffffffu);
2735 
2736   return isUInt<32>(Val) || isInt<32>(Val);
2737 }
2738 
2739 bool isArgPassedInSGPR(const Argument *A) {
2740   const Function *F = A->getParent();
2741 
2742   // Arguments to compute shaders are never a source of divergence.
2743   CallingConv::ID CC = F->getCallingConv();
2744   switch (CC) {
2745   case CallingConv::AMDGPU_KERNEL:
2746   case CallingConv::SPIR_KERNEL:
2747     return true;
2748   case CallingConv::AMDGPU_VS:
2749   case CallingConv::AMDGPU_LS:
2750   case CallingConv::AMDGPU_HS:
2751   case CallingConv::AMDGPU_ES:
2752   case CallingConv::AMDGPU_GS:
2753   case CallingConv::AMDGPU_PS:
2754   case CallingConv::AMDGPU_CS:
2755   case CallingConv::AMDGPU_Gfx:
2756   case CallingConv::AMDGPU_CS_Chain:
2757   case CallingConv::AMDGPU_CS_ChainPreserve:
2758     // For non-compute shaders, SGPR inputs are marked with either inreg or
2759     // byval. Everything else is in VGPRs.
2760     return A->hasAttribute(Attribute::InReg) ||
2761            A->hasAttribute(Attribute::ByVal);
2762   default:
2763     // TODO: treat i1 as divergent?
2764     return A->hasAttribute(Attribute::InReg);
2765   }
2766 }
2767 
2768 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2769   // Arguments to compute shaders are never a source of divergence.
2770   CallingConv::ID CC = CB->getCallingConv();
2771   switch (CC) {
2772   case CallingConv::AMDGPU_KERNEL:
2773   case CallingConv::SPIR_KERNEL:
2774     return true;
2775   case CallingConv::AMDGPU_VS:
2776   case CallingConv::AMDGPU_LS:
2777   case CallingConv::AMDGPU_HS:
2778   case CallingConv::AMDGPU_ES:
2779   case CallingConv::AMDGPU_GS:
2780   case CallingConv::AMDGPU_PS:
2781   case CallingConv::AMDGPU_CS:
2782   case CallingConv::AMDGPU_Gfx:
2783   case CallingConv::AMDGPU_CS_Chain:
2784   case CallingConv::AMDGPU_CS_ChainPreserve:
2785     // For non-compute shaders, SGPR inputs are marked with either inreg or
2786     // byval. Everything else is in VGPRs.
2787     return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2788            CB->paramHasAttr(ArgNo, Attribute::ByVal);
2789   default:
2790     return CB->paramHasAttr(ArgNo, Attribute::InReg);
2791   }
2792 }
2793 
2794 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2795   return isGCN3Encoding(ST) || isGFX10Plus(ST);
2796 }
2797 
2798 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
2799   return isGFX9Plus(ST);
2800 }
2801 
2802 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
2803                                       int64_t EncodedOffset) {
2804   if (isGFX12Plus(ST))
2805     return isUInt<23>(EncodedOffset);
2806 
2807   return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2808                                : isUInt<8>(EncodedOffset);
2809 }
2810 
2811 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
2812                                     int64_t EncodedOffset,
2813                                     bool IsBuffer) {
2814   if (isGFX12Plus(ST))
2815     return isInt<24>(EncodedOffset);
2816 
2817   return !IsBuffer &&
2818          hasSMRDSignedImmOffset(ST) &&
2819          isInt<21>(EncodedOffset);
2820 }
2821 
2822 static bool isDwordAligned(uint64_t ByteOffset) {
2823   return (ByteOffset & 3) == 0;
2824 }
2825 
2826 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
2827                                 uint64_t ByteOffset) {
2828   if (hasSMEMByteOffset(ST))
2829     return ByteOffset;
2830 
2831   assert(isDwordAligned(ByteOffset));
2832   return ByteOffset >> 2;
2833 }
2834 
2835 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2836                                             int64_t ByteOffset, bool IsBuffer) {
2837   if (isGFX12Plus(ST)) // 24 bit signed offsets
2838     return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2839                                  : std::nullopt;
2840 
2841   // The signed version is always a byte offset.
2842   if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2843     assert(hasSMEMByteOffset(ST));
2844     return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2845                                  : std::nullopt;
2846   }
2847 
2848   if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2849     return std::nullopt;
2850 
2851   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2852   return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2853              ? std::optional<int64_t>(EncodedOffset)
2854              : std::nullopt;
2855 }
2856 
2857 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2858                                                      int64_t ByteOffset) {
2859   if (!isCI(ST) || !isDwordAligned(ByteOffset))
2860     return std::nullopt;
2861 
2862   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2863   return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2864                                    : std::nullopt;
2865 }
2866 
2867 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
2868   if (AMDGPU::isGFX10(ST))
2869     return 12;
2870 
2871   if (AMDGPU::isGFX12(ST))
2872     return 24;
2873   return 13;
2874 }
2875 
2876 namespace {
2877 
2878 struct SourceOfDivergence {
2879   unsigned Intr;
2880 };
2881 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2882 
2883 struct AlwaysUniform {
2884   unsigned Intr;
2885 };
2886 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2887 
2888 #define GET_SourcesOfDivergence_IMPL
2889 #define GET_UniformIntrinsics_IMPL
2890 #define GET_Gfx9BufferFormat_IMPL
2891 #define GET_Gfx10BufferFormat_IMPL
2892 #define GET_Gfx11PlusBufferFormat_IMPL
2893 #include "AMDGPUGenSearchableTables.inc"
2894 
2895 } // end anonymous namespace
2896 
2897 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2898   return lookupSourceOfDivergence(IntrID);
2899 }
2900 
2901 bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2902   return lookupAlwaysUniform(IntrID);
2903 }
2904 
2905 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
2906                                                   uint8_t NumComponents,
2907                                                   uint8_t NumFormat,
2908                                                   const MCSubtargetInfo &STI) {
2909   return isGFX11Plus(STI)
2910              ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2911                                             NumFormat)
2912              : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2913                                                        NumComponents, NumFormat)
2914                             : getGfx9BufferFormatInfo(BitsPerComp,
2915                                                       NumComponents, NumFormat);
2916 }
2917 
2918 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
2919                                                   const MCSubtargetInfo &STI) {
2920   return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2921                           : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2922                                          : getGfx9BufferFormatInfo(Format);
2923 }
2924 
2925 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
2926   for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
2927                        OpName::src2 }) {
2928     int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
2929     if (Idx == -1)
2930       continue;
2931 
2932     if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
2933         OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
2934       return true;
2935   }
2936 
2937   return false;
2938 }
2939 
2940 bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
2941   return hasAny64BitVGPROperands(OpDesc);
2942 }
2943 
2944 } // namespace AMDGPU
2945 
2946 raw_ostream &operator<<(raw_ostream &OS,
2947                         const AMDGPU::IsaInfo::TargetIDSetting S) {
2948   switch (S) {
2949   case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
2950     OS << "Unsupported";
2951     break;
2952   case (AMDGPU::IsaInfo::TargetIDSetting::Any):
2953     OS << "Any";
2954     break;
2955   case (AMDGPU::IsaInfo::TargetIDSetting::Off):
2956     OS << "Off";
2957     break;
2958   case (AMDGPU::IsaInfo::TargetIDSetting::On):
2959     OS << "On";
2960     break;
2961   }
2962   return OS;
2963 }
2964 
2965 } // namespace llvm
2966