xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (revision 56b17de1e8360fe131d425de20b5e75ff3ea897c)
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDKernelCodeT.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDKernelCodeTUtils.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/IR/Attributes.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/Function.h"
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/IR/IntrinsicsR600.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/Support/AMDHSAKernelDescriptor.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/TargetParser/TargetParser.h"
30 #include <optional>
31 
32 #define GET_INSTRINFO_NAMED_OPS
33 #define GET_INSTRMAP_INFO
34 #include "AMDGPUGenInstrInfo.inc"
35 
36 static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
37     "amdhsa-code-object-version", llvm::cl::Hidden,
38     llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
39     llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40                    "or asm directive still take priority if present)"));
41 
42 namespace {
43 
44 /// \returns Bit mask for given bit \p Shift and bit \p Width.
45 unsigned getBitMask(unsigned Shift, unsigned Width) {
46   return ((1 << Width) - 1) << Shift;
47 }
48 
49 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50 ///
51 /// \returns Packed \p Dst.
52 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53   unsigned Mask = getBitMask(Shift, Width);
54   return ((Src << Shift) & Mask) | (Dst & ~Mask);
55 }
56 
57 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58 ///
59 /// \returns Unpacked bits.
60 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61   return (Src & getBitMask(Shift, Width)) >> Shift;
62 }
63 
64 /// \returns Vmcnt bit shift (lower bits).
65 unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66   return VersionMajor >= 11 ? 10 : 0;
67 }
68 
69 /// \returns Vmcnt bit width (lower bits).
70 unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71   return VersionMajor >= 11 ? 6 : 4;
72 }
73 
74 /// \returns Expcnt bit shift.
75 unsigned getExpcntBitShift(unsigned VersionMajor) {
76   return VersionMajor >= 11 ? 0 : 4;
77 }
78 
79 /// \returns Expcnt bit width.
80 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81 
82 /// \returns Lgkmcnt bit shift.
83 unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84   return VersionMajor >= 11 ? 4 : 8;
85 }
86 
87 /// \returns Lgkmcnt bit width.
88 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89   return VersionMajor >= 10 ? 6 : 4;
90 }
91 
92 /// \returns Vmcnt bit shift (higher bits).
93 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94 
95 /// \returns Vmcnt bit width (higher bits).
96 unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97   return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98 }
99 
100 /// \returns Loadcnt bit width
101 unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102   return VersionMajor >= 12 ? 6 : 0;
103 }
104 
105 /// \returns Samplecnt bit width.
106 unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107   return VersionMajor >= 12 ? 6 : 0;
108 }
109 
110 /// \returns Bvhcnt bit width.
111 unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112   return VersionMajor >= 12 ? 3 : 0;
113 }
114 
115 /// \returns Dscnt bit width.
116 unsigned getDscntBitWidth(unsigned VersionMajor) {
117   return VersionMajor >= 12 ? 6 : 0;
118 }
119 
120 /// \returns Dscnt bit shift in combined S_WAIT instructions.
121 unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122 
123 /// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124 unsigned getStorecntBitWidth(unsigned VersionMajor) {
125   return VersionMajor >= 10 ? 6 : 0;
126 }
127 
128 /// \returns Kmcnt bit width.
129 unsigned getKmcntBitWidth(unsigned VersionMajor) {
130   return VersionMajor >= 12 ? 5 : 0;
131 }
132 
133 /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
134 unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
135   return VersionMajor >= 12 ? 8 : 0;
136 }
137 
138 /// \returns VmVsrc bit width
139 inline unsigned getVmVsrcBitWidth() { return 3; }
140 
141 /// \returns VmVsrc bit shift
142 inline unsigned getVmVsrcBitShift() { return 2; }
143 
144 /// \returns VaVdst bit width
145 inline unsigned getVaVdstBitWidth() { return 4; }
146 
147 /// \returns VaVdst bit shift
148 inline unsigned getVaVdstBitShift() { return 12; }
149 
150 /// \returns SaSdst bit width
151 inline unsigned getSaSdstBitWidth() { return 1; }
152 
153 /// \returns SaSdst bit shift
154 inline unsigned getSaSdstBitShift() { return 0; }
155 
156 } // end anonymous namespace
157 
158 namespace llvm {
159 
160 namespace AMDGPU {
161 
162 /// \returns true if the target supports signed immediate offset for SMRD
163 /// instructions.
164 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
165   return isGFX9Plus(ST);
166 }
167 
168 /// \returns True if \p STI is AMDHSA.
169 bool isHsaAbi(const MCSubtargetInfo &STI) {
170   return STI.getTargetTriple().getOS() == Triple::AMDHSA;
171 }
172 
173 unsigned getAMDHSACodeObjectVersion(const Module &M) {
174   if (auto Ver = mdconst::extract_or_null<ConstantInt>(
175           M.getModuleFlag("amdhsa_code_object_version"))) {
176     return (unsigned)Ver->getZExtValue() / 100;
177   }
178 
179   return getDefaultAMDHSACodeObjectVersion();
180 }
181 
182 unsigned getDefaultAMDHSACodeObjectVersion() {
183   return DefaultAMDHSACodeObjectVersion;
184 }
185 
186 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
187   switch (ABIVersion) {
188   case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
189     return 4;
190   case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
191     return 5;
192   case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
193     return 6;
194   default:
195     return getDefaultAMDHSACodeObjectVersion();
196   }
197 }
198 
199 uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
200   if (T.getOS() != Triple::AMDHSA)
201     return 0;
202 
203   switch (CodeObjectVersion) {
204   case 4:
205     return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
206   case 5:
207     return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
208   case 6:
209     return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
210   default:
211     report_fatal_error("Unsupported AMDHSA Code Object Version " +
212                        Twine(CodeObjectVersion));
213   }
214 }
215 
216 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
217   switch (CodeObjectVersion) {
218   case AMDHSA_COV4:
219     return 48;
220   case AMDHSA_COV5:
221   case AMDHSA_COV6:
222   default:
223     return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
224   }
225 }
226 
227 
228 // FIXME: All such magic numbers about the ABI should be in a
229 // central TD file.
230 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
231   switch (CodeObjectVersion) {
232   case AMDHSA_COV4:
233     return 24;
234   case AMDHSA_COV5:
235   case AMDHSA_COV6:
236   default:
237     return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
238   }
239 }
240 
241 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
242   switch (CodeObjectVersion) {
243   case AMDHSA_COV4:
244     return 32;
245   case AMDHSA_COV5:
246   case AMDHSA_COV6:
247   default:
248     return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
249   }
250 }
251 
252 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
253   switch (CodeObjectVersion) {
254   case AMDHSA_COV4:
255     return 40;
256   case AMDHSA_COV5:
257   case AMDHSA_COV6:
258   default:
259     return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
260   }
261 }
262 
263 #define GET_MIMGBaseOpcodesTable_IMPL
264 #define GET_MIMGDimInfoTable_IMPL
265 #define GET_MIMGInfoTable_IMPL
266 #define GET_MIMGLZMappingTable_IMPL
267 #define GET_MIMGMIPMappingTable_IMPL
268 #define GET_MIMGBiasMappingTable_IMPL
269 #define GET_MIMGOffsetMappingTable_IMPL
270 #define GET_MIMGG16MappingTable_IMPL
271 #define GET_MAIInstInfoTable_IMPL
272 #include "AMDGPUGenSearchableTables.inc"
273 
274 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
275                   unsigned VDataDwords, unsigned VAddrDwords) {
276   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
277                                              VDataDwords, VAddrDwords);
278   return Info ? Info->Opcode : -1;
279 }
280 
281 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
282   const MIMGInfo *Info = getMIMGInfo(Opc);
283   return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
284 }
285 
286 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
287   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
288   const MIMGInfo *NewInfo =
289       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
290                           NewChannels, OrigInfo->VAddrDwords);
291   return NewInfo ? NewInfo->Opcode : -1;
292 }
293 
294 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
295                            const MIMGDimInfo *Dim, bool IsA16,
296                            bool IsG16Supported) {
297   unsigned AddrWords = BaseOpcode->NumExtraArgs;
298   unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
299                             (BaseOpcode->LodOrClampOrMip ? 1 : 0);
300   if (IsA16)
301     AddrWords += divideCeil(AddrComponents, 2);
302   else
303     AddrWords += AddrComponents;
304 
305   // Note: For subtargets that support A16 but not G16, enabling A16 also
306   // enables 16 bit gradients.
307   // For subtargets that support A16 (operand) and G16 (done with a different
308   // instruction encoding), they are independent.
309 
310   if (BaseOpcode->Gradients) {
311     if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
312       // There are two gradients per coordinate, we pack them separately.
313       // For the 3d case,
314       // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
315       AddrWords += alignTo<2>(Dim->NumGradients / 2);
316     else
317       AddrWords += Dim->NumGradients;
318   }
319   return AddrWords;
320 }
321 
322 struct MUBUFInfo {
323   uint16_t Opcode;
324   uint16_t BaseOpcode;
325   uint8_t elements;
326   bool has_vaddr;
327   bool has_srsrc;
328   bool has_soffset;
329   bool IsBufferInv;
330   bool tfe;
331 };
332 
333 struct MTBUFInfo {
334   uint16_t Opcode;
335   uint16_t BaseOpcode;
336   uint8_t elements;
337   bool has_vaddr;
338   bool has_srsrc;
339   bool has_soffset;
340 };
341 
342 struct SMInfo {
343   uint16_t Opcode;
344   bool IsBuffer;
345 };
346 
347 struct VOPInfo {
348   uint16_t Opcode;
349   bool IsSingle;
350 };
351 
352 struct VOPC64DPPInfo {
353   uint16_t Opcode;
354 };
355 
356 struct VOPCDPPAsmOnlyInfo {
357   uint16_t Opcode;
358 };
359 
360 struct VOP3CDPPAsmOnlyInfo {
361   uint16_t Opcode;
362 };
363 
364 struct VOPDComponentInfo {
365   uint16_t BaseVOP;
366   uint16_t VOPDOp;
367   bool CanBeVOPDX;
368 };
369 
370 struct VOPDInfo {
371   uint16_t Opcode;
372   uint16_t OpX;
373   uint16_t OpY;
374   uint16_t Subtarget;
375 };
376 
377 struct VOPTrue16Info {
378   uint16_t Opcode;
379   bool IsTrue16;
380 };
381 
382 struct SingleUseExceptionInfo {
383   uint16_t Opcode;
384   bool IsInvalidSingleUseConsumer;
385   bool IsInvalidSingleUseProducer;
386 };
387 
388 #define GET_MTBUFInfoTable_DECL
389 #define GET_MTBUFInfoTable_IMPL
390 #define GET_MUBUFInfoTable_DECL
391 #define GET_MUBUFInfoTable_IMPL
392 #define GET_SingleUseExceptionTable_DECL
393 #define GET_SingleUseExceptionTable_IMPL
394 #define GET_SMInfoTable_DECL
395 #define GET_SMInfoTable_IMPL
396 #define GET_VOP1InfoTable_DECL
397 #define GET_VOP1InfoTable_IMPL
398 #define GET_VOP2InfoTable_DECL
399 #define GET_VOP2InfoTable_IMPL
400 #define GET_VOP3InfoTable_DECL
401 #define GET_VOP3InfoTable_IMPL
402 #define GET_VOPC64DPPTable_DECL
403 #define GET_VOPC64DPPTable_IMPL
404 #define GET_VOPC64DPP8Table_DECL
405 #define GET_VOPC64DPP8Table_IMPL
406 #define GET_VOPCAsmOnlyInfoTable_DECL
407 #define GET_VOPCAsmOnlyInfoTable_IMPL
408 #define GET_VOP3CAsmOnlyInfoTable_DECL
409 #define GET_VOP3CAsmOnlyInfoTable_IMPL
410 #define GET_VOPDComponentTable_DECL
411 #define GET_VOPDComponentTable_IMPL
412 #define GET_VOPDPairs_DECL
413 #define GET_VOPDPairs_IMPL
414 #define GET_VOPTrue16Table_DECL
415 #define GET_VOPTrue16Table_IMPL
416 #define GET_WMMAOpcode2AddrMappingTable_DECL
417 #define GET_WMMAOpcode2AddrMappingTable_IMPL
418 #define GET_WMMAOpcode3AddrMappingTable_DECL
419 #define GET_WMMAOpcode3AddrMappingTable_IMPL
420 #include "AMDGPUGenSearchableTables.inc"
421 
422 int getMTBUFBaseOpcode(unsigned Opc) {
423   const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
424   return Info ? Info->BaseOpcode : -1;
425 }
426 
427 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
428   const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
429   return Info ? Info->Opcode : -1;
430 }
431 
432 int getMTBUFElements(unsigned Opc) {
433   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
434   return Info ? Info->elements : 0;
435 }
436 
437 bool getMTBUFHasVAddr(unsigned Opc) {
438   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
439   return Info ? Info->has_vaddr : false;
440 }
441 
442 bool getMTBUFHasSrsrc(unsigned Opc) {
443   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
444   return Info ? Info->has_srsrc : false;
445 }
446 
447 bool getMTBUFHasSoffset(unsigned Opc) {
448   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
449   return Info ? Info->has_soffset : false;
450 }
451 
452 int getMUBUFBaseOpcode(unsigned Opc) {
453   const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
454   return Info ? Info->BaseOpcode : -1;
455 }
456 
457 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
458   const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
459   return Info ? Info->Opcode : -1;
460 }
461 
462 int getMUBUFElements(unsigned Opc) {
463   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
464   return Info ? Info->elements : 0;
465 }
466 
467 bool getMUBUFHasVAddr(unsigned Opc) {
468   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
469   return Info ? Info->has_vaddr : false;
470 }
471 
472 bool getMUBUFHasSrsrc(unsigned Opc) {
473   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
474   return Info ? Info->has_srsrc : false;
475 }
476 
477 bool getMUBUFHasSoffset(unsigned Opc) {
478   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
479   return Info ? Info->has_soffset : false;
480 }
481 
482 bool getMUBUFIsBufferInv(unsigned Opc) {
483   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
484   return Info ? Info->IsBufferInv : false;
485 }
486 
487 bool getMUBUFTfe(unsigned Opc) {
488   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
489   return Info ? Info->tfe : false;
490 }
491 
492 bool getSMEMIsBuffer(unsigned Opc) {
493   const SMInfo *Info = getSMEMOpcodeHelper(Opc);
494   return Info ? Info->IsBuffer : false;
495 }
496 
497 bool getVOP1IsSingle(unsigned Opc) {
498   const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
499   return Info ? Info->IsSingle : true;
500 }
501 
502 bool getVOP2IsSingle(unsigned Opc) {
503   const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
504   return Info ? Info->IsSingle : true;
505 }
506 
507 bool getVOP3IsSingle(unsigned Opc) {
508   const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
509   return Info ? Info->IsSingle : true;
510 }
511 
512 bool isVOPC64DPP(unsigned Opc) {
513   return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
514 }
515 
516 bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
517 
518 bool getMAIIsDGEMM(unsigned Opc) {
519   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
520   return Info ? Info->is_dgemm : false;
521 }
522 
523 bool getMAIIsGFX940XDL(unsigned Opc) {
524   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
525   return Info ? Info->is_gfx940_xdl : false;
526 }
527 
528 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
529   if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
530     return SIEncodingFamily::GFX12;
531   if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
532     return SIEncodingFamily::GFX11;
533   llvm_unreachable("Subtarget generation does not support VOPD!");
534 }
535 
536 CanBeVOPD getCanBeVOPD(unsigned Opc) {
537   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
538   if (Info)
539     return {Info->CanBeVOPDX, true};
540   return {false, false};
541 }
542 
543 unsigned getVOPDOpcode(unsigned Opc) {
544   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
545   return Info ? Info->VOPDOp : ~0u;
546 }
547 
548 bool isVOPD(unsigned Opc) {
549   return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
550 }
551 
552 bool isMAC(unsigned Opc) {
553   return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
554          Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
555          Opc == AMDGPU::V_MAC_F32_e64_vi ||
556          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
557          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
558          Opc == AMDGPU::V_MAC_F16_e64_vi ||
559          Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
560          Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
561          Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
562          Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
563          Opc == AMDGPU::V_FMAC_F32_e64_vi ||
564          Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
565          Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
566          Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
567          Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
568          Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
569          Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
570          Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
571          Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
572          Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
573 }
574 
575 bool isPermlane16(unsigned Opc) {
576   return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
577          Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
578          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
579          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
580          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
581          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
582          Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
583          Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
584 }
585 
586 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
587   return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
588          Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
589          Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
590          Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
591          Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
592          Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
593          Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
594          Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
595 }
596 
597 bool isGenericAtomic(unsigned Opc) {
598   return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
599          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
600          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
601          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
602          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
603          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
604          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
605          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
606          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
607          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
608          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
609          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
610          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
611          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
612          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
613          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
614          Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
615 }
616 
617 bool isTrue16Inst(unsigned Opc) {
618   const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
619   return Info ? Info->IsTrue16 : false;
620 }
621 
622 bool isInvalidSingleUseConsumerInst(unsigned Opc) {
623   const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
624   return Info && Info->IsInvalidSingleUseConsumer;
625 }
626 
627 bool isInvalidSingleUseProducerInst(unsigned Opc) {
628   const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
629   return Info && Info->IsInvalidSingleUseProducer;
630 }
631 
632 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
633   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
634   return Info ? Info->Opcode3Addr : ~0u;
635 }
636 
637 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
638   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
639   return Info ? Info->Opcode2Addr : ~0u;
640 }
641 
642 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
643 // header files, so we need to wrap it in a function that takes unsigned
644 // instead.
645 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
646   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
647 }
648 
649 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
650   const VOPDInfo *Info =
651       getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
652   return Info ? Info->Opcode : -1;
653 }
654 
655 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
656   const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
657   assert(Info);
658   auto OpX = getVOPDBaseFromComponent(Info->OpX);
659   auto OpY = getVOPDBaseFromComponent(Info->OpY);
660   assert(OpX && OpY);
661   return {OpX->BaseVOP, OpY->BaseVOP};
662 }
663 
664 namespace VOPD {
665 
666 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
667   assert(OpDesc.getNumDefs() == Component::DST_NUM);
668 
669   assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
670   assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
671   auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
672   assert(TiedIdx == -1 || TiedIdx == Component::DST);
673   HasSrc2Acc = TiedIdx != -1;
674 
675   SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
676   assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
677 
678   auto OperandsNum = OpDesc.getNumOperands();
679   unsigned CompOprIdx;
680   for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
681     if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
682       MandatoryLiteralIdx = CompOprIdx;
683       break;
684     }
685   }
686 }
687 
688 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
689   assert(CompOprIdx < Component::MAX_OPR_NUM);
690 
691   if (CompOprIdx == Component::DST)
692     return getIndexOfDstInParsedOperands();
693 
694   auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
695   if (CompSrcIdx < getCompParsedSrcOperandsNum())
696     return getIndexOfSrcInParsedOperands(CompSrcIdx);
697 
698   // The specified operand does not exist.
699   return 0;
700 }
701 
702 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
703     std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
704 
705   auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
706   auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
707 
708   const unsigned CompOprNum =
709       SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM;
710   unsigned CompOprIdx;
711   for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
712     unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
713     if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
714         ((OpXRegs[CompOprIdx] & BanksMasks) ==
715          (OpYRegs[CompOprIdx] & BanksMasks)))
716       return CompOprIdx;
717   }
718 
719   return {};
720 }
721 
722 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
723 // by the specified component. If an operand is unused
724 // or is not a VGPR, the corresponding value is 0.
725 //
726 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
727 // for the specified component and MC operand. The callback must return 0
728 // if the operand is not a register or not a VGPR.
729 InstInfo::RegIndices InstInfo::getRegIndices(
730     unsigned CompIdx,
731     std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
732   assert(CompIdx < COMPONENTS_NUM);
733 
734   const auto &Comp = CompInfo[CompIdx];
735   InstInfo::RegIndices RegIndices;
736 
737   RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
738 
739   for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
740     unsigned CompSrcIdx = CompOprIdx - DST_NUM;
741     RegIndices[CompOprIdx] =
742         Comp.hasRegSrcOperand(CompSrcIdx)
743             ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
744             : 0;
745   }
746   return RegIndices;
747 }
748 
749 } // namespace VOPD
750 
751 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
752   return VOPD::InstInfo(OpX, OpY);
753 }
754 
755 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
756                                const MCInstrInfo *InstrInfo) {
757   auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
758   const auto &OpXDesc = InstrInfo->get(OpX);
759   const auto &OpYDesc = InstrInfo->get(OpY);
760   VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
761   VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
762   return VOPD::InstInfo(OpXInfo, OpYInfo);
763 }
764 
765 namespace IsaInfo {
766 
767 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
768     : STI(STI), XnackSetting(TargetIDSetting::Any),
769       SramEccSetting(TargetIDSetting::Any) {
770   if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
771     XnackSetting = TargetIDSetting::Unsupported;
772   if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
773     SramEccSetting = TargetIDSetting::Unsupported;
774 }
775 
776 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
777   // Check if xnack or sramecc is explicitly enabled or disabled.  In the
778   // absence of the target features we assume we must generate code that can run
779   // in any environment.
780   SubtargetFeatures Features(FS);
781   std::optional<bool> XnackRequested;
782   std::optional<bool> SramEccRequested;
783 
784   for (const std::string &Feature : Features.getFeatures()) {
785     if (Feature == "+xnack")
786       XnackRequested = true;
787     else if (Feature == "-xnack")
788       XnackRequested = false;
789     else if (Feature == "+sramecc")
790       SramEccRequested = true;
791     else if (Feature == "-sramecc")
792       SramEccRequested = false;
793   }
794 
795   bool XnackSupported = isXnackSupported();
796   bool SramEccSupported = isSramEccSupported();
797 
798   if (XnackRequested) {
799     if (XnackSupported) {
800       XnackSetting =
801           *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
802     } else {
803       // If a specific xnack setting was requested and this GPU does not support
804       // xnack emit a warning. Setting will remain set to "Unsupported".
805       if (*XnackRequested) {
806         errs() << "warning: xnack 'On' was requested for a processor that does "
807                   "not support it!\n";
808       } else {
809         errs() << "warning: xnack 'Off' was requested for a processor that "
810                   "does not support it!\n";
811       }
812     }
813   }
814 
815   if (SramEccRequested) {
816     if (SramEccSupported) {
817       SramEccSetting =
818           *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
819     } else {
820       // If a specific sramecc setting was requested and this GPU does not
821       // support sramecc emit a warning. Setting will remain set to
822       // "Unsupported".
823       if (*SramEccRequested) {
824         errs() << "warning: sramecc 'On' was requested for a processor that "
825                   "does not support it!\n";
826       } else {
827         errs() << "warning: sramecc 'Off' was requested for a processor that "
828                   "does not support it!\n";
829       }
830     }
831   }
832 }
833 
834 static TargetIDSetting
835 getTargetIDSettingFromFeatureString(StringRef FeatureString) {
836   if (FeatureString.ends_with("-"))
837     return TargetIDSetting::Off;
838   if (FeatureString.ends_with("+"))
839     return TargetIDSetting::On;
840 
841   llvm_unreachable("Malformed feature string");
842 }
843 
844 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
845   SmallVector<StringRef, 3> TargetIDSplit;
846   TargetID.split(TargetIDSplit, ':');
847 
848   for (const auto &FeatureString : TargetIDSplit) {
849     if (FeatureString.starts_with("xnack"))
850       XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
851     if (FeatureString.starts_with("sramecc"))
852       SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
853   }
854 }
855 
856 std::string AMDGPUTargetID::toString() const {
857   std::string StringRep;
858   raw_string_ostream StreamRep(StringRep);
859 
860   auto TargetTriple = STI.getTargetTriple();
861   auto Version = getIsaVersion(STI.getCPU());
862 
863   StreamRep << TargetTriple.getArchName() << '-'
864             << TargetTriple.getVendorName() << '-'
865             << TargetTriple.getOSName() << '-'
866             << TargetTriple.getEnvironmentName() << '-';
867 
868   std::string Processor;
869   // TODO: Following else statement is present here because we used various
870   // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
871   // Remove once all aliases are removed from GCNProcessors.td.
872   if (Version.Major >= 9)
873     Processor = STI.getCPU().str();
874   else
875     Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
876                  Twine(Version.Stepping))
877                     .str();
878 
879   std::string Features;
880   if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
881     // sramecc.
882     if (getSramEccSetting() == TargetIDSetting::Off)
883       Features += ":sramecc-";
884     else if (getSramEccSetting() == TargetIDSetting::On)
885       Features += ":sramecc+";
886     // xnack.
887     if (getXnackSetting() == TargetIDSetting::Off)
888       Features += ":xnack-";
889     else if (getXnackSetting() == TargetIDSetting::On)
890       Features += ":xnack+";
891   }
892 
893   StreamRep << Processor << Features;
894 
895   StreamRep.flush();
896   return StringRep;
897 }
898 
899 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
900   if (STI->getFeatureBits().test(FeatureWavefrontSize16))
901     return 16;
902   if (STI->getFeatureBits().test(FeatureWavefrontSize32))
903     return 32;
904 
905   return 64;
906 }
907 
908 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
909   unsigned BytesPerCU = 0;
910   if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
911     BytesPerCU = 32768;
912   if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
913     BytesPerCU = 65536;
914 
915   // "Per CU" really means "per whatever functional block the waves of a
916   // workgroup must share". So the effective local memory size is doubled in
917   // WGP mode on gfx10.
918   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
919     BytesPerCU *= 2;
920 
921   return BytesPerCU;
922 }
923 
924 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
925   if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
926     return 32768;
927   if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
928     return 65536;
929   return 0;
930 }
931 
932 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
933   // "Per CU" really means "per whatever functional block the waves of a
934   // workgroup must share". For gfx10 in CU mode this is the CU, which contains
935   // two SIMDs.
936   if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
937     return 2;
938   // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
939   // two CUs, so a total of four SIMDs.
940   return 4;
941 }
942 
943 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
944                                unsigned FlatWorkGroupSize) {
945   assert(FlatWorkGroupSize != 0);
946   if (STI->getTargetTriple().getArch() != Triple::amdgcn)
947     return 8;
948   unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
949   unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
950   if (N == 1) {
951     // Single-wave workgroups don't consume barrier resources.
952     return MaxWaves;
953   }
954 
955   unsigned MaxBarriers = 16;
956   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
957     MaxBarriers = 32;
958 
959   return std::min(MaxWaves / N, MaxBarriers);
960 }
961 
962 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
963   return 1;
964 }
965 
966 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
967   // FIXME: Need to take scratch memory into account.
968   if (isGFX90A(*STI))
969     return 8;
970   if (!isGFX10Plus(*STI))
971     return 10;
972   return hasGFX10_3Insts(*STI) ? 16 : 20;
973 }
974 
975 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
976                                    unsigned FlatWorkGroupSize) {
977   return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
978                     getEUsPerCU(STI));
979 }
980 
981 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
982   return 1;
983 }
984 
985 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
986   // Some subtargets allow encoding 2048, but this isn't tested or supported.
987   return 1024;
988 }
989 
990 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
991                               unsigned FlatWorkGroupSize) {
992   return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
993 }
994 
995 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
996   IsaVersion Version = getIsaVersion(STI->getCPU());
997   if (Version.Major >= 10)
998     return getAddressableNumSGPRs(STI);
999   if (Version.Major >= 8)
1000     return 16;
1001   return 8;
1002 }
1003 
1004 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
1005   return 8;
1006 }
1007 
1008 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1009   IsaVersion Version = getIsaVersion(STI->getCPU());
1010   if (Version.Major >= 8)
1011     return 800;
1012   return 512;
1013 }
1014 
1015 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
1016   if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1017     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1018 
1019   IsaVersion Version = getIsaVersion(STI->getCPU());
1020   if (Version.Major >= 10)
1021     return 106;
1022   if (Version.Major >= 8)
1023     return 102;
1024   return 104;
1025 }
1026 
1027 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1028   assert(WavesPerEU != 0);
1029 
1030   IsaVersion Version = getIsaVersion(STI->getCPU());
1031   if (Version.Major >= 10)
1032     return 0;
1033 
1034   if (WavesPerEU >= getMaxWavesPerEU(STI))
1035     return 0;
1036 
1037   unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1038   if (STI->getFeatureBits().test(FeatureTrapHandler))
1039     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1040   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1041   return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1042 }
1043 
1044 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1045                         bool Addressable) {
1046   assert(WavesPerEU != 0);
1047 
1048   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1049   IsaVersion Version = getIsaVersion(STI->getCPU());
1050   if (Version.Major >= 10)
1051     return Addressable ? AddressableNumSGPRs : 108;
1052   if (Version.Major >= 8 && !Addressable)
1053     AddressableNumSGPRs = 112;
1054   unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1055   if (STI->getFeatureBits().test(FeatureTrapHandler))
1056     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1057   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1058   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1059 }
1060 
1061 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1062                           bool FlatScrUsed, bool XNACKUsed) {
1063   unsigned ExtraSGPRs = 0;
1064   if (VCCUsed)
1065     ExtraSGPRs = 2;
1066 
1067   IsaVersion Version = getIsaVersion(STI->getCPU());
1068   if (Version.Major >= 10)
1069     return ExtraSGPRs;
1070 
1071   if (Version.Major < 8) {
1072     if (FlatScrUsed)
1073       ExtraSGPRs = 4;
1074   } else {
1075     if (XNACKUsed)
1076       ExtraSGPRs = 4;
1077 
1078     if (FlatScrUsed ||
1079         STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1080       ExtraSGPRs = 6;
1081   }
1082 
1083   return ExtraSGPRs;
1084 }
1085 
1086 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1087                           bool FlatScrUsed) {
1088   return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1089                           STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1090 }
1091 
1092 static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1093                                                unsigned Granule) {
1094   return divideCeil(std::max(1u, NumRegs), Granule);
1095 }
1096 
1097 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1098   // SGPRBlocks is actual number of SGPR blocks minus 1.
1099   return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) -
1100          1;
1101 }
1102 
1103 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1104                              std::optional<bool> EnableWavefrontSize32) {
1105   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1106     return 8;
1107 
1108   bool IsWave32 = EnableWavefrontSize32 ?
1109       *EnableWavefrontSize32 :
1110       STI->getFeatureBits().test(FeatureWavefrontSize32);
1111 
1112   if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1113     return IsWave32 ? 24 : 12;
1114 
1115   if (hasGFX10_3Insts(*STI))
1116     return IsWave32 ? 16 : 8;
1117 
1118   return IsWave32 ? 8 : 4;
1119 }
1120 
1121 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1122                                 std::optional<bool> EnableWavefrontSize32) {
1123   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1124     return 8;
1125 
1126   bool IsWave32 = EnableWavefrontSize32 ?
1127       *EnableWavefrontSize32 :
1128       STI->getFeatureBits().test(FeatureWavefrontSize32);
1129 
1130   return IsWave32 ? 8 : 4;
1131 }
1132 
1133 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1134   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1135     return 512;
1136   if (!isGFX10Plus(*STI))
1137     return 256;
1138   bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1139   if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1140     return IsWave32 ? 1536 : 768;
1141   return IsWave32 ? 1024 : 512;
1142 }
1143 
1144 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1145 
1146 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
1147   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1148     return 512;
1149   return getAddressableNumArchVGPRs(STI);
1150 }
1151 
1152 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1153                                       unsigned NumVGPRs) {
1154   return getNumWavesPerEUWithNumVGPRs(NumVGPRs, getVGPRAllocGranule(STI),
1155                                       getMaxWavesPerEU(STI),
1156                                       getTotalNumVGPRs(STI));
1157 }
1158 
1159 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1160                                       unsigned MaxWaves,
1161                                       unsigned TotalNumVGPRs) {
1162   if (NumVGPRs < Granule)
1163     return MaxWaves;
1164   unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1165   return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1166 }
1167 
1168 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1169                                   AMDGPUSubtarget::Generation Gen) {
1170   if (Gen >= AMDGPUSubtarget::GFX10)
1171     return MaxWaves;
1172 
1173   if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1174     if (SGPRs <= 80)
1175       return 10;
1176     if (SGPRs <= 88)
1177       return 9;
1178     if (SGPRs <= 100)
1179       return 8;
1180     return 7;
1181   }
1182   if (SGPRs <= 48)
1183     return 10;
1184   if (SGPRs <= 56)
1185     return 9;
1186   if (SGPRs <= 64)
1187     return 8;
1188   if (SGPRs <= 72)
1189     return 7;
1190   if (SGPRs <= 80)
1191     return 6;
1192   return 5;
1193 }
1194 
1195 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1196   assert(WavesPerEU != 0);
1197 
1198   unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1199   if (WavesPerEU >= MaxWavesPerEU)
1200     return 0;
1201 
1202   unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1203   unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1204   unsigned Granule = getVGPRAllocGranule(STI);
1205   unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1206 
1207   if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1208     return 0;
1209 
1210   unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1211   if (WavesPerEU < MinWavesPerEU)
1212     return getMinNumVGPRs(STI, MinWavesPerEU);
1213 
1214   unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1215   unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1216   return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1217 }
1218 
1219 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1220   assert(WavesPerEU != 0);
1221 
1222   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1223                                    getVGPRAllocGranule(STI));
1224   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1225   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1226 }
1227 
1228 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1229                                  std::optional<bool> EnableWavefrontSize32) {
1230   return getGranulatedNumRegisterBlocks(
1231              NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1232          1;
1233 }
1234 
1235 unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
1236                                    unsigned NumVGPRs,
1237                                    std::optional<bool> EnableWavefrontSize32) {
1238   return getGranulatedNumRegisterBlocks(
1239       NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32));
1240 }
1241 } // end namespace IsaInfo
1242 
1243 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1244                                const MCSubtargetInfo *STI) {
1245   IsaVersion Version = getIsaVersion(STI->getCPU());
1246   KernelCode.amd_kernel_code_version_major = 1;
1247   KernelCode.amd_kernel_code_version_minor = 2;
1248   KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1249   KernelCode.amd_machine_version_major = Version.Major;
1250   KernelCode.amd_machine_version_minor = Version.Minor;
1251   KernelCode.amd_machine_version_stepping = Version.Stepping;
1252   KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1253   if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1254     KernelCode.wavefront_size = 5;
1255     KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1256   } else {
1257     KernelCode.wavefront_size = 6;
1258   }
1259 
1260   // If the code object does not support indirect functions, then the value must
1261   // be 0xffffffff.
1262   KernelCode.call_convention = -1;
1263 
1264   // These alignment values are specified in powers of two, so alignment =
1265   // 2^n.  The minimum alignment is 2^4 = 16.
1266   KernelCode.kernarg_segment_alignment = 4;
1267   KernelCode.group_segment_alignment = 4;
1268   KernelCode.private_segment_alignment = 4;
1269 
1270   if (Version.Major >= 10) {
1271     KernelCode.compute_pgm_resource_registers |=
1272         S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1273         S_00B848_MEM_ORDERED(1);
1274   }
1275 }
1276 
1277 bool isGroupSegment(const GlobalValue *GV) {
1278   return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1279 }
1280 
1281 bool isGlobalSegment(const GlobalValue *GV) {
1282   return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1283 }
1284 
1285 bool isReadOnlySegment(const GlobalValue *GV) {
1286   unsigned AS = GV->getAddressSpace();
1287   return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1288          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1289 }
1290 
1291 bool shouldEmitConstantsToTextSection(const Triple &TT) {
1292   return TT.getArch() == Triple::r600;
1293 }
1294 
1295 std::pair<unsigned, unsigned>
1296 getIntegerPairAttribute(const Function &F, StringRef Name,
1297                         std::pair<unsigned, unsigned> Default,
1298                         bool OnlyFirstRequired) {
1299   Attribute A = F.getFnAttribute(Name);
1300   if (!A.isStringAttribute())
1301     return Default;
1302 
1303   LLVMContext &Ctx = F.getContext();
1304   std::pair<unsigned, unsigned> Ints = Default;
1305   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1306   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1307     Ctx.emitError("can't parse first integer attribute " + Name);
1308     return Default;
1309   }
1310   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1311     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1312       Ctx.emitError("can't parse second integer attribute " + Name);
1313       return Default;
1314     }
1315   }
1316 
1317   return Ints;
1318 }
1319 
1320 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1321                                              unsigned Size) {
1322   assert(Size > 2);
1323   SmallVector<unsigned> Default(Size, 0);
1324 
1325   Attribute A = F.getFnAttribute(Name);
1326   if (!A.isStringAttribute())
1327     return Default;
1328 
1329   SmallVector<unsigned> Vals(Size, 0);
1330 
1331   LLVMContext &Ctx = F.getContext();
1332 
1333   StringRef S = A.getValueAsString();
1334   unsigned i = 0;
1335   for (; !S.empty() && i < Size; i++) {
1336     std::pair<StringRef, StringRef> Strs = S.split(',');
1337     unsigned IntVal;
1338     if (Strs.first.trim().getAsInteger(0, IntVal)) {
1339       Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1340                     Name);
1341       return Default;
1342     }
1343     Vals[i] = IntVal;
1344     S = Strs.second;
1345   }
1346 
1347   if (!S.empty() || i < Size) {
1348     Ctx.emitError("attribute " + Name +
1349                   " has incorrect number of integers; expected " +
1350                   llvm::utostr(Size));
1351     return Default;
1352   }
1353   return Vals;
1354 }
1355 
1356 unsigned getVmcntBitMask(const IsaVersion &Version) {
1357   return (1 << (getVmcntBitWidthLo(Version.Major) +
1358                 getVmcntBitWidthHi(Version.Major))) -
1359          1;
1360 }
1361 
1362 unsigned getLoadcntBitMask(const IsaVersion &Version) {
1363   return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1364 }
1365 
1366 unsigned getSamplecntBitMask(const IsaVersion &Version) {
1367   return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1368 }
1369 
1370 unsigned getBvhcntBitMask(const IsaVersion &Version) {
1371   return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1372 }
1373 
1374 unsigned getExpcntBitMask(const IsaVersion &Version) {
1375   return (1 << getExpcntBitWidth(Version.Major)) - 1;
1376 }
1377 
1378 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1379   return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1380 }
1381 
1382 unsigned getDscntBitMask(const IsaVersion &Version) {
1383   return (1 << getDscntBitWidth(Version.Major)) - 1;
1384 }
1385 
1386 unsigned getKmcntBitMask(const IsaVersion &Version) {
1387   return (1 << getKmcntBitWidth(Version.Major)) - 1;
1388 }
1389 
1390 unsigned getStorecntBitMask(const IsaVersion &Version) {
1391   return (1 << getStorecntBitWidth(Version.Major)) - 1;
1392 }
1393 
1394 unsigned getWaitcntBitMask(const IsaVersion &Version) {
1395   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1396                                 getVmcntBitWidthLo(Version.Major));
1397   unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1398                                getExpcntBitWidth(Version.Major));
1399   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1400                                 getLgkmcntBitWidth(Version.Major));
1401   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1402                                 getVmcntBitWidthHi(Version.Major));
1403   return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1404 }
1405 
1406 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1407   unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1408                                 getVmcntBitWidthLo(Version.Major));
1409   unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1410                                 getVmcntBitWidthHi(Version.Major));
1411   return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1412 }
1413 
1414 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1415   return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1416                     getExpcntBitWidth(Version.Major));
1417 }
1418 
1419 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1420   return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1421                     getLgkmcntBitWidth(Version.Major));
1422 }
1423 
1424 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1425                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1426   Vmcnt = decodeVmcnt(Version, Waitcnt);
1427   Expcnt = decodeExpcnt(Version, Waitcnt);
1428   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1429 }
1430 
1431 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1432   Waitcnt Decoded;
1433   Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1434   Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1435   Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1436   return Decoded;
1437 }
1438 
1439 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1440                      unsigned Vmcnt) {
1441   Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1442                      getVmcntBitWidthLo(Version.Major));
1443   return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1444                   getVmcntBitShiftHi(Version.Major),
1445                   getVmcntBitWidthHi(Version.Major));
1446 }
1447 
1448 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1449                       unsigned Expcnt) {
1450   return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1451                   getExpcntBitWidth(Version.Major));
1452 }
1453 
1454 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1455                        unsigned Lgkmcnt) {
1456   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1457                   getLgkmcntBitWidth(Version.Major));
1458 }
1459 
1460 unsigned encodeWaitcnt(const IsaVersion &Version,
1461                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1462   unsigned Waitcnt = getWaitcntBitMask(Version);
1463   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1464   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1465   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1466   return Waitcnt;
1467 }
1468 
1469 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1470   return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1471 }
1472 
1473 static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1474                                         bool IsStore) {
1475   unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1476                               getDscntBitWidth(Version.Major));
1477   if (IsStore) {
1478     unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1479                                    getStorecntBitWidth(Version.Major));
1480     return Dscnt | Storecnt;
1481   }
1482   unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1483                                 getLoadcntBitWidth(Version.Major));
1484   return Dscnt | Loadcnt;
1485 }
1486 
1487 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1488   Waitcnt Decoded;
1489   Decoded.LoadCnt =
1490       unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1491                  getLoadcntBitWidth(Version.Major));
1492   Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1493                              getDscntBitWidth(Version.Major));
1494   return Decoded;
1495 }
1496 
1497 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1498   Waitcnt Decoded;
1499   Decoded.StoreCnt =
1500       unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1501                  getStorecntBitWidth(Version.Major));
1502   Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1503                              getDscntBitWidth(Version.Major));
1504   return Decoded;
1505 }
1506 
1507 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1508                               unsigned Loadcnt) {
1509   return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1510                   getLoadcntBitWidth(Version.Major));
1511 }
1512 
1513 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1514                                unsigned Storecnt) {
1515   return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1516                   getStorecntBitWidth(Version.Major));
1517 }
1518 
1519 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1520                             unsigned Dscnt) {
1521   return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1522                   getDscntBitWidth(Version.Major));
1523 }
1524 
1525 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1526                                    unsigned Dscnt) {
1527   unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1528   Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1529   Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1530   return Waitcnt;
1531 }
1532 
1533 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1534   return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1535 }
1536 
1537 static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1538                                     unsigned Storecnt, unsigned Dscnt) {
1539   unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1540   Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1541   Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1542   return Waitcnt;
1543 }
1544 
1545 unsigned encodeStorecntDscnt(const IsaVersion &Version,
1546                              const Waitcnt &Decoded) {
1547   return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1548 }
1549 
1550 //===----------------------------------------------------------------------===//
1551 // Custom Operand Values
1552 //===----------------------------------------------------------------------===//
1553 
1554 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1555                                                 int Size,
1556                                                 const MCSubtargetInfo &STI) {
1557   unsigned Enc = 0;
1558   for (int Idx = 0; Idx < Size; ++Idx) {
1559     const auto &Op = Opr[Idx];
1560     if (Op.isSupported(STI))
1561       Enc |= Op.encode(Op.Default);
1562   }
1563   return Enc;
1564 }
1565 
1566 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1567                                             int Size, unsigned Code,
1568                                             bool &HasNonDefaultVal,
1569                                             const MCSubtargetInfo &STI) {
1570   unsigned UsedOprMask = 0;
1571   HasNonDefaultVal = false;
1572   for (int Idx = 0; Idx < Size; ++Idx) {
1573     const auto &Op = Opr[Idx];
1574     if (!Op.isSupported(STI))
1575       continue;
1576     UsedOprMask |= Op.getMask();
1577     unsigned Val = Op.decode(Code);
1578     if (!Op.isValid(Val))
1579       return false;
1580     HasNonDefaultVal |= (Val != Op.Default);
1581   }
1582   return (Code & ~UsedOprMask) == 0;
1583 }
1584 
1585 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1586                                 unsigned Code, int &Idx, StringRef &Name,
1587                                 unsigned &Val, bool &IsDefault,
1588                                 const MCSubtargetInfo &STI) {
1589   while (Idx < Size) {
1590     const auto &Op = Opr[Idx++];
1591     if (Op.isSupported(STI)) {
1592       Name = Op.Name;
1593       Val = Op.decode(Code);
1594       IsDefault = (Val == Op.Default);
1595       return true;
1596     }
1597   }
1598 
1599   return false;
1600 }
1601 
1602 static int encodeCustomOperandVal(const CustomOperandVal &Op,
1603                                   int64_t InputVal) {
1604   if (InputVal < 0 || InputVal > Op.Max)
1605     return OPR_VAL_INVALID;
1606   return Op.encode(InputVal);
1607 }
1608 
1609 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1610                                const StringRef Name, int64_t InputVal,
1611                                unsigned &UsedOprMask,
1612                                const MCSubtargetInfo &STI) {
1613   int InvalidId = OPR_ID_UNKNOWN;
1614   for (int Idx = 0; Idx < Size; ++Idx) {
1615     const auto &Op = Opr[Idx];
1616     if (Op.Name == Name) {
1617       if (!Op.isSupported(STI)) {
1618         InvalidId = OPR_ID_UNSUPPORTED;
1619         continue;
1620       }
1621       auto OprMask = Op.getMask();
1622       if (OprMask & UsedOprMask)
1623         return OPR_ID_DUPLICATE;
1624       UsedOprMask |= OprMask;
1625       return encodeCustomOperandVal(Op, InputVal);
1626     }
1627   }
1628   return InvalidId;
1629 }
1630 
1631 //===----------------------------------------------------------------------===//
1632 // DepCtr
1633 //===----------------------------------------------------------------------===//
1634 
1635 namespace DepCtr {
1636 
1637 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
1638   static int Default = -1;
1639   if (Default == -1)
1640     Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI);
1641   return Default;
1642 }
1643 
1644 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1645                               const MCSubtargetInfo &STI) {
1646   return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code,
1647                                          HasNonDefaultVal, STI);
1648 }
1649 
1650 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1651                   bool &IsDefault, const MCSubtargetInfo &STI) {
1652   return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1653                              IsDefault, STI);
1654 }
1655 
1656 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1657                  const MCSubtargetInfo &STI) {
1658   return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1659                              STI);
1660 }
1661 
1662 unsigned decodeFieldVmVsrc(unsigned Encoded) {
1663   return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1664 }
1665 
1666 unsigned decodeFieldVaVdst(unsigned Encoded) {
1667   return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1668 }
1669 
1670 unsigned decodeFieldSaSdst(unsigned Encoded) {
1671   return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1672 }
1673 
1674 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1675   return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1676 }
1677 
1678 unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1679   return encodeFieldVmVsrc(0xffff, VmVsrc);
1680 }
1681 
1682 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1683   return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1684 }
1685 
1686 unsigned encodeFieldVaVdst(unsigned VaVdst) {
1687   return encodeFieldVaVdst(0xffff, VaVdst);
1688 }
1689 
1690 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1691   return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1692 }
1693 
1694 unsigned encodeFieldSaSdst(unsigned SaSdst) {
1695   return encodeFieldSaSdst(0xffff, SaSdst);
1696 }
1697 
1698 } // namespace DepCtr
1699 
1700 //===----------------------------------------------------------------------===//
1701 // exp tgt
1702 //===----------------------------------------------------------------------===//
1703 
1704 namespace Exp {
1705 
1706 struct ExpTgt {
1707   StringLiteral Name;
1708   unsigned Tgt;
1709   unsigned MaxIndex;
1710 };
1711 
1712 static constexpr ExpTgt ExpTgtInfo[] = {
1713   {{"null"},           ET_NULL,            ET_NULL_MAX_IDX},
1714   {{"mrtz"},           ET_MRTZ,            ET_MRTZ_MAX_IDX},
1715   {{"prim"},           ET_PRIM,            ET_PRIM_MAX_IDX},
1716   {{"mrt"},            ET_MRT0,            ET_MRT_MAX_IDX},
1717   {{"pos"},            ET_POS0,            ET_POS_MAX_IDX},
1718   {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1719   {{"param"},          ET_PARAM0,          ET_PARAM_MAX_IDX},
1720 };
1721 
1722 bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1723   for (const ExpTgt &Val : ExpTgtInfo) {
1724     if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1725       Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1726       Name = Val.Name;
1727       return true;
1728     }
1729   }
1730   return false;
1731 }
1732 
1733 unsigned getTgtId(const StringRef Name) {
1734 
1735   for (const ExpTgt &Val : ExpTgtInfo) {
1736     if (Val.MaxIndex == 0 && Name == Val.Name)
1737       return Val.Tgt;
1738 
1739     if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1740       StringRef Suffix = Name.drop_front(Val.Name.size());
1741 
1742       unsigned Id;
1743       if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1744         return ET_INVALID;
1745 
1746       // Disable leading zeroes
1747       if (Suffix.size() > 1 && Suffix[0] == '0')
1748         return ET_INVALID;
1749 
1750       return Val.Tgt + Id;
1751     }
1752   }
1753   return ET_INVALID;
1754 }
1755 
1756 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1757   switch (Id) {
1758   case ET_NULL:
1759     return !isGFX11Plus(STI);
1760   case ET_POS4:
1761   case ET_PRIM:
1762     return isGFX10Plus(STI);
1763   case ET_DUAL_SRC_BLEND0:
1764   case ET_DUAL_SRC_BLEND1:
1765     return isGFX11Plus(STI);
1766   default:
1767     if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1768       return !isGFX11Plus(STI);
1769     return true;
1770   }
1771 }
1772 
1773 } // namespace Exp
1774 
1775 //===----------------------------------------------------------------------===//
1776 // MTBUF Format
1777 //===----------------------------------------------------------------------===//
1778 
1779 namespace MTBUFFormat {
1780 
1781 int64_t getDfmt(const StringRef Name) {
1782   for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1783     if (Name == DfmtSymbolic[Id])
1784       return Id;
1785   }
1786   return DFMT_UNDEF;
1787 }
1788 
1789 StringRef getDfmtName(unsigned Id) {
1790   assert(Id <= DFMT_MAX);
1791   return DfmtSymbolic[Id];
1792 }
1793 
1794 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
1795   if (isSI(STI) || isCI(STI))
1796     return NfmtSymbolicSICI;
1797   if (isVI(STI) || isGFX9(STI))
1798     return NfmtSymbolicVI;
1799   return NfmtSymbolicGFX10;
1800 }
1801 
1802 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1803   auto lookupTable = getNfmtLookupTable(STI);
1804   for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1805     if (Name == lookupTable[Id])
1806       return Id;
1807   }
1808   return NFMT_UNDEF;
1809 }
1810 
1811 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1812   assert(Id <= NFMT_MAX);
1813   return getNfmtLookupTable(STI)[Id];
1814 }
1815 
1816 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1817   unsigned Dfmt;
1818   unsigned Nfmt;
1819   decodeDfmtNfmt(Id, Dfmt, Nfmt);
1820   return isValidNfmt(Nfmt, STI);
1821 }
1822 
1823 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1824   return !getNfmtName(Id, STI).empty();
1825 }
1826 
1827 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1828   return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1829 }
1830 
1831 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1832   Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1833   Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1834 }
1835 
1836 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
1837   if (isGFX11Plus(STI)) {
1838     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1839       if (Name == UfmtSymbolicGFX11[Id])
1840         return Id;
1841     }
1842   } else {
1843     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1844       if (Name == UfmtSymbolicGFX10[Id])
1845         return Id;
1846     }
1847   }
1848   return UFMT_UNDEF;
1849 }
1850 
1851 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
1852   if(isValidUnifiedFormat(Id, STI))
1853     return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1854   return "";
1855 }
1856 
1857 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1858   return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1859 }
1860 
1861 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1862                              const MCSubtargetInfo &STI) {
1863   int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1864   if (isGFX11Plus(STI)) {
1865     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1866       if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1867         return Id;
1868     }
1869   } else {
1870     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1871       if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1872         return Id;
1873     }
1874   }
1875   return UFMT_UNDEF;
1876 }
1877 
1878 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1879   return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1880 }
1881 
1882 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
1883   if (isGFX10Plus(STI))
1884     return UFMT_DEFAULT;
1885   return DFMT_NFMT_DEFAULT;
1886 }
1887 
1888 } // namespace MTBUFFormat
1889 
1890 //===----------------------------------------------------------------------===//
1891 // SendMsg
1892 //===----------------------------------------------------------------------===//
1893 
1894 namespace SendMsg {
1895 
1896 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
1897   return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
1898 }
1899 
1900 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1901   return (MsgId & ~(getMsgIdMask(STI))) == 0;
1902 }
1903 
1904 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1905                   bool Strict) {
1906   assert(isValidMsgId(MsgId, STI));
1907 
1908   if (!Strict)
1909     return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1910 
1911   if (msgRequiresOp(MsgId, STI)) {
1912     if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
1913       return false;
1914 
1915     return !getMsgOpName(MsgId, OpId, STI).empty();
1916   }
1917 
1918   return OpId == OP_NONE_;
1919 }
1920 
1921 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1922                       const MCSubtargetInfo &STI, bool Strict) {
1923   assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1924 
1925   if (!Strict)
1926     return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1927 
1928   if (!isGFX11Plus(STI)) {
1929     switch (MsgId) {
1930     case ID_GS_PreGFX11:
1931       return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
1932     case ID_GS_DONE_PreGFX11:
1933       return (OpId == OP_GS_NOP) ?
1934           (StreamId == STREAM_ID_NONE_) :
1935           (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
1936     }
1937   }
1938   return StreamId == STREAM_ID_NONE_;
1939 }
1940 
1941 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1942   return MsgId == ID_SYSMSG ||
1943       (!isGFX11Plus(STI) &&
1944        (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1945 }
1946 
1947 bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1948                        const MCSubtargetInfo &STI) {
1949   return !isGFX11Plus(STI) &&
1950       (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1951       OpId != OP_GS_NOP;
1952 }
1953 
1954 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1955                uint16_t &StreamId, const MCSubtargetInfo &STI) {
1956   MsgId = Val & getMsgIdMask(STI);
1957   if (isGFX11Plus(STI)) {
1958     OpId = 0;
1959     StreamId = 0;
1960   } else {
1961     OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1962     StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
1963   }
1964 }
1965 
1966 uint64_t encodeMsg(uint64_t MsgId,
1967                    uint64_t OpId,
1968                    uint64_t StreamId) {
1969   return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
1970 }
1971 
1972 } // namespace SendMsg
1973 
1974 //===----------------------------------------------------------------------===//
1975 //
1976 //===----------------------------------------------------------------------===//
1977 
1978 unsigned getInitialPSInputAddr(const Function &F) {
1979   return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
1980 }
1981 
1982 bool getHasColorExport(const Function &F) {
1983   // As a safe default always respond as if PS has color exports.
1984   return F.getFnAttributeAsParsedInteger(
1985              "amdgpu-color-export",
1986              F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
1987 }
1988 
1989 bool getHasDepthExport(const Function &F) {
1990   return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
1991 }
1992 
1993 bool isShader(CallingConv::ID cc) {
1994   switch(cc) {
1995     case CallingConv::AMDGPU_VS:
1996     case CallingConv::AMDGPU_LS:
1997     case CallingConv::AMDGPU_HS:
1998     case CallingConv::AMDGPU_ES:
1999     case CallingConv::AMDGPU_GS:
2000     case CallingConv::AMDGPU_PS:
2001     case CallingConv::AMDGPU_CS_Chain:
2002     case CallingConv::AMDGPU_CS_ChainPreserve:
2003     case CallingConv::AMDGPU_CS:
2004       return true;
2005     default:
2006       return false;
2007   }
2008 }
2009 
2010 bool isGraphics(CallingConv::ID cc) {
2011   return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2012 }
2013 
2014 bool isCompute(CallingConv::ID cc) {
2015   return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2016 }
2017 
2018 bool isEntryFunctionCC(CallingConv::ID CC) {
2019   switch (CC) {
2020   case CallingConv::AMDGPU_KERNEL:
2021   case CallingConv::SPIR_KERNEL:
2022   case CallingConv::AMDGPU_VS:
2023   case CallingConv::AMDGPU_GS:
2024   case CallingConv::AMDGPU_PS:
2025   case CallingConv::AMDGPU_CS:
2026   case CallingConv::AMDGPU_ES:
2027   case CallingConv::AMDGPU_HS:
2028   case CallingConv::AMDGPU_LS:
2029     return true;
2030   default:
2031     return false;
2032   }
2033 }
2034 
2035 bool isModuleEntryFunctionCC(CallingConv::ID CC) {
2036   switch (CC) {
2037   case CallingConv::AMDGPU_Gfx:
2038     return true;
2039   default:
2040     return isEntryFunctionCC(CC) || isChainCC(CC);
2041   }
2042 }
2043 
2044 bool isChainCC(CallingConv::ID CC) {
2045   switch (CC) {
2046   case CallingConv::AMDGPU_CS_Chain:
2047   case CallingConv::AMDGPU_CS_ChainPreserve:
2048     return true;
2049   default:
2050     return false;
2051   }
2052 }
2053 
2054 bool isKernelCC(const Function *Func) {
2055   return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2056 }
2057 
2058 bool hasXNACK(const MCSubtargetInfo &STI) {
2059   return STI.hasFeature(AMDGPU::FeatureXNACK);
2060 }
2061 
2062 bool hasSRAMECC(const MCSubtargetInfo &STI) {
2063   return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2064 }
2065 
2066 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2067   return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2068 }
2069 
2070 bool hasA16(const MCSubtargetInfo &STI) {
2071   return STI.hasFeature(AMDGPU::FeatureA16);
2072 }
2073 
2074 bool hasG16(const MCSubtargetInfo &STI) {
2075   return STI.hasFeature(AMDGPU::FeatureG16);
2076 }
2077 
2078 bool hasPackedD16(const MCSubtargetInfo &STI) {
2079   return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2080          !isSI(STI);
2081 }
2082 
2083 bool hasGDS(const MCSubtargetInfo &STI) {
2084   return STI.hasFeature(AMDGPU::FeatureGDS);
2085 }
2086 
2087 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2088   auto Version = getIsaVersion(STI.getCPU());
2089   if (Version.Major == 10)
2090     return Version.Minor >= 3 ? 13 : 5;
2091   if (Version.Major == 11)
2092     return 5;
2093   if (Version.Major >= 12)
2094     return HasSampler ? 4 : 5;
2095   return 0;
2096 }
2097 
2098 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2099 
2100 bool isSI(const MCSubtargetInfo &STI) {
2101   return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2102 }
2103 
2104 bool isCI(const MCSubtargetInfo &STI) {
2105   return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2106 }
2107 
2108 bool isVI(const MCSubtargetInfo &STI) {
2109   return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2110 }
2111 
2112 bool isGFX9(const MCSubtargetInfo &STI) {
2113   return STI.hasFeature(AMDGPU::FeatureGFX9);
2114 }
2115 
2116 bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2117   return isGFX9(STI) || isGFX10(STI);
2118 }
2119 
2120 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2121   return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2122 }
2123 
2124 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2125   return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2126 }
2127 
2128 bool isGFX8Plus(const MCSubtargetInfo &STI) {
2129   return isVI(STI) || isGFX9Plus(STI);
2130 }
2131 
2132 bool isGFX9Plus(const MCSubtargetInfo &STI) {
2133   return isGFX9(STI) || isGFX10Plus(STI);
2134 }
2135 
2136 bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2137 
2138 bool isGFX10(const MCSubtargetInfo &STI) {
2139   return STI.hasFeature(AMDGPU::FeatureGFX10);
2140 }
2141 
2142 bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2143   return isGFX10(STI) || isGFX11(STI);
2144 }
2145 
2146 bool isGFX10Plus(const MCSubtargetInfo &STI) {
2147   return isGFX10(STI) || isGFX11Plus(STI);
2148 }
2149 
2150 bool isGFX11(const MCSubtargetInfo &STI) {
2151   return STI.hasFeature(AMDGPU::FeatureGFX11);
2152 }
2153 
2154 bool isGFX11Plus(const MCSubtargetInfo &STI) {
2155   return isGFX11(STI) || isGFX12Plus(STI);
2156 }
2157 
2158 bool isGFX12(const MCSubtargetInfo &STI) {
2159   return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2160 }
2161 
2162 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2163 
2164 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2165 
2166 bool isNotGFX11Plus(const MCSubtargetInfo &STI) {
2167   return !isGFX11Plus(STI);
2168 }
2169 
2170 bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2171   return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2172 }
2173 
2174 bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2175   return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2176 }
2177 
2178 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2179   return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2180 }
2181 
2182 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2183   return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2184 }
2185 
2186 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2187   return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2188 }
2189 
2190 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2191   return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2192 }
2193 
2194 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2195   return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2196 }
2197 
2198 bool isGFX90A(const MCSubtargetInfo &STI) {
2199   return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2200 }
2201 
2202 bool isGFX940(const MCSubtargetInfo &STI) {
2203   return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2204 }
2205 
2206 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2207   return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2208 }
2209 
2210 bool hasMAIInsts(const MCSubtargetInfo &STI) {
2211   return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2212 }
2213 
2214 bool hasVOPD(const MCSubtargetInfo &STI) {
2215   return STI.hasFeature(AMDGPU::FeatureVOPD);
2216 }
2217 
2218 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2219   return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2220 }
2221 
2222 unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2223   return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2224 }
2225 
2226 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2227                          int32_t ArgNumVGPR) {
2228   if (has90AInsts && ArgNumAGPR)
2229     return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2230   return std::max(ArgNumVGPR, ArgNumAGPR);
2231 }
2232 
2233 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2234   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2235   const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2236   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2237     Reg == AMDGPU::SCC;
2238 }
2239 
2240 bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2241   return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2242 }
2243 
2244 #define MAP_REG2REG \
2245   using namespace AMDGPU; \
2246   switch(Reg) { \
2247   default: return Reg; \
2248   CASE_CI_VI(FLAT_SCR) \
2249   CASE_CI_VI(FLAT_SCR_LO) \
2250   CASE_CI_VI(FLAT_SCR_HI) \
2251   CASE_VI_GFX9PLUS(TTMP0) \
2252   CASE_VI_GFX9PLUS(TTMP1) \
2253   CASE_VI_GFX9PLUS(TTMP2) \
2254   CASE_VI_GFX9PLUS(TTMP3) \
2255   CASE_VI_GFX9PLUS(TTMP4) \
2256   CASE_VI_GFX9PLUS(TTMP5) \
2257   CASE_VI_GFX9PLUS(TTMP6) \
2258   CASE_VI_GFX9PLUS(TTMP7) \
2259   CASE_VI_GFX9PLUS(TTMP8) \
2260   CASE_VI_GFX9PLUS(TTMP9) \
2261   CASE_VI_GFX9PLUS(TTMP10) \
2262   CASE_VI_GFX9PLUS(TTMP11) \
2263   CASE_VI_GFX9PLUS(TTMP12) \
2264   CASE_VI_GFX9PLUS(TTMP13) \
2265   CASE_VI_GFX9PLUS(TTMP14) \
2266   CASE_VI_GFX9PLUS(TTMP15) \
2267   CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2268   CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2269   CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2270   CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2271   CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2272   CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2273   CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2274   CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2275   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2276   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2277   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2278   CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2279   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2280   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2281   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2282   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2283   CASE_GFXPRE11_GFX11PLUS(M0) \
2284   CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2285   CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2286   }
2287 
2288 #define CASE_CI_VI(node) \
2289   assert(!isSI(STI)); \
2290   case node: return isCI(STI) ? node##_ci : node##_vi;
2291 
2292 #define CASE_VI_GFX9PLUS(node) \
2293   case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2294 
2295 #define CASE_GFXPRE11_GFX11PLUS(node) \
2296   case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2297 
2298 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2299   case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2300 
2301 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2302   if (STI.getTargetTriple().getArch() == Triple::r600)
2303     return Reg;
2304   MAP_REG2REG
2305 }
2306 
2307 #undef CASE_CI_VI
2308 #undef CASE_VI_GFX9PLUS
2309 #undef CASE_GFXPRE11_GFX11PLUS
2310 #undef CASE_GFXPRE11_GFX11PLUS_TO
2311 
2312 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
2313 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2314 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2315 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2316 
2317 unsigned mc2PseudoReg(unsigned Reg) {
2318   MAP_REG2REG
2319 }
2320 
2321 bool isInlineValue(unsigned Reg) {
2322   switch (Reg) {
2323   case AMDGPU::SRC_SHARED_BASE_LO:
2324   case AMDGPU::SRC_SHARED_BASE:
2325   case AMDGPU::SRC_SHARED_LIMIT_LO:
2326   case AMDGPU::SRC_SHARED_LIMIT:
2327   case AMDGPU::SRC_PRIVATE_BASE_LO:
2328   case AMDGPU::SRC_PRIVATE_BASE:
2329   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2330   case AMDGPU::SRC_PRIVATE_LIMIT:
2331   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2332     return true;
2333   case AMDGPU::SRC_VCCZ:
2334   case AMDGPU::SRC_EXECZ:
2335   case AMDGPU::SRC_SCC:
2336     return true;
2337   case AMDGPU::SGPR_NULL:
2338     return true;
2339   default:
2340     return false;
2341   }
2342 }
2343 
2344 #undef CASE_CI_VI
2345 #undef CASE_VI_GFX9PLUS
2346 #undef CASE_GFXPRE11_GFX11PLUS
2347 #undef CASE_GFXPRE11_GFX11PLUS_TO
2348 #undef MAP_REG2REG
2349 
2350 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2351   assert(OpNo < Desc.NumOperands);
2352   unsigned OpType = Desc.operands()[OpNo].OperandType;
2353   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2354          OpType <= AMDGPU::OPERAND_SRC_LAST;
2355 }
2356 
2357 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2358   assert(OpNo < Desc.NumOperands);
2359   unsigned OpType = Desc.operands()[OpNo].OperandType;
2360   return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2361          OpType <= AMDGPU::OPERAND_KIMM_LAST;
2362 }
2363 
2364 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2365   assert(OpNo < Desc.NumOperands);
2366   unsigned OpType = Desc.operands()[OpNo].OperandType;
2367   switch (OpType) {
2368   case AMDGPU::OPERAND_REG_IMM_FP32:
2369   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2370   case AMDGPU::OPERAND_REG_IMM_FP64:
2371   case AMDGPU::OPERAND_REG_IMM_FP16:
2372   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2373   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2374   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2375   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2376   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2377   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2378   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2379   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2380   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2381   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2382   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2383   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2384     return true;
2385   default:
2386     return false;
2387   }
2388 }
2389 
2390 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2391   assert(OpNo < Desc.NumOperands);
2392   unsigned OpType = Desc.operands()[OpNo].OperandType;
2393   return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2394           OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2395          (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2396           OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2397 }
2398 
2399 // Avoid using MCRegisterClass::getSize, since that function will go away
2400 // (move from MC* level to Target* level). Return size in bits.
2401 unsigned getRegBitWidth(unsigned RCID) {
2402   switch (RCID) {
2403   case AMDGPU::SGPR_LO16RegClassID:
2404   case AMDGPU::AGPR_LO16RegClassID:
2405     return 16;
2406   case AMDGPU::SGPR_32RegClassID:
2407   case AMDGPU::VGPR_32RegClassID:
2408   case AMDGPU::VRegOrLds_32RegClassID:
2409   case AMDGPU::AGPR_32RegClassID:
2410   case AMDGPU::VS_32RegClassID:
2411   case AMDGPU::AV_32RegClassID:
2412   case AMDGPU::SReg_32RegClassID:
2413   case AMDGPU::SReg_32_XM0RegClassID:
2414   case AMDGPU::SRegOrLds_32RegClassID:
2415     return 32;
2416   case AMDGPU::SGPR_64RegClassID:
2417   case AMDGPU::VS_64RegClassID:
2418   case AMDGPU::SReg_64RegClassID:
2419   case AMDGPU::VReg_64RegClassID:
2420   case AMDGPU::AReg_64RegClassID:
2421   case AMDGPU::SReg_64_XEXECRegClassID:
2422   case AMDGPU::VReg_64_Align2RegClassID:
2423   case AMDGPU::AReg_64_Align2RegClassID:
2424   case AMDGPU::AV_64RegClassID:
2425   case AMDGPU::AV_64_Align2RegClassID:
2426     return 64;
2427   case AMDGPU::SGPR_96RegClassID:
2428   case AMDGPU::SReg_96RegClassID:
2429   case AMDGPU::VReg_96RegClassID:
2430   case AMDGPU::AReg_96RegClassID:
2431   case AMDGPU::VReg_96_Align2RegClassID:
2432   case AMDGPU::AReg_96_Align2RegClassID:
2433   case AMDGPU::AV_96RegClassID:
2434   case AMDGPU::AV_96_Align2RegClassID:
2435     return 96;
2436   case AMDGPU::SGPR_128RegClassID:
2437   case AMDGPU::SReg_128RegClassID:
2438   case AMDGPU::VReg_128RegClassID:
2439   case AMDGPU::AReg_128RegClassID:
2440   case AMDGPU::VReg_128_Align2RegClassID:
2441   case AMDGPU::AReg_128_Align2RegClassID:
2442   case AMDGPU::AV_128RegClassID:
2443   case AMDGPU::AV_128_Align2RegClassID:
2444     return 128;
2445   case AMDGPU::SGPR_160RegClassID:
2446   case AMDGPU::SReg_160RegClassID:
2447   case AMDGPU::VReg_160RegClassID:
2448   case AMDGPU::AReg_160RegClassID:
2449   case AMDGPU::VReg_160_Align2RegClassID:
2450   case AMDGPU::AReg_160_Align2RegClassID:
2451   case AMDGPU::AV_160RegClassID:
2452   case AMDGPU::AV_160_Align2RegClassID:
2453     return 160;
2454   case AMDGPU::SGPR_192RegClassID:
2455   case AMDGPU::SReg_192RegClassID:
2456   case AMDGPU::VReg_192RegClassID:
2457   case AMDGPU::AReg_192RegClassID:
2458   case AMDGPU::VReg_192_Align2RegClassID:
2459   case AMDGPU::AReg_192_Align2RegClassID:
2460   case AMDGPU::AV_192RegClassID:
2461   case AMDGPU::AV_192_Align2RegClassID:
2462     return 192;
2463   case AMDGPU::SGPR_224RegClassID:
2464   case AMDGPU::SReg_224RegClassID:
2465   case AMDGPU::VReg_224RegClassID:
2466   case AMDGPU::AReg_224RegClassID:
2467   case AMDGPU::VReg_224_Align2RegClassID:
2468   case AMDGPU::AReg_224_Align2RegClassID:
2469   case AMDGPU::AV_224RegClassID:
2470   case AMDGPU::AV_224_Align2RegClassID:
2471     return 224;
2472   case AMDGPU::SGPR_256RegClassID:
2473   case AMDGPU::SReg_256RegClassID:
2474   case AMDGPU::VReg_256RegClassID:
2475   case AMDGPU::AReg_256RegClassID:
2476   case AMDGPU::VReg_256_Align2RegClassID:
2477   case AMDGPU::AReg_256_Align2RegClassID:
2478   case AMDGPU::AV_256RegClassID:
2479   case AMDGPU::AV_256_Align2RegClassID:
2480     return 256;
2481   case AMDGPU::SGPR_288RegClassID:
2482   case AMDGPU::SReg_288RegClassID:
2483   case AMDGPU::VReg_288RegClassID:
2484   case AMDGPU::AReg_288RegClassID:
2485   case AMDGPU::VReg_288_Align2RegClassID:
2486   case AMDGPU::AReg_288_Align2RegClassID:
2487   case AMDGPU::AV_288RegClassID:
2488   case AMDGPU::AV_288_Align2RegClassID:
2489     return 288;
2490   case AMDGPU::SGPR_320RegClassID:
2491   case AMDGPU::SReg_320RegClassID:
2492   case AMDGPU::VReg_320RegClassID:
2493   case AMDGPU::AReg_320RegClassID:
2494   case AMDGPU::VReg_320_Align2RegClassID:
2495   case AMDGPU::AReg_320_Align2RegClassID:
2496   case AMDGPU::AV_320RegClassID:
2497   case AMDGPU::AV_320_Align2RegClassID:
2498     return 320;
2499   case AMDGPU::SGPR_352RegClassID:
2500   case AMDGPU::SReg_352RegClassID:
2501   case AMDGPU::VReg_352RegClassID:
2502   case AMDGPU::AReg_352RegClassID:
2503   case AMDGPU::VReg_352_Align2RegClassID:
2504   case AMDGPU::AReg_352_Align2RegClassID:
2505   case AMDGPU::AV_352RegClassID:
2506   case AMDGPU::AV_352_Align2RegClassID:
2507     return 352;
2508   case AMDGPU::SGPR_384RegClassID:
2509   case AMDGPU::SReg_384RegClassID:
2510   case AMDGPU::VReg_384RegClassID:
2511   case AMDGPU::AReg_384RegClassID:
2512   case AMDGPU::VReg_384_Align2RegClassID:
2513   case AMDGPU::AReg_384_Align2RegClassID:
2514   case AMDGPU::AV_384RegClassID:
2515   case AMDGPU::AV_384_Align2RegClassID:
2516     return 384;
2517   case AMDGPU::SGPR_512RegClassID:
2518   case AMDGPU::SReg_512RegClassID:
2519   case AMDGPU::VReg_512RegClassID:
2520   case AMDGPU::AReg_512RegClassID:
2521   case AMDGPU::VReg_512_Align2RegClassID:
2522   case AMDGPU::AReg_512_Align2RegClassID:
2523   case AMDGPU::AV_512RegClassID:
2524   case AMDGPU::AV_512_Align2RegClassID:
2525     return 512;
2526   case AMDGPU::SGPR_1024RegClassID:
2527   case AMDGPU::SReg_1024RegClassID:
2528   case AMDGPU::VReg_1024RegClassID:
2529   case AMDGPU::AReg_1024RegClassID:
2530   case AMDGPU::VReg_1024_Align2RegClassID:
2531   case AMDGPU::AReg_1024_Align2RegClassID:
2532   case AMDGPU::AV_1024RegClassID:
2533   case AMDGPU::AV_1024_Align2RegClassID:
2534     return 1024;
2535   default:
2536     llvm_unreachable("Unexpected register class");
2537   }
2538 }
2539 
2540 unsigned getRegBitWidth(const MCRegisterClass &RC) {
2541   return getRegBitWidth(RC.getID());
2542 }
2543 
2544 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
2545                            unsigned OpNo) {
2546   assert(OpNo < Desc.NumOperands);
2547   unsigned RCID = Desc.operands()[OpNo].RegClass;
2548   return getRegBitWidth(RCID) / 8;
2549 }
2550 
2551 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2552   if (isInlinableIntLiteral(Literal))
2553     return true;
2554 
2555   uint64_t Val = static_cast<uint64_t>(Literal);
2556   return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2557          (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2558          (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2559          (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2560          (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2561          (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2562          (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2563          (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2564          (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2565          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2566 }
2567 
2568 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2569   if (isInlinableIntLiteral(Literal))
2570     return true;
2571 
2572   // The actual type of the operand does not seem to matter as long
2573   // as the bits match one of the inline immediate values.  For example:
2574   //
2575   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2576   // so it is a legal inline immediate.
2577   //
2578   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2579   // floating-point, so it is a legal inline immediate.
2580 
2581   uint32_t Val = static_cast<uint32_t>(Literal);
2582   return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2583          (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2584          (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2585          (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2586          (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2587          (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2588          (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2589          (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2590          (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2591          (Val == 0x3e22f983 && HasInv2Pi);
2592 }
2593 
2594 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2595   if (!HasInv2Pi)
2596     return false;
2597   if (isInlinableIntLiteral(Literal))
2598     return true;
2599   uint16_t Val = static_cast<uint16_t>(Literal);
2600   return Val == 0x3F00 || // 0.5
2601          Val == 0xBF00 || // -0.5
2602          Val == 0x3F80 || // 1.0
2603          Val == 0xBF80 || // -1.0
2604          Val == 0x4000 || // 2.0
2605          Val == 0xC000 || // -2.0
2606          Val == 0x4080 || // 4.0
2607          Val == 0xC080 || // -4.0
2608          Val == 0x3E22;   // 1.0 / (2.0 * pi)
2609 }
2610 
2611 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
2612   return isInlinableLiteral32(Literal, HasInv2Pi);
2613 }
2614 
2615 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2616   if (!HasInv2Pi)
2617     return false;
2618   if (isInlinableIntLiteral(Literal))
2619     return true;
2620   uint16_t Val = static_cast<uint16_t>(Literal);
2621   return Val == 0x3C00 || // 1.0
2622          Val == 0xBC00 || // -1.0
2623          Val == 0x3800 || // 0.5
2624          Val == 0xB800 || // -0.5
2625          Val == 0x4000 || // 2.0
2626          Val == 0xC000 || // -2.0
2627          Val == 0x4400 || // 4.0
2628          Val == 0xC400 || // -4.0
2629          Val == 0x3118;   // 1/2pi
2630 }
2631 
2632 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2633   // Unfortunately, the Instruction Set Architecture Reference Guide is
2634   // misleading about how the inline operands work for (packed) 16-bit
2635   // instructions. In a nutshell, the actual HW behavior is:
2636   //
2637   //  - integer encodings (-16 .. 64) are always produced as sign-extended
2638   //    32-bit values
2639   //  - float encodings are produced as:
2640   //    - for F16 instructions: corresponding half-precision float values in
2641   //      the LSBs, 0 in the MSBs
2642   //    - for UI16 instructions: corresponding single-precision float value
2643   int32_t Signed = static_cast<int32_t>(Literal);
2644   if (Signed >= 0 && Signed <= 64)
2645     return 128 + Signed;
2646 
2647   if (Signed >= -16 && Signed <= -1)
2648     return 192 + std::abs(Signed);
2649 
2650   if (IsFloat) {
2651     // clang-format off
2652     switch (Literal) {
2653     case 0x3800: return 240; // 0.5
2654     case 0xB800: return 241; // -0.5
2655     case 0x3C00: return 242; // 1.0
2656     case 0xBC00: return 243; // -1.0
2657     case 0x4000: return 244; // 2.0
2658     case 0xC000: return 245; // -2.0
2659     case 0x4400: return 246; // 4.0
2660     case 0xC400: return 247; // -4.0
2661     case 0x3118: return 248; // 1.0 / (2.0 * pi)
2662     default: break;
2663     }
2664     // clang-format on
2665   } else {
2666     // clang-format off
2667     switch (Literal) {
2668     case 0x3F000000: return 240; // 0.5
2669     case 0xBF000000: return 241; // -0.5
2670     case 0x3F800000: return 242; // 1.0
2671     case 0xBF800000: return 243; // -1.0
2672     case 0x40000000: return 244; // 2.0
2673     case 0xC0000000: return 245; // -2.0
2674     case 0x40800000: return 246; // 4.0
2675     case 0xC0800000: return 247; // -4.0
2676     case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2677     default: break;
2678     }
2679     // clang-format on
2680   }
2681 
2682   return {};
2683 }
2684 
2685 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2686 // or nullopt.
2687 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2688   return getInlineEncodingV216(false, Literal);
2689 }
2690 
2691 // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2692 // or nullopt.
2693 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2694   int32_t Signed = static_cast<int32_t>(Literal);
2695   if (Signed >= 0 && Signed <= 64)
2696     return 128 + Signed;
2697 
2698   if (Signed >= -16 && Signed <= -1)
2699     return 192 + std::abs(Signed);
2700 
2701   // clang-format off
2702   switch (Literal) {
2703   case 0x3F00: return 240; // 0.5
2704   case 0xBF00: return 241; // -0.5
2705   case 0x3F80: return 242; // 1.0
2706   case 0xBF80: return 243; // -1.0
2707   case 0x4000: return 244; // 2.0
2708   case 0xC000: return 245; // -2.0
2709   case 0x4080: return 246; // 4.0
2710   case 0xC080: return 247; // -4.0
2711   case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2712   default: break;
2713   }
2714   // clang-format on
2715 
2716   return std::nullopt;
2717 }
2718 
2719 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2720 // or nullopt.
2721 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2722   return getInlineEncodingV216(true, Literal);
2723 }
2724 
2725 // Whether the given literal can be inlined for a V_PK_* instruction.
2726 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
2727   switch (OpType) {
2728   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2729   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2730   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2731     return getInlineEncodingV216(false, Literal).has_value();
2732   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2733   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2734   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2735     return getInlineEncodingV216(true, Literal).has_value();
2736   case AMDGPU::OPERAND_REG_IMM_V2BF16:
2737   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2738   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
2739     return isInlinableLiteralV2BF16(Literal);
2740   default:
2741     llvm_unreachable("bad packed operand type");
2742   }
2743 }
2744 
2745 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2746 bool isInlinableLiteralV2I16(uint32_t Literal) {
2747   return getInlineEncodingV2I16(Literal).has_value();
2748 }
2749 
2750 // Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2751 bool isInlinableLiteralV2BF16(uint32_t Literal) {
2752   return getInlineEncodingV2BF16(Literal).has_value();
2753 }
2754 
2755 // Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2756 bool isInlinableLiteralV2F16(uint32_t Literal) {
2757   return getInlineEncodingV2F16(Literal).has_value();
2758 }
2759 
2760 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2761   if (IsFP64)
2762     return !(Val & 0xffffffffu);
2763 
2764   return isUInt<32>(Val) || isInt<32>(Val);
2765 }
2766 
2767 bool isArgPassedInSGPR(const Argument *A) {
2768   const Function *F = A->getParent();
2769 
2770   // Arguments to compute shaders are never a source of divergence.
2771   CallingConv::ID CC = F->getCallingConv();
2772   switch (CC) {
2773   case CallingConv::AMDGPU_KERNEL:
2774   case CallingConv::SPIR_KERNEL:
2775     return true;
2776   case CallingConv::AMDGPU_VS:
2777   case CallingConv::AMDGPU_LS:
2778   case CallingConv::AMDGPU_HS:
2779   case CallingConv::AMDGPU_ES:
2780   case CallingConv::AMDGPU_GS:
2781   case CallingConv::AMDGPU_PS:
2782   case CallingConv::AMDGPU_CS:
2783   case CallingConv::AMDGPU_Gfx:
2784   case CallingConv::AMDGPU_CS_Chain:
2785   case CallingConv::AMDGPU_CS_ChainPreserve:
2786     // For non-compute shaders, SGPR inputs are marked with either inreg or
2787     // byval. Everything else is in VGPRs.
2788     return A->hasAttribute(Attribute::InReg) ||
2789            A->hasAttribute(Attribute::ByVal);
2790   default:
2791     // TODO: treat i1 as divergent?
2792     return A->hasAttribute(Attribute::InReg);
2793   }
2794 }
2795 
2796 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2797   // Arguments to compute shaders are never a source of divergence.
2798   CallingConv::ID CC = CB->getCallingConv();
2799   switch (CC) {
2800   case CallingConv::AMDGPU_KERNEL:
2801   case CallingConv::SPIR_KERNEL:
2802     return true;
2803   case CallingConv::AMDGPU_VS:
2804   case CallingConv::AMDGPU_LS:
2805   case CallingConv::AMDGPU_HS:
2806   case CallingConv::AMDGPU_ES:
2807   case CallingConv::AMDGPU_GS:
2808   case CallingConv::AMDGPU_PS:
2809   case CallingConv::AMDGPU_CS:
2810   case CallingConv::AMDGPU_Gfx:
2811   case CallingConv::AMDGPU_CS_Chain:
2812   case CallingConv::AMDGPU_CS_ChainPreserve:
2813     // For non-compute shaders, SGPR inputs are marked with either inreg or
2814     // byval. Everything else is in VGPRs.
2815     return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2816            CB->paramHasAttr(ArgNo, Attribute::ByVal);
2817   default:
2818     return CB->paramHasAttr(ArgNo, Attribute::InReg);
2819   }
2820 }
2821 
2822 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2823   return isGCN3Encoding(ST) || isGFX10Plus(ST);
2824 }
2825 
2826 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
2827                                       int64_t EncodedOffset) {
2828   if (isGFX12Plus(ST))
2829     return isUInt<23>(EncodedOffset);
2830 
2831   return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2832                                : isUInt<8>(EncodedOffset);
2833 }
2834 
2835 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
2836                                     int64_t EncodedOffset,
2837                                     bool IsBuffer) {
2838   if (isGFX12Plus(ST))
2839     return isInt<24>(EncodedOffset);
2840 
2841   return !IsBuffer &&
2842          hasSMRDSignedImmOffset(ST) &&
2843          isInt<21>(EncodedOffset);
2844 }
2845 
2846 static bool isDwordAligned(uint64_t ByteOffset) {
2847   return (ByteOffset & 3) == 0;
2848 }
2849 
2850 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
2851                                 uint64_t ByteOffset) {
2852   if (hasSMEMByteOffset(ST))
2853     return ByteOffset;
2854 
2855   assert(isDwordAligned(ByteOffset));
2856   return ByteOffset >> 2;
2857 }
2858 
2859 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2860                                             int64_t ByteOffset, bool IsBuffer,
2861                                             bool HasSOffset) {
2862   // For unbuffered smem loads, it is illegal for the Immediate Offset to be
2863   // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
2864   // Handle case where SOffset is not present.
2865   if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
2866     return std::nullopt;
2867 
2868   if (isGFX12Plus(ST)) // 24 bit signed offsets
2869     return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2870                                  : std::nullopt;
2871 
2872   // The signed version is always a byte offset.
2873   if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2874     assert(hasSMEMByteOffset(ST));
2875     return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2876                                  : std::nullopt;
2877   }
2878 
2879   if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2880     return std::nullopt;
2881 
2882   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2883   return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2884              ? std::optional<int64_t>(EncodedOffset)
2885              : std::nullopt;
2886 }
2887 
2888 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2889                                                      int64_t ByteOffset) {
2890   if (!isCI(ST) || !isDwordAligned(ByteOffset))
2891     return std::nullopt;
2892 
2893   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2894   return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2895                                    : std::nullopt;
2896 }
2897 
2898 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
2899   if (AMDGPU::isGFX10(ST))
2900     return 12;
2901 
2902   if (AMDGPU::isGFX12(ST))
2903     return 24;
2904   return 13;
2905 }
2906 
2907 namespace {
2908 
2909 struct SourceOfDivergence {
2910   unsigned Intr;
2911 };
2912 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2913 
2914 struct AlwaysUniform {
2915   unsigned Intr;
2916 };
2917 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2918 
2919 #define GET_SourcesOfDivergence_IMPL
2920 #define GET_UniformIntrinsics_IMPL
2921 #define GET_Gfx9BufferFormat_IMPL
2922 #define GET_Gfx10BufferFormat_IMPL
2923 #define GET_Gfx11PlusBufferFormat_IMPL
2924 #include "AMDGPUGenSearchableTables.inc"
2925 
2926 } // end anonymous namespace
2927 
2928 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2929   return lookupSourceOfDivergence(IntrID);
2930 }
2931 
2932 bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2933   return lookupAlwaysUniform(IntrID);
2934 }
2935 
2936 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
2937                                                   uint8_t NumComponents,
2938                                                   uint8_t NumFormat,
2939                                                   const MCSubtargetInfo &STI) {
2940   return isGFX11Plus(STI)
2941              ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2942                                             NumFormat)
2943              : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2944                                                        NumComponents, NumFormat)
2945                             : getGfx9BufferFormatInfo(BitsPerComp,
2946                                                       NumComponents, NumFormat);
2947 }
2948 
2949 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
2950                                                   const MCSubtargetInfo &STI) {
2951   return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2952                           : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2953                                          : getGfx9BufferFormatInfo(Format);
2954 }
2955 
2956 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
2957   for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
2958                        OpName::src2 }) {
2959     int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
2960     if (Idx == -1)
2961       continue;
2962 
2963     if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
2964         OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
2965       return true;
2966   }
2967 
2968   return false;
2969 }
2970 
2971 bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
2972   return hasAny64BitVGPROperands(OpDesc);
2973 }
2974 
2975 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
2976   // Currently this is 128 for all subtargets
2977   return 128;
2978 }
2979 
2980 } // namespace AMDGPU
2981 
2982 raw_ostream &operator<<(raw_ostream &OS,
2983                         const AMDGPU::IsaInfo::TargetIDSetting S) {
2984   switch (S) {
2985   case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
2986     OS << "Unsupported";
2987     break;
2988   case (AMDGPU::IsaInfo::TargetIDSetting::Any):
2989     OS << "Any";
2990     break;
2991   case (AMDGPU::IsaInfo::TargetIDSetting::Off):
2992     OS << "Off";
2993     break;
2994   case (AMDGPU::IsaInfo::TargetIDSetting::On):
2995     OS << "On";
2996     break;
2997   }
2998   return OS;
2999 }
3000 
3001 } // namespace llvm
3002