xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUInstPrinter.h"
11 #include "MCTargetDesc/AMDGPUMCAsmInfo.h"
12 #include "MCTargetDesc/AMDGPUMCExpr.h"
13 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
16 #include "SIDefines.h"
17 #include "SIInstrInfo.h"
18 #include "TargetInfo/AMDGPUTargetInfo.h"
19 #include "Utils/AMDGPUAsmUtils.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "Utils/AMDKernelCodeTUtils.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/StringSet.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/BinaryFormat/ELF.h"
27 #include "llvm/CodeGenTypes/MachineValueType.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCParser/AsmLexer.h"
34 #include "llvm/MC/MCParser/MCAsmParser.h"
35 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
36 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
37 #include "llvm/MC/MCRegisterInfo.h"
38 #include "llvm/MC/MCSymbol.h"
39 #include "llvm/MC/TargetRegistry.h"
40 #include "llvm/Support/AMDGPUMetadata.h"
41 #include "llvm/Support/AMDHSAKernelDescriptor.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Compiler.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/TargetParser/TargetParser.h"
46 #include <optional>
47 
48 using namespace llvm;
49 using namespace llvm::AMDGPU;
50 using namespace llvm::amdhsa;
51 
52 namespace {
53 
54 class AMDGPUAsmParser;
55 
56 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57 
58 //===----------------------------------------------------------------------===//
59 // Operand
60 //===----------------------------------------------------------------------===//
61 
62 class AMDGPUOperand : public MCParsedAsmOperand {
63   enum KindTy {
64     Token,
65     Immediate,
66     Register,
67     Expression
68   } Kind;
69 
70   SMLoc StartLoc, EndLoc;
71   const AMDGPUAsmParser *AsmParser;
72 
73 public:
74   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75       : Kind(Kind_), AsmParser(AsmParser_) {}
76 
77   using Ptr = std::unique_ptr<AMDGPUOperand>;
78 
79   struct Modifiers {
80     bool Abs = false;
81     bool Neg = false;
82     bool Sext = false;
83     bool Lit = false;
84     bool Lit64 = false;
85 
86     bool hasFPModifiers() const { return Abs || Neg; }
87     bool hasIntModifiers() const { return Sext; }
88     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89 
90     int64_t getFPModifiersOperand() const {
91       int64_t Operand = 0;
92       Operand |= Abs ? SISrcMods::ABS : 0u;
93       Operand |= Neg ? SISrcMods::NEG : 0u;
94       return Operand;
95     }
96 
97     int64_t getIntModifiersOperand() const {
98       int64_t Operand = 0;
99       Operand |= Sext ? SISrcMods::SEXT : 0u;
100       return Operand;
101     }
102 
103     int64_t getModifiersOperand() const {
104       assert(!(hasFPModifiers() && hasIntModifiers())
105            && "fp and int modifiers should not be used simultaneously");
106       if (hasFPModifiers())
107         return getFPModifiersOperand();
108       if (hasIntModifiers())
109         return getIntModifiersOperand();
110       return 0;
111     }
112 
113     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
114   };
115 
116   enum ImmTy {
117     ImmTyNone,
118     ImmTyGDS,
119     ImmTyLDS,
120     ImmTyOffen,
121     ImmTyIdxen,
122     ImmTyAddr64,
123     ImmTyOffset,
124     ImmTyInstOffset,
125     ImmTyOffset0,
126     ImmTyOffset1,
127     ImmTySMEMOffsetMod,
128     ImmTyCPol,
129     ImmTyTFE,
130     ImmTyD16,
131     ImmTyClamp,
132     ImmTyOModSI,
133     ImmTySDWADstSel,
134     ImmTySDWASrc0Sel,
135     ImmTySDWASrc1Sel,
136     ImmTySDWADstUnused,
137     ImmTyDMask,
138     ImmTyDim,
139     ImmTyUNorm,
140     ImmTyDA,
141     ImmTyR128A16,
142     ImmTyA16,
143     ImmTyLWE,
144     ImmTyExpTgt,
145     ImmTyExpCompr,
146     ImmTyExpVM,
147     ImmTyFORMAT,
148     ImmTyHwreg,
149     ImmTyOff,
150     ImmTySendMsg,
151     ImmTyInterpSlot,
152     ImmTyInterpAttr,
153     ImmTyInterpAttrChan,
154     ImmTyOpSel,
155     ImmTyOpSelHi,
156     ImmTyNegLo,
157     ImmTyNegHi,
158     ImmTyIndexKey8bit,
159     ImmTyIndexKey16bit,
160     ImmTyIndexKey32bit,
161     ImmTyDPP8,
162     ImmTyDppCtrl,
163     ImmTyDppRowMask,
164     ImmTyDppBankMask,
165     ImmTyDppBoundCtrl,
166     ImmTyDppFI,
167     ImmTySwizzle,
168     ImmTyGprIdxMode,
169     ImmTyHigh,
170     ImmTyBLGP,
171     ImmTyCBSZ,
172     ImmTyABID,
173     ImmTyEndpgm,
174     ImmTyWaitVDST,
175     ImmTyWaitEXP,
176     ImmTyWaitVAVDst,
177     ImmTyWaitVMVSrc,
178     ImmTyBitOp3,
179     ImmTyMatrixAReuse,
180     ImmTyMatrixBReuse,
181     ImmTyByteSel,
182   };
183 
184   // Immediate operand kind.
185   // It helps to identify the location of an offending operand after an error.
186   // Note that regular literals and mandatory literals (KImm) must be handled
187   // differently. When looking for an offending operand, we should usually
188   // ignore mandatory literals because they are part of the instruction and
189   // cannot be changed. Report location of mandatory operands only for VOPD,
190   // when both OpX and OpY have a KImm and there are no other literals.
191   enum ImmKindTy {
192     ImmKindTyNone,
193     ImmKindTyLiteral,
194     ImmKindTyMandatoryLiteral,
195     ImmKindTyConst,
196   };
197 
198 private:
199   struct TokOp {
200     const char *Data;
201     unsigned Length;
202   };
203 
204   struct ImmOp {
205     int64_t Val;
206     ImmTy Type;
207     bool IsFPImm;
208     mutable ImmKindTy Kind;
209     Modifiers Mods;
210   };
211 
212   struct RegOp {
213     MCRegister RegNo;
214     Modifiers Mods;
215   };
216 
217   union {
218     TokOp Tok;
219     ImmOp Imm;
220     RegOp Reg;
221     const MCExpr *Expr;
222   };
223 
224 public:
225   bool isToken() const override { return Kind == Token; }
226 
227   bool isSymbolRefExpr() const {
228     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
229   }
230 
231   bool isImm() const override {
232     return Kind == Immediate;
233   }
234 
235   void setImmKindNone() const {
236     assert(isImm());
237     Imm.Kind = ImmKindTyNone;
238   }
239 
240   void setImmKindLiteral() const {
241     assert(isImm());
242     Imm.Kind = ImmKindTyLiteral;
243   }
244 
245   void setImmKindMandatoryLiteral() const {
246     assert(isImm());
247     Imm.Kind = ImmKindTyMandatoryLiteral;
248   }
249 
250   void setImmKindConst() const {
251     assert(isImm());
252     Imm.Kind = ImmKindTyConst;
253   }
254 
255   bool IsImmKindLiteral() const {
256     return isImm() && Imm.Kind == ImmKindTyLiteral;
257   }
258 
259   bool IsImmKindMandatoryLiteral() const {
260     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
261   }
262 
263   bool isImmKindConst() const {
264     return isImm() && Imm.Kind == ImmKindTyConst;
265   }
266 
267   bool isInlinableImm(MVT type) const;
268   bool isLiteralImm(MVT type) const;
269 
270   bool isRegKind() const {
271     return Kind == Register;
272   }
273 
274   bool isReg() const override {
275     return isRegKind() && !hasModifiers();
276   }
277 
278   bool isRegOrInline(unsigned RCID, MVT type) const {
279     return isRegClass(RCID) || isInlinableImm(type);
280   }
281 
282   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
283     return isRegOrInline(RCID, type) || isLiteralImm(type);
284   }
285 
286   bool isRegOrImmWithInt16InputMods() const {
287     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
288   }
289 
290   template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
291     return isRegOrImmWithInputMods(
292         IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
293   }
294 
295   bool isRegOrImmWithInt32InputMods() const {
296     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
297   }
298 
299   bool isRegOrInlineImmWithInt16InputMods() const {
300     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
301   }
302 
303   template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
304     return isRegOrInline(
305         IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
306   }
307 
308   bool isRegOrInlineImmWithInt32InputMods() const {
309     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
310   }
311 
312   bool isRegOrImmWithInt64InputMods() const {
313     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
314   }
315 
316   bool isRegOrImmWithFP16InputMods() const {
317     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
318   }
319 
320   template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
321     return isRegOrImmWithInputMods(
322         IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
323   }
324 
325   bool isRegOrImmWithFP32InputMods() const {
326     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
327   }
328 
329   bool isRegOrImmWithFP64InputMods() const {
330     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
331   }
332 
333   template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
334     return isRegOrInline(
335         IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
336   }
337 
338   bool isRegOrInlineImmWithFP32InputMods() const {
339     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
340   }
341 
342   bool isRegOrInlineImmWithFP64InputMods() const {
343     return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
344   }
345 
346   bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
347 
348   bool isVRegWithFP32InputMods() const {
349     return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
350   }
351 
352   bool isVRegWithFP64InputMods() const {
353     return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
354   }
355 
356   bool isPackedFP16InputMods() const {
357     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
358   }
359 
360   bool isPackedVGPRFP32InputMods() const {
361     return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
362   }
363 
364   bool isVReg() const {
365     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
366            isRegClass(AMDGPU::VReg_64RegClassID) ||
367            isRegClass(AMDGPU::VReg_96RegClassID) ||
368            isRegClass(AMDGPU::VReg_128RegClassID) ||
369            isRegClass(AMDGPU::VReg_160RegClassID) ||
370            isRegClass(AMDGPU::VReg_192RegClassID) ||
371            isRegClass(AMDGPU::VReg_256RegClassID) ||
372            isRegClass(AMDGPU::VReg_512RegClassID) ||
373            isRegClass(AMDGPU::VReg_1024RegClassID);
374   }
375 
376   bool isVReg32() const {
377     return isRegClass(AMDGPU::VGPR_32RegClassID);
378   }
379 
380   bool isVReg32OrOff() const {
381     return isOff() || isVReg32();
382   }
383 
384   bool isNull() const {
385     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
386   }
387 
388   bool isVRegWithInputMods() const;
389   template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
390   template <bool IsFake16> bool isT16VRegWithInputMods() const;
391 
392   bool isSDWAOperand(MVT type) const;
393   bool isSDWAFP16Operand() const;
394   bool isSDWAFP32Operand() const;
395   bool isSDWAInt16Operand() const;
396   bool isSDWAInt32Operand() const;
397 
398   bool isImmTy(ImmTy ImmT) const {
399     return isImm() && Imm.Type == ImmT;
400   }
401 
402   template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
403 
404   bool isImmLiteral() const { return isImmTy(ImmTyNone); }
405 
406   bool isImmModifier() const {
407     return isImm() && Imm.Type != ImmTyNone;
408   }
409 
410   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
411   bool isDim() const { return isImmTy(ImmTyDim); }
412   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
413   bool isOff() const { return isImmTy(ImmTyOff); }
414   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
415   bool isOffen() const { return isImmTy(ImmTyOffen); }
416   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
417   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
418   bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
419   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
420   bool isGDS() const { return isImmTy(ImmTyGDS); }
421   bool isLDS() const { return isImmTy(ImmTyLDS); }
422   bool isCPol() const { return isImmTy(ImmTyCPol); }
423   bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
424   bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
425   bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
426   bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
427   bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
428   bool isTFE() const { return isImmTy(ImmTyTFE); }
429   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
430   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
431   bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
432   bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
433   bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
434   bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
435   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
436   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
437   bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
438   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
439   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
440   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
441   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
442   bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
443 
444   bool isRegOrImm() const {
445     return isReg() || isImm();
446   }
447 
448   bool isRegClass(unsigned RCID) const;
449 
450   bool isInlineValue() const;
451 
452   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
453     return isRegOrInline(RCID, type) && !hasModifiers();
454   }
455 
456   bool isSCSrcB16() const {
457     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
458   }
459 
460   bool isSCSrcV2B16() const {
461     return isSCSrcB16();
462   }
463 
464   bool isSCSrc_b32() const {
465     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
466   }
467 
468   bool isSCSrc_b64() const {
469     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
470   }
471 
472   bool isBoolReg() const;
473 
474   bool isSCSrcF16() const {
475     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
476   }
477 
478   bool isSCSrcV2F16() const {
479     return isSCSrcF16();
480   }
481 
482   bool isSCSrcF32() const {
483     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
484   }
485 
486   bool isSCSrcF64() const {
487     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
488   }
489 
490   bool isSSrc_b32() const {
491     return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
492   }
493 
494   bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
495 
496   bool isSSrcV2B16() const {
497     llvm_unreachable("cannot happen");
498     return isSSrc_b16();
499   }
500 
501   bool isSSrc_b64() const {
502     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
503     // See isVSrc64().
504     return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
505            (((const MCTargetAsmParser *)AsmParser)
506                 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
507             isExpr());
508   }
509 
510   bool isSSrc_f32() const {
511     return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
512   }
513 
514   bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
515 
516   bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
517 
518   bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
519 
520   bool isSSrcV2F16() const {
521     llvm_unreachable("cannot happen");
522     return isSSrc_f16();
523   }
524 
525   bool isSSrcV2FP32() const {
526     llvm_unreachable("cannot happen");
527     return isSSrc_f32();
528   }
529 
530   bool isSCSrcV2FP32() const {
531     llvm_unreachable("cannot happen");
532     return isSCSrcF32();
533   }
534 
535   bool isSSrcV2INT32() const {
536     llvm_unreachable("cannot happen");
537     return isSSrc_b32();
538   }
539 
540   bool isSCSrcV2INT32() const {
541     llvm_unreachable("cannot happen");
542     return isSCSrc_b32();
543   }
544 
545   bool isSSrcOrLds_b32() const {
546     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
547            isLiteralImm(MVT::i32) || isExpr();
548   }
549 
550   bool isVCSrc_b32() const {
551     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
552   }
553 
554   bool isVCSrc_b64() const {
555     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
556   }
557 
558   bool isVCSrcT_b16() const {
559     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
560   }
561 
562   bool isVCSrcTB16_Lo128() const {
563     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
564   }
565 
566   bool isVCSrcFake16B16_Lo128() const {
567     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
568   }
569 
570   bool isVCSrc_b16() const {
571     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
572   }
573 
574   bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
575 
576   bool isVCSrc_f32() const {
577     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
578   }
579 
580   bool isVCSrc_f64() const {
581     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
582   }
583 
584   bool isVCSrcTBF16() const {
585     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
586   }
587 
588   bool isVCSrcT_f16() const {
589     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
590   }
591 
592   bool isVCSrcT_bf16() const {
593     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
594   }
595 
596   bool isVCSrcTBF16_Lo128() const {
597     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
598   }
599 
600   bool isVCSrcTF16_Lo128() const {
601     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
602   }
603 
604   bool isVCSrcFake16BF16_Lo128() const {
605     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
606   }
607 
608   bool isVCSrcFake16F16_Lo128() const {
609     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
610   }
611 
612   bool isVCSrc_bf16() const {
613     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
614   }
615 
616   bool isVCSrc_f16() const {
617     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
618   }
619 
620   bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
621 
622   bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
623 
624   bool isVSrc_b32() const {
625     return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
626   }
627 
628   bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
629 
630   bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
631 
632   bool isVSrcT_b16_Lo128() const {
633     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
634   }
635 
636   bool isVSrcFake16_b16_Lo128() const {
637     return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
638   }
639 
640   bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
641 
642   bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
643 
644   bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
645 
646   bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
647 
648   bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
649 
650   bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
651 
652   bool isVSrc_f32() const {
653     return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
654   }
655 
656   bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
657 
658   bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
659 
660   bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
661 
662   bool isVSrcT_bf16_Lo128() const {
663     return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
664   }
665 
666   bool isVSrcT_f16_Lo128() const {
667     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
668   }
669 
670   bool isVSrcFake16_bf16_Lo128() const {
671     return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
672   }
673 
674   bool isVSrcFake16_f16_Lo128() const {
675     return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
676   }
677 
678   bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
679 
680   bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
681 
682   bool isVSrc_v2bf16() const {
683     return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
684   }
685 
686   bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
687 
688   bool isVISrcB32() const {
689     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
690   }
691 
692   bool isVISrcB16() const {
693     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
694   }
695 
696   bool isVISrcV2B16() const {
697     return isVISrcB16();
698   }
699 
700   bool isVISrcF32() const {
701     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
702   }
703 
704   bool isVISrcF16() const {
705     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
706   }
707 
708   bool isVISrcV2F16() const {
709     return isVISrcF16() || isVISrcB32();
710   }
711 
712   bool isVISrc_64_bf16() const {
713     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
714   }
715 
716   bool isVISrc_64_f16() const {
717     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
718   }
719 
720   bool isVISrc_64_b32() const {
721     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
722   }
723 
724   bool isVISrc_64B64() const {
725     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
726   }
727 
728   bool isVISrc_64_f64() const {
729     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
730   }
731 
732   bool isVISrc_64V2FP32() const {
733     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
734   }
735 
736   bool isVISrc_64V2INT32() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
738   }
739 
740   bool isVISrc_256_b32() const {
741     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
742   }
743 
744   bool isVISrc_256_f32() const {
745     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
746   }
747 
748   bool isVISrc_256B64() const {
749     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
750   }
751 
752   bool isVISrc_256_f64() const {
753     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
754   }
755 
756   bool isVISrc_512_f64() const {
757     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
758   }
759 
760   bool isVISrc_128B16() const {
761     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
762   }
763 
764   bool isVISrc_128V2B16() const {
765     return isVISrc_128B16();
766   }
767 
768   bool isVISrc_128_b32() const {
769     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
770   }
771 
772   bool isVISrc_128_f32() const {
773     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
774   }
775 
776   bool isVISrc_256V2FP32() const {
777     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
778   }
779 
780   bool isVISrc_256V2INT32() const {
781     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
782   }
783 
784   bool isVISrc_512_b32() const {
785     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
786   }
787 
788   bool isVISrc_512B16() const {
789     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
790   }
791 
792   bool isVISrc_512V2B16() const {
793     return isVISrc_512B16();
794   }
795 
796   bool isVISrc_512_f32() const {
797     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
798   }
799 
800   bool isVISrc_512F16() const {
801     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
802   }
803 
804   bool isVISrc_512V2F16() const {
805     return isVISrc_512F16() || isVISrc_512_b32();
806   }
807 
808   bool isVISrc_1024_b32() const {
809     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
810   }
811 
812   bool isVISrc_1024B16() const {
813     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
814   }
815 
816   bool isVISrc_1024V2B16() const {
817     return isVISrc_1024B16();
818   }
819 
820   bool isVISrc_1024_f32() const {
821     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
822   }
823 
824   bool isVISrc_1024F16() const {
825     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
826   }
827 
828   bool isVISrc_1024V2F16() const {
829     return isVISrc_1024F16() || isVISrc_1024_b32();
830   }
831 
832   bool isAISrcB32() const {
833     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
834   }
835 
836   bool isAISrcB16() const {
837     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
838   }
839 
840   bool isAISrcV2B16() const {
841     return isAISrcB16();
842   }
843 
844   bool isAISrcF32() const {
845     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
846   }
847 
848   bool isAISrcF16() const {
849     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
850   }
851 
852   bool isAISrcV2F16() const {
853     return isAISrcF16() || isAISrcB32();
854   }
855 
856   bool isAISrc_64B64() const {
857     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
858   }
859 
860   bool isAISrc_64_f64() const {
861     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
862   }
863 
864   bool isAISrc_128_b32() const {
865     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
866   }
867 
868   bool isAISrc_128B16() const {
869     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
870   }
871 
872   bool isAISrc_128V2B16() const {
873     return isAISrc_128B16();
874   }
875 
876   bool isAISrc_128_f32() const {
877     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
878   }
879 
880   bool isAISrc_128F16() const {
881     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
882   }
883 
884   bool isAISrc_128V2F16() const {
885     return isAISrc_128F16() || isAISrc_128_b32();
886   }
887 
888   bool isVISrc_128_bf16() const {
889     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
890   }
891 
892   bool isVISrc_128_f16() const {
893     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
894   }
895 
896   bool isVISrc_128V2F16() const {
897     return isVISrc_128_f16() || isVISrc_128_b32();
898   }
899 
900   bool isAISrc_256B64() const {
901     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
902   }
903 
904   bool isAISrc_256_f64() const {
905     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
906   }
907 
908   bool isAISrc_512_b32() const {
909     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
910   }
911 
912   bool isAISrc_512B16() const {
913     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
914   }
915 
916   bool isAISrc_512V2B16() const {
917     return isAISrc_512B16();
918   }
919 
920   bool isAISrc_512_f32() const {
921     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
922   }
923 
924   bool isAISrc_512F16() const {
925     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
926   }
927 
928   bool isAISrc_512V2F16() const {
929     return isAISrc_512F16() || isAISrc_512_b32();
930   }
931 
932   bool isAISrc_1024_b32() const {
933     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
934   }
935 
936   bool isAISrc_1024B16() const {
937     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
938   }
939 
940   bool isAISrc_1024V2B16() const {
941     return isAISrc_1024B16();
942   }
943 
944   bool isAISrc_1024_f32() const {
945     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
946   }
947 
948   bool isAISrc_1024F16() const {
949     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
950   }
951 
952   bool isAISrc_1024V2F16() const {
953     return isAISrc_1024F16() || isAISrc_1024_b32();
954   }
955 
956   bool isKImmFP32() const {
957     return isLiteralImm(MVT::f32);
958   }
959 
960   bool isKImmFP16() const {
961     return isLiteralImm(MVT::f16);
962   }
963 
964   bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
965 
966   bool isMem() const override {
967     return false;
968   }
969 
970   bool isExpr() const {
971     return Kind == Expression;
972   }
973 
974   bool isSOPPBrTarget() const { return isExpr() || isImm(); }
975 
976   bool isSWaitCnt() const;
977   bool isDepCtr() const;
978   bool isSDelayALU() const;
979   bool isHwreg() const;
980   bool isSendMsg() const;
981   bool isSplitBarrier() const;
982   bool isSwizzle() const;
983   bool isSMRDOffset8() const;
984   bool isSMEMOffset() const;
985   bool isSMRDLiteralOffset() const;
986   bool isDPP8() const;
987   bool isDPPCtrl() const;
988   bool isBLGP() const;
989   bool isGPRIdxMode() const;
990   bool isS16Imm() const;
991   bool isU16Imm() const;
992   bool isEndpgm() const;
993 
994   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
995     return [=](){ return P(*this); };
996   }
997 
998   StringRef getToken() const {
999     assert(isToken());
1000     return StringRef(Tok.Data, Tok.Length);
1001   }
1002 
1003   int64_t getImm() const {
1004     assert(isImm());
1005     return Imm.Val;
1006   }
1007 
1008   void setImm(int64_t Val) {
1009     assert(isImm());
1010     Imm.Val = Val;
1011   }
1012 
1013   ImmTy getImmTy() const {
1014     assert(isImm());
1015     return Imm.Type;
1016   }
1017 
1018   MCRegister getReg() const override {
1019     assert(isRegKind());
1020     return Reg.RegNo;
1021   }
1022 
1023   SMLoc getStartLoc() const override {
1024     return StartLoc;
1025   }
1026 
1027   SMLoc getEndLoc() const override {
1028     return EndLoc;
1029   }
1030 
1031   SMRange getLocRange() const {
1032     return SMRange(StartLoc, EndLoc);
1033   }
1034 
1035   Modifiers getModifiers() const {
1036     assert(isRegKind() || isImmTy(ImmTyNone));
1037     return isRegKind() ? Reg.Mods : Imm.Mods;
1038   }
1039 
1040   void setModifiers(Modifiers Mods) {
1041     assert(isRegKind() || isImmTy(ImmTyNone));
1042     if (isRegKind())
1043       Reg.Mods = Mods;
1044     else
1045       Imm.Mods = Mods;
1046   }
1047 
1048   bool hasModifiers() const {
1049     return getModifiers().hasModifiers();
1050   }
1051 
1052   bool hasFPModifiers() const {
1053     return getModifiers().hasFPModifiers();
1054   }
1055 
1056   bool hasIntModifiers() const {
1057     return getModifiers().hasIntModifiers();
1058   }
1059 
1060   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1061 
1062   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1063 
1064   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1065 
1066   void addRegOperands(MCInst &Inst, unsigned N) const;
1067 
1068   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1069     if (isRegKind())
1070       addRegOperands(Inst, N);
1071     else
1072       addImmOperands(Inst, N);
1073   }
1074 
1075   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1076     Modifiers Mods = getModifiers();
1077     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1078     if (isRegKind()) {
1079       addRegOperands(Inst, N);
1080     } else {
1081       addImmOperands(Inst, N, false);
1082     }
1083   }
1084 
1085   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1086     assert(!hasIntModifiers());
1087     addRegOrImmWithInputModsOperands(Inst, N);
1088   }
1089 
1090   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1091     assert(!hasFPModifiers());
1092     addRegOrImmWithInputModsOperands(Inst, N);
1093   }
1094 
1095   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1096     Modifiers Mods = getModifiers();
1097     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1098     assert(isRegKind());
1099     addRegOperands(Inst, N);
1100   }
1101 
1102   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1103     assert(!hasIntModifiers());
1104     addRegWithInputModsOperands(Inst, N);
1105   }
1106 
1107   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1108     assert(!hasFPModifiers());
1109     addRegWithInputModsOperands(Inst, N);
1110   }
1111 
1112   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1113     // clang-format off
1114     switch (Type) {
1115     case ImmTyNone: OS << "None"; break;
1116     case ImmTyGDS: OS << "GDS"; break;
1117     case ImmTyLDS: OS << "LDS"; break;
1118     case ImmTyOffen: OS << "Offen"; break;
1119     case ImmTyIdxen: OS << "Idxen"; break;
1120     case ImmTyAddr64: OS << "Addr64"; break;
1121     case ImmTyOffset: OS << "Offset"; break;
1122     case ImmTyInstOffset: OS << "InstOffset"; break;
1123     case ImmTyOffset0: OS << "Offset0"; break;
1124     case ImmTyOffset1: OS << "Offset1"; break;
1125     case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1126     case ImmTyCPol: OS << "CPol"; break;
1127     case ImmTyIndexKey8bit: OS << "index_key"; break;
1128     case ImmTyIndexKey16bit: OS << "index_key"; break;
1129     case ImmTyIndexKey32bit: OS << "index_key"; break;
1130     case ImmTyTFE: OS << "TFE"; break;
1131     case ImmTyD16: OS << "D16"; break;
1132     case ImmTyFORMAT: OS << "FORMAT"; break;
1133     case ImmTyClamp: OS << "Clamp"; break;
1134     case ImmTyOModSI: OS << "OModSI"; break;
1135     case ImmTyDPP8: OS << "DPP8"; break;
1136     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1137     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1138     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1139     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1140     case ImmTyDppFI: OS << "DppFI"; break;
1141     case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1142     case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1143     case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1144     case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1145     case ImmTyDMask: OS << "DMask"; break;
1146     case ImmTyDim: OS << "Dim"; break;
1147     case ImmTyUNorm: OS << "UNorm"; break;
1148     case ImmTyDA: OS << "DA"; break;
1149     case ImmTyR128A16: OS << "R128A16"; break;
1150     case ImmTyA16: OS << "A16"; break;
1151     case ImmTyLWE: OS << "LWE"; break;
1152     case ImmTyOff: OS << "Off"; break;
1153     case ImmTyExpTgt: OS << "ExpTgt"; break;
1154     case ImmTyExpCompr: OS << "ExpCompr"; break;
1155     case ImmTyExpVM: OS << "ExpVM"; break;
1156     case ImmTyHwreg: OS << "Hwreg"; break;
1157     case ImmTySendMsg: OS << "SendMsg"; break;
1158     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1159     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1160     case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1161     case ImmTyOpSel: OS << "OpSel"; break;
1162     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1163     case ImmTyNegLo: OS << "NegLo"; break;
1164     case ImmTyNegHi: OS << "NegHi"; break;
1165     case ImmTySwizzle: OS << "Swizzle"; break;
1166     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1167     case ImmTyHigh: OS << "High"; break;
1168     case ImmTyBLGP: OS << "BLGP"; break;
1169     case ImmTyCBSZ: OS << "CBSZ"; break;
1170     case ImmTyABID: OS << "ABID"; break;
1171     case ImmTyEndpgm: OS << "Endpgm"; break;
1172     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1173     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1174     case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1175     case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1176     case ImmTyBitOp3: OS << "BitOp3"; break;
1177     case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1178     case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1179     case ImmTyByteSel: OS << "ByteSel" ; break;
1180     }
1181     // clang-format on
1182   }
1183 
1184   void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1185     switch (Kind) {
1186     case Register:
1187       OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1188          << " mods: " << Reg.Mods << '>';
1189       break;
1190     case Immediate:
1191       OS << '<' << getImm();
1192       if (getImmTy() != ImmTyNone) {
1193         OS << " type: "; printImmTy(OS, getImmTy());
1194       }
1195       OS << " mods: " << Imm.Mods << '>';
1196       break;
1197     case Token:
1198       OS << '\'' << getToken() << '\'';
1199       break;
1200     case Expression:
1201       OS << "<expr ";
1202       MAI.printExpr(OS, *Expr);
1203       OS << '>';
1204       break;
1205     }
1206   }
1207 
1208   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1209                                       int64_t Val, SMLoc Loc,
1210                                       ImmTy Type = ImmTyNone,
1211                                       bool IsFPImm = false) {
1212     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1213     Op->Imm.Val = Val;
1214     Op->Imm.IsFPImm = IsFPImm;
1215     Op->Imm.Kind = ImmKindTyNone;
1216     Op->Imm.Type = Type;
1217     Op->Imm.Mods = Modifiers();
1218     Op->StartLoc = Loc;
1219     Op->EndLoc = Loc;
1220     return Op;
1221   }
1222 
1223   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1224                                         StringRef Str, SMLoc Loc,
1225                                         bool HasExplicitEncodingSize = true) {
1226     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1227     Res->Tok.Data = Str.data();
1228     Res->Tok.Length = Str.size();
1229     Res->StartLoc = Loc;
1230     Res->EndLoc = Loc;
1231     return Res;
1232   }
1233 
1234   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1235                                       MCRegister Reg, SMLoc S, SMLoc E) {
1236     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1237     Op->Reg.RegNo = Reg;
1238     Op->Reg.Mods = Modifiers();
1239     Op->StartLoc = S;
1240     Op->EndLoc = E;
1241     return Op;
1242   }
1243 
1244   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1245                                        const class MCExpr *Expr, SMLoc S) {
1246     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1247     Op->Expr = Expr;
1248     Op->StartLoc = S;
1249     Op->EndLoc = S;
1250     return Op;
1251   }
1252 };
1253 
1254 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1255   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1256   return OS;
1257 }
1258 
1259 //===----------------------------------------------------------------------===//
1260 // AsmParser
1261 //===----------------------------------------------------------------------===//
1262 
1263 // Holds info related to the current kernel, e.g. count of SGPRs used.
1264 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1265 // .amdgpu_hsa_kernel or at EOF.
1266 class KernelScopeInfo {
1267   int SgprIndexUnusedMin = -1;
1268   int VgprIndexUnusedMin = -1;
1269   int AgprIndexUnusedMin = -1;
1270   MCContext *Ctx = nullptr;
1271   MCSubtargetInfo const *MSTI = nullptr;
1272 
1273   void usesSgprAt(int i) {
1274     if (i >= SgprIndexUnusedMin) {
1275       SgprIndexUnusedMin = ++i;
1276       if (Ctx) {
1277         MCSymbol* const Sym =
1278           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1279         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1280       }
1281     }
1282   }
1283 
1284   void usesVgprAt(int i) {
1285     if (i >= VgprIndexUnusedMin) {
1286       VgprIndexUnusedMin = ++i;
1287       if (Ctx) {
1288         MCSymbol* const Sym =
1289           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1290         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1291                                          VgprIndexUnusedMin);
1292         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1293       }
1294     }
1295   }
1296 
1297   void usesAgprAt(int i) {
1298     // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1299     if (!hasMAIInsts(*MSTI))
1300       return;
1301 
1302     if (i >= AgprIndexUnusedMin) {
1303       AgprIndexUnusedMin = ++i;
1304       if (Ctx) {
1305         MCSymbol* const Sym =
1306           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1307         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1308 
1309         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1310         MCSymbol* const vSym =
1311           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1312         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1313                                          VgprIndexUnusedMin);
1314         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1315       }
1316     }
1317   }
1318 
1319 public:
1320   KernelScopeInfo() = default;
1321 
1322   void initialize(MCContext &Context) {
1323     Ctx = &Context;
1324     MSTI = Ctx->getSubtargetInfo();
1325 
1326     usesSgprAt(SgprIndexUnusedMin = -1);
1327     usesVgprAt(VgprIndexUnusedMin = -1);
1328     if (hasMAIInsts(*MSTI)) {
1329       usesAgprAt(AgprIndexUnusedMin = -1);
1330     }
1331   }
1332 
1333   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1334                     unsigned RegWidth) {
1335     switch (RegKind) {
1336     case IS_SGPR:
1337       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1338       break;
1339     case IS_AGPR:
1340       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1341       break;
1342     case IS_VGPR:
1343       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1344       break;
1345     default:
1346       break;
1347     }
1348   }
1349 };
1350 
1351 class AMDGPUAsmParser : public MCTargetAsmParser {
1352   MCAsmParser &Parser;
1353 
1354   unsigned ForcedEncodingSize = 0;
1355   bool ForcedDPP = false;
1356   bool ForcedSDWA = false;
1357   KernelScopeInfo KernelScope;
1358 
1359   /// @name Auto-generated Match Functions
1360   /// {
1361 
1362 #define GET_ASSEMBLER_HEADER
1363 #include "AMDGPUGenAsmMatcher.inc"
1364 
1365   /// }
1366 
1367 private:
1368   void createConstantSymbol(StringRef Id, int64_t Val);
1369 
1370   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1371   bool OutOfRangeError(SMRange Range);
1372   /// Calculate VGPR/SGPR blocks required for given target, reserved
1373   /// registers, and user-specified NextFreeXGPR values.
1374   ///
1375   /// \param Features [in] Target features, used for bug corrections.
1376   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1377   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1378   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1379   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1380   /// descriptor field, if valid.
1381   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1382   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1383   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1384   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1385   /// \param VGPRBlocks [out] Result VGPR block count.
1386   /// \param SGPRBlocks [out] Result SGPR block count.
1387   bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1388                           const MCExpr *FlatScrUsed, bool XNACKUsed,
1389                           std::optional<bool> EnableWavefrontSize32,
1390                           const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1391                           const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1392                           const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1393   bool ParseDirectiveAMDGCNTarget();
1394   bool ParseDirectiveAMDHSACodeObjectVersion();
1395   bool ParseDirectiveAMDHSAKernel();
1396   bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1397   bool ParseDirectiveAMDKernelCodeT();
1398   // TODO: Possibly make subtargetHasRegister const.
1399   bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1400   bool ParseDirectiveAMDGPUHsaKernel();
1401 
1402   bool ParseDirectiveISAVersion();
1403   bool ParseDirectiveHSAMetadata();
1404   bool ParseDirectivePALMetadataBegin();
1405   bool ParseDirectivePALMetadata();
1406   bool ParseDirectiveAMDGPULDS();
1407 
1408   /// Common code to parse out a block of text (typically YAML) between start and
1409   /// end directives.
1410   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1411                            const char *AssemblerDirectiveEnd,
1412                            std::string &CollectString);
1413 
1414   bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1415                              RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1416   bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1417                            unsigned &RegNum, unsigned &RegWidth,
1418                            bool RestoreOnFailure = false);
1419   bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1420                            unsigned &RegNum, unsigned &RegWidth,
1421                            SmallVectorImpl<AsmToken> &Tokens);
1422   MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1423                              unsigned &RegWidth,
1424                              SmallVectorImpl<AsmToken> &Tokens);
1425   MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1426                              unsigned &RegWidth,
1427                              SmallVectorImpl<AsmToken> &Tokens);
1428   MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1429                           unsigned &RegWidth,
1430                           SmallVectorImpl<AsmToken> &Tokens);
1431   bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1432   MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1433                            unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1434 
1435   bool isRegister();
1436   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1437   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1438   void initializeGprCountSymbol(RegisterKind RegKind);
1439   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1440                              unsigned RegWidth);
1441   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1442                     bool IsAtomic);
1443 
1444 public:
1445   enum OperandMode {
1446     OperandMode_Default,
1447     OperandMode_NSA,
1448   };
1449 
1450   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1451 
1452   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1453                const MCInstrInfo &MII,
1454                const MCTargetOptions &Options)
1455       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1456     MCAsmParserExtension::Initialize(Parser);
1457 
1458     if (getFeatureBits().none()) {
1459       // Set default features.
1460       copySTI().ToggleFeature("southern-islands");
1461     }
1462 
1463     FeatureBitset FB = getFeatureBits();
1464     if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1465         !FB[AMDGPU::FeatureWavefrontSize32]) {
1466       // If there is no default wave size it must be a generation before gfx10,
1467       // these have FeatureWavefrontSize64 in their definition already. For
1468       // gfx10+ set wave32 as a default.
1469       copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1470     }
1471 
1472     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1473 
1474     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1475     if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1476       createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1477       createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1478       createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1479     } else {
1480       createConstantSymbol(".option.machine_version_major", ISA.Major);
1481       createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1482       createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1483     }
1484     if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1485       initializeGprCountSymbol(IS_VGPR);
1486       initializeGprCountSymbol(IS_SGPR);
1487     } else
1488       KernelScope.initialize(getContext());
1489 
1490     for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1491       createConstantSymbol(Symbol, Code);
1492 
1493     createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1494     createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1495     createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1496   }
1497 
1498   bool hasMIMG_R128() const {
1499     return AMDGPU::hasMIMG_R128(getSTI());
1500   }
1501 
1502   bool hasPackedD16() const {
1503     return AMDGPU::hasPackedD16(getSTI());
1504   }
1505 
1506   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1507 
1508   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1509 
1510   bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1511 
1512   bool isSI() const {
1513     return AMDGPU::isSI(getSTI());
1514   }
1515 
1516   bool isCI() const {
1517     return AMDGPU::isCI(getSTI());
1518   }
1519 
1520   bool isVI() const {
1521     return AMDGPU::isVI(getSTI());
1522   }
1523 
1524   bool isGFX9() const {
1525     return AMDGPU::isGFX9(getSTI());
1526   }
1527 
1528   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1529   bool isGFX90A() const {
1530     return AMDGPU::isGFX90A(getSTI());
1531   }
1532 
1533   bool isGFX940() const {
1534     return AMDGPU::isGFX940(getSTI());
1535   }
1536 
1537   bool isGFX9Plus() const {
1538     return AMDGPU::isGFX9Plus(getSTI());
1539   }
1540 
1541   bool isGFX10() const {
1542     return AMDGPU::isGFX10(getSTI());
1543   }
1544 
1545   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1546 
1547   bool isGFX11() const {
1548     return AMDGPU::isGFX11(getSTI());
1549   }
1550 
1551   bool isGFX11Plus() const {
1552     return AMDGPU::isGFX11Plus(getSTI());
1553   }
1554 
1555   bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1556 
1557   bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1558 
1559   bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1560 
1561   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1562 
1563   bool isGFX10_BEncoding() const {
1564     return AMDGPU::isGFX10_BEncoding(getSTI());
1565   }
1566 
1567   bool hasInv2PiInlineImm() const {
1568     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1569   }
1570 
1571   bool has64BitLiterals() const {
1572     return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1573   }
1574 
1575   bool hasFlatOffsets() const {
1576     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1577   }
1578 
1579   bool hasTrue16Insts() const {
1580     return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1581   }
1582 
1583   bool hasArchitectedFlatScratch() const {
1584     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1585   }
1586 
1587   bool hasSGPR102_SGPR103() const {
1588     return !isVI() && !isGFX9();
1589   }
1590 
1591   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1592 
1593   bool hasIntClamp() const {
1594     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1595   }
1596 
1597   bool hasPartialNSAEncoding() const {
1598     return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1599   }
1600 
1601   unsigned getNSAMaxSize(bool HasSampler = false) const {
1602     return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1603   }
1604 
1605   unsigned getMaxNumUserSGPRs() const {
1606     return AMDGPU::getMaxNumUserSGPRs(getSTI());
1607   }
1608 
1609   bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1610 
1611   AMDGPUTargetStreamer &getTargetStreamer() {
1612     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1613     return static_cast<AMDGPUTargetStreamer &>(TS);
1614   }
1615 
1616   const MCRegisterInfo *getMRI() const {
1617     // We need this const_cast because for some reason getContext() is not const
1618     // in MCAsmParser.
1619     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1620   }
1621 
1622   const MCInstrInfo *getMII() const {
1623     return &MII;
1624   }
1625 
1626   const FeatureBitset &getFeatureBits() const {
1627     return getSTI().getFeatureBits();
1628   }
1629 
1630   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1631   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1632   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1633 
1634   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1635   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1636   bool isForcedDPP() const { return ForcedDPP; }
1637   bool isForcedSDWA() const { return ForcedSDWA; }
1638   ArrayRef<unsigned> getMatchedVariants() const;
1639   StringRef getMatchedVariantName() const;
1640 
1641   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1642   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1643                      bool RestoreOnFailure);
1644   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1645   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1646                                SMLoc &EndLoc) override;
1647   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1648   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1649                                       unsigned Kind) override;
1650   bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1651                                OperandVector &Operands, MCStreamer &Out,
1652                                uint64_t &ErrorInfo,
1653                                bool MatchingInlineAsm) override;
1654   bool ParseDirective(AsmToken DirectiveID) override;
1655   ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1656                            OperandMode Mode = OperandMode_Default);
1657   StringRef parseMnemonicSuffix(StringRef Name);
1658   bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1659                         SMLoc NameLoc, OperandVector &Operands) override;
1660   //bool ProcessInstruction(MCInst &Inst);
1661 
1662   ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1663 
1664   ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1665 
1666   ParseStatus
1667   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1668                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1669                      std::function<bool(int64_t &)> ConvertResult = nullptr);
1670 
1671   ParseStatus parseOperandArrayWithPrefix(
1672       const char *Prefix, OperandVector &Operands,
1673       AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1674       bool (*ConvertResult)(int64_t &) = nullptr);
1675 
1676   ParseStatus
1677   parseNamedBit(StringRef Name, OperandVector &Operands,
1678                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1679   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1680   ParseStatus parseCPol(OperandVector &Operands);
1681   ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1682   ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1683   ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1684                                     SMLoc &StringLoc);
1685   ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1686                                          StringRef Name,
1687                                          ArrayRef<const char *> Ids,
1688                                          int64_t &IntVal);
1689   ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1690                                          StringRef Name,
1691                                          ArrayRef<const char *> Ids,
1692                                          AMDGPUOperand::ImmTy Type);
1693 
1694   bool isModifier();
1695   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1696   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1697   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1698   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1699   bool parseSP3NegModifier();
1700   ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1701                        bool HasLit = false, bool HasLit64 = false);
1702   ParseStatus parseReg(OperandVector &Operands);
1703   ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1704                             bool HasLit = false, bool HasLit64 = false);
1705   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1706                                            bool AllowImm = true);
1707   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1708                                             bool AllowImm = true);
1709   ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1710   ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1711   ParseStatus parseVReg32OrOff(OperandVector &Operands);
1712   ParseStatus tryParseIndexKey(OperandVector &Operands,
1713                                AMDGPUOperand::ImmTy ImmTy);
1714   ParseStatus parseIndexKey8bit(OperandVector &Operands);
1715   ParseStatus parseIndexKey16bit(OperandVector &Operands);
1716   ParseStatus parseIndexKey32bit(OperandVector &Operands);
1717 
1718   ParseStatus parseDfmtNfmt(int64_t &Format);
1719   ParseStatus parseUfmt(int64_t &Format);
1720   ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1721                                        int64_t &Format);
1722   ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1723                                          int64_t &Format);
1724   ParseStatus parseFORMAT(OperandVector &Operands);
1725   ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1726   ParseStatus parseNumericFormat(int64_t &Format);
1727   ParseStatus parseFlatOffset(OperandVector &Operands);
1728   ParseStatus parseR128A16(OperandVector &Operands);
1729   ParseStatus parseBLGP(OperandVector &Operands);
1730   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1731   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1732 
1733   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1734 
1735   bool parseCnt(int64_t &IntVal);
1736   ParseStatus parseSWaitCnt(OperandVector &Operands);
1737 
1738   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1739   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1740   ParseStatus parseDepCtr(OperandVector &Operands);
1741 
1742   bool parseDelay(int64_t &Delay);
1743   ParseStatus parseSDelayALU(OperandVector &Operands);
1744 
1745   ParseStatus parseHwreg(OperandVector &Operands);
1746 
1747 private:
1748   struct OperandInfoTy {
1749     SMLoc Loc;
1750     int64_t Val;
1751     bool IsSymbolic = false;
1752     bool IsDefined = false;
1753 
1754     OperandInfoTy(int64_t Val) : Val(Val) {}
1755   };
1756 
1757   struct StructuredOpField : OperandInfoTy {
1758     StringLiteral Id;
1759     StringLiteral Desc;
1760     unsigned Width;
1761     bool IsDefined = false;
1762 
1763     StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1764                       int64_t Default)
1765         : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1766     virtual ~StructuredOpField() = default;
1767 
1768     bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1769       Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1770       return false;
1771     }
1772 
1773     virtual bool validate(AMDGPUAsmParser &Parser) const {
1774       if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1775         return Error(Parser, "not supported on this GPU");
1776       if (!isUIntN(Width, Val))
1777         return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1778       return true;
1779     }
1780   };
1781 
1782   ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1783   bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1784 
1785   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1786   bool validateSendMsg(const OperandInfoTy &Msg,
1787                        const OperandInfoTy &Op,
1788                        const OperandInfoTy &Stream);
1789 
1790   ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1791                              OperandInfoTy &Width);
1792 
1793   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1794   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1795   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1796 
1797   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1798                       const OperandVector &Operands) const;
1799   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1800   SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const;
1801   SMLoc getLitLoc(const OperandVector &Operands,
1802                   bool SearchMandatoryLiterals = false) const;
1803   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1804   SMLoc getConstLoc(const OperandVector &Operands) const;
1805   SMLoc getInstLoc(const OperandVector &Operands) const;
1806 
1807   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1808   bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1809   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1810   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1811   bool validateSOPLiteral(const MCInst &Inst) const;
1812   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1813   std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1814                                                       bool AsVOPD3);
1815   bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1816   bool tryVOPD(const MCInst &Inst);
1817   bool tryVOPD3(const MCInst &Inst);
1818   bool tryAnotherVOPDEncoding(const MCInst &Inst);
1819 
1820   bool validateIntClampSupported(const MCInst &Inst);
1821   bool validateMIMGAtomicDMask(const MCInst &Inst);
1822   bool validateMIMGGatherDMask(const MCInst &Inst);
1823   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1824   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1825   bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1826   bool validateMIMGD16(const MCInst &Inst);
1827   bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1828   bool validateTensorR128(const MCInst &Inst);
1829   bool validateMIMGMSAA(const MCInst &Inst);
1830   bool validateOpSel(const MCInst &Inst);
1831   bool validateTrue16OpSel(const MCInst &Inst);
1832   bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1833   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1834   bool validateVccOperand(MCRegister Reg) const;
1835   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1836   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1837   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1838   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1839   bool validateAGPRLdSt(const MCInst &Inst) const;
1840   bool validateVGPRAlign(const MCInst &Inst) const;
1841   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1842   bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1843   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1844   bool validateDivScale(const MCInst &Inst);
1845   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1846   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1847                              const SMLoc &IDLoc);
1848   bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1849                               const unsigned CPol);
1850   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1851   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1852   unsigned getConstantBusLimit(unsigned Opcode) const;
1853   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1854   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1855   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1856 
1857   bool isSupportedMnemo(StringRef Mnemo,
1858                         const FeatureBitset &FBS);
1859   bool isSupportedMnemo(StringRef Mnemo,
1860                         const FeatureBitset &FBS,
1861                         ArrayRef<unsigned> Variants);
1862   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1863 
1864   bool isId(const StringRef Id) const;
1865   bool isId(const AsmToken &Token, const StringRef Id) const;
1866   bool isToken(const AsmToken::TokenKind Kind) const;
1867   StringRef getId() const;
1868   bool trySkipId(const StringRef Id);
1869   bool trySkipId(const StringRef Pref, const StringRef Id);
1870   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1871   bool trySkipToken(const AsmToken::TokenKind Kind);
1872   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1873   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1874   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1875 
1876   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1877   AsmToken::TokenKind getTokenKind() const;
1878   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1879   bool parseExpr(OperandVector &Operands);
1880   StringRef getTokenStr() const;
1881   AsmToken peekToken(bool ShouldSkipSpace = true);
1882   AsmToken getToken() const;
1883   SMLoc getLoc() const;
1884   void lex();
1885 
1886 public:
1887   void onBeginOfFile() override;
1888   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1889 
1890   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1891 
1892   ParseStatus parseExpTgt(OperandVector &Operands);
1893   ParseStatus parseSendMsg(OperandVector &Operands);
1894   ParseStatus parseInterpSlot(OperandVector &Operands);
1895   ParseStatus parseInterpAttr(OperandVector &Operands);
1896   ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1897   ParseStatus parseBoolReg(OperandVector &Operands);
1898 
1899   bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1900                            const unsigned MaxVal, const Twine &ErrMsg,
1901                            SMLoc &Loc);
1902   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1903                             const unsigned MinVal,
1904                             const unsigned MaxVal,
1905                             const StringRef ErrMsg);
1906   ParseStatus parseSwizzle(OperandVector &Operands);
1907   bool parseSwizzleOffset(int64_t &Imm);
1908   bool parseSwizzleMacro(int64_t &Imm);
1909   bool parseSwizzleQuadPerm(int64_t &Imm);
1910   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1911   bool parseSwizzleBroadcast(int64_t &Imm);
1912   bool parseSwizzleSwap(int64_t &Imm);
1913   bool parseSwizzleReverse(int64_t &Imm);
1914   bool parseSwizzleFFT(int64_t &Imm);
1915   bool parseSwizzleRotate(int64_t &Imm);
1916 
1917   ParseStatus parseGPRIdxMode(OperandVector &Operands);
1918   int64_t parseGPRIdxMacro();
1919 
1920   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1921   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1922 
1923   ParseStatus parseOModSI(OperandVector &Operands);
1924 
1925   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1926                OptionalImmIndexMap &OptionalIdx);
1927   void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1928   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1929   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1930   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1931   void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1932 
1933   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1934   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1935                     OptionalImmIndexMap &OptionalIdx);
1936   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1937                 OptionalImmIndexMap &OptionalIdx);
1938 
1939   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1940   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1941 
1942   bool parseDimId(unsigned &Encoding);
1943   ParseStatus parseDim(OperandVector &Operands);
1944   bool convertDppBoundCtrl(int64_t &BoundCtrl);
1945   ParseStatus parseDPP8(OperandVector &Operands);
1946   ParseStatus parseDPPCtrl(OperandVector &Operands);
1947   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1948   int64_t parseDPPCtrlSel(StringRef Ctrl);
1949   int64_t parseDPPCtrlPerm();
1950   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1951   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1952     cvtDPP(Inst, Operands, true);
1953   }
1954   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1955                   bool IsDPP8 = false);
1956   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1957     cvtVOP3DPP(Inst, Operands, true);
1958   }
1959 
1960   ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1961                            AMDGPUOperand::ImmTy Type);
1962   ParseStatus parseSDWADstUnused(OperandVector &Operands);
1963   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1964   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1965   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1966   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1967   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1968   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1969                uint64_t BasicInstType,
1970                bool SkipDstVcc = false,
1971                bool SkipSrcVcc = false);
1972 
1973   ParseStatus parseEndpgm(OperandVector &Operands);
1974 
1975   ParseStatus parseVOPD(OperandVector &Operands);
1976 };
1977 
1978 } // end anonymous namespace
1979 
1980 // May be called with integer type with equivalent bitwidth.
1981 static const fltSemantics *getFltSemantics(unsigned Size) {
1982   switch (Size) {
1983   case 4:
1984     return &APFloat::IEEEsingle();
1985   case 8:
1986     return &APFloat::IEEEdouble();
1987   case 2:
1988     return &APFloat::IEEEhalf();
1989   default:
1990     llvm_unreachable("unsupported fp type");
1991   }
1992 }
1993 
1994 static const fltSemantics *getFltSemantics(MVT VT) {
1995   return getFltSemantics(VT.getSizeInBits() / 8);
1996 }
1997 
1998 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1999   switch (OperandType) {
2000   // When floating-point immediate is used as operand of type i16, the 32-bit
2001    // representation of the constant truncated to the 16 LSBs should be used.
2002   case AMDGPU::OPERAND_REG_IMM_INT16:
2003   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2004   case AMDGPU::OPERAND_REG_IMM_INT32:
2005   case AMDGPU::OPERAND_REG_IMM_FP32:
2006   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2007   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2008   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2009   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2010   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2011   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2012   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2013   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2014   case AMDGPU::OPERAND_KIMM32:
2015   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2016     return &APFloat::IEEEsingle();
2017   case AMDGPU::OPERAND_REG_IMM_INT64:
2018   case AMDGPU::OPERAND_REG_IMM_FP64:
2019   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2020   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2021   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2022   case AMDGPU::OPERAND_KIMM64:
2023     return &APFloat::IEEEdouble();
2024   case AMDGPU::OPERAND_REG_IMM_FP16:
2025   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2026   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2027   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2028   case AMDGPU::OPERAND_KIMM16:
2029     return &APFloat::IEEEhalf();
2030   case AMDGPU::OPERAND_REG_IMM_BF16:
2031   case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2032   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2033   case AMDGPU::OPERAND_REG_IMM_V2BF16:
2034     return &APFloat::BFloat();
2035   default:
2036     llvm_unreachable("unsupported fp type");
2037   }
2038 }
2039 
2040 //===----------------------------------------------------------------------===//
2041 // Operand
2042 //===----------------------------------------------------------------------===//
2043 
2044 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2045   bool Lost;
2046 
2047   // Convert literal to single precision
2048   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
2049                                                APFloat::rmNearestTiesToEven,
2050                                                &Lost);
2051   // We allow precision lost but not overflow or underflow
2052   if (Status != APFloat::opOK &&
2053       Lost &&
2054       ((Status & APFloat::opOverflow)  != 0 ||
2055        (Status & APFloat::opUnderflow) != 0)) {
2056     return false;
2057   }
2058 
2059   return true;
2060 }
2061 
2062 static bool isSafeTruncation(int64_t Val, unsigned Size) {
2063   return isUIntN(Size, Val) || isIntN(Size, Val);
2064 }
2065 
2066 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2067   if (VT.getScalarType() == MVT::i16)
2068     return isInlinableLiteral32(Val, HasInv2Pi);
2069 
2070   if (VT.getScalarType() == MVT::f16)
2071     return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2072 
2073   assert(VT.getScalarType() == MVT::bf16);
2074 
2075   return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2076 }
2077 
2078 bool AMDGPUOperand::isInlinableImm(MVT type) const {
2079 
2080   // This is a hack to enable named inline values like
2081   // shared_base with both 32-bit and 64-bit operands.
2082   // Note that these values are defined as
2083   // 32-bit operands only.
2084   if (isInlineValue()) {
2085     return true;
2086   }
2087 
2088   if (!isImmTy(ImmTyNone)) {
2089     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2090     return false;
2091   }
2092   // TODO: We should avoid using host float here. It would be better to
2093   // check the float bit values which is what a few other places do.
2094   // We've had bot failures before due to weird NaN support on mips hosts.
2095 
2096   APInt Literal(64, Imm.Val);
2097 
2098   if (Imm.IsFPImm) { // We got fp literal token
2099     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2100       return AMDGPU::isInlinableLiteral64(Imm.Val,
2101                                           AsmParser->hasInv2PiInlineImm());
2102     }
2103 
2104     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2105     if (!canLosslesslyConvertToFPType(FPLiteral, type))
2106       return false;
2107 
2108     if (type.getScalarSizeInBits() == 16) {
2109       bool Lost = false;
2110       switch (type.getScalarType().SimpleTy) {
2111       default:
2112         llvm_unreachable("unknown 16-bit type");
2113       case MVT::bf16:
2114         FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2115                           &Lost);
2116         break;
2117       case MVT::f16:
2118         FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2119                           &Lost);
2120         break;
2121       case MVT::i16:
2122         FPLiteral.convert(APFloatBase::IEEEsingle(),
2123                           APFloat::rmNearestTiesToEven, &Lost);
2124         break;
2125       }
2126       // We need to use 32-bit representation here because when a floating-point
2127       // inline constant is used as an i16 operand, its 32-bit representation
2128       // representation will be used. We will need the 32-bit value to check if
2129       // it is FP inline constant.
2130       uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2131       return isInlineableLiteralOp16(ImmVal, type,
2132                                      AsmParser->hasInv2PiInlineImm());
2133     }
2134 
2135     // Check if single precision literal is inlinable
2136     return AMDGPU::isInlinableLiteral32(
2137       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2138       AsmParser->hasInv2PiInlineImm());
2139   }
2140 
2141   // We got int literal token.
2142   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2143     return AMDGPU::isInlinableLiteral64(Imm.Val,
2144                                         AsmParser->hasInv2PiInlineImm());
2145   }
2146 
2147   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2148     return false;
2149   }
2150 
2151   if (type.getScalarSizeInBits() == 16) {
2152     return isInlineableLiteralOp16(
2153       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2154       type, AsmParser->hasInv2PiInlineImm());
2155   }
2156 
2157   return AMDGPU::isInlinableLiteral32(
2158     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2159     AsmParser->hasInv2PiInlineImm());
2160 }
2161 
2162 bool AMDGPUOperand::isLiteralImm(MVT type) const {
2163   // Check that this immediate can be added as literal
2164   if (!isImmTy(ImmTyNone)) {
2165     return false;
2166   }
2167 
2168   bool Allow64Bit =
2169       (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2170 
2171   if (!Imm.IsFPImm) {
2172     // We got int literal token.
2173 
2174     if (type == MVT::f64 && hasFPModifiers()) {
2175       // Cannot apply fp modifiers to int literals preserving the same semantics
2176       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2177       // disable these cases.
2178       return false;
2179     }
2180 
2181     unsigned Size = type.getSizeInBits();
2182     if (Size == 64) {
2183       if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2184         return true;
2185       Size = 32;
2186     }
2187 
2188     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2189     // types.
2190     return isSafeTruncation(Imm.Val, Size);
2191   }
2192 
2193   // We got fp literal token
2194   if (type == MVT::f64) { // Expected 64-bit fp operand
2195     // We would set low 64-bits of literal to zeroes but we accept this literals
2196     return true;
2197   }
2198 
2199   if (type == MVT::i64) { // Expected 64-bit int operand
2200     // We don't allow fp literals in 64-bit integer instructions. It is
2201     // unclear how we should encode them.
2202     return false;
2203   }
2204 
2205   // We allow fp literals with f16x2 operands assuming that the specified
2206   // literal goes into the lower half and the upper half is zero. We also
2207   // require that the literal may be losslessly converted to f16.
2208   //
2209   // For i16x2 operands, we assume that the specified literal is encoded as a
2210   // single-precision float. This is pretty odd, but it matches SP3 and what
2211   // happens in hardware.
2212   MVT ExpectedType = (type == MVT::v2f16)   ? MVT::f16
2213                      : (type == MVT::v2i16) ? MVT::f32
2214                      : (type == MVT::v2f32) ? MVT::f32
2215                                             : type;
2216 
2217   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2218   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2219 }
2220 
2221 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2222   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2223 }
2224 
2225 bool AMDGPUOperand::isVRegWithInputMods() const {
2226   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2227          // GFX90A allows DPP on 64-bit operands.
2228          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2229           AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2230 }
2231 
2232 template <bool IsFake16>
2233 bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2234   return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2235                              : AMDGPU::VGPR_16_Lo128RegClassID);
2236 }
2237 
2238 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2239   return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2240                              : AMDGPU::VGPR_16RegClassID);
2241 }
2242 
2243 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2244   if (AsmParser->isVI())
2245     return isVReg32();
2246   if (AsmParser->isGFX9Plus())
2247     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2248   return false;
2249 }
2250 
2251 bool AMDGPUOperand::isSDWAFP16Operand() const {
2252   return isSDWAOperand(MVT::f16);
2253 }
2254 
2255 bool AMDGPUOperand::isSDWAFP32Operand() const {
2256   return isSDWAOperand(MVT::f32);
2257 }
2258 
2259 bool AMDGPUOperand::isSDWAInt16Operand() const {
2260   return isSDWAOperand(MVT::i16);
2261 }
2262 
2263 bool AMDGPUOperand::isSDWAInt32Operand() const {
2264   return isSDWAOperand(MVT::i32);
2265 }
2266 
2267 bool AMDGPUOperand::isBoolReg() const {
2268   auto FB = AsmParser->getFeatureBits();
2269   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2270                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2271 }
2272 
2273 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2274 {
2275   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2276   assert(Size == 2 || Size == 4 || Size == 8);
2277 
2278   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2279 
2280   if (Imm.Mods.Abs) {
2281     Val &= ~FpSignMask;
2282   }
2283   if (Imm.Mods.Neg) {
2284     Val ^= FpSignMask;
2285   }
2286 
2287   return Val;
2288 }
2289 
2290 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2291   if (isExpr()) {
2292     Inst.addOperand(MCOperand::createExpr(Expr));
2293     return;
2294   }
2295 
2296   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2297                              Inst.getNumOperands())) {
2298     addLiteralImmOperand(Inst, Imm.Val,
2299                          ApplyModifiers &
2300                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2301   } else {
2302     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2303     Inst.addOperand(MCOperand::createImm(Imm.Val));
2304     setImmKindNone();
2305   }
2306 }
2307 
2308 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2309   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2310   auto OpNum = Inst.getNumOperands();
2311   // Check that this operand accepts literals
2312   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2313 
2314   if (ApplyModifiers) {
2315     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2316     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2317     Val = applyInputFPModifiers(Val, Size);
2318   }
2319 
2320   APInt Literal(64, Val);
2321   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2322 
2323   if (Imm.IsFPImm) { // We got fp literal token
2324     switch (OpTy) {
2325     case AMDGPU::OPERAND_REG_IMM_INT64:
2326     case AMDGPU::OPERAND_REG_IMM_FP64:
2327     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2328     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2329     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2330       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2331                                        AsmParser->hasInv2PiInlineImm())) {
2332         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2333         setImmKindConst();
2334         return;
2335       }
2336 
2337       // Non-inlineable
2338       if (AMDGPU::isSISrcFPOperand(InstDesc,
2339                                    OpNum)) { // Expected 64-bit fp operand
2340         bool HasMandatoryLiteral =
2341             AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2342         // For fp operands we check if low 32 bits are zeros
2343         if (Literal.getLoBits(32) != 0 &&
2344             (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2345             !HasMandatoryLiteral) {
2346           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2347               Inst.getLoc(),
2348               "Can't encode literal as exact 64-bit floating-point operand. "
2349               "Low 32-bits will be set to zero");
2350           Val &= 0xffffffff00000000u;
2351         }
2352 
2353         Inst.addOperand(MCOperand::createImm(Val));
2354         setImmKindLiteral();
2355         return;
2356       }
2357 
2358       // We don't allow fp literals in 64-bit integer instructions. It is
2359       // unclear how we should encode them. This case should be checked earlier
2360       // in predicate methods (isLiteralImm())
2361       llvm_unreachable("fp literal in 64-bit integer instruction.");
2362 
2363     case AMDGPU::OPERAND_KIMM64:
2364       Inst.addOperand(MCOperand::createImm(Val));
2365       setImmKindMandatoryLiteral();
2366       return;
2367 
2368     case AMDGPU::OPERAND_REG_IMM_BF16:
2369     case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2370     case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2371     case AMDGPU::OPERAND_REG_IMM_V2BF16:
2372       if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2373         // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2374         // loss of precision. The constant represents ideomatic fp32 value of
2375         // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2376         // bits. Prevent rounding below.
2377         Inst.addOperand(MCOperand::createImm(0x3e22));
2378         setImmKindLiteral();
2379         return;
2380       }
2381       [[fallthrough]];
2382 
2383     case AMDGPU::OPERAND_REG_IMM_INT32:
2384     case AMDGPU::OPERAND_REG_IMM_FP32:
2385     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2386     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2387     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2388     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2389     case AMDGPU::OPERAND_REG_IMM_INT16:
2390     case AMDGPU::OPERAND_REG_IMM_FP16:
2391     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2392     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2393     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2394     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2395     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2396     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2397     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2398     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2399     case AMDGPU::OPERAND_KIMM32:
2400     case AMDGPU::OPERAND_KIMM16:
2401     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2402       bool lost;
2403       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2404       // Convert literal to single precision
2405       FPLiteral.convert(*getOpFltSemantics(OpTy),
2406                         APFloat::rmNearestTiesToEven, &lost);
2407       // We allow precision lost but not overflow or underflow. This should be
2408       // checked earlier in isLiteralImm()
2409 
2410       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2411       Inst.addOperand(MCOperand::createImm(ImmVal));
2412       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2413         setImmKindMandatoryLiteral();
2414       } else {
2415         setImmKindLiteral();
2416       }
2417       return;
2418     }
2419     default:
2420       llvm_unreachable("invalid operand size");
2421     }
2422 
2423     return;
2424   }
2425 
2426   // We got int literal token.
2427   // Only sign extend inline immediates.
2428   switch (OpTy) {
2429   case AMDGPU::OPERAND_REG_IMM_INT32:
2430   case AMDGPU::OPERAND_REG_IMM_FP32:
2431   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2432   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2433   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2434   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2435   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2436   case AMDGPU::OPERAND_REG_IMM_V2BF16:
2437   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2438   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2439   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2440   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2441     if (isSafeTruncation(Val, 32) &&
2442         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2443                                      AsmParser->hasInv2PiInlineImm())) {
2444       Inst.addOperand(MCOperand::createImm(Val));
2445       setImmKindConst();
2446       return;
2447     }
2448 
2449     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2450     setImmKindLiteral();
2451     return;
2452 
2453   case AMDGPU::OPERAND_REG_IMM_INT64:
2454   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2455     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2456       Inst.addOperand(MCOperand::createImm(Val));
2457       setImmKindConst();
2458       return;
2459     }
2460 
2461     // When the 32 MSBs are not zero (effectively means it can't be safely
2462     // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2463     // the lit modifier is explicitly used, we need to truncate it to the 32
2464     // LSBs.
2465     if (!AsmParser->has64BitLiterals() || getModifiers().Lit)
2466       Val = Lo_32(Val);
2467 
2468     Inst.addOperand(MCOperand::createImm(Val));
2469     setImmKindLiteral();
2470     return;
2471 
2472   case AMDGPU::OPERAND_REG_IMM_FP64:
2473   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2474   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2475     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2476       Inst.addOperand(MCOperand::createImm(Val));
2477       setImmKindConst();
2478       return;
2479     }
2480 
2481     // If the target doesn't support 64-bit literals, we need to use the
2482     // constant as the high 32 MSBs of a double-precision floating point value.
2483     if (!AsmParser->has64BitLiterals()) {
2484       Val = static_cast<uint64_t>(Val) << 32;
2485     } else {
2486       // Now the target does support 64-bit literals, there are two cases
2487       // where we still want to use src_literal encoding:
2488       // 1) explicitly forced by using lit modifier;
2489       // 2) the value is a valid 32-bit representation (signed or unsigned),
2490       // meanwhile not forced by lit64 modifier.
2491       if (getModifiers().Lit ||
2492           (!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2493         Val = static_cast<uint64_t>(Val) << 32;
2494     }
2495 
2496     Inst.addOperand(MCOperand::createImm(Val));
2497     setImmKindLiteral();
2498     return;
2499 
2500   case AMDGPU::OPERAND_REG_IMM_INT16:
2501   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2502     if (isSafeTruncation(Val, 16) &&
2503         AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2504       Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2505       setImmKindConst();
2506       return;
2507     }
2508 
2509     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2510     setImmKindLiteral();
2511     return;
2512 
2513   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2514   case AMDGPU::OPERAND_REG_IMM_FP16:
2515     if (isSafeTruncation(Val, 16) &&
2516         AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2517                                        AsmParser->hasInv2PiInlineImm())) {
2518       Inst.addOperand(MCOperand::createImm(Val));
2519       setImmKindConst();
2520       return;
2521     }
2522 
2523     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2524     setImmKindLiteral();
2525     return;
2526 
2527   case AMDGPU::OPERAND_REG_IMM_BF16:
2528   case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2529     if (isSafeTruncation(Val, 16) &&
2530         AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2531                                      AsmParser->hasInv2PiInlineImm())) {
2532       Inst.addOperand(MCOperand::createImm(Val));
2533       setImmKindConst();
2534       return;
2535     }
2536 
2537     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2538     setImmKindLiteral();
2539     return;
2540 
2541   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: {
2542     assert(isSafeTruncation(Val, 16));
2543     assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2544     Inst.addOperand(MCOperand::createImm(Val));
2545     return;
2546   }
2547   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
2548     assert(isSafeTruncation(Val, 16));
2549     assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2550                                           AsmParser->hasInv2PiInlineImm()));
2551 
2552     Inst.addOperand(MCOperand::createImm(Val));
2553     return;
2554   }
2555 
2556   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: {
2557     assert(isSafeTruncation(Val, 16));
2558     assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2559                                           AsmParser->hasInv2PiInlineImm()));
2560 
2561     Inst.addOperand(MCOperand::createImm(Val));
2562     return;
2563   }
2564 
2565   case AMDGPU::OPERAND_KIMM32:
2566     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2567     setImmKindMandatoryLiteral();
2568     return;
2569   case AMDGPU::OPERAND_KIMM16:
2570     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2571     setImmKindMandatoryLiteral();
2572     return;
2573   case AMDGPU::OPERAND_KIMM64:
2574     if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64)
2575       Val <<= 32;
2576 
2577     Inst.addOperand(MCOperand::createImm(Val));
2578     setImmKindMandatoryLiteral();
2579     return;
2580   default:
2581     llvm_unreachable("invalid operand size");
2582   }
2583 }
2584 
2585 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2586   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2587 }
2588 
2589 bool AMDGPUOperand::isInlineValue() const {
2590   return isRegKind() && ::isInlineValue(getReg());
2591 }
2592 
2593 //===----------------------------------------------------------------------===//
2594 // AsmParser
2595 //===----------------------------------------------------------------------===//
2596 
2597 void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2598   // TODO: make those pre-defined variables read-only.
2599   // Currently there is none suitable machinery in the core llvm-mc for this.
2600   // MCSymbol::isRedefinable is intended for another purpose, and
2601   // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2602   MCContext &Ctx = getContext();
2603   MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2604   Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2605 }
2606 
2607 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2608   if (Is == IS_VGPR) {
2609     switch (RegWidth) {
2610       default: return -1;
2611       case 32:
2612         return AMDGPU::VGPR_32RegClassID;
2613       case 64:
2614         return AMDGPU::VReg_64RegClassID;
2615       case 96:
2616         return AMDGPU::VReg_96RegClassID;
2617       case 128:
2618         return AMDGPU::VReg_128RegClassID;
2619       case 160:
2620         return AMDGPU::VReg_160RegClassID;
2621       case 192:
2622         return AMDGPU::VReg_192RegClassID;
2623       case 224:
2624         return AMDGPU::VReg_224RegClassID;
2625       case 256:
2626         return AMDGPU::VReg_256RegClassID;
2627       case 288:
2628         return AMDGPU::VReg_288RegClassID;
2629       case 320:
2630         return AMDGPU::VReg_320RegClassID;
2631       case 352:
2632         return AMDGPU::VReg_352RegClassID;
2633       case 384:
2634         return AMDGPU::VReg_384RegClassID;
2635       case 512:
2636         return AMDGPU::VReg_512RegClassID;
2637       case 1024:
2638         return AMDGPU::VReg_1024RegClassID;
2639     }
2640   } else if (Is == IS_TTMP) {
2641     switch (RegWidth) {
2642       default: return -1;
2643       case 32:
2644         return AMDGPU::TTMP_32RegClassID;
2645       case 64:
2646         return AMDGPU::TTMP_64RegClassID;
2647       case 128:
2648         return AMDGPU::TTMP_128RegClassID;
2649       case 256:
2650         return AMDGPU::TTMP_256RegClassID;
2651       case 512:
2652         return AMDGPU::TTMP_512RegClassID;
2653     }
2654   } else if (Is == IS_SGPR) {
2655     switch (RegWidth) {
2656       default: return -1;
2657       case 32:
2658         return AMDGPU::SGPR_32RegClassID;
2659       case 64:
2660         return AMDGPU::SGPR_64RegClassID;
2661       case 96:
2662         return AMDGPU::SGPR_96RegClassID;
2663       case 128:
2664         return AMDGPU::SGPR_128RegClassID;
2665       case 160:
2666         return AMDGPU::SGPR_160RegClassID;
2667       case 192:
2668         return AMDGPU::SGPR_192RegClassID;
2669       case 224:
2670         return AMDGPU::SGPR_224RegClassID;
2671       case 256:
2672         return AMDGPU::SGPR_256RegClassID;
2673       case 288:
2674         return AMDGPU::SGPR_288RegClassID;
2675       case 320:
2676         return AMDGPU::SGPR_320RegClassID;
2677       case 352:
2678         return AMDGPU::SGPR_352RegClassID;
2679       case 384:
2680         return AMDGPU::SGPR_384RegClassID;
2681       case 512:
2682         return AMDGPU::SGPR_512RegClassID;
2683     }
2684   } else if (Is == IS_AGPR) {
2685     switch (RegWidth) {
2686       default: return -1;
2687       case 32:
2688         return AMDGPU::AGPR_32RegClassID;
2689       case 64:
2690         return AMDGPU::AReg_64RegClassID;
2691       case 96:
2692         return AMDGPU::AReg_96RegClassID;
2693       case 128:
2694         return AMDGPU::AReg_128RegClassID;
2695       case 160:
2696         return AMDGPU::AReg_160RegClassID;
2697       case 192:
2698         return AMDGPU::AReg_192RegClassID;
2699       case 224:
2700         return AMDGPU::AReg_224RegClassID;
2701       case 256:
2702         return AMDGPU::AReg_256RegClassID;
2703       case 288:
2704         return AMDGPU::AReg_288RegClassID;
2705       case 320:
2706         return AMDGPU::AReg_320RegClassID;
2707       case 352:
2708         return AMDGPU::AReg_352RegClassID;
2709       case 384:
2710         return AMDGPU::AReg_384RegClassID;
2711       case 512:
2712         return AMDGPU::AReg_512RegClassID;
2713       case 1024:
2714         return AMDGPU::AReg_1024RegClassID;
2715     }
2716   }
2717   return -1;
2718 }
2719 
2720 static MCRegister getSpecialRegForName(StringRef RegName) {
2721   return StringSwitch<unsigned>(RegName)
2722     .Case("exec", AMDGPU::EXEC)
2723     .Case("vcc", AMDGPU::VCC)
2724     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2725     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2726     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2727     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2728     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2729     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2730     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2731     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2732     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2733     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2734     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2735     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2736     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2737     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2738     .Case("m0", AMDGPU::M0)
2739     .Case("vccz", AMDGPU::SRC_VCCZ)
2740     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2741     .Case("execz", AMDGPU::SRC_EXECZ)
2742     .Case("src_execz", AMDGPU::SRC_EXECZ)
2743     .Case("scc", AMDGPU::SRC_SCC)
2744     .Case("src_scc", AMDGPU::SRC_SCC)
2745     .Case("tba", AMDGPU::TBA)
2746     .Case("tma", AMDGPU::TMA)
2747     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2748     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2749     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2750     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2751     .Case("vcc_lo", AMDGPU::VCC_LO)
2752     .Case("vcc_hi", AMDGPU::VCC_HI)
2753     .Case("exec_lo", AMDGPU::EXEC_LO)
2754     .Case("exec_hi", AMDGPU::EXEC_HI)
2755     .Case("tma_lo", AMDGPU::TMA_LO)
2756     .Case("tma_hi", AMDGPU::TMA_HI)
2757     .Case("tba_lo", AMDGPU::TBA_LO)
2758     .Case("tba_hi", AMDGPU::TBA_HI)
2759     .Case("pc", AMDGPU::PC_REG)
2760     .Case("null", AMDGPU::SGPR_NULL)
2761     .Default(AMDGPU::NoRegister);
2762 }
2763 
2764 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2765                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2766   auto R = parseRegister();
2767   if (!R) return true;
2768   assert(R->isReg());
2769   RegNo = R->getReg();
2770   StartLoc = R->getStartLoc();
2771   EndLoc = R->getEndLoc();
2772   return false;
2773 }
2774 
2775 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2776                                     SMLoc &EndLoc) {
2777   return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2778 }
2779 
2780 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2781                                               SMLoc &EndLoc) {
2782   bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2783   bool PendingErrors = getParser().hasPendingError();
2784   getParser().clearPendingErrors();
2785   if (PendingErrors)
2786     return ParseStatus::Failure;
2787   if (Result)
2788     return ParseStatus::NoMatch;
2789   return ParseStatus::Success;
2790 }
2791 
2792 bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2793                                             RegisterKind RegKind,
2794                                             MCRegister Reg1, SMLoc Loc) {
2795   switch (RegKind) {
2796   case IS_SPECIAL:
2797     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2798       Reg = AMDGPU::EXEC;
2799       RegWidth = 64;
2800       return true;
2801     }
2802     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2803       Reg = AMDGPU::FLAT_SCR;
2804       RegWidth = 64;
2805       return true;
2806     }
2807     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2808       Reg = AMDGPU::XNACK_MASK;
2809       RegWidth = 64;
2810       return true;
2811     }
2812     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2813       Reg = AMDGPU::VCC;
2814       RegWidth = 64;
2815       return true;
2816     }
2817     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2818       Reg = AMDGPU::TBA;
2819       RegWidth = 64;
2820       return true;
2821     }
2822     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2823       Reg = AMDGPU::TMA;
2824       RegWidth = 64;
2825       return true;
2826     }
2827     Error(Loc, "register does not fit in the list");
2828     return false;
2829   case IS_VGPR:
2830   case IS_SGPR:
2831   case IS_AGPR:
2832   case IS_TTMP:
2833     if (Reg1 != Reg + RegWidth / 32) {
2834       Error(Loc, "registers in a list must have consecutive indices");
2835       return false;
2836     }
2837     RegWidth += 32;
2838     return true;
2839   default:
2840     llvm_unreachable("unexpected register kind");
2841   }
2842 }
2843 
2844 struct RegInfo {
2845   StringLiteral Name;
2846   RegisterKind Kind;
2847 };
2848 
2849 static constexpr RegInfo RegularRegisters[] = {
2850   {{"v"},    IS_VGPR},
2851   {{"s"},    IS_SGPR},
2852   {{"ttmp"}, IS_TTMP},
2853   {{"acc"},  IS_AGPR},
2854   {{"a"},    IS_AGPR},
2855 };
2856 
2857 static bool isRegularReg(RegisterKind Kind) {
2858   return Kind == IS_VGPR ||
2859          Kind == IS_SGPR ||
2860          Kind == IS_TTMP ||
2861          Kind == IS_AGPR;
2862 }
2863 
2864 static const RegInfo* getRegularRegInfo(StringRef Str) {
2865   for (const RegInfo &Reg : RegularRegisters)
2866     if (Str.starts_with(Reg.Name))
2867       return &Reg;
2868   return nullptr;
2869 }
2870 
2871 static bool getRegNum(StringRef Str, unsigned& Num) {
2872   return !Str.getAsInteger(10, Num);
2873 }
2874 
2875 bool
2876 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2877                             const AsmToken &NextToken) const {
2878 
2879   // A list of consecutive registers: [s0,s1,s2,s3]
2880   if (Token.is(AsmToken::LBrac))
2881     return true;
2882 
2883   if (!Token.is(AsmToken::Identifier))
2884     return false;
2885 
2886   // A single register like s0 or a range of registers like s[0:1]
2887 
2888   StringRef Str = Token.getString();
2889   const RegInfo *Reg = getRegularRegInfo(Str);
2890   if (Reg) {
2891     StringRef RegName = Reg->Name;
2892     StringRef RegSuffix = Str.substr(RegName.size());
2893     if (!RegSuffix.empty()) {
2894       RegSuffix.consume_back(".l");
2895       RegSuffix.consume_back(".h");
2896       unsigned Num;
2897       // A single register with an index: rXX
2898       if (getRegNum(RegSuffix, Num))
2899         return true;
2900     } else {
2901       // A range of registers: r[XX:YY].
2902       if (NextToken.is(AsmToken::LBrac))
2903         return true;
2904     }
2905   }
2906 
2907   return getSpecialRegForName(Str).isValid();
2908 }
2909 
2910 bool
2911 AMDGPUAsmParser::isRegister()
2912 {
2913   return isRegister(getToken(), peekToken());
2914 }
2915 
2916 MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2917                                           unsigned SubReg, unsigned RegWidth,
2918                                           SMLoc Loc) {
2919   assert(isRegularReg(RegKind));
2920 
2921   unsigned AlignSize = 1;
2922   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2923     // SGPR and TTMP registers must be aligned.
2924     // Max required alignment is 4 dwords.
2925     AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2926   }
2927 
2928   if (RegNum % AlignSize != 0) {
2929     Error(Loc, "invalid register alignment");
2930     return MCRegister();
2931   }
2932 
2933   unsigned RegIdx = RegNum / AlignSize;
2934   int RCID = getRegClass(RegKind, RegWidth);
2935   if (RCID == -1) {
2936     Error(Loc, "invalid or unsupported register size");
2937     return MCRegister();
2938   }
2939 
2940   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2941   const MCRegisterClass RC = TRI->getRegClass(RCID);
2942   if (RegIdx >= RC.getNumRegs()) {
2943     Error(Loc, "register index is out of range");
2944     return MCRegister();
2945   }
2946 
2947   MCRegister Reg = RC.getRegister(RegIdx);
2948 
2949   if (SubReg) {
2950     Reg = TRI->getSubReg(Reg, SubReg);
2951 
2952     // Currently all regular registers have their .l and .h subregisters, so
2953     // we should never need to generate an error here.
2954     assert(Reg && "Invalid subregister!");
2955   }
2956 
2957   return Reg;
2958 }
2959 
2960 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2961                                     unsigned &SubReg) {
2962   int64_t RegLo, RegHi;
2963   if (!skipToken(AsmToken::LBrac, "missing register index"))
2964     return false;
2965 
2966   SMLoc FirstIdxLoc = getLoc();
2967   SMLoc SecondIdxLoc;
2968 
2969   if (!parseExpr(RegLo))
2970     return false;
2971 
2972   if (trySkipToken(AsmToken::Colon)) {
2973     SecondIdxLoc = getLoc();
2974     if (!parseExpr(RegHi))
2975       return false;
2976   } else {
2977     RegHi = RegLo;
2978   }
2979 
2980   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2981     return false;
2982 
2983   if (!isUInt<32>(RegLo)) {
2984     Error(FirstIdxLoc, "invalid register index");
2985     return false;
2986   }
2987 
2988   if (!isUInt<32>(RegHi)) {
2989     Error(SecondIdxLoc, "invalid register index");
2990     return false;
2991   }
2992 
2993   if (RegLo > RegHi) {
2994     Error(FirstIdxLoc, "first register index should not exceed second index");
2995     return false;
2996   }
2997 
2998   if (RegHi == RegLo) {
2999     StringRef RegSuffix = getTokenStr();
3000     if (RegSuffix == ".l") {
3001       SubReg = AMDGPU::lo16;
3002       lex();
3003     } else if (RegSuffix == ".h") {
3004       SubReg = AMDGPU::hi16;
3005       lex();
3006     }
3007   }
3008 
3009   Num = static_cast<unsigned>(RegLo);
3010   RegWidth = 32 * ((RegHi - RegLo) + 1);
3011 
3012   return true;
3013 }
3014 
3015 MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3016                                             unsigned &RegNum,
3017                                             unsigned &RegWidth,
3018                                             SmallVectorImpl<AsmToken> &Tokens) {
3019   assert(isToken(AsmToken::Identifier));
3020   MCRegister Reg = getSpecialRegForName(getTokenStr());
3021   if (Reg) {
3022     RegNum = 0;
3023     RegWidth = 32;
3024     RegKind = IS_SPECIAL;
3025     Tokens.push_back(getToken());
3026     lex(); // skip register name
3027   }
3028   return Reg;
3029 }
3030 
3031 MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3032                                             unsigned &RegNum,
3033                                             unsigned &RegWidth,
3034                                             SmallVectorImpl<AsmToken> &Tokens) {
3035   assert(isToken(AsmToken::Identifier));
3036   StringRef RegName = getTokenStr();
3037   auto Loc = getLoc();
3038 
3039   const RegInfo *RI = getRegularRegInfo(RegName);
3040   if (!RI) {
3041     Error(Loc, "invalid register name");
3042     return MCRegister();
3043   }
3044 
3045   Tokens.push_back(getToken());
3046   lex(); // skip register name
3047 
3048   RegKind = RI->Kind;
3049   StringRef RegSuffix = RegName.substr(RI->Name.size());
3050   unsigned SubReg = NoSubRegister;
3051   if (!RegSuffix.empty()) {
3052     if (RegSuffix.consume_back(".l"))
3053       SubReg = AMDGPU::lo16;
3054     else if (RegSuffix.consume_back(".h"))
3055       SubReg = AMDGPU::hi16;
3056 
3057     // Single 32-bit register: vXX.
3058     if (!getRegNum(RegSuffix, RegNum)) {
3059       Error(Loc, "invalid register index");
3060       return MCRegister();
3061     }
3062     RegWidth = 32;
3063   } else {
3064     // Range of registers: v[XX:YY]. ":YY" is optional.
3065     if (!ParseRegRange(RegNum, RegWidth, SubReg))
3066       return MCRegister();
3067   }
3068 
3069   return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3070 }
3071 
3072 MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3073                                          unsigned &RegNum, unsigned &RegWidth,
3074                                          SmallVectorImpl<AsmToken> &Tokens) {
3075   MCRegister Reg;
3076   auto ListLoc = getLoc();
3077 
3078   if (!skipToken(AsmToken::LBrac,
3079                  "expected a register or a list of registers")) {
3080     return MCRegister();
3081   }
3082 
3083   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3084 
3085   auto Loc = getLoc();
3086   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3087     return MCRegister();
3088   if (RegWidth != 32) {
3089     Error(Loc, "expected a single 32-bit register");
3090     return MCRegister();
3091   }
3092 
3093   for (; trySkipToken(AsmToken::Comma); ) {
3094     RegisterKind NextRegKind;
3095     MCRegister NextReg;
3096     unsigned NextRegNum, NextRegWidth;
3097     Loc = getLoc();
3098 
3099     if (!ParseAMDGPURegister(NextRegKind, NextReg,
3100                              NextRegNum, NextRegWidth,
3101                              Tokens)) {
3102       return MCRegister();
3103     }
3104     if (NextRegWidth != 32) {
3105       Error(Loc, "expected a single 32-bit register");
3106       return MCRegister();
3107     }
3108     if (NextRegKind != RegKind) {
3109       Error(Loc, "registers in a list must be of the same kind");
3110       return MCRegister();
3111     }
3112     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3113       return MCRegister();
3114   }
3115 
3116   if (!skipToken(AsmToken::RBrac,
3117                  "expected a comma or a closing square bracket")) {
3118     return MCRegister();
3119   }
3120 
3121   if (isRegularReg(RegKind))
3122     Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3123 
3124   return Reg;
3125 }
3126 
3127 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3128                                           MCRegister &Reg, unsigned &RegNum,
3129                                           unsigned &RegWidth,
3130                                           SmallVectorImpl<AsmToken> &Tokens) {
3131   auto Loc = getLoc();
3132   Reg = MCRegister();
3133 
3134   if (isToken(AsmToken::Identifier)) {
3135     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3136     if (!Reg)
3137       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3138   } else {
3139     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3140   }
3141 
3142   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3143   if (!Reg) {
3144     assert(Parser.hasPendingError());
3145     return false;
3146   }
3147 
3148   if (!subtargetHasRegister(*TRI, Reg)) {
3149     if (Reg == AMDGPU::SGPR_NULL) {
3150       Error(Loc, "'null' operand is not supported on this GPU");
3151     } else {
3152       Error(Loc, Twine(AMDGPUInstPrinter::getRegisterName(Reg)) +
3153                      " register not available on this GPU");
3154     }
3155     return false;
3156   }
3157 
3158   return true;
3159 }
3160 
3161 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3162                                           MCRegister &Reg, unsigned &RegNum,
3163                                           unsigned &RegWidth,
3164                                           bool RestoreOnFailure /*=false*/) {
3165   Reg = MCRegister();
3166 
3167   SmallVector<AsmToken, 1> Tokens;
3168   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3169     if (RestoreOnFailure) {
3170       while (!Tokens.empty()) {
3171         getLexer().UnLex(Tokens.pop_back_val());
3172       }
3173     }
3174     return true;
3175   }
3176   return false;
3177 }
3178 
3179 std::optional<StringRef>
3180 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3181   switch (RegKind) {
3182   case IS_VGPR:
3183     return StringRef(".amdgcn.next_free_vgpr");
3184   case IS_SGPR:
3185     return StringRef(".amdgcn.next_free_sgpr");
3186   default:
3187     return std::nullopt;
3188   }
3189 }
3190 
3191 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3192   auto SymbolName = getGprCountSymbolName(RegKind);
3193   assert(SymbolName && "initializing invalid register kind");
3194   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3195   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3196   Sym->setRedefinable(true);
3197 }
3198 
3199 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3200                                             unsigned DwordRegIndex,
3201                                             unsigned RegWidth) {
3202   // Symbols are only defined for GCN targets
3203   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3204     return true;
3205 
3206   auto SymbolName = getGprCountSymbolName(RegKind);
3207   if (!SymbolName)
3208     return true;
3209   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3210 
3211   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3212   int64_t OldCount;
3213 
3214   if (!Sym->isVariable())
3215     return !Error(getLoc(),
3216                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3217   if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3218     return !Error(
3219         getLoc(),
3220         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3221 
3222   if (OldCount <= NewMax)
3223     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3224 
3225   return true;
3226 }
3227 
3228 std::unique_ptr<AMDGPUOperand>
3229 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3230   const auto &Tok = getToken();
3231   SMLoc StartLoc = Tok.getLoc();
3232   SMLoc EndLoc = Tok.getEndLoc();
3233   RegisterKind RegKind;
3234   MCRegister Reg;
3235   unsigned RegNum, RegWidth;
3236 
3237   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3238     return nullptr;
3239   }
3240   if (isHsaAbi(getSTI())) {
3241     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3242       return nullptr;
3243   } else
3244     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3245   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3246 }
3247 
3248 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3249                                       bool HasSP3AbsModifier, bool HasLit,
3250                                       bool HasLit64) {
3251   // TODO: add syntactic sugar for 1/(2*PI)
3252 
3253   if (isRegister() || isModifier())
3254     return ParseStatus::NoMatch;
3255 
3256   if (!HasLit && !HasLit64) {
3257     HasLit64 = trySkipId("lit64");
3258     HasLit = !HasLit64 && trySkipId("lit");
3259     if (HasLit || HasLit64) {
3260       if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3261         return ParseStatus::Failure;
3262       ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit, HasLit64);
3263       if (S.isSuccess() &&
3264           !skipToken(AsmToken::RParen, "expected closing parentheses"))
3265         return ParseStatus::Failure;
3266       return S;
3267     }
3268   }
3269 
3270   const auto& Tok = getToken();
3271   const auto& NextTok = peekToken();
3272   bool IsReal = Tok.is(AsmToken::Real);
3273   SMLoc S = getLoc();
3274   bool Negate = false;
3275 
3276   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3277     lex();
3278     IsReal = true;
3279     Negate = true;
3280   }
3281 
3282   AMDGPUOperand::Modifiers Mods;
3283   Mods.Lit = HasLit;
3284   Mods.Lit64 = HasLit64;
3285 
3286   if (IsReal) {
3287     // Floating-point expressions are not supported.
3288     // Can only allow floating-point literals with an
3289     // optional sign.
3290 
3291     StringRef Num = getTokenStr();
3292     lex();
3293 
3294     APFloat RealVal(APFloat::IEEEdouble());
3295     auto roundMode = APFloat::rmNearestTiesToEven;
3296     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3297       return ParseStatus::Failure;
3298     if (Negate)
3299       RealVal.changeSign();
3300 
3301     Operands.push_back(
3302       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3303                                AMDGPUOperand::ImmTyNone, true));
3304     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3305     Op.setModifiers(Mods);
3306 
3307     return ParseStatus::Success;
3308 
3309   } else {
3310     int64_t IntVal;
3311     const MCExpr *Expr;
3312     SMLoc S = getLoc();
3313 
3314     if (HasSP3AbsModifier) {
3315       // This is a workaround for handling expressions
3316       // as arguments of SP3 'abs' modifier, for example:
3317       //     |1.0|
3318       //     |-1|
3319       //     |1+x|
3320       // This syntax is not compatible with syntax of standard
3321       // MC expressions (due to the trailing '|').
3322       SMLoc EndLoc;
3323       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3324         return ParseStatus::Failure;
3325     } else {
3326       if (Parser.parseExpression(Expr))
3327         return ParseStatus::Failure;
3328     }
3329 
3330     if (Expr->evaluateAsAbsolute(IntVal)) {
3331       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3332       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3333       Op.setModifiers(Mods);
3334     } else {
3335       if (HasLit || HasLit64)
3336         return ParseStatus::NoMatch;
3337       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3338     }
3339 
3340     return ParseStatus::Success;
3341   }
3342 
3343   return ParseStatus::NoMatch;
3344 }
3345 
3346 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3347   if (!isRegister())
3348     return ParseStatus::NoMatch;
3349 
3350   if (auto R = parseRegister()) {
3351     assert(R->isReg());
3352     Operands.push_back(std::move(R));
3353     return ParseStatus::Success;
3354   }
3355   return ParseStatus::Failure;
3356 }
3357 
3358 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3359                                            bool HasSP3AbsMod, bool HasLit,
3360                                            bool HasLit64) {
3361   ParseStatus Res = parseReg(Operands);
3362   if (!Res.isNoMatch())
3363     return Res;
3364   if (isModifier())
3365     return ParseStatus::NoMatch;
3366   return parseImm(Operands, HasSP3AbsMod, HasLit, HasLit64);
3367 }
3368 
3369 bool
3370 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3371   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3372     const auto &str = Token.getString();
3373     return str == "abs" || str == "neg" || str == "sext";
3374   }
3375   return false;
3376 }
3377 
3378 bool
3379 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3380   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3381 }
3382 
3383 bool
3384 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3385   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3386 }
3387 
3388 bool
3389 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3390   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3391 }
3392 
3393 // Check if this is an operand modifier or an opcode modifier
3394 // which may look like an expression but it is not. We should
3395 // avoid parsing these modifiers as expressions. Currently
3396 // recognized sequences are:
3397 //   |...|
3398 //   abs(...)
3399 //   neg(...)
3400 //   sext(...)
3401 //   -reg
3402 //   -|...|
3403 //   -abs(...)
3404 //   name:...
3405 //
3406 bool
3407 AMDGPUAsmParser::isModifier() {
3408 
3409   AsmToken Tok = getToken();
3410   AsmToken NextToken[2];
3411   peekTokens(NextToken);
3412 
3413   return isOperandModifier(Tok, NextToken[0]) ||
3414          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3415          isOpcodeModifierWithVal(Tok, NextToken[0]);
3416 }
3417 
3418 // Check if the current token is an SP3 'neg' modifier.
3419 // Currently this modifier is allowed in the following context:
3420 //
3421 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3422 // 2. Before an 'abs' modifier: -abs(...)
3423 // 3. Before an SP3 'abs' modifier: -|...|
3424 //
3425 // In all other cases "-" is handled as a part
3426 // of an expression that follows the sign.
3427 //
3428 // Note: When "-" is followed by an integer literal,
3429 // this is interpreted as integer negation rather
3430 // than a floating-point NEG modifier applied to N.
3431 // Beside being contr-intuitive, such use of floating-point
3432 // NEG modifier would have resulted in different meaning
3433 // of integer literals used with VOP1/2/C and VOP3,
3434 // for example:
3435 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3436 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3437 // Negative fp literals with preceding "-" are
3438 // handled likewise for uniformity
3439 //
3440 bool
3441 AMDGPUAsmParser::parseSP3NegModifier() {
3442 
3443   AsmToken NextToken[2];
3444   peekTokens(NextToken);
3445 
3446   if (isToken(AsmToken::Minus) &&
3447       (isRegister(NextToken[0], NextToken[1]) ||
3448        NextToken[0].is(AsmToken::Pipe) ||
3449        isId(NextToken[0], "abs"))) {
3450     lex();
3451     return true;
3452   }
3453 
3454   return false;
3455 }
3456 
3457 ParseStatus
3458 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3459                                               bool AllowImm) {
3460   bool Neg, SP3Neg;
3461   bool Abs, SP3Abs;
3462   bool Lit64, Lit;
3463   SMLoc Loc;
3464 
3465   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3466   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3467     return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3468 
3469   SP3Neg = parseSP3NegModifier();
3470 
3471   Loc = getLoc();
3472   Neg = trySkipId("neg");
3473   if (Neg && SP3Neg)
3474     return Error(Loc, "expected register or immediate");
3475   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3476     return ParseStatus::Failure;
3477 
3478   Abs = trySkipId("abs");
3479   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3480     return ParseStatus::Failure;
3481 
3482   Lit64 = trySkipId("lit64");
3483   if (Lit64) {
3484     if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3485       return ParseStatus::Failure;
3486     if (!has64BitLiterals())
3487       return Error(Loc, "lit64 is not supported on this GPU");
3488   }
3489 
3490   Lit = !Lit64 && trySkipId("lit");
3491   if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3492     return ParseStatus::Failure;
3493 
3494   Loc = getLoc();
3495   SP3Abs = trySkipToken(AsmToken::Pipe);
3496   if (Abs && SP3Abs)
3497     return Error(Loc, "expected register or immediate");
3498 
3499   ParseStatus Res;
3500   if (AllowImm) {
3501     Res = parseRegOrImm(Operands, SP3Abs, Lit, Lit64);
3502   } else {
3503     Res = parseReg(Operands);
3504   }
3505   if (!Res.isSuccess())
3506     return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)
3507                ? ParseStatus::Failure
3508                : Res;
3509 
3510   if ((Lit || Lit64) && !Operands.back()->isImm())
3511     Error(Loc, "expected immediate with lit modifier");
3512 
3513   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3514     return ParseStatus::Failure;
3515   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3516     return ParseStatus::Failure;
3517   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3518     return ParseStatus::Failure;
3519   if ((Lit || Lit64) &&
3520       !skipToken(AsmToken::RParen, "expected closing parentheses"))
3521     return ParseStatus::Failure;
3522 
3523   AMDGPUOperand::Modifiers Mods;
3524   Mods.Abs = Abs || SP3Abs;
3525   Mods.Neg = Neg || SP3Neg;
3526   Mods.Lit = Lit;
3527   Mods.Lit64 = Lit64;
3528 
3529   if (Mods.hasFPModifiers() || Lit || Lit64) {
3530     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3531     if (Op.isExpr())
3532       return Error(Op.getStartLoc(), "expected an absolute expression");
3533     Op.setModifiers(Mods);
3534   }
3535   return ParseStatus::Success;
3536 }
3537 
3538 ParseStatus
3539 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3540                                                bool AllowImm) {
3541   bool Sext = trySkipId("sext");
3542   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3543     return ParseStatus::Failure;
3544 
3545   ParseStatus Res;
3546   if (AllowImm) {
3547     Res = parseRegOrImm(Operands);
3548   } else {
3549     Res = parseReg(Operands);
3550   }
3551   if (!Res.isSuccess())
3552     return Sext ? ParseStatus::Failure : Res;
3553 
3554   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3555     return ParseStatus::Failure;
3556 
3557   AMDGPUOperand::Modifiers Mods;
3558   Mods.Sext = Sext;
3559 
3560   if (Mods.hasIntModifiers()) {
3561     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3562     if (Op.isExpr())
3563       return Error(Op.getStartLoc(), "expected an absolute expression");
3564     Op.setModifiers(Mods);
3565   }
3566 
3567   return ParseStatus::Success;
3568 }
3569 
3570 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3571   return parseRegOrImmWithFPInputMods(Operands, false);
3572 }
3573 
3574 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3575   return parseRegOrImmWithIntInputMods(Operands, false);
3576 }
3577 
3578 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3579   auto Loc = getLoc();
3580   if (trySkipId("off")) {
3581     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3582                                                 AMDGPUOperand::ImmTyOff, false));
3583     return ParseStatus::Success;
3584   }
3585 
3586   if (!isRegister())
3587     return ParseStatus::NoMatch;
3588 
3589   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3590   if (Reg) {
3591     Operands.push_back(std::move(Reg));
3592     return ParseStatus::Success;
3593   }
3594 
3595   return ParseStatus::Failure;
3596 }
3597 
3598 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3599   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3600 
3601   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3602       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3603       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3604       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3605     return Match_InvalidOperand;
3606 
3607   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3608       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3609     // v_mac_f32/16 allow only dst_sel == DWORD;
3610     auto OpNum =
3611         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3612     const auto &Op = Inst.getOperand(OpNum);
3613     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3614       return Match_InvalidOperand;
3615     }
3616   }
3617 
3618   // Asm can first try to match VOPD or VOPD3. By failing early here with
3619   // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3620   // Checking later during validateInstruction does not give a chance to retry
3621   // parsing as a different encoding.
3622   if (tryAnotherVOPDEncoding(Inst))
3623     return Match_InvalidOperand;
3624 
3625   return Match_Success;
3626 }
3627 
3628 static ArrayRef<unsigned> getAllVariants() {
3629   static const unsigned Variants[] = {
3630     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3631     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3632     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3633   };
3634 
3635   return ArrayRef(Variants);
3636 }
3637 
3638 // What asm variants we should check
3639 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3640   if (isForcedDPP() && isForcedVOP3()) {
3641     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3642     return ArrayRef(Variants);
3643   }
3644   if (getForcedEncodingSize() == 32) {
3645     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3646     return ArrayRef(Variants);
3647   }
3648 
3649   if (isForcedVOP3()) {
3650     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3651     return ArrayRef(Variants);
3652   }
3653 
3654   if (isForcedSDWA()) {
3655     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3656                                         AMDGPUAsmVariants::SDWA9};
3657     return ArrayRef(Variants);
3658   }
3659 
3660   if (isForcedDPP()) {
3661     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3662     return ArrayRef(Variants);
3663   }
3664 
3665   return getAllVariants();
3666 }
3667 
3668 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3669   if (isForcedDPP() && isForcedVOP3())
3670     return "e64_dpp";
3671 
3672   if (getForcedEncodingSize() == 32)
3673     return "e32";
3674 
3675   if (isForcedVOP3())
3676     return "e64";
3677 
3678   if (isForcedSDWA())
3679     return "sdwa";
3680 
3681   if (isForcedDPP())
3682     return "dpp";
3683 
3684   return "";
3685 }
3686 
3687 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3688   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3689   for (MCPhysReg Reg : Desc.implicit_uses()) {
3690     switch (Reg) {
3691     case AMDGPU::FLAT_SCR:
3692     case AMDGPU::VCC:
3693     case AMDGPU::VCC_LO:
3694     case AMDGPU::VCC_HI:
3695     case AMDGPU::M0:
3696       return Reg;
3697     default:
3698       break;
3699     }
3700   }
3701   return AMDGPU::NoRegister;
3702 }
3703 
3704 // NB: This code is correct only when used to check constant
3705 // bus limitations because GFX7 support no f16 inline constants.
3706 // Note that there are no cases when a GFX7 opcode violates
3707 // constant bus limitations due to the use of an f16 constant.
3708 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3709                                        unsigned OpIdx) const {
3710   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3711 
3712   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3713       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3714     return false;
3715   }
3716 
3717   const MCOperand &MO = Inst.getOperand(OpIdx);
3718 
3719   int64_t Val = MO.getImm();
3720   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3721 
3722   switch (OpSize) { // expected operand size
3723   case 8:
3724     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3725   case 4:
3726     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3727   case 2: {
3728     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3729     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3730         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16)
3731       return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3732 
3733     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3734         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3735       return AMDGPU::isInlinableLiteralV2I16(Val);
3736 
3737     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3738         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3739       return AMDGPU::isInlinableLiteralV2F16(Val);
3740 
3741     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3742         OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3743       return AMDGPU::isInlinableLiteralV2BF16(Val);
3744 
3745     if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3746         OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16)
3747       return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3748 
3749     if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3750         OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)
3751       return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3752 
3753     llvm_unreachable("invalid operand type");
3754   }
3755   default:
3756     llvm_unreachable("invalid operand size");
3757   }
3758 }
3759 
3760 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3761   if (!isGFX10Plus())
3762     return 1;
3763 
3764   switch (Opcode) {
3765   // 64-bit shift instructions can use only one scalar value input
3766   case AMDGPU::V_LSHLREV_B64_e64:
3767   case AMDGPU::V_LSHLREV_B64_gfx10:
3768   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3769   case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3770   case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3771   case AMDGPU::V_LSHRREV_B64_e64:
3772   case AMDGPU::V_LSHRREV_B64_gfx10:
3773   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3774   case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3775   case AMDGPU::V_ASHRREV_I64_e64:
3776   case AMDGPU::V_ASHRREV_I64_gfx10:
3777   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3778   case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3779   case AMDGPU::V_LSHL_B64_e64:
3780   case AMDGPU::V_LSHR_B64_e64:
3781   case AMDGPU::V_ASHR_I64_e64:
3782     return 1;
3783   default:
3784     return 2;
3785   }
3786 }
3787 
3788 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3789 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3790 
3791 // Get regular operand indices in the same order as specified
3792 // in the instruction (but append mandatory literals to the end).
3793 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3794                                            bool AddMandatoryLiterals = false) {
3795 
3796   int16_t ImmIdx =
3797       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3798 
3799   if (isVOPD(Opcode)) {
3800     int16_t ImmXIdx =
3801         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3802 
3803     return {getNamedOperandIdx(Opcode, OpName::src0X),
3804             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3805             getNamedOperandIdx(Opcode, OpName::vsrc2X),
3806             getNamedOperandIdx(Opcode, OpName::src0Y),
3807             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3808             getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3809             ImmXIdx,
3810             ImmIdx};
3811   }
3812 
3813   return {getNamedOperandIdx(Opcode, OpName::src0),
3814           getNamedOperandIdx(Opcode, OpName::src1),
3815           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3816 }
3817 
3818 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3819   const MCOperand &MO = Inst.getOperand(OpIdx);
3820   if (MO.isImm())
3821     return !isInlineConstant(Inst, OpIdx);
3822   if (MO.isReg()) {
3823     auto Reg = MO.getReg();
3824     if (!Reg)
3825       return false;
3826     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3827     auto PReg = mc2PseudoReg(Reg);
3828     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3829   }
3830   return true;
3831 }
3832 
3833 // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3834 // Writelane is special in that it can use SGPR and M0 (which would normally
3835 // count as using the constant bus twice - but in this case it is allowed since
3836 // the lane selector doesn't count as a use of the constant bus). However, it is
3837 // still required to abide by the 1 SGPR rule.
3838 static bool checkWriteLane(const MCInst &Inst) {
3839   const unsigned Opcode = Inst.getOpcode();
3840   if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3841     return false;
3842   const MCOperand &LaneSelOp = Inst.getOperand(2);
3843   if (!LaneSelOp.isReg())
3844     return false;
3845   auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3846   return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3847 }
3848 
3849 bool AMDGPUAsmParser::validateConstantBusLimitations(
3850     const MCInst &Inst, const OperandVector &Operands) {
3851   const unsigned Opcode = Inst.getOpcode();
3852   const MCInstrDesc &Desc = MII.get(Opcode);
3853   MCRegister LastSGPR;
3854   unsigned ConstantBusUseCount = 0;
3855   unsigned NumLiterals = 0;
3856   unsigned LiteralSize;
3857 
3858   if (!(Desc.TSFlags &
3859         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3860          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3861       !isVOPD(Opcode))
3862     return true;
3863 
3864   if (checkWriteLane(Inst))
3865     return true;
3866 
3867   // Check special imm operands (used by madmk, etc)
3868   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3869     ++NumLiterals;
3870     LiteralSize = 4;
3871   }
3872 
3873   SmallDenseSet<unsigned> SGPRsUsed;
3874   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3875   if (SGPRUsed != AMDGPU::NoRegister) {
3876     SGPRsUsed.insert(SGPRUsed);
3877     ++ConstantBusUseCount;
3878   }
3879 
3880   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3881 
3882   for (int OpIdx : OpIndices) {
3883     if (OpIdx == -1)
3884       continue;
3885 
3886     const MCOperand &MO = Inst.getOperand(OpIdx);
3887     if (usesConstantBus(Inst, OpIdx)) {
3888       if (MO.isReg()) {
3889         LastSGPR = mc2PseudoReg(MO.getReg());
3890         // Pairs of registers with a partial intersections like these
3891         //   s0, s[0:1]
3892         //   flat_scratch_lo, flat_scratch
3893         //   flat_scratch_lo, flat_scratch_hi
3894         // are theoretically valid but they are disabled anyway.
3895         // Note that this code mimics SIInstrInfo::verifyInstruction
3896         if (SGPRsUsed.insert(LastSGPR).second) {
3897           ++ConstantBusUseCount;
3898         }
3899       } else { // Expression or a literal
3900 
3901         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3902           continue; // special operand like VINTERP attr_chan
3903 
3904         // An instruction may use only one literal.
3905         // This has been validated on the previous step.
3906         // See validateVOPLiteral.
3907         // This literal may be used as more than one operand.
3908         // If all these operands are of the same size,
3909         // this literal counts as one scalar value.
3910         // Otherwise it counts as 2 scalar values.
3911         // See "GFX10 Shader Programming", section 3.6.2.3.
3912 
3913         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3914         if (Size < 4)
3915           Size = 4;
3916 
3917         if (NumLiterals == 0) {
3918           NumLiterals = 1;
3919           LiteralSize = Size;
3920         } else if (LiteralSize != Size) {
3921           NumLiterals = 2;
3922         }
3923       }
3924     }
3925   }
3926   ConstantBusUseCount += NumLiterals;
3927 
3928   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3929     return true;
3930 
3931   SMLoc LitLoc = getLitLoc(Operands);
3932   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3933   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3934   Error(Loc, "invalid operand (violates constant bus restrictions)");
3935   return false;
3936 }
3937 
3938 std::optional<unsigned>
3939 AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3940 
3941   const unsigned Opcode = Inst.getOpcode();
3942   if (!isVOPD(Opcode))
3943     return {};
3944 
3945   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3946 
3947   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3948     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3949     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3950                ? Opr.getReg()
3951                : MCRegister();
3952   };
3953 
3954   // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3955   // source-cache.
3956   bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3957                  Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3958                  Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3959   bool AllowSameVGPR = isGFX1250();
3960 
3961   if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3962     for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3963       int I = getNamedOperandIdx(Opcode, OpName);
3964       const MCOperand &Op = Inst.getOperand(I);
3965       if (!Op.isImm())
3966         continue;
3967       int64_t Imm = Op.getImm();
3968       if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3969           !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3970         return (unsigned)I;
3971     }
3972 
3973     for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3974                         OpName::vsrc2Y, OpName::imm}) {
3975       int I = getNamedOperandIdx(Opcode, OpName);
3976       if (I == -1)
3977         continue;
3978       const MCOperand &Op = Inst.getOperand(I);
3979       if (Op.isImm())
3980         return (unsigned)I;
3981     }
3982   }
3983 
3984   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3985   auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3986       getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3987 
3988   return InvalidCompOprIdx;
3989 }
3990 
3991 bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3992                                    const OperandVector &Operands) {
3993 
3994   unsigned Opcode = Inst.getOpcode();
3995   bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3996 
3997   if (AsVOPD3) {
3998     for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
3999       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4000       if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4001           (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4002         Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4003     }
4004   }
4005 
4006   auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4007   if (!InvalidCompOprIdx.has_value())
4008     return true;
4009 
4010   auto CompOprIdx = *InvalidCompOprIdx;
4011   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4012   auto ParsedIdx =
4013       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4014                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4015   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4016 
4017   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4018   if (CompOprIdx == VOPD::Component::DST) {
4019     if (AsVOPD3)
4020       Error(Loc, "dst registers must be distinct");
4021     else
4022       Error(Loc, "one dst register must be even and the other odd");
4023   } else {
4024     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4025     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4026                    " operands must use different VGPR banks");
4027   }
4028 
4029   return false;
4030 }
4031 
4032 // \returns true if \p Inst does not satisfy VOPD constraints, but can be
4033 // potentially used as VOPD3 with the same operands.
4034 bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4035   // First check if it fits VOPD
4036   auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4037   if (!InvalidCompOprIdx.has_value())
4038     return false;
4039 
4040   // Then if it fits VOPD3
4041   InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4042   if (InvalidCompOprIdx.has_value()) {
4043     // If failed operand is dst it is better to show error about VOPD3
4044     // instruction as it has more capabilities and error message will be
4045     // more informative. If the dst is not legal for VOPD3, then it is not
4046     // legal for VOPD either.
4047     if (*InvalidCompOprIdx == VOPD::Component::DST)
4048       return true;
4049 
4050     // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4051     // with a conflict in tied implicit src2 of fmac and no asm operand to
4052     // to point to.
4053     return false;
4054   }
4055   return true;
4056 }
4057 
4058 // \returns true is a VOPD3 instruction can be also represented as a shorter
4059 // VOPD encoding.
4060 bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4061   const unsigned Opcode = Inst.getOpcode();
4062   const auto &II = getVOPDInstInfo(Opcode, &MII);
4063   unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4064   if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4065       !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4066     return false;
4067 
4068   // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4069   // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4070   // be parsed as VOPD which does not accept src2.
4071   if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4072       II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4073     return false;
4074 
4075   // If any modifiers are set this cannot be VOPD.
4076   for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4077                       OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4078                       OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4079     int I = getNamedOperandIdx(Opcode, OpName);
4080     if (I == -1)
4081       continue;
4082     if (Inst.getOperand(I).getImm())
4083       return false;
4084   }
4085 
4086   return !tryVOPD3(Inst);
4087 }
4088 
4089 // VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4090 // form but switch to VOPD3 otherwise.
4091 bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4092   const unsigned Opcode = Inst.getOpcode();
4093   if (!isGFX1250() || !isVOPD(Opcode))
4094     return false;
4095 
4096   if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4097     return tryVOPD(Inst);
4098   return tryVOPD3(Inst);
4099 }
4100 
4101 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4102 
4103   const unsigned Opc = Inst.getOpcode();
4104   const MCInstrDesc &Desc = MII.get(Opc);
4105 
4106   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4107     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4108     assert(ClampIdx != -1);
4109     return Inst.getOperand(ClampIdx).getImm() == 0;
4110   }
4111 
4112   return true;
4113 }
4114 
4115 constexpr uint64_t MIMGFlags =
4116     SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
4117 
4118 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
4119                                            const SMLoc &IDLoc) {
4120 
4121   const unsigned Opc = Inst.getOpcode();
4122   const MCInstrDesc &Desc = MII.get(Opc);
4123 
4124   if ((Desc.TSFlags & MIMGFlags) == 0)
4125     return true;
4126 
4127   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4128   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4129   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4130 
4131   if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4132     return true;
4133 
4134   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4135     return true;
4136 
4137   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
4138   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4139   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4140   if (DMask == 0)
4141     DMask = 1;
4142 
4143   bool IsPackedD16 = false;
4144   unsigned DataSize =
4145       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4146   if (hasPackedD16()) {
4147     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4148     IsPackedD16 = D16Idx >= 0;
4149     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4150       DataSize = (DataSize + 1) / 2;
4151   }
4152 
4153   if ((VDataSize / 4) == DataSize + TFESize)
4154     return true;
4155 
4156   StringRef Modifiers;
4157   if (isGFX90A())
4158     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4159   else
4160     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4161 
4162   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4163   return false;
4164 }
4165 
4166 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
4167                                            const SMLoc &IDLoc) {
4168   const unsigned Opc = Inst.getOpcode();
4169   const MCInstrDesc &Desc = MII.get(Opc);
4170 
4171   if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4172     return true;
4173 
4174   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4175 
4176   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4177       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
4178   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4179   AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4180                                   ? AMDGPU::OpName::srsrc
4181                                   : AMDGPU::OpName::rsrc;
4182   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4183   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4184   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4185 
4186   assert(VAddr0Idx != -1);
4187   assert(SrsrcIdx != -1);
4188   assert(SrsrcIdx > VAddr0Idx);
4189 
4190   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4191   if (BaseOpcode->BVH) {
4192     if (IsA16 == BaseOpcode->A16)
4193       return true;
4194     Error(IDLoc, "image address size does not match a16");
4195     return false;
4196   }
4197 
4198   unsigned Dim = Inst.getOperand(DimIdx).getImm();
4199   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4200   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4201   unsigned ActualAddrSize =
4202       IsNSA ? SrsrcIdx - VAddr0Idx
4203             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
4204 
4205   unsigned ExpectedAddrSize =
4206       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4207 
4208   if (IsNSA) {
4209     if (hasPartialNSAEncoding() &&
4210         ExpectedAddrSize >
4211             getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
4212       int VAddrLastIdx = SrsrcIdx - 1;
4213       unsigned VAddrLastSize =
4214           AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
4215 
4216       ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4217     }
4218   } else {
4219     if (ExpectedAddrSize > 12)
4220       ExpectedAddrSize = 16;
4221 
4222     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4223     // This provides backward compatibility for assembly created
4224     // before 160b/192b/224b types were directly supported.
4225     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4226       return true;
4227   }
4228 
4229   if (ActualAddrSize == ExpectedAddrSize)
4230     return true;
4231 
4232   Error(IDLoc, "image address size does not match dim and a16");
4233   return false;
4234 }
4235 
4236 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4237 
4238   const unsigned Opc = Inst.getOpcode();
4239   const MCInstrDesc &Desc = MII.get(Opc);
4240 
4241   if ((Desc.TSFlags & MIMGFlags) == 0)
4242     return true;
4243   if (!Desc.mayLoad() || !Desc.mayStore())
4244     return true; // Not atomic
4245 
4246   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4247   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4248 
4249   // This is an incomplete check because image_atomic_cmpswap
4250   // may only use 0x3 and 0xf while other atomic operations
4251   // may use 0x1 and 0x3. However these limitations are
4252   // verified when we check that dmask matches dst size.
4253   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4254 }
4255 
4256 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4257 
4258   const unsigned Opc = Inst.getOpcode();
4259   const MCInstrDesc &Desc = MII.get(Opc);
4260 
4261   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4262     return true;
4263 
4264   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4265   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4266 
4267   // GATHER4 instructions use dmask in a different fashion compared to
4268   // other MIMG instructions. The only useful DMASK values are
4269   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4270   // (red,red,red,red) etc.) The ISA document doesn't mention
4271   // this.
4272   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4273 }
4274 
4275 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4276                                       const OperandVector &Operands) {
4277   if (!isGFX10Plus())
4278     return true;
4279 
4280   const unsigned Opc = Inst.getOpcode();
4281   const MCInstrDesc &Desc = MII.get(Opc);
4282 
4283   if ((Desc.TSFlags & MIMGFlags) == 0)
4284     return true;
4285 
4286   // image_bvh_intersect_ray instructions do not have dim
4287   if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4288     return true;
4289 
4290   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4291     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4292     if (Op.isDim())
4293       return true;
4294   }
4295   return false;
4296 }
4297 
4298 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4299   const unsigned Opc = Inst.getOpcode();
4300   const MCInstrDesc &Desc = MII.get(Opc);
4301 
4302   if ((Desc.TSFlags & MIMGFlags) == 0)
4303     return true;
4304 
4305   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4306   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4307       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
4308 
4309   if (!BaseOpcode->MSAA)
4310     return true;
4311 
4312   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4313   assert(DimIdx != -1);
4314 
4315   unsigned Dim = Inst.getOperand(DimIdx).getImm();
4316   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4317 
4318   return DimInfo->MSAA;
4319 }
4320 
4321 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4322 {
4323   switch (Opcode) {
4324   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4325   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4326   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4327     return true;
4328   default:
4329     return false;
4330   }
4331 }
4332 
4333 // movrels* opcodes should only allow VGPRS as src0.
4334 // This is specified in .td description for vop1/vop3,
4335 // but sdwa is handled differently. See isSDWAOperand.
4336 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4337                                       const OperandVector &Operands) {
4338 
4339   const unsigned Opc = Inst.getOpcode();
4340   const MCInstrDesc &Desc = MII.get(Opc);
4341 
4342   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4343     return true;
4344 
4345   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4346   assert(Src0Idx != -1);
4347 
4348   SMLoc ErrLoc;
4349   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4350   if (Src0.isReg()) {
4351     auto Reg = mc2PseudoReg(Src0.getReg());
4352     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4353     if (!isSGPR(Reg, TRI))
4354       return true;
4355     ErrLoc = getRegLoc(Reg, Operands);
4356   } else {
4357     ErrLoc = getConstLoc(Operands);
4358   }
4359 
4360   Error(ErrLoc, "source operand must be a VGPR");
4361   return false;
4362 }
4363 
4364 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4365                                           const OperandVector &Operands) {
4366 
4367   const unsigned Opc = Inst.getOpcode();
4368 
4369   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4370     return true;
4371 
4372   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4373   assert(Src0Idx != -1);
4374 
4375   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4376   if (!Src0.isReg())
4377     return true;
4378 
4379   auto Reg = mc2PseudoReg(Src0.getReg());
4380   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4381   if (!isGFX90A() && isSGPR(Reg, TRI)) {
4382     Error(getRegLoc(Reg, Operands),
4383           "source operand must be either a VGPR or an inline constant");
4384     return false;
4385   }
4386 
4387   return true;
4388 }
4389 
4390 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4391                                       const OperandVector &Operands) {
4392   unsigned Opcode = Inst.getOpcode();
4393   const MCInstrDesc &Desc = MII.get(Opcode);
4394 
4395   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4396       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4397     return true;
4398 
4399   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4400   if (Src2Idx == -1)
4401     return true;
4402 
4403   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4404     Error(getConstLoc(Operands),
4405           "inline constants are not allowed for this operand");
4406     return false;
4407   }
4408 
4409   return true;
4410 }
4411 
4412 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4413                                    const OperandVector &Operands) {
4414   const unsigned Opc = Inst.getOpcode();
4415   const MCInstrDesc &Desc = MII.get(Opc);
4416 
4417   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4418     return true;
4419 
4420   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4421   if (BlgpIdx != -1) {
4422     if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4423       int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4424 
4425       unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4426       unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4427 
4428       // Validate the correct register size was used for the floating point
4429       // format operands
4430 
4431       bool Success = true;
4432       if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4433         int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4434         Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()),
4435                         Operands),
4436               "wrong register tuple size for cbsz value " + Twine(CBSZ));
4437         Success = false;
4438       }
4439 
4440       if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4441         int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4442         Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()),
4443                         Operands),
4444               "wrong register tuple size for blgp value " + Twine(BLGP));
4445         Success = false;
4446       }
4447 
4448       return Success;
4449     }
4450   }
4451 
4452   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4453   if (Src2Idx == -1)
4454     return true;
4455 
4456   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4457   if (!Src2.isReg())
4458     return true;
4459 
4460   MCRegister Src2Reg = Src2.getReg();
4461   MCRegister DstReg = Inst.getOperand(0).getReg();
4462   if (Src2Reg == DstReg)
4463     return true;
4464 
4465   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4466   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4467     return true;
4468 
4469   if (TRI->regsOverlap(Src2Reg, DstReg)) {
4470     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4471           "source 2 operand must not partially overlap with dst");
4472     return false;
4473   }
4474 
4475   return true;
4476 }
4477 
4478 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4479   switch (Inst.getOpcode()) {
4480   default:
4481     return true;
4482   case V_DIV_SCALE_F32_gfx6_gfx7:
4483   case V_DIV_SCALE_F32_vi:
4484   case V_DIV_SCALE_F32_gfx10:
4485   case V_DIV_SCALE_F64_gfx6_gfx7:
4486   case V_DIV_SCALE_F64_vi:
4487   case V_DIV_SCALE_F64_gfx10:
4488     break;
4489   }
4490 
4491   // TODO: Check that src0 = src1 or src2.
4492 
4493   for (auto Name : {AMDGPU::OpName::src0_modifiers,
4494                     AMDGPU::OpName::src2_modifiers,
4495                     AMDGPU::OpName::src2_modifiers}) {
4496     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4497             .getImm() &
4498         SISrcMods::ABS) {
4499       return false;
4500     }
4501   }
4502 
4503   return true;
4504 }
4505 
4506 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4507 
4508   const unsigned Opc = Inst.getOpcode();
4509   const MCInstrDesc &Desc = MII.get(Opc);
4510 
4511   if ((Desc.TSFlags & MIMGFlags) == 0)
4512     return true;
4513 
4514   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4515   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4516     if (isCI() || isSI())
4517       return false;
4518   }
4519 
4520   return true;
4521 }
4522 
4523 bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4524   const unsigned Opc = Inst.getOpcode();
4525   const MCInstrDesc &Desc = MII.get(Opc);
4526 
4527   if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4528     return true;
4529 
4530   int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4531 
4532   return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4533 }
4534 
4535 static bool IsRevOpcode(const unsigned Opcode)
4536 {
4537   switch (Opcode) {
4538   case AMDGPU::V_SUBREV_F32_e32:
4539   case AMDGPU::V_SUBREV_F32_e64:
4540   case AMDGPU::V_SUBREV_F32_e32_gfx10:
4541   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4542   case AMDGPU::V_SUBREV_F32_e32_vi:
4543   case AMDGPU::V_SUBREV_F32_e64_gfx10:
4544   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4545   case AMDGPU::V_SUBREV_F32_e64_vi:
4546 
4547   case AMDGPU::V_SUBREV_CO_U32_e32:
4548   case AMDGPU::V_SUBREV_CO_U32_e64:
4549   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4550   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4551 
4552   case AMDGPU::V_SUBBREV_U32_e32:
4553   case AMDGPU::V_SUBBREV_U32_e64:
4554   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4555   case AMDGPU::V_SUBBREV_U32_e32_vi:
4556   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4557   case AMDGPU::V_SUBBREV_U32_e64_vi:
4558 
4559   case AMDGPU::V_SUBREV_U32_e32:
4560   case AMDGPU::V_SUBREV_U32_e64:
4561   case AMDGPU::V_SUBREV_U32_e32_gfx9:
4562   case AMDGPU::V_SUBREV_U32_e32_vi:
4563   case AMDGPU::V_SUBREV_U32_e64_gfx9:
4564   case AMDGPU::V_SUBREV_U32_e64_vi:
4565 
4566   case AMDGPU::V_SUBREV_F16_e32:
4567   case AMDGPU::V_SUBREV_F16_e64:
4568   case AMDGPU::V_SUBREV_F16_e32_gfx10:
4569   case AMDGPU::V_SUBREV_F16_e32_vi:
4570   case AMDGPU::V_SUBREV_F16_e64_gfx10:
4571   case AMDGPU::V_SUBREV_F16_e64_vi:
4572 
4573   case AMDGPU::V_SUBREV_U16_e32:
4574   case AMDGPU::V_SUBREV_U16_e64:
4575   case AMDGPU::V_SUBREV_U16_e32_vi:
4576   case AMDGPU::V_SUBREV_U16_e64_vi:
4577 
4578   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4579   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4580   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4581 
4582   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4583   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4584 
4585   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4586   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4587 
4588   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4589   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4590 
4591   case AMDGPU::V_LSHRREV_B32_e32:
4592   case AMDGPU::V_LSHRREV_B32_e64:
4593   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4594   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4595   case AMDGPU::V_LSHRREV_B32_e32_vi:
4596   case AMDGPU::V_LSHRREV_B32_e64_vi:
4597   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4598   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4599 
4600   case AMDGPU::V_ASHRREV_I32_e32:
4601   case AMDGPU::V_ASHRREV_I32_e64:
4602   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4603   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4604   case AMDGPU::V_ASHRREV_I32_e32_vi:
4605   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4606   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4607   case AMDGPU::V_ASHRREV_I32_e64_vi:
4608 
4609   case AMDGPU::V_LSHLREV_B32_e32:
4610   case AMDGPU::V_LSHLREV_B32_e64:
4611   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4612   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4613   case AMDGPU::V_LSHLREV_B32_e32_vi:
4614   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4615   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4616   case AMDGPU::V_LSHLREV_B32_e64_vi:
4617 
4618   case AMDGPU::V_LSHLREV_B16_e32:
4619   case AMDGPU::V_LSHLREV_B16_e64:
4620   case AMDGPU::V_LSHLREV_B16_e32_vi:
4621   case AMDGPU::V_LSHLREV_B16_e64_vi:
4622   case AMDGPU::V_LSHLREV_B16_gfx10:
4623 
4624   case AMDGPU::V_LSHRREV_B16_e32:
4625   case AMDGPU::V_LSHRREV_B16_e64:
4626   case AMDGPU::V_LSHRREV_B16_e32_vi:
4627   case AMDGPU::V_LSHRREV_B16_e64_vi:
4628   case AMDGPU::V_LSHRREV_B16_gfx10:
4629 
4630   case AMDGPU::V_ASHRREV_I16_e32:
4631   case AMDGPU::V_ASHRREV_I16_e64:
4632   case AMDGPU::V_ASHRREV_I16_e32_vi:
4633   case AMDGPU::V_ASHRREV_I16_e64_vi:
4634   case AMDGPU::V_ASHRREV_I16_gfx10:
4635 
4636   case AMDGPU::V_LSHLREV_B64_e64:
4637   case AMDGPU::V_LSHLREV_B64_gfx10:
4638   case AMDGPU::V_LSHLREV_B64_vi:
4639 
4640   case AMDGPU::V_LSHRREV_B64_e64:
4641   case AMDGPU::V_LSHRREV_B64_gfx10:
4642   case AMDGPU::V_LSHRREV_B64_vi:
4643 
4644   case AMDGPU::V_ASHRREV_I64_e64:
4645   case AMDGPU::V_ASHRREV_I64_gfx10:
4646   case AMDGPU::V_ASHRREV_I64_vi:
4647 
4648   case AMDGPU::V_PK_LSHLREV_B16:
4649   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4650   case AMDGPU::V_PK_LSHLREV_B16_vi:
4651 
4652   case AMDGPU::V_PK_LSHRREV_B16:
4653   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4654   case AMDGPU::V_PK_LSHRREV_B16_vi:
4655   case AMDGPU::V_PK_ASHRREV_I16:
4656   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4657   case AMDGPU::V_PK_ASHRREV_I16_vi:
4658     return true;
4659   default:
4660     return false;
4661   }
4662 }
4663 
4664 std::optional<StringRef>
4665 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4666 
4667   using namespace SIInstrFlags;
4668   const unsigned Opcode = Inst.getOpcode();
4669   const MCInstrDesc &Desc = MII.get(Opcode);
4670 
4671   // lds_direct register is defined so that it can be used
4672   // with 9-bit operands only. Ignore encodings which do not accept these.
4673   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4674   if ((Desc.TSFlags & Enc) == 0)
4675     return std::nullopt;
4676 
4677   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4678     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4679     if (SrcIdx == -1)
4680       break;
4681     const auto &Src = Inst.getOperand(SrcIdx);
4682     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4683 
4684       if (isGFX90A() || isGFX11Plus())
4685         return StringRef("lds_direct is not supported on this GPU");
4686 
4687       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4688         return StringRef("lds_direct cannot be used with this instruction");
4689 
4690       if (SrcName != OpName::src0)
4691         return StringRef("lds_direct may be used as src0 only");
4692     }
4693   }
4694 
4695   return std::nullopt;
4696 }
4697 
4698 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4699   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4700     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4701     if (Op.isFlatOffset())
4702       return Op.getStartLoc();
4703   }
4704   return getLoc();
4705 }
4706 
4707 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4708                                      const OperandVector &Operands) {
4709   auto Opcode = Inst.getOpcode();
4710   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4711   if (OpNum == -1)
4712     return true;
4713 
4714   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4715   if ((TSFlags & SIInstrFlags::FLAT))
4716     return validateFlatOffset(Inst, Operands);
4717 
4718   if ((TSFlags & SIInstrFlags::SMRD))
4719     return validateSMEMOffset(Inst, Operands);
4720 
4721   const auto &Op = Inst.getOperand(OpNum);
4722   if (isGFX12Plus() &&
4723       (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4724     const unsigned OffsetSize = 24;
4725     if (!isIntN(OffsetSize, Op.getImm())) {
4726       Error(getFlatOffsetLoc(Operands),
4727             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4728       return false;
4729     }
4730   } else {
4731     const unsigned OffsetSize = 16;
4732     if (!isUIntN(OffsetSize, Op.getImm())) {
4733       Error(getFlatOffsetLoc(Operands),
4734             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4735       return false;
4736     }
4737   }
4738   return true;
4739 }
4740 
4741 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4742                                          const OperandVector &Operands) {
4743   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4744   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4745     return true;
4746 
4747   auto Opcode = Inst.getOpcode();
4748   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4749   assert(OpNum != -1);
4750 
4751   const auto &Op = Inst.getOperand(OpNum);
4752   if (!hasFlatOffsets() && Op.getImm() != 0) {
4753     Error(getFlatOffsetLoc(Operands),
4754           "flat offset modifier is not supported on this GPU");
4755     return false;
4756   }
4757 
4758   // For pre-GFX12 FLAT instructions the offset must be positive;
4759   // MSB is ignored and forced to zero.
4760   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4761   bool AllowNegative =
4762       (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4763       isGFX12Plus();
4764   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4765     Error(getFlatOffsetLoc(Operands),
4766           Twine("expected a ") +
4767               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4768                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4769     return false;
4770   }
4771 
4772   return true;
4773 }
4774 
4775 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4776   // Start with second operand because SMEM Offset cannot be dst or src0.
4777   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4778     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4779     if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4780       return Op.getStartLoc();
4781   }
4782   return getLoc();
4783 }
4784 
4785 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4786                                          const OperandVector &Operands) {
4787   if (isCI() || isSI())
4788     return true;
4789 
4790   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4791   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4792     return true;
4793 
4794   auto Opcode = Inst.getOpcode();
4795   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4796   if (OpNum == -1)
4797     return true;
4798 
4799   const auto &Op = Inst.getOperand(OpNum);
4800   if (!Op.isImm())
4801     return true;
4802 
4803   uint64_t Offset = Op.getImm();
4804   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4805   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4806       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4807     return true;
4808 
4809   Error(getSMEMOffsetLoc(Operands),
4810         isGFX12Plus()          ? "expected a 24-bit signed offset"
4811         : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4812                                : "expected a 21-bit signed offset");
4813 
4814   return false;
4815 }
4816 
4817 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4818   unsigned Opcode = Inst.getOpcode();
4819   const MCInstrDesc &Desc = MII.get(Opcode);
4820   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4821     return true;
4822 
4823   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4824   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4825 
4826   const int OpIndices[] = { Src0Idx, Src1Idx };
4827 
4828   unsigned NumExprs = 0;
4829   unsigned NumLiterals = 0;
4830   uint64_t LiteralValue;
4831 
4832   for (int OpIdx : OpIndices) {
4833     if (OpIdx == -1) break;
4834 
4835     const MCOperand &MO = Inst.getOperand(OpIdx);
4836     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4837     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4838       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4839         uint64_t Value = static_cast<uint64_t>(MO.getImm());
4840         if (NumLiterals == 0 || LiteralValue != Value) {
4841           LiteralValue = Value;
4842           ++NumLiterals;
4843         }
4844       } else if (MO.isExpr()) {
4845         ++NumExprs;
4846       }
4847     }
4848   }
4849 
4850   return NumLiterals + NumExprs <= 1;
4851 }
4852 
4853 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4854   const unsigned Opc = Inst.getOpcode();
4855   if (isPermlane16(Opc)) {
4856     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4857     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4858 
4859     if (OpSel & ~3)
4860       return false;
4861   }
4862 
4863   uint64_t TSFlags = MII.get(Opc).TSFlags;
4864 
4865   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4866     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4867     if (OpSelIdx != -1) {
4868       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4869         return false;
4870     }
4871     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4872     if (OpSelHiIdx != -1) {
4873       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4874         return false;
4875     }
4876   }
4877 
4878   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4879   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4880       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4881     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4882     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4883     if (OpSel & 3)
4884       return false;
4885   }
4886 
4887   return true;
4888 }
4889 
4890 bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4891   if (!hasTrue16Insts())
4892     return true;
4893   const MCRegisterInfo *MRI = getMRI();
4894   const unsigned Opc = Inst.getOpcode();
4895   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4896   if (OpSelIdx == -1)
4897     return true;
4898   unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4899   // If the value is 0 we could have a default OpSel Operand, so conservatively
4900   // allow it.
4901   if (OpSelOpValue == 0)
4902     return true;
4903   unsigned OpCount = 0;
4904   for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4905                                 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4906     int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4907     if (OpIdx == -1)
4908       continue;
4909     const MCOperand &Op = Inst.getOperand(OpIdx);
4910     if (Op.isReg() &&
4911         MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4912       bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4913       bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4914       if (OpSelOpIsHi != VGPRSuffixIsHi)
4915         return false;
4916     }
4917     ++OpCount;
4918   }
4919 
4920   return true;
4921 }
4922 
4923 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4924   assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4925 
4926   const unsigned Opc = Inst.getOpcode();
4927   uint64_t TSFlags = MII.get(Opc).TSFlags;
4928 
4929   // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4930   // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4931   // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4932   // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4933   if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4934       !(TSFlags & SIInstrFlags::IsSWMMAC))
4935     return true;
4936 
4937   int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4938   if (NegIdx == -1)
4939     return true;
4940 
4941   unsigned Neg = Inst.getOperand(NegIdx).getImm();
4942 
4943   // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4944   // on some src operands but not allowed on other.
4945   // It is convenient that such instructions don't have src_modifiers operand
4946   // for src operands that don't allow neg because they also don't allow opsel.
4947 
4948   const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4949                                      AMDGPU::OpName::src1_modifiers,
4950                                      AMDGPU::OpName::src2_modifiers};
4951 
4952   for (unsigned i = 0; i < 3; ++i) {
4953     if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4954       if (Neg & (1 << i))
4955         return false;
4956     }
4957   }
4958 
4959   return true;
4960 }
4961 
4962 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4963                                   const OperandVector &Operands) {
4964   const unsigned Opc = Inst.getOpcode();
4965   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4966   if (DppCtrlIdx >= 0) {
4967     unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4968 
4969     if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4970         AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4971       // DP ALU DPP is supported for row_newbcast only on GFX9*
4972       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4973       Error(S, "DP ALU dpp only supports row_newbcast");
4974       return false;
4975     }
4976   }
4977 
4978   int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4979   bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4980 
4981   if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4982     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4983     if (Src1Idx >= 0) {
4984       const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4985       const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4986       if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4987         auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4988         SMLoc S = getRegLoc(Reg, Operands);
4989         Error(S, "invalid operand for instruction");
4990         return false;
4991       }
4992       if (Src1.isImm()) {
4993         Error(getInstLoc(Operands),
4994               "src1 immediate operand invalid for instruction");
4995         return false;
4996       }
4997     }
4998   }
4999 
5000   return true;
5001 }
5002 
5003 // Check if VCC register matches wavefront size
5004 bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5005   auto FB = getFeatureBits();
5006   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
5007     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
5008 }
5009 
5010 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
5011 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5012                                          const OperandVector &Operands) {
5013   unsigned Opcode = Inst.getOpcode();
5014   const MCInstrDesc &Desc = MII.get(Opcode);
5015   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5016   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5017       !HasMandatoryLiteral && !isVOPD(Opcode))
5018     return true;
5019 
5020   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5021 
5022   unsigned NumExprs = 0;
5023   unsigned NumLiterals = 0;
5024   uint64_t LiteralValue;
5025 
5026   for (int OpIdx : OpIndices) {
5027     if (OpIdx == -1)
5028       continue;
5029 
5030     const MCOperand &MO = Inst.getOperand(OpIdx);
5031     if (!MO.isImm() && !MO.isExpr())
5032       continue;
5033     if (!isSISrcOperand(Desc, OpIdx))
5034       continue;
5035 
5036     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
5037       uint64_t Value = static_cast<uint64_t>(MO.getImm());
5038       bool IsForcedFP64 =
5039           Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5040           (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 &&
5041            HasMandatoryLiteral);
5042       bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5043                     AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5044       bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5045 
5046       if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5047           !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5048         Error(getLitLoc(Operands), "invalid operand for instruction");
5049         return false;
5050       }
5051 
5052       if (IsFP64 && IsValid32Op && !IsForcedFP64)
5053         Value = Hi_32(Value);
5054 
5055       if (NumLiterals == 0 || LiteralValue != Value) {
5056         LiteralValue = Value;
5057         ++NumLiterals;
5058       }
5059     } else if (MO.isExpr()) {
5060       ++NumExprs;
5061     }
5062   }
5063   NumLiterals += NumExprs;
5064 
5065   if (!NumLiterals)
5066     return true;
5067 
5068   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
5069     Error(getLitLoc(Operands), "literal operands are not supported");
5070     return false;
5071   }
5072 
5073   if (NumLiterals > 1) {
5074     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
5075     return false;
5076   }
5077 
5078   return true;
5079 }
5080 
5081 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5082 static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5083                          const MCRegisterInfo *MRI) {
5084   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5085   if (OpIdx < 0)
5086     return -1;
5087 
5088   const MCOperand &Op = Inst.getOperand(OpIdx);
5089   if (!Op.isReg())
5090     return -1;
5091 
5092   MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5093   auto Reg = Sub ? Sub : Op.getReg();
5094   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5095   return AGPR32.contains(Reg) ? 1 : 0;
5096 }
5097 
5098 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5099   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5100   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5101                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5102                   SIInstrFlags::DS)) == 0)
5103     return true;
5104 
5105   AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5106                                 ? AMDGPU::OpName::data0
5107                                 : AMDGPU::OpName::vdata;
5108 
5109   const MCRegisterInfo *MRI = getMRI();
5110   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5111   int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5112 
5113   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5114     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5115     if (Data2Areg >= 0 && Data2Areg != DataAreg)
5116       return false;
5117   }
5118 
5119   auto FB = getFeatureBits();
5120   if (FB[AMDGPU::FeatureGFX90AInsts]) {
5121     if (DataAreg < 0 || DstAreg < 0)
5122       return true;
5123     return DstAreg == DataAreg;
5124   }
5125 
5126   return DstAreg < 1 && DataAreg < 1;
5127 }
5128 
5129 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5130   auto FB = getFeatureBits();
5131   unsigned Opc = Inst.getOpcode();
5132   // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5133   // unaligned VGPR. All others only allow even aligned VGPRs.
5134   if (!(FB[AMDGPU::FeatureGFX90AInsts]) || Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5135     return true;
5136 
5137   const MCRegisterInfo *MRI = getMRI();
5138   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5139   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5140   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5141     const MCOperand &Op = Inst.getOperand(I);
5142     if (!Op.isReg())
5143       continue;
5144 
5145     MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5146     if (!Sub)
5147       continue;
5148 
5149     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5150       return false;
5151     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5152       return false;
5153   }
5154 
5155   return true;
5156 }
5157 
5158 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5159   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5160     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5161     if (Op.isBLGP())
5162       return Op.getStartLoc();
5163   }
5164   return SMLoc();
5165 }
5166 
5167 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5168                                    const OperandVector &Operands) {
5169   unsigned Opc = Inst.getOpcode();
5170   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5171   if (BlgpIdx == -1)
5172     return true;
5173   SMLoc BLGPLoc = getBLGPLoc(Operands);
5174   if (!BLGPLoc.isValid())
5175     return true;
5176   bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5177   auto FB = getFeatureBits();
5178   bool UsesNeg = false;
5179   if (FB[AMDGPU::FeatureGFX940Insts]) {
5180     switch (Opc) {
5181     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5182     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5183     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5184     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5185       UsesNeg = true;
5186     }
5187   }
5188 
5189   if (IsNeg == UsesNeg)
5190     return true;
5191 
5192   Error(BLGPLoc,
5193         UsesNeg ? "invalid modifier: blgp is not supported"
5194                 : "invalid modifier: neg is not supported");
5195 
5196   return false;
5197 }
5198 
5199 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5200                                       const OperandVector &Operands) {
5201   if (!isGFX11Plus())
5202     return true;
5203 
5204   unsigned Opc = Inst.getOpcode();
5205   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5206       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5207       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5208       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5209     return true;
5210 
5211   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5212   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5213   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5214   if (Reg == AMDGPU::SGPR_NULL)
5215     return true;
5216 
5217   SMLoc RegLoc = getRegLoc(Reg, Operands);
5218   Error(RegLoc, "src0 must be null");
5219   return false;
5220 }
5221 
5222 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5223                                  const OperandVector &Operands) {
5224   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5225   if ((TSFlags & SIInstrFlags::DS) == 0)
5226     return true;
5227   if (TSFlags & SIInstrFlags::GWS)
5228     return validateGWS(Inst, Operands);
5229   // Only validate GDS for non-GWS instructions.
5230   if (hasGDS())
5231     return true;
5232   int GDSIdx =
5233       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5234   if (GDSIdx < 0)
5235     return true;
5236   unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5237   if (GDS) {
5238     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5239     Error(S, "gds modifier is not supported on this GPU");
5240     return false;
5241   }
5242   return true;
5243 }
5244 
5245 // gfx90a has an undocumented limitation:
5246 // DS_GWS opcodes must use even aligned registers.
5247 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5248                                   const OperandVector &Operands) {
5249   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5250     return true;
5251 
5252   int Opc = Inst.getOpcode();
5253   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5254       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5255     return true;
5256 
5257   const MCRegisterInfo *MRI = getMRI();
5258   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5259   int Data0Pos =
5260       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5261   assert(Data0Pos != -1);
5262   auto Reg = Inst.getOperand(Data0Pos).getReg();
5263   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5264   if (RegIdx & 1) {
5265     SMLoc RegLoc = getRegLoc(Reg, Operands);
5266     Error(RegLoc, "vgpr must be even aligned");
5267     return false;
5268   }
5269 
5270   return true;
5271 }
5272 
5273 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5274                                             const OperandVector &Operands,
5275                                             const SMLoc &IDLoc) {
5276   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5277                                            AMDGPU::OpName::cpol);
5278   if (CPolPos == -1)
5279     return true;
5280 
5281   unsigned CPol = Inst.getOperand(CPolPos).getImm();
5282 
5283   if (isGFX12Plus())
5284     return validateTHAndScopeBits(Inst, Operands, CPol);
5285 
5286   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5287   if (TSFlags & SIInstrFlags::SMRD) {
5288     if (CPol && (isSI() || isCI())) {
5289       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5290       Error(S, "cache policy is not supported for SMRD instructions");
5291       return false;
5292     }
5293     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5294       Error(IDLoc, "invalid cache policy for SMEM instruction");
5295       return false;
5296     }
5297   }
5298 
5299   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5300     const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5301                                       SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5302                                       SIInstrFlags::FLAT;
5303     if (!(TSFlags & AllowSCCModifier)) {
5304       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5305       StringRef CStr(S.getPointer());
5306       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5307       Error(S,
5308             "scc modifier is not supported for this instruction on this GPU");
5309       return false;
5310     }
5311   }
5312 
5313   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
5314     return true;
5315 
5316   if (TSFlags & SIInstrFlags::IsAtomicRet) {
5317     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5318       Error(IDLoc, isGFX940() ? "instruction must use sc0"
5319                               : "instruction must use glc");
5320       return false;
5321     }
5322   } else {
5323     if (CPol & CPol::GLC) {
5324       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5325       StringRef CStr(S.getPointer());
5326       S = SMLoc::getFromPointer(
5327           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5328       Error(S, isGFX940() ? "instruction must not use sc0"
5329                           : "instruction must not use glc");
5330       return false;
5331     }
5332   }
5333 
5334   return true;
5335 }
5336 
5337 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5338                                              const OperandVector &Operands,
5339                                              const unsigned CPol) {
5340   const unsigned TH = CPol & AMDGPU::CPol::TH;
5341   const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5342 
5343   const unsigned Opcode = Inst.getOpcode();
5344   const MCInstrDesc &TID = MII.get(Opcode);
5345 
5346   auto PrintError = [&](StringRef Msg) {
5347     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5348     Error(S, Msg);
5349     return false;
5350   };
5351 
5352   if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5353       (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
5354       (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
5355     return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5356 
5357   if (TH == 0)
5358     return true;
5359 
5360   if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5361       ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5362        (TH == AMDGPU::CPol::TH_NT_HT)))
5363     return PrintError("invalid th value for SMEM instruction");
5364 
5365   if (TH == AMDGPU::CPol::TH_BYPASS) {
5366     if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5367          CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
5368         (Scope == AMDGPU::CPol::SCOPE_SYS &&
5369          !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5370       return PrintError("scope and th combination is not valid");
5371   }
5372 
5373   unsigned THType = AMDGPU::getTemporalHintType(TID);
5374   if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5375     if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5376       return PrintError("invalid th value for atomic instructions");
5377   } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5378     if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5379       return PrintError("invalid th value for store instructions");
5380   } else {
5381     if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5382       return PrintError("invalid th value for load instructions");
5383   }
5384 
5385   return true;
5386 }
5387 
5388 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5389                                   const OperandVector &Operands) {
5390   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5391   if (Desc.mayStore() &&
5392       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
5393     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5394     if (Loc != getInstLoc(Operands)) {
5395       Error(Loc, "TFE modifier has no meaning for store instructions");
5396       return false;
5397     }
5398   }
5399 
5400   return true;
5401 }
5402 
5403 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5404                                           const SMLoc &IDLoc,
5405                                           const OperandVector &Operands) {
5406   if (auto ErrMsg = validateLdsDirect(Inst)) {
5407     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5408     return false;
5409   }
5410   if (!validateTrue16OpSel(Inst)) {
5411     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5412           "op_sel operand conflicts with 16-bit operand suffix");
5413     return false;
5414   }
5415   if (!validateSOPLiteral(Inst)) {
5416     Error(getLitLoc(Operands),
5417       "only one unique literal operand is allowed");
5418     return false;
5419   }
5420   if (!validateVOPLiteral(Inst, Operands)) {
5421     return false;
5422   }
5423   if (!validateConstantBusLimitations(Inst, Operands)) {
5424     return false;
5425   }
5426   if (!validateVOPD(Inst, Operands)) {
5427     return false;
5428   }
5429   if (!validateIntClampSupported(Inst)) {
5430     Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5431           "integer clamping is not supported on this GPU");
5432     return false;
5433   }
5434   if (!validateOpSel(Inst)) {
5435     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5436       "invalid op_sel operand");
5437     return false;
5438   }
5439   if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5440     Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5441           "invalid neg_lo operand");
5442     return false;
5443   }
5444   if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5445     Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5446           "invalid neg_hi operand");
5447     return false;
5448   }
5449   if (!validateDPP(Inst, Operands)) {
5450     return false;
5451   }
5452   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5453   if (!validateMIMGD16(Inst)) {
5454     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5455       "d16 modifier is not supported on this GPU");
5456     return false;
5457   }
5458   if (!validateMIMGDim(Inst, Operands)) {
5459     Error(IDLoc, "missing dim operand");
5460     return false;
5461   }
5462   if (!validateTensorR128(Inst)) {
5463     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5464           "instruction must set modifier r128=0");
5465     return false;
5466   }
5467   if (!validateMIMGMSAA(Inst)) {
5468     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5469           "invalid dim; must be MSAA type");
5470     return false;
5471   }
5472   if (!validateMIMGDataSize(Inst, IDLoc)) {
5473     return false;
5474   }
5475   if (!validateMIMGAddrSize(Inst, IDLoc))
5476     return false;
5477   if (!validateMIMGAtomicDMask(Inst)) {
5478     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5479       "invalid atomic image dmask");
5480     return false;
5481   }
5482   if (!validateMIMGGatherDMask(Inst)) {
5483     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5484       "invalid image_gather dmask: only one bit must be set");
5485     return false;
5486   }
5487   if (!validateMovrels(Inst, Operands)) {
5488     return false;
5489   }
5490   if (!validateOffset(Inst, Operands)) {
5491     return false;
5492   }
5493   if (!validateMAIAccWrite(Inst, Operands)) {
5494     return false;
5495   }
5496   if (!validateMAISrc2(Inst, Operands)) {
5497     return false;
5498   }
5499   if (!validateMFMA(Inst, Operands)) {
5500     return false;
5501   }
5502   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5503     return false;
5504   }
5505 
5506   if (!validateAGPRLdSt(Inst)) {
5507     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5508     ? "invalid register class: data and dst should be all VGPR or AGPR"
5509     : "invalid register class: agpr loads and stores not supported on this GPU"
5510     );
5511     return false;
5512   }
5513   if (!validateVGPRAlign(Inst)) {
5514     Error(IDLoc,
5515       "invalid register class: vgpr tuples must be 64 bit aligned");
5516     return false;
5517   }
5518   if (!validateDS(Inst, Operands)) {
5519     return false;
5520   }
5521 
5522   if (!validateBLGP(Inst, Operands)) {
5523     return false;
5524   }
5525 
5526   if (!validateDivScale(Inst)) {
5527     Error(IDLoc, "ABS not allowed in VOP3B instructions");
5528     return false;
5529   }
5530   if (!validateWaitCnt(Inst, Operands)) {
5531     return false;
5532   }
5533   if (!validateTFE(Inst, Operands)) {
5534     return false;
5535   }
5536 
5537   return true;
5538 }
5539 
5540 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5541                                             const FeatureBitset &FBS,
5542                                             unsigned VariantID = 0);
5543 
5544 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5545                                 const FeatureBitset &AvailableFeatures,
5546                                 unsigned VariantID);
5547 
5548 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5549                                        const FeatureBitset &FBS) {
5550   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5551 }
5552 
5553 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5554                                        const FeatureBitset &FBS,
5555                                        ArrayRef<unsigned> Variants) {
5556   for (auto Variant : Variants) {
5557     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5558       return true;
5559   }
5560 
5561   return false;
5562 }
5563 
5564 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5565                                                   const SMLoc &IDLoc) {
5566   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5567 
5568   // Check if requested instruction variant is supported.
5569   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5570     return false;
5571 
5572   // This instruction is not supported.
5573   // Clear any other pending errors because they are no longer relevant.
5574   getParser().clearPendingErrors();
5575 
5576   // Requested instruction variant is not supported.
5577   // Check if any other variants are supported.
5578   StringRef VariantName = getMatchedVariantName();
5579   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5580     return Error(IDLoc,
5581                  Twine(VariantName,
5582                        " variant of this instruction is not supported"));
5583   }
5584 
5585   // Check if this instruction may be used with a different wavesize.
5586   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5587       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5588 
5589     FeatureBitset FeaturesWS32 = getFeatureBits();
5590     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5591         .flip(AMDGPU::FeatureWavefrontSize32);
5592     FeatureBitset AvailableFeaturesWS32 =
5593         ComputeAvailableFeatures(FeaturesWS32);
5594 
5595     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5596       return Error(IDLoc, "instruction requires wavesize=32");
5597   }
5598 
5599   // Finally check if this instruction is supported on any other GPU.
5600   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5601     return Error(IDLoc, "instruction not supported on this GPU");
5602   }
5603 
5604   // Instruction not supported on any GPU. Probably a typo.
5605   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5606   return Error(IDLoc, "invalid instruction" + Suggestion);
5607 }
5608 
5609 static bool isInvalidVOPDY(const OperandVector &Operands,
5610                            uint64_t InvalidOprIdx) {
5611   assert(InvalidOprIdx < Operands.size());
5612   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5613   if (Op.isToken() && InvalidOprIdx > 1) {
5614     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5615     return PrevOp.isToken() && PrevOp.getToken() == "::";
5616   }
5617   return false;
5618 }
5619 
5620 bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5621                                               OperandVector &Operands,
5622                                               MCStreamer &Out,
5623                                               uint64_t &ErrorInfo,
5624                                               bool MatchingInlineAsm) {
5625   MCInst Inst;
5626   unsigned Result = Match_Success;
5627   for (auto Variant : getMatchedVariants()) {
5628     uint64_t EI;
5629     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5630                                   Variant);
5631     // We order match statuses from least to most specific. We use most specific
5632     // status as resulting
5633     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5634     if (R == Match_Success || R == Match_MissingFeature ||
5635         (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5636         (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5637          Result != Match_MissingFeature)) {
5638       Result = R;
5639       ErrorInfo = EI;
5640     }
5641     if (R == Match_Success)
5642       break;
5643   }
5644 
5645   if (Result == Match_Success) {
5646     if (!validateInstruction(Inst, IDLoc, Operands)) {
5647       return true;
5648     }
5649     Inst.setLoc(IDLoc);
5650     Out.emitInstruction(Inst, getSTI());
5651     return false;
5652   }
5653 
5654   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5655   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5656     return true;
5657   }
5658 
5659   switch (Result) {
5660   default: break;
5661   case Match_MissingFeature:
5662     // It has been verified that the specified instruction
5663     // mnemonic is valid. A match was found but it requires
5664     // features which are not supported on this GPU.
5665     return Error(IDLoc, "operands are not valid for this GPU or mode");
5666 
5667   case Match_InvalidOperand: {
5668     SMLoc ErrorLoc = IDLoc;
5669     if (ErrorInfo != ~0ULL) {
5670       if (ErrorInfo >= Operands.size()) {
5671         return Error(IDLoc, "too few operands for instruction");
5672       }
5673       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5674       if (ErrorLoc == SMLoc())
5675         ErrorLoc = IDLoc;
5676 
5677       if (isInvalidVOPDY(Operands, ErrorInfo))
5678         return Error(ErrorLoc, "invalid VOPDY instruction");
5679     }
5680     return Error(ErrorLoc, "invalid operand for instruction");
5681   }
5682 
5683   case Match_MnemonicFail:
5684     llvm_unreachable("Invalid instructions should have been handled already");
5685   }
5686   llvm_unreachable("Implement any new match types added!");
5687 }
5688 
5689 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5690   int64_t Tmp = -1;
5691   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5692     return true;
5693   }
5694   if (getParser().parseAbsoluteExpression(Tmp)) {
5695     return true;
5696   }
5697   Ret = static_cast<uint32_t>(Tmp);
5698   return false;
5699 }
5700 
5701 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5702   if (!getSTI().getTargetTriple().isAMDGCN())
5703     return TokError("directive only supported for amdgcn architecture");
5704 
5705   std::string TargetIDDirective;
5706   SMLoc TargetStart = getTok().getLoc();
5707   if (getParser().parseEscapedString(TargetIDDirective))
5708     return true;
5709 
5710   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5711   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5712     return getParser().Error(TargetRange.Start,
5713         (Twine(".amdgcn_target directive's target id ") +
5714          Twine(TargetIDDirective) +
5715          Twine(" does not match the specified target id ") +
5716          Twine(getTargetStreamer().getTargetID()->toString())).str());
5717 
5718   return false;
5719 }
5720 
5721 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5722   return Error(Range.Start, "value out of range", Range);
5723 }
5724 
5725 bool AMDGPUAsmParser::calculateGPRBlocks(
5726     const FeatureBitset &Features, const MCExpr *VCCUsed,
5727     const MCExpr *FlatScrUsed, bool XNACKUsed,
5728     std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5729     SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5730     const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5731   // TODO(scott.linder): These calculations are duplicated from
5732   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5733   IsaVersion Version = getIsaVersion(getSTI().getCPU());
5734   MCContext &Ctx = getContext();
5735 
5736   const MCExpr *NumSGPRs = NextFreeSGPR;
5737   int64_t EvaluatedSGPRs;
5738 
5739   if (Version.Major >= 10)
5740     NumSGPRs = MCConstantExpr::create(0, Ctx);
5741   else {
5742     unsigned MaxAddressableNumSGPRs =
5743         IsaInfo::getAddressableNumSGPRs(&getSTI());
5744 
5745     if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5746         !Features.test(FeatureSGPRInitBug) &&
5747         static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5748       return OutOfRangeError(SGPRRange);
5749 
5750     const MCExpr *ExtraSGPRs =
5751         AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5752     NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5753 
5754     if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5755         (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5756         static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5757       return OutOfRangeError(SGPRRange);
5758 
5759     if (Features.test(FeatureSGPRInitBug))
5760       NumSGPRs =
5761           MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5762   }
5763 
5764   // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5765   // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5766   auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5767                                 unsigned Granule) -> const MCExpr * {
5768     const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5769     const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5770     const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5771     const MCExpr *AlignToGPR =
5772         AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5773     const MCExpr *DivGPR =
5774         MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5775     const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5776     return SubGPR;
5777   };
5778 
5779   VGPRBlocks = GetNumGPRBlocks(
5780       NextFreeVGPR,
5781       IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5782   SGPRBlocks =
5783       GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5784 
5785   return false;
5786 }
5787 
5788 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5789   if (!getSTI().getTargetTriple().isAMDGCN())
5790     return TokError("directive only supported for amdgcn architecture");
5791 
5792   if (!isHsaAbi(getSTI()))
5793     return TokError("directive only supported for amdhsa OS");
5794 
5795   StringRef KernelName;
5796   if (getParser().parseIdentifier(KernelName))
5797     return true;
5798 
5799   AMDGPU::MCKernelDescriptor KD =
5800       AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5801           &getSTI(), getContext());
5802 
5803   StringSet<> Seen;
5804 
5805   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5806 
5807   const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5808   const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5809 
5810   SMRange VGPRRange;
5811   const MCExpr *NextFreeVGPR = ZeroExpr;
5812   const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5813   uint64_t SharedVGPRCount = 0;
5814   uint64_t PreloadLength = 0;
5815   uint64_t PreloadOffset = 0;
5816   SMRange SGPRRange;
5817   const MCExpr *NextFreeSGPR = ZeroExpr;
5818 
5819   // Count the number of user SGPRs implied from the enabled feature bits.
5820   unsigned ImpliedUserSGPRCount = 0;
5821 
5822   // Track if the asm explicitly contains the directive for the user SGPR
5823   // count.
5824   std::optional<unsigned> ExplicitUserSGPRCount;
5825   const MCExpr *ReserveVCC = OneExpr;
5826   const MCExpr *ReserveFlatScr = OneExpr;
5827   std::optional<bool> EnableWavefrontSize32;
5828 
5829   while (true) {
5830     while (trySkipToken(AsmToken::EndOfStatement));
5831 
5832     StringRef ID;
5833     SMRange IDRange = getTok().getLocRange();
5834     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5835       return true;
5836 
5837     if (ID == ".end_amdhsa_kernel")
5838       break;
5839 
5840     if (!Seen.insert(ID).second)
5841       return TokError(".amdhsa_ directives cannot be repeated");
5842 
5843     SMLoc ValStart = getLoc();
5844     const MCExpr *ExprVal;
5845     if (getParser().parseExpression(ExprVal))
5846       return true;
5847     SMLoc ValEnd = getLoc();
5848     SMRange ValRange = SMRange(ValStart, ValEnd);
5849 
5850     int64_t IVal = 0;
5851     uint64_t Val = IVal;
5852     bool EvaluatableExpr;
5853     if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5854       if (IVal < 0)
5855         return OutOfRangeError(ValRange);
5856       Val = IVal;
5857     }
5858 
5859 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
5860   if (!isUInt<ENTRY##_WIDTH>(Val))                                             \
5861     return OutOfRangeError(RANGE);                                             \
5862   AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY,     \
5863                                        getContext());
5864 
5865 // Some fields use the parsed value immediately which requires the expression to
5866 // be solvable.
5867 #define EXPR_RESOLVE_OR_ERROR(RESOLVED)                                        \
5868   if (!(RESOLVED))                                                             \
5869     return Error(IDRange.Start, "directive should have resolvable expression", \
5870                  IDRange);
5871 
5872     if (ID == ".amdhsa_group_segment_fixed_size") {
5873       if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
5874                   CHAR_BIT>(Val))
5875         return OutOfRangeError(ValRange);
5876       KD.group_segment_fixed_size = ExprVal;
5877     } else if (ID == ".amdhsa_private_segment_fixed_size") {
5878       if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
5879                   CHAR_BIT>(Val))
5880         return OutOfRangeError(ValRange);
5881       KD.private_segment_fixed_size = ExprVal;
5882     } else if (ID == ".amdhsa_kernarg_size") {
5883       if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5884         return OutOfRangeError(ValRange);
5885       KD.kernarg_size = ExprVal;
5886     } else if (ID == ".amdhsa_user_sgpr_count") {
5887       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5888       ExplicitUserSGPRCount = Val;
5889     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5890       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5891       if (hasArchitectedFlatScratch())
5892         return Error(IDRange.Start,
5893                      "directive is not supported with architected flat scratch",
5894                      IDRange);
5895       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5896                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5897                        ExprVal, ValRange);
5898       if (Val)
5899         ImpliedUserSGPRCount += 4;
5900     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5901       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5902       if (!hasKernargPreload())
5903         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5904 
5905       if (Val > getMaxNumUserSGPRs())
5906         return OutOfRangeError(ValRange);
5907       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5908                        ValRange);
5909       if (Val) {
5910         ImpliedUserSGPRCount += Val;
5911         PreloadLength = Val;
5912       }
5913     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5914       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5915       if (!hasKernargPreload())
5916         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5917 
5918       if (Val >= 1024)
5919         return OutOfRangeError(ValRange);
5920       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5921                        ValRange);
5922       if (Val)
5923         PreloadOffset = Val;
5924     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5925       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5926       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5927                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5928                        ValRange);
5929       if (Val)
5930         ImpliedUserSGPRCount += 2;
5931     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5932       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5933       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5934                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5935                        ValRange);
5936       if (Val)
5937         ImpliedUserSGPRCount += 2;
5938     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5939       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5940       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5941                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5942                        ExprVal, ValRange);
5943       if (Val)
5944         ImpliedUserSGPRCount += 2;
5945     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5946       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5947       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5948                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5949                        ValRange);
5950       if (Val)
5951         ImpliedUserSGPRCount += 2;
5952     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5953       if (hasArchitectedFlatScratch())
5954         return Error(IDRange.Start,
5955                      "directive is not supported with architected flat scratch",
5956                      IDRange);
5957       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5958       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5959                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5960                        ExprVal, ValRange);
5961       if (Val)
5962         ImpliedUserSGPRCount += 2;
5963     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5964       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5965       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5966                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5967                        ExprVal, ValRange);
5968       if (Val)
5969         ImpliedUserSGPRCount += 1;
5970     } else if (ID == ".amdhsa_wavefront_size32") {
5971       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5972       if (IVersion.Major < 10)
5973         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5974       EnableWavefrontSize32 = Val;
5975       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5976                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5977                        ValRange);
5978     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5979       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5980                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5981                        ValRange);
5982     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5983       if (hasArchitectedFlatScratch())
5984         return Error(IDRange.Start,
5985                      "directive is not supported with architected flat scratch",
5986                      IDRange);
5987       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5988                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5989                        ValRange);
5990     } else if (ID == ".amdhsa_enable_private_segment") {
5991       if (!hasArchitectedFlatScratch())
5992         return Error(
5993             IDRange.Start,
5994             "directive is not supported without architected flat scratch",
5995             IDRange);
5996       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5997                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5998                        ValRange);
5999     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6000       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6001                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6002                        ValRange);
6003     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6004       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6005                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6006                        ValRange);
6007     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6008       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6009                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6010                        ValRange);
6011     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6012       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6013                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6014                        ValRange);
6015     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6016       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6017                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6018                        ValRange);
6019     } else if (ID == ".amdhsa_next_free_vgpr") {
6020       VGPRRange = ValRange;
6021       NextFreeVGPR = ExprVal;
6022     } else if (ID == ".amdhsa_next_free_sgpr") {
6023       SGPRRange = ValRange;
6024       NextFreeSGPR = ExprVal;
6025     } else if (ID == ".amdhsa_accum_offset") {
6026       if (!isGFX90A())
6027         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6028       AccumOffset = ExprVal;
6029     } else if (ID == ".amdhsa_reserve_vcc") {
6030       if (EvaluatableExpr && !isUInt<1>(Val))
6031         return OutOfRangeError(ValRange);
6032       ReserveVCC = ExprVal;
6033     } else if (ID == ".amdhsa_reserve_flat_scratch") {
6034       if (IVersion.Major < 7)
6035         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6036       if (hasArchitectedFlatScratch())
6037         return Error(IDRange.Start,
6038                      "directive is not supported with architected flat scratch",
6039                      IDRange);
6040       if (EvaluatableExpr && !isUInt<1>(Val))
6041         return OutOfRangeError(ValRange);
6042       ReserveFlatScr = ExprVal;
6043     } else if (ID == ".amdhsa_reserve_xnack_mask") {
6044       if (IVersion.Major < 8)
6045         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6046       if (!isUInt<1>(Val))
6047         return OutOfRangeError(ValRange);
6048       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6049         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6050                                  IDRange);
6051     } else if (ID == ".amdhsa_float_round_mode_32") {
6052       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6053                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6054                        ValRange);
6055     } else if (ID == ".amdhsa_float_round_mode_16_64") {
6056       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6057                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6058                        ValRange);
6059     } else if (ID == ".amdhsa_float_denorm_mode_32") {
6060       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6061                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6062                        ValRange);
6063     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6064       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6065                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6066                        ValRange);
6067     } else if (ID == ".amdhsa_dx10_clamp") {
6068       if (IVersion.Major >= 12)
6069         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6070       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6071                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6072                        ValRange);
6073     } else if (ID == ".amdhsa_ieee_mode") {
6074       if (IVersion.Major >= 12)
6075         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6076       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6077                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6078                        ValRange);
6079     } else if (ID == ".amdhsa_fp16_overflow") {
6080       if (IVersion.Major < 9)
6081         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6082       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6083                        COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6084                        ValRange);
6085     } else if (ID == ".amdhsa_tg_split") {
6086       if (!isGFX90A())
6087         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6088       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6089                        ExprVal, ValRange);
6090     } else if (ID == ".amdhsa_workgroup_processor_mode") {
6091       if (IVersion.Major < 10)
6092         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6093       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6094                        COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6095                        ValRange);
6096     } else if (ID == ".amdhsa_memory_ordered") {
6097       if (IVersion.Major < 10)
6098         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6099       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6100                        COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6101                        ValRange);
6102     } else if (ID == ".amdhsa_forward_progress") {
6103       if (IVersion.Major < 10)
6104         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6105       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6106                        COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6107                        ValRange);
6108     } else if (ID == ".amdhsa_shared_vgpr_count") {
6109       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6110       if (IVersion.Major < 10 || IVersion.Major >= 12)
6111         return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6112                      IDRange);
6113       SharedVGPRCount = Val;
6114       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6115                        COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6116                        ValRange);
6117     } else if (ID == ".amdhsa_inst_pref_size") {
6118       if (IVersion.Major < 11)
6119         return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6120       if (IVersion.Major == 11) {
6121         PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6122                          COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6123                          ValRange);
6124       } else {
6125         PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6126                          COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6127                          ValRange);
6128       }
6129     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6130       PARSE_BITS_ENTRY(
6131           KD.compute_pgm_rsrc2,
6132           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6133           ExprVal, ValRange);
6134     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6135       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6136                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6137                        ExprVal, ValRange);
6138     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6139       PARSE_BITS_ENTRY(
6140           KD.compute_pgm_rsrc2,
6141           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6142           ExprVal, ValRange);
6143     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6144       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6145                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6146                        ExprVal, ValRange);
6147     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6148       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6149                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6150                        ExprVal, ValRange);
6151     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6152       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6153                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6154                        ExprVal, ValRange);
6155     } else if (ID == ".amdhsa_exception_int_div_zero") {
6156       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6157                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6158                        ExprVal, ValRange);
6159     } else if (ID == ".amdhsa_round_robin_scheduling") {
6160       if (IVersion.Major < 12)
6161         return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6162       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6163                        COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6164                        ValRange);
6165     } else {
6166       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6167     }
6168 
6169 #undef PARSE_BITS_ENTRY
6170   }
6171 
6172   if (!Seen.contains(".amdhsa_next_free_vgpr"))
6173     return TokError(".amdhsa_next_free_vgpr directive is required");
6174 
6175   if (!Seen.contains(".amdhsa_next_free_sgpr"))
6176     return TokError(".amdhsa_next_free_sgpr directive is required");
6177 
6178   unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6179 
6180   // Consider the case where the total number of UserSGPRs with trailing
6181   // allocated preload SGPRs, is greater than the number of explicitly
6182   // referenced SGPRs.
6183   if (PreloadLength) {
6184     MCContext &Ctx = getContext();
6185     NextFreeSGPR = AMDGPUMCExpr::createMax(
6186         {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6187   }
6188 
6189   const MCExpr *VGPRBlocks;
6190   const MCExpr *SGPRBlocks;
6191   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6192                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6193                          EnableWavefrontSize32, NextFreeVGPR,
6194                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6195                          SGPRBlocks))
6196     return true;
6197 
6198   int64_t EvaluatedVGPRBlocks;
6199   bool VGPRBlocksEvaluatable =
6200       VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6201   if (VGPRBlocksEvaluatable &&
6202       !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
6203           static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6204     return OutOfRangeError(VGPRRange);
6205   }
6206   AMDGPU::MCKernelDescriptor::bits_set(
6207       KD.compute_pgm_rsrc1, VGPRBlocks,
6208       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6209       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6210 
6211   int64_t EvaluatedSGPRBlocks;
6212   if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6213       !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
6214           static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6215     return OutOfRangeError(SGPRRange);
6216   AMDGPU::MCKernelDescriptor::bits_set(
6217       KD.compute_pgm_rsrc1, SGPRBlocks,
6218       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6219       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6220 
6221   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6222     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6223                     "enabled user SGPRs");
6224 
6225   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
6226     return TokError("too many user SGPRs enabled");
6227   AMDGPU::MCKernelDescriptor::bits_set(
6228       KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
6229       COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
6230       COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
6231 
6232   int64_t IVal = 0;
6233   if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6234     return TokError("Kernarg size should be resolvable");
6235   uint64_t kernarg_size = IVal;
6236   if (PreloadLength && kernarg_size &&
6237       (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6238     return TokError("Kernarg preload length + offset is larger than the "
6239                     "kernarg segment size");
6240 
6241   if (isGFX90A()) {
6242     if (!Seen.contains(".amdhsa_accum_offset"))
6243       return TokError(".amdhsa_accum_offset directive is required");
6244     int64_t EvaluatedAccum;
6245     bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6246     uint64_t UEvaluatedAccum = EvaluatedAccum;
6247     if (AccumEvaluatable &&
6248         (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6249       return TokError("accum_offset should be in range [4..256] in "
6250                       "increments of 4");
6251 
6252     int64_t EvaluatedNumVGPR;
6253     if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6254         AccumEvaluatable &&
6255         UEvaluatedAccum >
6256             alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6257       return TokError("accum_offset exceeds total VGPR allocation");
6258     const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6259         MCBinaryExpr::createDiv(
6260             AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6261         MCConstantExpr::create(1, getContext()), getContext());
6262     MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum,
6263                                  COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6264                                  COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6265                                  getContext());
6266   }
6267 
6268   if (IVersion.Major >= 10 && IVersion.Major < 12) {
6269     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6270     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6271       return TokError("shared_vgpr_count directive not valid on "
6272                       "wavefront size 32");
6273     }
6274 
6275     if (VGPRBlocksEvaluatable &&
6276         (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6277          63)) {
6278       return TokError("shared_vgpr_count*2 + "
6279                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6280                       "exceed 63\n");
6281     }
6282   }
6283 
6284   getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6285                                                  NextFreeVGPR, NextFreeSGPR,
6286                                                  ReserveVCC, ReserveFlatScr);
6287   return false;
6288 }
6289 
6290 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6291   uint32_t Version;
6292   if (ParseAsAbsoluteExpression(Version))
6293     return true;
6294 
6295   getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6296   return false;
6297 }
6298 
6299 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6300                                                AMDGPUMCKernelCodeT &C) {
6301   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6302   // assembly for backwards compatibility.
6303   if (ID == "max_scratch_backing_memory_byte_size") {
6304     Parser.eatToEndOfStatement();
6305     return false;
6306   }
6307 
6308   SmallString<40> ErrStr;
6309   raw_svector_ostream Err(ErrStr);
6310   if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6311     return TokError(Err.str());
6312   }
6313   Lex();
6314 
6315   if (ID == "enable_wavefront_size32") {
6316     if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6317       if (!isGFX10Plus())
6318         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6319       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6320         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6321     } else {
6322       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6323         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6324     }
6325   }
6326 
6327   if (ID == "wavefront_size") {
6328     if (C.wavefront_size == 5) {
6329       if (!isGFX10Plus())
6330         return TokError("wavefront_size=5 is only allowed on GFX10+");
6331       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6332         return TokError("wavefront_size=5 requires +WavefrontSize32");
6333     } else if (C.wavefront_size == 6) {
6334       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6335         return TokError("wavefront_size=6 requires +WavefrontSize64");
6336     }
6337   }
6338 
6339   return false;
6340 }
6341 
6342 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6343   AMDGPUMCKernelCodeT KernelCode;
6344   KernelCode.initDefault(&getSTI(), getContext());
6345 
6346   while (true) {
6347     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
6348     // will set the current token to EndOfStatement.
6349     while(trySkipToken(AsmToken::EndOfStatement));
6350 
6351     StringRef ID;
6352     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6353       return true;
6354 
6355     if (ID == ".end_amd_kernel_code_t")
6356       break;
6357 
6358     if (ParseAMDKernelCodeTValue(ID, KernelCode))
6359       return true;
6360   }
6361 
6362   KernelCode.validate(&getSTI(), getContext());
6363   getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6364 
6365   return false;
6366 }
6367 
6368 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6369   StringRef KernelName;
6370   if (!parseId(KernelName, "expected symbol name"))
6371     return true;
6372 
6373   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6374                                            ELF::STT_AMDGPU_HSA_KERNEL);
6375 
6376   KernelScope.initialize(getContext());
6377   return false;
6378 }
6379 
6380 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6381   if (!getSTI().getTargetTriple().isAMDGCN()) {
6382     return Error(getLoc(),
6383                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
6384                  "architectures");
6385   }
6386 
6387   auto TargetIDDirective = getLexer().getTok().getStringContents();
6388   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6389     return Error(getParser().getTok().getLoc(), "target id must match options");
6390 
6391   getTargetStreamer().EmitISAVersion();
6392   Lex();
6393 
6394   return false;
6395 }
6396 
6397 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6398   assert(isHsaAbi(getSTI()));
6399 
6400   std::string HSAMetadataString;
6401   if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6402                           HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6403     return true;
6404 
6405   if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6406     return Error(getLoc(), "invalid HSA metadata");
6407 
6408   return false;
6409 }
6410 
6411 /// Common code to parse out a block of text (typically YAML) between start and
6412 /// end directives.
6413 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6414                                           const char *AssemblerDirectiveEnd,
6415                                           std::string &CollectString) {
6416 
6417   raw_string_ostream CollectStream(CollectString);
6418 
6419   getLexer().setSkipSpace(false);
6420 
6421   bool FoundEnd = false;
6422   while (!isToken(AsmToken::Eof)) {
6423     while (isToken(AsmToken::Space)) {
6424       CollectStream << getTokenStr();
6425       Lex();
6426     }
6427 
6428     if (trySkipId(AssemblerDirectiveEnd)) {
6429       FoundEnd = true;
6430       break;
6431     }
6432 
6433     CollectStream << Parser.parseStringToEndOfStatement()
6434                   << getContext().getAsmInfo()->getSeparatorString();
6435 
6436     Parser.eatToEndOfStatement();
6437   }
6438 
6439   getLexer().setSkipSpace(true);
6440 
6441   if (isToken(AsmToken::Eof) && !FoundEnd) {
6442     return TokError(Twine("expected directive ") +
6443                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6444   }
6445 
6446   return false;
6447 }
6448 
6449 /// Parse the assembler directive for new MsgPack-format PAL metadata.
6450 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6451   std::string String;
6452   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6453                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
6454     return true;
6455 
6456   auto *PALMetadata = getTargetStreamer().getPALMetadata();
6457   if (!PALMetadata->setFromString(String))
6458     return Error(getLoc(), "invalid PAL metadata");
6459   return false;
6460 }
6461 
6462 /// Parse the assembler directive for old linear-format PAL metadata.
6463 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6464   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6465     return Error(getLoc(),
6466                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6467                  "not available on non-amdpal OSes")).str());
6468   }
6469 
6470   auto *PALMetadata = getTargetStreamer().getPALMetadata();
6471   PALMetadata->setLegacy();
6472   for (;;) {
6473     uint32_t Key, Value;
6474     if (ParseAsAbsoluteExpression(Key)) {
6475       return TokError(Twine("invalid value in ") +
6476                       Twine(PALMD::AssemblerDirective));
6477     }
6478     if (!trySkipToken(AsmToken::Comma)) {
6479       return TokError(Twine("expected an even number of values in ") +
6480                       Twine(PALMD::AssemblerDirective));
6481     }
6482     if (ParseAsAbsoluteExpression(Value)) {
6483       return TokError(Twine("invalid value in ") +
6484                       Twine(PALMD::AssemblerDirective));
6485     }
6486     PALMetadata->setRegister(Key, Value);
6487     if (!trySkipToken(AsmToken::Comma))
6488       break;
6489   }
6490   return false;
6491 }
6492 
6493 /// ParseDirectiveAMDGPULDS
6494 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6495 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6496   if (getParser().checkForValidSection())
6497     return true;
6498 
6499   StringRef Name;
6500   SMLoc NameLoc = getLoc();
6501   if (getParser().parseIdentifier(Name))
6502     return TokError("expected identifier in directive");
6503 
6504   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6505   if (getParser().parseComma())
6506     return true;
6507 
6508   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6509 
6510   int64_t Size;
6511   SMLoc SizeLoc = getLoc();
6512   if (getParser().parseAbsoluteExpression(Size))
6513     return true;
6514   if (Size < 0)
6515     return Error(SizeLoc, "size must be non-negative");
6516   if (Size > LocalMemorySize)
6517     return Error(SizeLoc, "size is too large");
6518 
6519   int64_t Alignment = 4;
6520   if (trySkipToken(AsmToken::Comma)) {
6521     SMLoc AlignLoc = getLoc();
6522     if (getParser().parseAbsoluteExpression(Alignment))
6523       return true;
6524     if (Alignment < 0 || !isPowerOf2_64(Alignment))
6525       return Error(AlignLoc, "alignment must be a power of two");
6526 
6527     // Alignment larger than the size of LDS is possible in theory, as long
6528     // as the linker manages to place to symbol at address 0, but we do want
6529     // to make sure the alignment fits nicely into a 32-bit integer.
6530     if (Alignment >= 1u << 31)
6531       return Error(AlignLoc, "alignment is too large");
6532   }
6533 
6534   if (parseEOL())
6535     return true;
6536 
6537   Symbol->redefineIfPossible();
6538   if (!Symbol->isUndefined())
6539     return Error(NameLoc, "invalid symbol redefinition");
6540 
6541   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6542   return false;
6543 }
6544 
6545 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6546   StringRef IDVal = DirectiveID.getString();
6547 
6548   if (isHsaAbi(getSTI())) {
6549     if (IDVal == ".amdhsa_kernel")
6550      return ParseDirectiveAMDHSAKernel();
6551 
6552     if (IDVal == ".amdhsa_code_object_version")
6553       return ParseDirectiveAMDHSACodeObjectVersion();
6554 
6555     // TODO: Restructure/combine with PAL metadata directive.
6556     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6557       return ParseDirectiveHSAMetadata();
6558   } else {
6559     if (IDVal == ".amd_kernel_code_t")
6560       return ParseDirectiveAMDKernelCodeT();
6561 
6562     if (IDVal == ".amdgpu_hsa_kernel")
6563       return ParseDirectiveAMDGPUHsaKernel();
6564 
6565     if (IDVal == ".amd_amdgpu_isa")
6566       return ParseDirectiveISAVersion();
6567 
6568     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6569       return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6570                               Twine(" directive is "
6571                                     "not available on non-amdhsa OSes"))
6572                                  .str());
6573     }
6574   }
6575 
6576   if (IDVal == ".amdgcn_target")
6577     return ParseDirectiveAMDGCNTarget();
6578 
6579   if (IDVal == ".amdgpu_lds")
6580     return ParseDirectiveAMDGPULDS();
6581 
6582   if (IDVal == PALMD::AssemblerDirectiveBegin)
6583     return ParseDirectivePALMetadataBegin();
6584 
6585   if (IDVal == PALMD::AssemblerDirective)
6586     return ParseDirectivePALMetadata();
6587 
6588   return true;
6589 }
6590 
6591 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6592                                            MCRegister Reg) {
6593   if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6594     return isGFX9Plus();
6595 
6596   // GFX10+ has 2 more SGPRs 104 and 105.
6597   if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6598     return hasSGPR104_SGPR105();
6599 
6600   switch (Reg.id()) {
6601   case SRC_SHARED_BASE_LO:
6602   case SRC_SHARED_BASE:
6603   case SRC_SHARED_LIMIT_LO:
6604   case SRC_SHARED_LIMIT:
6605   case SRC_PRIVATE_BASE_LO:
6606   case SRC_PRIVATE_BASE:
6607   case SRC_PRIVATE_LIMIT_LO:
6608   case SRC_PRIVATE_LIMIT:
6609     return isGFX9Plus();
6610   case SRC_POPS_EXITING_WAVE_ID:
6611     return isGFX9Plus() && !isGFX11Plus();
6612   case TBA:
6613   case TBA_LO:
6614   case TBA_HI:
6615   case TMA:
6616   case TMA_LO:
6617   case TMA_HI:
6618     return !isGFX9Plus();
6619   case XNACK_MASK:
6620   case XNACK_MASK_LO:
6621   case XNACK_MASK_HI:
6622     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6623   case SGPR_NULL:
6624     return isGFX10Plus();
6625   case SRC_EXECZ:
6626   case SRC_VCCZ:
6627     return !isGFX11Plus();
6628   default:
6629     break;
6630   }
6631 
6632   if (isCI())
6633     return true;
6634 
6635   if (isSI() || isGFX10Plus()) {
6636     // No flat_scr on SI.
6637     // On GFX10Plus flat scratch is not a valid register operand and can only be
6638     // accessed with s_setreg/s_getreg.
6639     switch (Reg.id()) {
6640     case FLAT_SCR:
6641     case FLAT_SCR_LO:
6642     case FLAT_SCR_HI:
6643       return false;
6644     default:
6645       return true;
6646     }
6647   }
6648 
6649   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6650   // SI/CI have.
6651   if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6652     return hasSGPR102_SGPR103();
6653 
6654   return true;
6655 }
6656 
6657 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6658                                           StringRef Mnemonic,
6659                                           OperandMode Mode) {
6660   ParseStatus Res = parseVOPD(Operands);
6661   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6662     return Res;
6663 
6664   // Try to parse with a custom parser
6665   Res = MatchOperandParserImpl(Operands, Mnemonic);
6666 
6667   // If we successfully parsed the operand or if there as an error parsing,
6668   // we are done.
6669   //
6670   // If we are parsing after we reach EndOfStatement then this means we
6671   // are appending default values to the Operands list.  This is only done
6672   // by custom parser, so we shouldn't continue on to the generic parsing.
6673   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6674     return Res;
6675 
6676   SMLoc RBraceLoc;
6677   SMLoc LBraceLoc = getLoc();
6678   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6679     unsigned Prefix = Operands.size();
6680 
6681     for (;;) {
6682       auto Loc = getLoc();
6683       Res = parseReg(Operands);
6684       if (Res.isNoMatch())
6685         Error(Loc, "expected a register");
6686       if (!Res.isSuccess())
6687         return ParseStatus::Failure;
6688 
6689       RBraceLoc = getLoc();
6690       if (trySkipToken(AsmToken::RBrac))
6691         break;
6692 
6693       if (!skipToken(AsmToken::Comma,
6694                      "expected a comma or a closing square bracket"))
6695         return ParseStatus::Failure;
6696     }
6697 
6698     if (Operands.size() - Prefix > 1) {
6699       Operands.insert(Operands.begin() + Prefix,
6700                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6701       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6702     }
6703 
6704     return ParseStatus::Success;
6705   }
6706 
6707   return parseRegOrImm(Operands);
6708 }
6709 
6710 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6711   // Clear any forced encodings from the previous instruction.
6712   setForcedEncodingSize(0);
6713   setForcedDPP(false);
6714   setForcedSDWA(false);
6715 
6716   if (Name.consume_back("_e64_dpp")) {
6717     setForcedDPP(true);
6718     setForcedEncodingSize(64);
6719     return Name;
6720   }
6721   if (Name.consume_back("_e64")) {
6722     setForcedEncodingSize(64);
6723     return Name;
6724   }
6725   if (Name.consume_back("_e32")) {
6726     setForcedEncodingSize(32);
6727     return Name;
6728   }
6729   if (Name.consume_back("_dpp")) {
6730     setForcedDPP(true);
6731     return Name;
6732   }
6733   if (Name.consume_back("_sdwa")) {
6734     setForcedSDWA(true);
6735     return Name;
6736   }
6737   return Name;
6738 }
6739 
6740 static void applyMnemonicAliases(StringRef &Mnemonic,
6741                                  const FeatureBitset &Features,
6742                                  unsigned VariantID);
6743 
6744 bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6745                                        StringRef Name, SMLoc NameLoc,
6746                                        OperandVector &Operands) {
6747   // Add the instruction mnemonic
6748   Name = parseMnemonicSuffix(Name);
6749 
6750   // If the target architecture uses MnemonicAlias, call it here to parse
6751   // operands correctly.
6752   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6753 
6754   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6755 
6756   bool IsMIMG = Name.starts_with("image_");
6757 
6758   while (!trySkipToken(AsmToken::EndOfStatement)) {
6759     OperandMode Mode = OperandMode_Default;
6760     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6761       Mode = OperandMode_NSA;
6762     ParseStatus Res = parseOperand(Operands, Name, Mode);
6763 
6764     if (!Res.isSuccess()) {
6765       checkUnsupportedInstruction(Name, NameLoc);
6766       if (!Parser.hasPendingError()) {
6767         // FIXME: use real operand location rather than the current location.
6768         StringRef Msg = Res.isFailure() ? "failed parsing operand."
6769                                         : "not a valid operand.";
6770         Error(getLoc(), Msg);
6771       }
6772       while (!trySkipToken(AsmToken::EndOfStatement)) {
6773         lex();
6774       }
6775       return true;
6776     }
6777 
6778     // Eat the comma or space if there is one.
6779     trySkipToken(AsmToken::Comma);
6780   }
6781 
6782   return false;
6783 }
6784 
6785 //===----------------------------------------------------------------------===//
6786 // Utility functions
6787 //===----------------------------------------------------------------------===//
6788 
6789 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6790                                           OperandVector &Operands) {
6791   SMLoc S = getLoc();
6792   if (!trySkipId(Name))
6793     return ParseStatus::NoMatch;
6794 
6795   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6796   return ParseStatus::Success;
6797 }
6798 
6799 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6800                                                 int64_t &IntVal) {
6801 
6802   if (!trySkipId(Prefix, AsmToken::Colon))
6803     return ParseStatus::NoMatch;
6804 
6805   return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6806 }
6807 
6808 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6809     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6810     std::function<bool(int64_t &)> ConvertResult) {
6811   SMLoc S = getLoc();
6812   int64_t Value = 0;
6813 
6814   ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6815   if (!Res.isSuccess())
6816     return Res;
6817 
6818   if (ConvertResult && !ConvertResult(Value)) {
6819     Error(S, "invalid " + StringRef(Prefix) + " value.");
6820   }
6821 
6822   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6823   return ParseStatus::Success;
6824 }
6825 
6826 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6827     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6828     bool (*ConvertResult)(int64_t &)) {
6829   SMLoc S = getLoc();
6830   if (!trySkipId(Prefix, AsmToken::Colon))
6831     return ParseStatus::NoMatch;
6832 
6833   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6834     return ParseStatus::Failure;
6835 
6836   unsigned Val = 0;
6837   const unsigned MaxSize = 4;
6838 
6839   // FIXME: How to verify the number of elements matches the number of src
6840   // operands?
6841   for (int I = 0; ; ++I) {
6842     int64_t Op;
6843     SMLoc Loc = getLoc();
6844     if (!parseExpr(Op))
6845       return ParseStatus::Failure;
6846 
6847     if (Op != 0 && Op != 1)
6848       return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6849 
6850     Val |= (Op << I);
6851 
6852     if (trySkipToken(AsmToken::RBrac))
6853       break;
6854 
6855     if (I + 1 == MaxSize)
6856       return Error(getLoc(), "expected a closing square bracket");
6857 
6858     if (!skipToken(AsmToken::Comma, "expected a comma"))
6859       return ParseStatus::Failure;
6860   }
6861 
6862   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6863   return ParseStatus::Success;
6864 }
6865 
6866 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6867                                            OperandVector &Operands,
6868                                            AMDGPUOperand::ImmTy ImmTy) {
6869   int64_t Bit;
6870   SMLoc S = getLoc();
6871 
6872   if (trySkipId(Name)) {
6873     Bit = 1;
6874   } else if (trySkipId("no", Name)) {
6875     Bit = 0;
6876   } else {
6877     return ParseStatus::NoMatch;
6878   }
6879 
6880   if (Name == "r128" && !hasMIMG_R128())
6881     return Error(S, "r128 modifier is not supported on this GPU");
6882   if (Name == "a16" && !hasA16())
6883     return Error(S, "a16 modifier is not supported on this GPU");
6884 
6885   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6886     ImmTy = AMDGPUOperand::ImmTyR128A16;
6887 
6888   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6889   return ParseStatus::Success;
6890 }
6891 
6892 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6893                                       bool &Disabling) const {
6894   Disabling = Id.consume_front("no");
6895 
6896   if (isGFX940() && !Mnemo.starts_with("s_")) {
6897     return StringSwitch<unsigned>(Id)
6898         .Case("nt", AMDGPU::CPol::NT)
6899         .Case("sc0", AMDGPU::CPol::SC0)
6900         .Case("sc1", AMDGPU::CPol::SC1)
6901         .Default(0);
6902   }
6903 
6904   return StringSwitch<unsigned>(Id)
6905       .Case("dlc", AMDGPU::CPol::DLC)
6906       .Case("glc", AMDGPU::CPol::GLC)
6907       .Case("scc", AMDGPU::CPol::SCC)
6908       .Case("slc", AMDGPU::CPol::SLC)
6909       .Default(0);
6910 }
6911 
6912 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6913   if (isGFX12Plus()) {
6914     SMLoc StringLoc = getLoc();
6915 
6916     int64_t CPolVal = 0;
6917     ParseStatus ResTH = ParseStatus::NoMatch;
6918     ParseStatus ResScope = ParseStatus::NoMatch;
6919 
6920     for (;;) {
6921       if (ResTH.isNoMatch()) {
6922         int64_t TH;
6923         ResTH = parseTH(Operands, TH);
6924         if (ResTH.isFailure())
6925           return ResTH;
6926         if (ResTH.isSuccess()) {
6927           CPolVal |= TH;
6928           continue;
6929         }
6930       }
6931 
6932       if (ResScope.isNoMatch()) {
6933         int64_t Scope;
6934         ResScope = parseScope(Operands, Scope);
6935         if (ResScope.isFailure())
6936           return ResScope;
6937         if (ResScope.isSuccess()) {
6938           CPolVal |= Scope;
6939           continue;
6940         }
6941       }
6942 
6943       break;
6944     }
6945 
6946     if (ResTH.isNoMatch() && ResScope.isNoMatch())
6947       return ParseStatus::NoMatch;
6948 
6949     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6950                                                 AMDGPUOperand::ImmTyCPol));
6951     return ParseStatus::Success;
6952   }
6953 
6954   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6955   SMLoc OpLoc = getLoc();
6956   unsigned Enabled = 0, Seen = 0;
6957   for (;;) {
6958     SMLoc S = getLoc();
6959     bool Disabling;
6960     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6961     if (!CPol)
6962       break;
6963 
6964     lex();
6965 
6966     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6967       return Error(S, "dlc modifier is not supported on this GPU");
6968 
6969     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6970       return Error(S, "scc modifier is not supported on this GPU");
6971 
6972     if (Seen & CPol)
6973       return Error(S, "duplicate cache policy modifier");
6974 
6975     if (!Disabling)
6976       Enabled |= CPol;
6977 
6978     Seen |= CPol;
6979   }
6980 
6981   if (!Seen)
6982     return ParseStatus::NoMatch;
6983 
6984   Operands.push_back(
6985       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6986   return ParseStatus::Success;
6987 }
6988 
6989 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6990                                         int64_t &Scope) {
6991   static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
6992                                     CPol::SCOPE_DEV, CPol::SCOPE_SYS};
6993 
6994   ParseStatus Res = parseStringOrIntWithPrefix(
6995       Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
6996       Scope);
6997 
6998   if (Res.isSuccess())
6999     Scope = Scopes[Scope];
7000 
7001   return Res;
7002 }
7003 
7004 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7005   TH = AMDGPU::CPol::TH_RT; // default
7006 
7007   StringRef Value;
7008   SMLoc StringLoc;
7009   ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7010   if (!Res.isSuccess())
7011     return Res;
7012 
7013   if (Value == "TH_DEFAULT")
7014     TH = AMDGPU::CPol::TH_RT;
7015   else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7016            Value == "TH_LOAD_NT_WB") {
7017     return Error(StringLoc, "invalid th value");
7018   } else if (Value.consume_front("TH_ATOMIC_")) {
7019     TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
7020   } else if (Value.consume_front("TH_LOAD_")) {
7021     TH = AMDGPU::CPol::TH_TYPE_LOAD;
7022   } else if (Value.consume_front("TH_STORE_")) {
7023     TH = AMDGPU::CPol::TH_TYPE_STORE;
7024   } else {
7025     return Error(StringLoc, "invalid th value");
7026   }
7027 
7028   if (Value == "BYPASS")
7029     TH |= AMDGPU::CPol::TH_REAL_BYPASS;
7030 
7031   if (TH != 0) {
7032     if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
7033       TH |= StringSwitch<int64_t>(Value)
7034                 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7035                 .Case("RT", AMDGPU::CPol::TH_RT)
7036                 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7037                 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7038                 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7039                                        AMDGPU::CPol::TH_ATOMIC_RETURN)
7040                 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7041                 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7042                                         AMDGPU::CPol::TH_ATOMIC_NT)
7043                 .Default(0xffffffff);
7044     else
7045       TH |= StringSwitch<int64_t>(Value)
7046                 .Case("RT", AMDGPU::CPol::TH_RT)
7047                 .Case("NT", AMDGPU::CPol::TH_NT)
7048                 .Case("HT", AMDGPU::CPol::TH_HT)
7049                 .Case("LU", AMDGPU::CPol::TH_LU)
7050                 .Case("WB", AMDGPU::CPol::TH_WB)
7051                 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7052                 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7053                 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7054                 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7055                 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7056                 .Default(0xffffffff);
7057   }
7058 
7059   if (TH == 0xffffffff)
7060     return Error(StringLoc, "invalid th value");
7061 
7062   return ParseStatus::Success;
7063 }
7064 
7065 static void
7066 addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
7067                       AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7068                       AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7069                       std::optional<unsigned> InsertAt = std::nullopt) {
7070   auto i = OptionalIdx.find(ImmT);
7071   if (i != OptionalIdx.end()) {
7072     unsigned Idx = i->second;
7073     const AMDGPUOperand &Op =
7074         static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7075     if (InsertAt)
7076       Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7077     else
7078       Op.addImmOperands(Inst, 1);
7079   } else {
7080     if (InsertAt.has_value())
7081       Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7082     else
7083       Inst.addOperand(MCOperand::createImm(Default));
7084   }
7085 }
7086 
7087 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7088                                                    StringRef &Value,
7089                                                    SMLoc &StringLoc) {
7090   if (!trySkipId(Prefix, AsmToken::Colon))
7091     return ParseStatus::NoMatch;
7092 
7093   StringLoc = getLoc();
7094   return parseId(Value, "expected an identifier") ? ParseStatus::Success
7095                                                   : ParseStatus::Failure;
7096 }
7097 
7098 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7099     OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7100     int64_t &IntVal) {
7101   if (!trySkipId(Name, AsmToken::Colon))
7102     return ParseStatus::NoMatch;
7103 
7104   SMLoc StringLoc = getLoc();
7105 
7106   StringRef Value;
7107   if (isToken(AsmToken::Identifier)) {
7108     Value = getTokenStr();
7109     lex();
7110 
7111     for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7112       if (Value == Ids[IntVal])
7113         break;
7114   } else if (!parseExpr(IntVal))
7115     return ParseStatus::Failure;
7116 
7117   if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7118     return Error(StringLoc, "invalid " + Twine(Name) + " value");
7119 
7120   return ParseStatus::Success;
7121 }
7122 
7123 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7124     OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7125     AMDGPUOperand::ImmTy Type) {
7126   SMLoc S = getLoc();
7127   int64_t IntVal;
7128 
7129   ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7130   if (Res.isSuccess())
7131     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7132 
7133   return Res;
7134 }
7135 
7136 //===----------------------------------------------------------------------===//
7137 // MTBUF format
7138 //===----------------------------------------------------------------------===//
7139 
7140 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7141                                   int64_t MaxVal,
7142                                   int64_t &Fmt) {
7143   int64_t Val;
7144   SMLoc Loc = getLoc();
7145 
7146   auto Res = parseIntWithPrefix(Pref, Val);
7147   if (Res.isFailure())
7148     return false;
7149   if (Res.isNoMatch())
7150     return true;
7151 
7152   if (Val < 0 || Val > MaxVal) {
7153     Error(Loc, Twine("out of range ", StringRef(Pref)));
7154     return false;
7155   }
7156 
7157   Fmt = Val;
7158   return true;
7159 }
7160 
7161 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7162                                               AMDGPUOperand::ImmTy ImmTy) {
7163   const char *Pref = "index_key";
7164   int64_t ImmVal = 0;
7165   SMLoc Loc = getLoc();
7166   auto Res = parseIntWithPrefix(Pref, ImmVal);
7167   if (!Res.isSuccess())
7168     return Res;
7169 
7170   if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7171        ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7172       (ImmVal < 0 || ImmVal > 1))
7173     return Error(Loc, Twine("out of range ", StringRef(Pref)));
7174 
7175   if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7176     return Error(Loc, Twine("out of range ", StringRef(Pref)));
7177 
7178   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7179   return ParseStatus::Success;
7180 }
7181 
7182 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7183   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7184 }
7185 
7186 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7187   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7188 }
7189 
7190 ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7191   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7192 }
7193 
7194 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7195 // values to live in a joint format operand in the MCInst encoding.
7196 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7197   using namespace llvm::AMDGPU::MTBUFFormat;
7198 
7199   int64_t Dfmt = DFMT_UNDEF;
7200   int64_t Nfmt = NFMT_UNDEF;
7201 
7202   // dfmt and nfmt can appear in either order, and each is optional.
7203   for (int I = 0; I < 2; ++I) {
7204     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7205       return ParseStatus::Failure;
7206 
7207     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7208       return ParseStatus::Failure;
7209 
7210     // Skip optional comma between dfmt/nfmt
7211     // but guard against 2 commas following each other.
7212     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7213         !peekToken().is(AsmToken::Comma)) {
7214       trySkipToken(AsmToken::Comma);
7215     }
7216   }
7217 
7218   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7219     return ParseStatus::NoMatch;
7220 
7221   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7222   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7223 
7224   Format = encodeDfmtNfmt(Dfmt, Nfmt);
7225   return ParseStatus::Success;
7226 }
7227 
7228 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7229   using namespace llvm::AMDGPU::MTBUFFormat;
7230 
7231   int64_t Fmt = UFMT_UNDEF;
7232 
7233   if (!tryParseFmt("format", UFMT_MAX, Fmt))
7234     return ParseStatus::Failure;
7235 
7236   if (Fmt == UFMT_UNDEF)
7237     return ParseStatus::NoMatch;
7238 
7239   Format = Fmt;
7240   return ParseStatus::Success;
7241 }
7242 
7243 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7244                                     int64_t &Nfmt,
7245                                     StringRef FormatStr,
7246                                     SMLoc Loc) {
7247   using namespace llvm::AMDGPU::MTBUFFormat;
7248   int64_t Format;
7249 
7250   Format = getDfmt(FormatStr);
7251   if (Format != DFMT_UNDEF) {
7252     Dfmt = Format;
7253     return true;
7254   }
7255 
7256   Format = getNfmt(FormatStr, getSTI());
7257   if (Format != NFMT_UNDEF) {
7258     Nfmt = Format;
7259     return true;
7260   }
7261 
7262   Error(Loc, "unsupported format");
7263   return false;
7264 }
7265 
7266 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7267                                                       SMLoc FormatLoc,
7268                                                       int64_t &Format) {
7269   using namespace llvm::AMDGPU::MTBUFFormat;
7270 
7271   int64_t Dfmt = DFMT_UNDEF;
7272   int64_t Nfmt = NFMT_UNDEF;
7273   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7274     return ParseStatus::Failure;
7275 
7276   if (trySkipToken(AsmToken::Comma)) {
7277     StringRef Str;
7278     SMLoc Loc = getLoc();
7279     if (!parseId(Str, "expected a format string") ||
7280         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7281       return ParseStatus::Failure;
7282     if (Dfmt == DFMT_UNDEF)
7283       return Error(Loc, "duplicate numeric format");
7284     if (Nfmt == NFMT_UNDEF)
7285       return Error(Loc, "duplicate data format");
7286   }
7287 
7288   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7289   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7290 
7291   if (isGFX10Plus()) {
7292     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7293     if (Ufmt == UFMT_UNDEF)
7294       return Error(FormatLoc, "unsupported format");
7295     Format = Ufmt;
7296   } else {
7297     Format = encodeDfmtNfmt(Dfmt, Nfmt);
7298   }
7299 
7300   return ParseStatus::Success;
7301 }
7302 
7303 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7304                                                         SMLoc Loc,
7305                                                         int64_t &Format) {
7306   using namespace llvm::AMDGPU::MTBUFFormat;
7307 
7308   auto Id = getUnifiedFormat(FormatStr, getSTI());
7309   if (Id == UFMT_UNDEF)
7310     return ParseStatus::NoMatch;
7311 
7312   if (!isGFX10Plus())
7313     return Error(Loc, "unified format is not supported on this GPU");
7314 
7315   Format = Id;
7316   return ParseStatus::Success;
7317 }
7318 
7319 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7320   using namespace llvm::AMDGPU::MTBUFFormat;
7321   SMLoc Loc = getLoc();
7322 
7323   if (!parseExpr(Format))
7324     return ParseStatus::Failure;
7325   if (!isValidFormatEncoding(Format, getSTI()))
7326     return Error(Loc, "out of range format");
7327 
7328   return ParseStatus::Success;
7329 }
7330 
7331 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7332   using namespace llvm::AMDGPU::MTBUFFormat;
7333 
7334   if (!trySkipId("format", AsmToken::Colon))
7335     return ParseStatus::NoMatch;
7336 
7337   if (trySkipToken(AsmToken::LBrac)) {
7338     StringRef FormatStr;
7339     SMLoc Loc = getLoc();
7340     if (!parseId(FormatStr, "expected a format string"))
7341       return ParseStatus::Failure;
7342 
7343     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7344     if (Res.isNoMatch())
7345       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7346     if (!Res.isSuccess())
7347       return Res;
7348 
7349     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7350       return ParseStatus::Failure;
7351 
7352     return ParseStatus::Success;
7353   }
7354 
7355   return parseNumericFormat(Format);
7356 }
7357 
7358 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7359   using namespace llvm::AMDGPU::MTBUFFormat;
7360 
7361   int64_t Format = getDefaultFormatEncoding(getSTI());
7362   ParseStatus Res;
7363   SMLoc Loc = getLoc();
7364 
7365   // Parse legacy format syntax.
7366   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7367   if (Res.isFailure())
7368     return Res;
7369 
7370   bool FormatFound = Res.isSuccess();
7371 
7372   Operands.push_back(
7373     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7374 
7375   if (FormatFound)
7376     trySkipToken(AsmToken::Comma);
7377 
7378   if (isToken(AsmToken::EndOfStatement)) {
7379     // We are expecting an soffset operand,
7380     // but let matcher handle the error.
7381     return ParseStatus::Success;
7382   }
7383 
7384   // Parse soffset.
7385   Res = parseRegOrImm(Operands);
7386   if (!Res.isSuccess())
7387     return Res;
7388 
7389   trySkipToken(AsmToken::Comma);
7390 
7391   if (!FormatFound) {
7392     Res = parseSymbolicOrNumericFormat(Format);
7393     if (Res.isFailure())
7394       return Res;
7395     if (Res.isSuccess()) {
7396       auto Size = Operands.size();
7397       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7398       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7399       Op.setImm(Format);
7400     }
7401     return ParseStatus::Success;
7402   }
7403 
7404   if (isId("format") && peekToken().is(AsmToken::Colon))
7405     return Error(getLoc(), "duplicate format");
7406   return ParseStatus::Success;
7407 }
7408 
7409 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7410   ParseStatus Res =
7411       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7412   if (Res.isNoMatch()) {
7413     Res = parseIntWithPrefix("inst_offset", Operands,
7414                              AMDGPUOperand::ImmTyInstOffset);
7415   }
7416   return Res;
7417 }
7418 
7419 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7420   ParseStatus Res =
7421       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7422   if (Res.isNoMatch())
7423     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7424   return Res;
7425 }
7426 
7427 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7428   ParseStatus Res =
7429       parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7430   if (Res.isNoMatch()) {
7431     Res =
7432         parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7433   }
7434   return Res;
7435 }
7436 
7437 //===----------------------------------------------------------------------===//
7438 // Exp
7439 //===----------------------------------------------------------------------===//
7440 
7441 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7442   OptionalImmIndexMap OptionalIdx;
7443 
7444   unsigned OperandIdx[4];
7445   unsigned EnMask = 0;
7446   int SrcIdx = 0;
7447 
7448   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7449     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7450 
7451     // Add the register arguments
7452     if (Op.isReg()) {
7453       assert(SrcIdx < 4);
7454       OperandIdx[SrcIdx] = Inst.size();
7455       Op.addRegOperands(Inst, 1);
7456       ++SrcIdx;
7457       continue;
7458     }
7459 
7460     if (Op.isOff()) {
7461       assert(SrcIdx < 4);
7462       OperandIdx[SrcIdx] = Inst.size();
7463       Inst.addOperand(MCOperand::createReg(MCRegister()));
7464       ++SrcIdx;
7465       continue;
7466     }
7467 
7468     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7469       Op.addImmOperands(Inst, 1);
7470       continue;
7471     }
7472 
7473     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7474       continue;
7475 
7476     // Handle optional arguments
7477     OptionalIdx[Op.getImmTy()] = i;
7478   }
7479 
7480   assert(SrcIdx == 4);
7481 
7482   bool Compr = false;
7483   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7484     Compr = true;
7485     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7486     Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7487     Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7488   }
7489 
7490   for (auto i = 0; i < SrcIdx; ++i) {
7491     if (Inst.getOperand(OperandIdx[i]).getReg()) {
7492       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7493     }
7494   }
7495 
7496   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7497   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7498 
7499   Inst.addOperand(MCOperand::createImm(EnMask));
7500 }
7501 
7502 //===----------------------------------------------------------------------===//
7503 // s_waitcnt
7504 //===----------------------------------------------------------------------===//
7505 
7506 static bool
7507 encodeCnt(
7508   const AMDGPU::IsaVersion ISA,
7509   int64_t &IntVal,
7510   int64_t CntVal,
7511   bool Saturate,
7512   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7513   unsigned (*decode)(const IsaVersion &Version, unsigned))
7514 {
7515   bool Failed = false;
7516 
7517   IntVal = encode(ISA, IntVal, CntVal);
7518   if (CntVal != decode(ISA, IntVal)) {
7519     if (Saturate) {
7520       IntVal = encode(ISA, IntVal, -1);
7521     } else {
7522       Failed = true;
7523     }
7524   }
7525   return Failed;
7526 }
7527 
7528 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7529 
7530   SMLoc CntLoc = getLoc();
7531   StringRef CntName = getTokenStr();
7532 
7533   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7534       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7535     return false;
7536 
7537   int64_t CntVal;
7538   SMLoc ValLoc = getLoc();
7539   if (!parseExpr(CntVal))
7540     return false;
7541 
7542   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7543 
7544   bool Failed = true;
7545   bool Sat = CntName.ends_with("_sat");
7546 
7547   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7548     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7549   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7550     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7551   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7552     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7553   } else {
7554     Error(CntLoc, "invalid counter name " + CntName);
7555     return false;
7556   }
7557 
7558   if (Failed) {
7559     Error(ValLoc, "too large value for " + CntName);
7560     return false;
7561   }
7562 
7563   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7564     return false;
7565 
7566   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7567     if (isToken(AsmToken::EndOfStatement)) {
7568       Error(getLoc(), "expected a counter name");
7569       return false;
7570     }
7571   }
7572 
7573   return true;
7574 }
7575 
7576 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7577   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7578   int64_t Waitcnt = getWaitcntBitMask(ISA);
7579   SMLoc S = getLoc();
7580 
7581   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7582     while (!isToken(AsmToken::EndOfStatement)) {
7583       if (!parseCnt(Waitcnt))
7584         return ParseStatus::Failure;
7585     }
7586   } else {
7587     if (!parseExpr(Waitcnt))
7588       return ParseStatus::Failure;
7589   }
7590 
7591   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7592   return ParseStatus::Success;
7593 }
7594 
7595 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7596   SMLoc FieldLoc = getLoc();
7597   StringRef FieldName = getTokenStr();
7598   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7599       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7600     return false;
7601 
7602   SMLoc ValueLoc = getLoc();
7603   StringRef ValueName = getTokenStr();
7604   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7605       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7606     return false;
7607 
7608   unsigned Shift;
7609   if (FieldName == "instid0") {
7610     Shift = 0;
7611   } else if (FieldName == "instskip") {
7612     Shift = 4;
7613   } else if (FieldName == "instid1") {
7614     Shift = 7;
7615   } else {
7616     Error(FieldLoc, "invalid field name " + FieldName);
7617     return false;
7618   }
7619 
7620   int Value;
7621   if (Shift == 4) {
7622     // Parse values for instskip.
7623     Value = StringSwitch<int>(ValueName)
7624                 .Case("SAME", 0)
7625                 .Case("NEXT", 1)
7626                 .Case("SKIP_1", 2)
7627                 .Case("SKIP_2", 3)
7628                 .Case("SKIP_3", 4)
7629                 .Case("SKIP_4", 5)
7630                 .Default(-1);
7631   } else {
7632     // Parse values for instid0 and instid1.
7633     Value = StringSwitch<int>(ValueName)
7634                 .Case("NO_DEP", 0)
7635                 .Case("VALU_DEP_1", 1)
7636                 .Case("VALU_DEP_2", 2)
7637                 .Case("VALU_DEP_3", 3)
7638                 .Case("VALU_DEP_4", 4)
7639                 .Case("TRANS32_DEP_1", 5)
7640                 .Case("TRANS32_DEP_2", 6)
7641                 .Case("TRANS32_DEP_3", 7)
7642                 .Case("FMA_ACCUM_CYCLE_1", 8)
7643                 .Case("SALU_CYCLE_1", 9)
7644                 .Case("SALU_CYCLE_2", 10)
7645                 .Case("SALU_CYCLE_3", 11)
7646                 .Default(-1);
7647   }
7648   if (Value < 0) {
7649     Error(ValueLoc, "invalid value name " + ValueName);
7650     return false;
7651   }
7652 
7653   Delay |= Value << Shift;
7654   return true;
7655 }
7656 
7657 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7658   int64_t Delay = 0;
7659   SMLoc S = getLoc();
7660 
7661   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7662     do {
7663       if (!parseDelay(Delay))
7664         return ParseStatus::Failure;
7665     } while (trySkipToken(AsmToken::Pipe));
7666   } else {
7667     if (!parseExpr(Delay))
7668       return ParseStatus::Failure;
7669   }
7670 
7671   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7672   return ParseStatus::Success;
7673 }
7674 
7675 bool
7676 AMDGPUOperand::isSWaitCnt() const {
7677   return isImm();
7678 }
7679 
7680 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7681 
7682 //===----------------------------------------------------------------------===//
7683 // DepCtr
7684 //===----------------------------------------------------------------------===//
7685 
7686 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7687                                   StringRef DepCtrName) {
7688   switch (ErrorId) {
7689   case OPR_ID_UNKNOWN:
7690     Error(Loc, Twine("invalid counter name ", DepCtrName));
7691     return;
7692   case OPR_ID_UNSUPPORTED:
7693     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7694     return;
7695   case OPR_ID_DUPLICATE:
7696     Error(Loc, Twine("duplicate counter name ", DepCtrName));
7697     return;
7698   case OPR_VAL_INVALID:
7699     Error(Loc, Twine("invalid value for ", DepCtrName));
7700     return;
7701   default:
7702     assert(false);
7703   }
7704 }
7705 
7706 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7707 
7708   using namespace llvm::AMDGPU::DepCtr;
7709 
7710   SMLoc DepCtrLoc = getLoc();
7711   StringRef DepCtrName = getTokenStr();
7712 
7713   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7714       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7715     return false;
7716 
7717   int64_t ExprVal;
7718   if (!parseExpr(ExprVal))
7719     return false;
7720 
7721   unsigned PrevOprMask = UsedOprMask;
7722   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7723 
7724   if (CntVal < 0) {
7725     depCtrError(DepCtrLoc, CntVal, DepCtrName);
7726     return false;
7727   }
7728 
7729   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7730     return false;
7731 
7732   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7733     if (isToken(AsmToken::EndOfStatement)) {
7734       Error(getLoc(), "expected a counter name");
7735       return false;
7736     }
7737   }
7738 
7739   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7740   DepCtr = (DepCtr & ~CntValMask) | CntVal;
7741   return true;
7742 }
7743 
7744 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7745   using namespace llvm::AMDGPU::DepCtr;
7746 
7747   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7748   SMLoc Loc = getLoc();
7749 
7750   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7751     unsigned UsedOprMask = 0;
7752     while (!isToken(AsmToken::EndOfStatement)) {
7753       if (!parseDepCtr(DepCtr, UsedOprMask))
7754         return ParseStatus::Failure;
7755     }
7756   } else {
7757     if (!parseExpr(DepCtr))
7758       return ParseStatus::Failure;
7759   }
7760 
7761   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7762   return ParseStatus::Success;
7763 }
7764 
7765 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7766 
7767 //===----------------------------------------------------------------------===//
7768 // hwreg
7769 //===----------------------------------------------------------------------===//
7770 
7771 ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7772                                             OperandInfoTy &Offset,
7773                                             OperandInfoTy &Width) {
7774   using namespace llvm::AMDGPU::Hwreg;
7775 
7776   if (!trySkipId("hwreg", AsmToken::LParen))
7777     return ParseStatus::NoMatch;
7778 
7779   // The register may be specified by name or using a numeric code
7780   HwReg.Loc = getLoc();
7781   if (isToken(AsmToken::Identifier) &&
7782       (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7783     HwReg.IsSymbolic = true;
7784     lex(); // skip register name
7785   } else if (!parseExpr(HwReg.Val, "a register name")) {
7786     return ParseStatus::Failure;
7787   }
7788 
7789   if (trySkipToken(AsmToken::RParen))
7790     return ParseStatus::Success;
7791 
7792   // parse optional params
7793   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7794     return ParseStatus::Failure;
7795 
7796   Offset.Loc = getLoc();
7797   if (!parseExpr(Offset.Val))
7798     return ParseStatus::Failure;
7799 
7800   if (!skipToken(AsmToken::Comma, "expected a comma"))
7801     return ParseStatus::Failure;
7802 
7803   Width.Loc = getLoc();
7804   if (!parseExpr(Width.Val) ||
7805       !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7806     return ParseStatus::Failure;
7807 
7808   return ParseStatus::Success;
7809 }
7810 
7811 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7812   using namespace llvm::AMDGPU::Hwreg;
7813 
7814   int64_t ImmVal = 0;
7815   SMLoc Loc = getLoc();
7816 
7817   StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7818                           HwregId::Default);
7819   StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7820                            HwregOffset::Default);
7821   struct : StructuredOpField {
7822     using StructuredOpField::StructuredOpField;
7823     bool validate(AMDGPUAsmParser &Parser) const override {
7824       if (!isUIntN(Width, Val - 1))
7825         return Error(Parser, "only values from 1 to 32 are legal");
7826       return true;
7827     }
7828   } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7829   ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7830 
7831   if (Res.isNoMatch())
7832     Res = parseHwregFunc(HwReg, Offset, Width);
7833 
7834   if (Res.isSuccess()) {
7835     if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7836       return ParseStatus::Failure;
7837     ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7838   }
7839 
7840   if (Res.isNoMatch() &&
7841       parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7842     Res = ParseStatus::Success;
7843 
7844   if (!Res.isSuccess())
7845     return ParseStatus::Failure;
7846 
7847   if (!isUInt<16>(ImmVal))
7848     return Error(Loc, "invalid immediate: only 16-bit values are legal");
7849   Operands.push_back(
7850       AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7851   return ParseStatus::Success;
7852 }
7853 
7854 bool AMDGPUOperand::isHwreg() const {
7855   return isImmTy(ImmTyHwreg);
7856 }
7857 
7858 //===----------------------------------------------------------------------===//
7859 // sendmsg
7860 //===----------------------------------------------------------------------===//
7861 
7862 bool
7863 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7864                                   OperandInfoTy &Op,
7865                                   OperandInfoTy &Stream) {
7866   using namespace llvm::AMDGPU::SendMsg;
7867 
7868   Msg.Loc = getLoc();
7869   if (isToken(AsmToken::Identifier) &&
7870       (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7871     Msg.IsSymbolic = true;
7872     lex(); // skip message name
7873   } else if (!parseExpr(Msg.Val, "a message name")) {
7874     return false;
7875   }
7876 
7877   if (trySkipToken(AsmToken::Comma)) {
7878     Op.IsDefined = true;
7879     Op.Loc = getLoc();
7880     if (isToken(AsmToken::Identifier) &&
7881         (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7882             OPR_ID_UNKNOWN) {
7883       lex(); // skip operation name
7884     } else if (!parseExpr(Op.Val, "an operation name")) {
7885       return false;
7886     }
7887 
7888     if (trySkipToken(AsmToken::Comma)) {
7889       Stream.IsDefined = true;
7890       Stream.Loc = getLoc();
7891       if (!parseExpr(Stream.Val))
7892         return false;
7893     }
7894   }
7895 
7896   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7897 }
7898 
7899 bool
7900 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7901                                  const OperandInfoTy &Op,
7902                                  const OperandInfoTy &Stream) {
7903   using namespace llvm::AMDGPU::SendMsg;
7904 
7905   // Validation strictness depends on whether message is specified
7906   // in a symbolic or in a numeric form. In the latter case
7907   // only encoding possibility is checked.
7908   bool Strict = Msg.IsSymbolic;
7909 
7910   if (Strict) {
7911     if (Msg.Val == OPR_ID_UNSUPPORTED) {
7912       Error(Msg.Loc, "specified message id is not supported on this GPU");
7913       return false;
7914     }
7915   } else {
7916     if (!isValidMsgId(Msg.Val, getSTI())) {
7917       Error(Msg.Loc, "invalid message id");
7918       return false;
7919     }
7920   }
7921   if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7922     if (Op.IsDefined) {
7923       Error(Op.Loc, "message does not support operations");
7924     } else {
7925       Error(Msg.Loc, "missing message operation");
7926     }
7927     return false;
7928   }
7929   if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7930     if (Op.Val == OPR_ID_UNSUPPORTED)
7931       Error(Op.Loc, "specified operation id is not supported on this GPU");
7932     else
7933       Error(Op.Loc, "invalid operation id");
7934     return false;
7935   }
7936   if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7937       Stream.IsDefined) {
7938     Error(Stream.Loc, "message operation does not support streams");
7939     return false;
7940   }
7941   if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7942     Error(Stream.Loc, "invalid message stream id");
7943     return false;
7944   }
7945   return true;
7946 }
7947 
7948 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7949   using namespace llvm::AMDGPU::SendMsg;
7950 
7951   int64_t ImmVal = 0;
7952   SMLoc Loc = getLoc();
7953 
7954   if (trySkipId("sendmsg", AsmToken::LParen)) {
7955     OperandInfoTy Msg(OPR_ID_UNKNOWN);
7956     OperandInfoTy Op(OP_NONE_);
7957     OperandInfoTy Stream(STREAM_ID_NONE_);
7958     if (parseSendMsgBody(Msg, Op, Stream) &&
7959         validateSendMsg(Msg, Op, Stream)) {
7960       ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7961     } else {
7962       return ParseStatus::Failure;
7963     }
7964   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7965     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7966       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7967   } else {
7968     return ParseStatus::Failure;
7969   }
7970 
7971   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7972   return ParseStatus::Success;
7973 }
7974 
7975 bool AMDGPUOperand::isSendMsg() const {
7976   return isImmTy(ImmTySendMsg);
7977 }
7978 
7979 //===----------------------------------------------------------------------===//
7980 // v_interp
7981 //===----------------------------------------------------------------------===//
7982 
7983 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7984   StringRef Str;
7985   SMLoc S = getLoc();
7986 
7987   if (!parseId(Str))
7988     return ParseStatus::NoMatch;
7989 
7990   int Slot = StringSwitch<int>(Str)
7991     .Case("p10", 0)
7992     .Case("p20", 1)
7993     .Case("p0", 2)
7994     .Default(-1);
7995 
7996   if (Slot == -1)
7997     return Error(S, "invalid interpolation slot");
7998 
7999   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8000                                               AMDGPUOperand::ImmTyInterpSlot));
8001   return ParseStatus::Success;
8002 }
8003 
8004 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8005   StringRef Str;
8006   SMLoc S = getLoc();
8007 
8008   if (!parseId(Str))
8009     return ParseStatus::NoMatch;
8010 
8011   if (!Str.starts_with("attr"))
8012     return Error(S, "invalid interpolation attribute");
8013 
8014   StringRef Chan = Str.take_back(2);
8015   int AttrChan = StringSwitch<int>(Chan)
8016     .Case(".x", 0)
8017     .Case(".y", 1)
8018     .Case(".z", 2)
8019     .Case(".w", 3)
8020     .Default(-1);
8021   if (AttrChan == -1)
8022     return Error(S, "invalid or missing interpolation attribute channel");
8023 
8024   Str = Str.drop_back(2).drop_front(4);
8025 
8026   uint8_t Attr;
8027   if (Str.getAsInteger(10, Attr))
8028     return Error(S, "invalid or missing interpolation attribute number");
8029 
8030   if (Attr > 32)
8031     return Error(S, "out of bounds interpolation attribute number");
8032 
8033   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8034 
8035   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8036                                               AMDGPUOperand::ImmTyInterpAttr));
8037   Operands.push_back(AMDGPUOperand::CreateImm(
8038       this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8039   return ParseStatus::Success;
8040 }
8041 
8042 //===----------------------------------------------------------------------===//
8043 // exp
8044 //===----------------------------------------------------------------------===//
8045 
8046 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8047   using namespace llvm::AMDGPU::Exp;
8048 
8049   StringRef Str;
8050   SMLoc S = getLoc();
8051 
8052   if (!parseId(Str))
8053     return ParseStatus::NoMatch;
8054 
8055   unsigned Id = getTgtId(Str);
8056   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8057     return Error(S, (Id == ET_INVALID)
8058                         ? "invalid exp target"
8059                         : "exp target is not supported on this GPU");
8060 
8061   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8062                                               AMDGPUOperand::ImmTyExpTgt));
8063   return ParseStatus::Success;
8064 }
8065 
8066 //===----------------------------------------------------------------------===//
8067 // parser helpers
8068 //===----------------------------------------------------------------------===//
8069 
8070 bool
8071 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8072   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8073 }
8074 
8075 bool
8076 AMDGPUAsmParser::isId(const StringRef Id) const {
8077   return isId(getToken(), Id);
8078 }
8079 
8080 bool
8081 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8082   return getTokenKind() == Kind;
8083 }
8084 
8085 StringRef AMDGPUAsmParser::getId() const {
8086   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8087 }
8088 
8089 bool
8090 AMDGPUAsmParser::trySkipId(const StringRef Id) {
8091   if (isId(Id)) {
8092     lex();
8093     return true;
8094   }
8095   return false;
8096 }
8097 
8098 bool
8099 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8100   if (isToken(AsmToken::Identifier)) {
8101     StringRef Tok = getTokenStr();
8102     if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8103       lex();
8104       return true;
8105     }
8106   }
8107   return false;
8108 }
8109 
8110 bool
8111 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8112   if (isId(Id) && peekToken().is(Kind)) {
8113     lex();
8114     lex();
8115     return true;
8116   }
8117   return false;
8118 }
8119 
8120 bool
8121 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8122   if (isToken(Kind)) {
8123     lex();
8124     return true;
8125   }
8126   return false;
8127 }
8128 
8129 bool
8130 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8131                            const StringRef ErrMsg) {
8132   if (!trySkipToken(Kind)) {
8133     Error(getLoc(), ErrMsg);
8134     return false;
8135   }
8136   return true;
8137 }
8138 
8139 bool
8140 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8141   SMLoc S = getLoc();
8142 
8143   const MCExpr *Expr;
8144   if (Parser.parseExpression(Expr))
8145     return false;
8146 
8147   if (Expr->evaluateAsAbsolute(Imm))
8148     return true;
8149 
8150   if (Expected.empty()) {
8151     Error(S, "expected absolute expression");
8152   } else {
8153     Error(S, Twine("expected ", Expected) +
8154              Twine(" or an absolute expression"));
8155   }
8156   return false;
8157 }
8158 
8159 bool
8160 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8161   SMLoc S = getLoc();
8162 
8163   const MCExpr *Expr;
8164   if (Parser.parseExpression(Expr))
8165     return false;
8166 
8167   int64_t IntVal;
8168   if (Expr->evaluateAsAbsolute(IntVal)) {
8169     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8170   } else {
8171     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8172   }
8173   return true;
8174 }
8175 
8176 bool
8177 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8178   if (isToken(AsmToken::String)) {
8179     Val = getToken().getStringContents();
8180     lex();
8181     return true;
8182   }
8183   Error(getLoc(), ErrMsg);
8184   return false;
8185 }
8186 
8187 bool
8188 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8189   if (isToken(AsmToken::Identifier)) {
8190     Val = getTokenStr();
8191     lex();
8192     return true;
8193   }
8194   if (!ErrMsg.empty())
8195     Error(getLoc(), ErrMsg);
8196   return false;
8197 }
8198 
8199 AsmToken
8200 AMDGPUAsmParser::getToken() const {
8201   return Parser.getTok();
8202 }
8203 
8204 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8205   return isToken(AsmToken::EndOfStatement)
8206              ? getToken()
8207              : getLexer().peekTok(ShouldSkipSpace);
8208 }
8209 
8210 void
8211 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8212   auto TokCount = getLexer().peekTokens(Tokens);
8213 
8214   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8215     Tokens[Idx] = AsmToken(AsmToken::Error, "");
8216 }
8217 
8218 AsmToken::TokenKind
8219 AMDGPUAsmParser::getTokenKind() const {
8220   return getLexer().getKind();
8221 }
8222 
8223 SMLoc
8224 AMDGPUAsmParser::getLoc() const {
8225   return getToken().getLoc();
8226 }
8227 
8228 StringRef
8229 AMDGPUAsmParser::getTokenStr() const {
8230   return getToken().getString();
8231 }
8232 
8233 void
8234 AMDGPUAsmParser::lex() {
8235   Parser.Lex();
8236 }
8237 
8238 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8239   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8240 }
8241 
8242 SMLoc
8243 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8244                                const OperandVector &Operands) const {
8245   for (unsigned i = Operands.size() - 1; i > 0; --i) {
8246     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8247     if (Test(Op))
8248       return Op.getStartLoc();
8249   }
8250   return getInstLoc(Operands);
8251 }
8252 
8253 SMLoc
8254 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8255                            const OperandVector &Operands) const {
8256   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8257   return getOperandLoc(Test, Operands);
8258 }
8259 
8260 SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg,
8261                                  const OperandVector &Operands) const {
8262   auto Test = [=](const AMDGPUOperand& Op) {
8263     return Op.isRegKind() && Op.getReg() == Reg;
8264   };
8265   return getOperandLoc(Test, Operands);
8266 }
8267 
8268 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
8269                                  bool SearchMandatoryLiterals) const {
8270   auto Test = [](const AMDGPUOperand& Op) {
8271     return Op.IsImmKindLiteral() || Op.isExpr();
8272   };
8273   SMLoc Loc = getOperandLoc(Test, Operands);
8274   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
8275     Loc = getMandatoryLitLoc(Operands);
8276   return Loc;
8277 }
8278 
8279 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
8280   auto Test = [](const AMDGPUOperand &Op) {
8281     return Op.IsImmKindMandatoryLiteral();
8282   };
8283   return getOperandLoc(Test, Operands);
8284 }
8285 
8286 SMLoc
8287 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
8288   auto Test = [](const AMDGPUOperand& Op) {
8289     return Op.isImmKindConst();
8290   };
8291   return getOperandLoc(Test, Operands);
8292 }
8293 
8294 ParseStatus
8295 AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8296   if (!trySkipToken(AsmToken::LCurly))
8297     return ParseStatus::NoMatch;
8298 
8299   bool First = true;
8300   while (!trySkipToken(AsmToken::RCurly)) {
8301     if (!First &&
8302         !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8303       return ParseStatus::Failure;
8304 
8305     StringRef Id = getTokenStr();
8306     SMLoc IdLoc = getLoc();
8307     if (!skipToken(AsmToken::Identifier, "field name expected") ||
8308         !skipToken(AsmToken::Colon, "colon expected"))
8309       return ParseStatus::Failure;
8310 
8311     const auto *I =
8312         find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8313     if (I == Fields.end())
8314       return Error(IdLoc, "unknown field");
8315     if ((*I)->IsDefined)
8316       return Error(IdLoc, "duplicate field");
8317 
8318     // TODO: Support symbolic values.
8319     (*I)->Loc = getLoc();
8320     if (!parseExpr((*I)->Val))
8321       return ParseStatus::Failure;
8322     (*I)->IsDefined = true;
8323 
8324     First = false;
8325   }
8326   return ParseStatus::Success;
8327 }
8328 
8329 bool AMDGPUAsmParser::validateStructuredOpFields(
8330     ArrayRef<const StructuredOpField *> Fields) {
8331   return all_of(Fields, [this](const StructuredOpField *F) {
8332     return F->validate(*this);
8333   });
8334 }
8335 
8336 //===----------------------------------------------------------------------===//
8337 // swizzle
8338 //===----------------------------------------------------------------------===//
8339 
8340 LLVM_READNONE
8341 static unsigned
8342 encodeBitmaskPerm(const unsigned AndMask,
8343                   const unsigned OrMask,
8344                   const unsigned XorMask) {
8345   using namespace llvm::AMDGPU::Swizzle;
8346 
8347   return BITMASK_PERM_ENC |
8348          (AndMask << BITMASK_AND_SHIFT) |
8349          (OrMask  << BITMASK_OR_SHIFT)  |
8350          (XorMask << BITMASK_XOR_SHIFT);
8351 }
8352 
8353 bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8354                                           const unsigned MaxVal,
8355                                           const Twine &ErrMsg, SMLoc &Loc) {
8356   if (!skipToken(AsmToken::Comma, "expected a comma")) {
8357     return false;
8358   }
8359   Loc = getLoc();
8360   if (!parseExpr(Op)) {
8361     return false;
8362   }
8363   if (Op < MinVal || Op > MaxVal) {
8364     Error(Loc, ErrMsg);
8365     return false;
8366   }
8367 
8368   return true;
8369 }
8370 
8371 bool
8372 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8373                                       const unsigned MinVal,
8374                                       const unsigned MaxVal,
8375                                       const StringRef ErrMsg) {
8376   SMLoc Loc;
8377   for (unsigned i = 0; i < OpNum; ++i) {
8378     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8379       return false;
8380   }
8381 
8382   return true;
8383 }
8384 
8385 bool
8386 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8387   using namespace llvm::AMDGPU::Swizzle;
8388 
8389   int64_t Lane[LANE_NUM];
8390   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8391                            "expected a 2-bit lane id")) {
8392     Imm = QUAD_PERM_ENC;
8393     for (unsigned I = 0; I < LANE_NUM; ++I) {
8394       Imm |= Lane[I] << (LANE_SHIFT * I);
8395     }
8396     return true;
8397   }
8398   return false;
8399 }
8400 
8401 bool
8402 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8403   using namespace llvm::AMDGPU::Swizzle;
8404 
8405   SMLoc Loc;
8406   int64_t GroupSize;
8407   int64_t LaneIdx;
8408 
8409   if (!parseSwizzleOperand(GroupSize,
8410                            2, 32,
8411                            "group size must be in the interval [2,32]",
8412                            Loc)) {
8413     return false;
8414   }
8415   if (!isPowerOf2_64(GroupSize)) {
8416     Error(Loc, "group size must be a power of two");
8417     return false;
8418   }
8419   if (parseSwizzleOperand(LaneIdx,
8420                           0, GroupSize - 1,
8421                           "lane id must be in the interval [0,group size - 1]",
8422                           Loc)) {
8423     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8424     return true;
8425   }
8426   return false;
8427 }
8428 
8429 bool
8430 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8431   using namespace llvm::AMDGPU::Swizzle;
8432 
8433   SMLoc Loc;
8434   int64_t GroupSize;
8435 
8436   if (!parseSwizzleOperand(GroupSize,
8437                            2, 32,
8438                            "group size must be in the interval [2,32]",
8439                            Loc)) {
8440     return false;
8441   }
8442   if (!isPowerOf2_64(GroupSize)) {
8443     Error(Loc, "group size must be a power of two");
8444     return false;
8445   }
8446 
8447   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8448   return true;
8449 }
8450 
8451 bool
8452 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8453   using namespace llvm::AMDGPU::Swizzle;
8454 
8455   SMLoc Loc;
8456   int64_t GroupSize;
8457 
8458   if (!parseSwizzleOperand(GroupSize,
8459                            1, 16,
8460                            "group size must be in the interval [1,16]",
8461                            Loc)) {
8462     return false;
8463   }
8464   if (!isPowerOf2_64(GroupSize)) {
8465     Error(Loc, "group size must be a power of two");
8466     return false;
8467   }
8468 
8469   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8470   return true;
8471 }
8472 
8473 bool
8474 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8475   using namespace llvm::AMDGPU::Swizzle;
8476 
8477   if (!skipToken(AsmToken::Comma, "expected a comma")) {
8478     return false;
8479   }
8480 
8481   StringRef Ctl;
8482   SMLoc StrLoc = getLoc();
8483   if (!parseString(Ctl)) {
8484     return false;
8485   }
8486   if (Ctl.size() != BITMASK_WIDTH) {
8487     Error(StrLoc, "expected a 5-character mask");
8488     return false;
8489   }
8490 
8491   unsigned AndMask = 0;
8492   unsigned OrMask = 0;
8493   unsigned XorMask = 0;
8494 
8495   for (size_t i = 0; i < Ctl.size(); ++i) {
8496     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8497     switch(Ctl[i]) {
8498     default:
8499       Error(StrLoc, "invalid mask");
8500       return false;
8501     case '0':
8502       break;
8503     case '1':
8504       OrMask |= Mask;
8505       break;
8506     case 'p':
8507       AndMask |= Mask;
8508       break;
8509     case 'i':
8510       AndMask |= Mask;
8511       XorMask |= Mask;
8512       break;
8513     }
8514   }
8515 
8516   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8517   return true;
8518 }
8519 
8520 bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8521   using namespace llvm::AMDGPU::Swizzle;
8522 
8523   if (!AMDGPU::isGFX9Plus(getSTI())) {
8524     Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8525     return false;
8526   }
8527 
8528   int64_t Swizzle;
8529   SMLoc Loc;
8530   if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8531                            "FFT swizzle must be in the interval [0," +
8532                                Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8533                            Loc))
8534     return false;
8535 
8536   Imm = FFT_MODE_ENC | Swizzle;
8537   return true;
8538 }
8539 
8540 bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8541   using namespace llvm::AMDGPU::Swizzle;
8542 
8543   if (!AMDGPU::isGFX9Plus(getSTI())) {
8544     Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8545     return false;
8546   }
8547 
8548   SMLoc Loc;
8549   int64_t Direction;
8550 
8551   if (!parseSwizzleOperand(Direction, 0, 1,
8552                            "direction must be 0 (left) or 1 (right)", Loc))
8553     return false;
8554 
8555   int64_t RotateSize;
8556   if (!parseSwizzleOperand(
8557           RotateSize, 0, ROTATE_MAX_SIZE,
8558           "number of threads to rotate must be in the interval [0," +
8559               Twine(ROTATE_MAX_SIZE) + Twine(']'),
8560           Loc))
8561     return false;
8562 
8563   Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) |
8564         (RotateSize << ROTATE_SIZE_SHIFT);
8565   return true;
8566 }
8567 
8568 bool
8569 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8570 
8571   SMLoc OffsetLoc = getLoc();
8572 
8573   if (!parseExpr(Imm, "a swizzle macro")) {
8574     return false;
8575   }
8576   if (!isUInt<16>(Imm)) {
8577     Error(OffsetLoc, "expected a 16-bit offset");
8578     return false;
8579   }
8580   return true;
8581 }
8582 
8583 bool
8584 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8585   using namespace llvm::AMDGPU::Swizzle;
8586 
8587   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8588 
8589     SMLoc ModeLoc = getLoc();
8590     bool Ok = false;
8591 
8592     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8593       Ok = parseSwizzleQuadPerm(Imm);
8594     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8595       Ok = parseSwizzleBitmaskPerm(Imm);
8596     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8597       Ok = parseSwizzleBroadcast(Imm);
8598     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8599       Ok = parseSwizzleSwap(Imm);
8600     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8601       Ok = parseSwizzleReverse(Imm);
8602     } else if (trySkipId(IdSymbolic[ID_FFT])) {
8603       Ok = parseSwizzleFFT(Imm);
8604     } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8605       Ok = parseSwizzleRotate(Imm);
8606     } else {
8607       Error(ModeLoc, "expected a swizzle mode");
8608     }
8609 
8610     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8611   }
8612 
8613   return false;
8614 }
8615 
8616 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8617   SMLoc S = getLoc();
8618   int64_t Imm = 0;
8619 
8620   if (trySkipId("offset")) {
8621 
8622     bool Ok = false;
8623     if (skipToken(AsmToken::Colon, "expected a colon")) {
8624       if (trySkipId("swizzle")) {
8625         Ok = parseSwizzleMacro(Imm);
8626       } else {
8627         Ok = parseSwizzleOffset(Imm);
8628       }
8629     }
8630 
8631     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8632 
8633     return Ok ? ParseStatus::Success : ParseStatus::Failure;
8634   }
8635   return ParseStatus::NoMatch;
8636 }
8637 
8638 bool
8639 AMDGPUOperand::isSwizzle() const {
8640   return isImmTy(ImmTySwizzle);
8641 }
8642 
8643 //===----------------------------------------------------------------------===//
8644 // VGPR Index Mode
8645 //===----------------------------------------------------------------------===//
8646 
8647 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8648 
8649   using namespace llvm::AMDGPU::VGPRIndexMode;
8650 
8651   if (trySkipToken(AsmToken::RParen)) {
8652     return OFF;
8653   }
8654 
8655   int64_t Imm = 0;
8656 
8657   while (true) {
8658     unsigned Mode = 0;
8659     SMLoc S = getLoc();
8660 
8661     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8662       if (trySkipId(IdSymbolic[ModeId])) {
8663         Mode = 1 << ModeId;
8664         break;
8665       }
8666     }
8667 
8668     if (Mode == 0) {
8669       Error(S, (Imm == 0)?
8670                "expected a VGPR index mode or a closing parenthesis" :
8671                "expected a VGPR index mode");
8672       return UNDEF;
8673     }
8674 
8675     if (Imm & Mode) {
8676       Error(S, "duplicate VGPR index mode");
8677       return UNDEF;
8678     }
8679     Imm |= Mode;
8680 
8681     if (trySkipToken(AsmToken::RParen))
8682       break;
8683     if (!skipToken(AsmToken::Comma,
8684                    "expected a comma or a closing parenthesis"))
8685       return UNDEF;
8686   }
8687 
8688   return Imm;
8689 }
8690 
8691 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8692 
8693   using namespace llvm::AMDGPU::VGPRIndexMode;
8694 
8695   int64_t Imm = 0;
8696   SMLoc S = getLoc();
8697 
8698   if (trySkipId("gpr_idx", AsmToken::LParen)) {
8699     Imm = parseGPRIdxMacro();
8700     if (Imm == UNDEF)
8701       return ParseStatus::Failure;
8702   } else {
8703     if (getParser().parseAbsoluteExpression(Imm))
8704       return ParseStatus::Failure;
8705     if (Imm < 0 || !isUInt<4>(Imm))
8706       return Error(S, "invalid immediate: only 4-bit values are legal");
8707   }
8708 
8709   Operands.push_back(
8710       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8711   return ParseStatus::Success;
8712 }
8713 
8714 bool AMDGPUOperand::isGPRIdxMode() const {
8715   return isImmTy(ImmTyGprIdxMode);
8716 }
8717 
8718 //===----------------------------------------------------------------------===//
8719 // sopp branch targets
8720 //===----------------------------------------------------------------------===//
8721 
8722 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8723 
8724   // Make sure we are not parsing something
8725   // that looks like a label or an expression but is not.
8726   // This will improve error messages.
8727   if (isRegister() || isModifier())
8728     return ParseStatus::NoMatch;
8729 
8730   if (!parseExpr(Operands))
8731     return ParseStatus::Failure;
8732 
8733   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8734   assert(Opr.isImm() || Opr.isExpr());
8735   SMLoc Loc = Opr.getStartLoc();
8736 
8737   // Currently we do not support arbitrary expressions as branch targets.
8738   // Only labels and absolute expressions are accepted.
8739   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8740     Error(Loc, "expected an absolute expression or a label");
8741   } else if (Opr.isImm() && !Opr.isS16Imm()) {
8742     Error(Loc, "expected a 16-bit signed jump offset");
8743   }
8744 
8745   return ParseStatus::Success;
8746 }
8747 
8748 //===----------------------------------------------------------------------===//
8749 // Boolean holding registers
8750 //===----------------------------------------------------------------------===//
8751 
8752 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8753   return parseReg(Operands);
8754 }
8755 
8756 //===----------------------------------------------------------------------===//
8757 // mubuf
8758 //===----------------------------------------------------------------------===//
8759 
8760 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8761                                    const OperandVector &Operands,
8762                                    bool IsAtomic) {
8763   OptionalImmIndexMap OptionalIdx;
8764   unsigned FirstOperandIdx = 1;
8765   bool IsAtomicReturn = false;
8766 
8767   if (IsAtomic) {
8768     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
8769                       SIInstrFlags::IsAtomicRet;
8770   }
8771 
8772   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8773     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8774 
8775     // Add the register arguments
8776     if (Op.isReg()) {
8777       Op.addRegOperands(Inst, 1);
8778       // Insert a tied src for atomic return dst.
8779       // This cannot be postponed as subsequent calls to
8780       // addImmOperands rely on correct number of MC operands.
8781       if (IsAtomicReturn && i == FirstOperandIdx)
8782         Op.addRegOperands(Inst, 1);
8783       continue;
8784     }
8785 
8786     // Handle the case where soffset is an immediate
8787     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8788       Op.addImmOperands(Inst, 1);
8789       continue;
8790     }
8791 
8792     // Handle tokens like 'offen' which are sometimes hard-coded into the
8793     // asm string.  There are no MCInst operands for these.
8794     if (Op.isToken()) {
8795       continue;
8796     }
8797     assert(Op.isImm());
8798 
8799     // Handle optional arguments
8800     OptionalIdx[Op.getImmTy()] = i;
8801   }
8802 
8803   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8804   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8805 }
8806 
8807 //===----------------------------------------------------------------------===//
8808 // smrd
8809 //===----------------------------------------------------------------------===//
8810 
8811 bool AMDGPUOperand::isSMRDOffset8() const {
8812   return isImmLiteral() && isUInt<8>(getImm());
8813 }
8814 
8815 bool AMDGPUOperand::isSMEMOffset() const {
8816   // Offset range is checked later by validator.
8817   return isImmLiteral();
8818 }
8819 
8820 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8821   // 32-bit literals are only supported on CI and we only want to use them
8822   // when the offset is > 8-bits.
8823   return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8824 }
8825 
8826 //===----------------------------------------------------------------------===//
8827 // vop3
8828 //===----------------------------------------------------------------------===//
8829 
8830 static bool ConvertOmodMul(int64_t &Mul) {
8831   if (Mul != 1 && Mul != 2 && Mul != 4)
8832     return false;
8833 
8834   Mul >>= 1;
8835   return true;
8836 }
8837 
8838 static bool ConvertOmodDiv(int64_t &Div) {
8839   if (Div == 1) {
8840     Div = 0;
8841     return true;
8842   }
8843 
8844   if (Div == 2) {
8845     Div = 3;
8846     return true;
8847   }
8848 
8849   return false;
8850 }
8851 
8852 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8853 // This is intentional and ensures compatibility with sp3.
8854 // See bug 35397 for details.
8855 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8856   if (BoundCtrl == 0 || BoundCtrl == 1) {
8857     if (!isGFX11Plus())
8858       BoundCtrl = 1;
8859     return true;
8860   }
8861   return false;
8862 }
8863 
8864 void AMDGPUAsmParser::onBeginOfFile() {
8865   if (!getParser().getStreamer().getTargetStreamer() ||
8866       getSTI().getTargetTriple().getArch() == Triple::r600)
8867     return;
8868 
8869   if (!getTargetStreamer().getTargetID())
8870     getTargetStreamer().initializeTargetID(getSTI(),
8871                                            getSTI().getFeatureString());
8872 
8873   if (isHsaAbi(getSTI()))
8874     getTargetStreamer().EmitDirectiveAMDGCNTarget();
8875 }
8876 
8877 /// Parse AMDGPU specific expressions.
8878 ///
8879 ///  expr ::= or(expr, ...) |
8880 ///           max(expr, ...)
8881 ///
8882 bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8883   using AGVK = AMDGPUMCExpr::VariantKind;
8884 
8885   if (isToken(AsmToken::Identifier)) {
8886     StringRef TokenId = getTokenStr();
8887     AGVK VK = StringSwitch<AGVK>(TokenId)
8888                   .Case("max", AGVK::AGVK_Max)
8889                   .Case("or", AGVK::AGVK_Or)
8890                   .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8891                   .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8892                   .Case("alignto", AGVK::AGVK_AlignTo)
8893                   .Case("occupancy", AGVK::AGVK_Occupancy)
8894                   .Default(AGVK::AGVK_None);
8895 
8896     if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8897       SmallVector<const MCExpr *, 4> Exprs;
8898       uint64_t CommaCount = 0;
8899       lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8900       lex(); // Eat '('
8901       while (true) {
8902         if (trySkipToken(AsmToken::RParen)) {
8903           if (Exprs.empty()) {
8904             Error(getToken().getLoc(),
8905                   "empty " + Twine(TokenId) + " expression");
8906             return true;
8907           }
8908           if (CommaCount + 1 != Exprs.size()) {
8909             Error(getToken().getLoc(),
8910                   "mismatch of commas in " + Twine(TokenId) + " expression");
8911             return true;
8912           }
8913           Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8914           return false;
8915         }
8916         const MCExpr *Expr;
8917         if (getParser().parseExpression(Expr, EndLoc))
8918           return true;
8919         Exprs.push_back(Expr);
8920         bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8921         if (LastTokenWasComma)
8922           CommaCount++;
8923         if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8924           Error(getToken().getLoc(),
8925                 "unexpected token in " + Twine(TokenId) + " expression");
8926           return true;
8927         }
8928       }
8929     }
8930   }
8931   return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8932 }
8933 
8934 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8935   StringRef Name = getTokenStr();
8936   if (Name == "mul") {
8937     return parseIntWithPrefix("mul", Operands,
8938                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8939   }
8940 
8941   if (Name == "div") {
8942     return parseIntWithPrefix("div", Operands,
8943                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8944   }
8945 
8946   return ParseStatus::NoMatch;
8947 }
8948 
8949 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8950 // the number of src operands present, then copies that bit into src0_modifiers.
8951 static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8952   int Opc = Inst.getOpcode();
8953   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8954   if (OpSelIdx == -1)
8955     return;
8956 
8957   int SrcNum;
8958   const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
8959                                 AMDGPU::OpName::src2};
8960   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8961        ++SrcNum)
8962     ;
8963   assert(SrcNum > 0);
8964 
8965   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8966 
8967   int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8968   if (DstIdx == -1)
8969     return;
8970 
8971   const MCOperand &DstOp = Inst.getOperand(DstIdx);
8972   int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8973   uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8974   if (DstOp.isReg() &&
8975       MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8976     if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
8977       ModVal |= SISrcMods::DST_OP_SEL;
8978   } else {
8979     if ((OpSel & (1 << SrcNum)) != 0)
8980       ModVal |= SISrcMods::DST_OP_SEL;
8981   }
8982   Inst.getOperand(ModIdx).setImm(ModVal);
8983 }
8984 
8985 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8986                                    const OperandVector &Operands) {
8987   cvtVOP3P(Inst, Operands);
8988   cvtVOP3DstOpSelOnly(Inst, *getMRI());
8989 }
8990 
8991 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8992                                    OptionalImmIndexMap &OptionalIdx) {
8993   cvtVOP3P(Inst, Operands, OptionalIdx);
8994   cvtVOP3DstOpSelOnly(Inst, *getMRI());
8995 }
8996 
8997 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8998   return
8999       // 1. This operand is input modifiers
9000       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9001       // 2. This is not last operand
9002       && Desc.NumOperands > (OpNum + 1)
9003       // 3. Next operand is register class
9004       && Desc.operands()[OpNum + 1].RegClass != -1
9005       // 4. Next register is not tied to any other operand
9006       && Desc.getOperandConstraint(OpNum + 1,
9007                                    MCOI::OperandConstraint::TIED_TO) == -1;
9008 }
9009 
9010 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9011 {
9012   OptionalImmIndexMap OptionalIdx;
9013   unsigned Opc = Inst.getOpcode();
9014 
9015   unsigned I = 1;
9016   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9017   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9018     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9019   }
9020 
9021   for (unsigned E = Operands.size(); I != E; ++I) {
9022     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9023     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9024       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9025     } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9026                Op.isInterpAttrChan()) {
9027       Inst.addOperand(MCOperand::createImm(Op.getImm()));
9028     } else if (Op.isImmModifier()) {
9029       OptionalIdx[Op.getImmTy()] = I;
9030     } else {
9031       llvm_unreachable("unhandled operand type");
9032     }
9033   }
9034 
9035   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9036     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9037                           AMDGPUOperand::ImmTyHigh);
9038 
9039   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9040     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9041                           AMDGPUOperand::ImmTyClamp);
9042 
9043   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9044     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9045                           AMDGPUOperand::ImmTyOModSI);
9046 }
9047 
9048 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9049 {
9050   OptionalImmIndexMap OptionalIdx;
9051   unsigned Opc = Inst.getOpcode();
9052 
9053   unsigned I = 1;
9054   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9055   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9056     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9057   }
9058 
9059   for (unsigned E = Operands.size(); I != E; ++I) {
9060     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9061     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9062       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9063     } else if (Op.isImmModifier()) {
9064       OptionalIdx[Op.getImmTy()] = I;
9065     } else {
9066       llvm_unreachable("unhandled operand type");
9067     }
9068   }
9069 
9070   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9071 
9072   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9073   if (OpSelIdx != -1)
9074     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9075 
9076   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9077 
9078   if (OpSelIdx == -1)
9079     return;
9080 
9081   const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9082                                 AMDGPU::OpName::src2};
9083   const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9084                                    AMDGPU::OpName::src1_modifiers,
9085                                    AMDGPU::OpName::src2_modifiers};
9086 
9087   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9088 
9089   for (int J = 0; J < 3; ++J) {
9090     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9091     if (OpIdx == -1)
9092       break;
9093 
9094     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9095     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9096 
9097     if ((OpSel & (1 << J)) != 0)
9098       ModVal |= SISrcMods::OP_SEL_0;
9099     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
9100         (OpSel & (1 << 3)) != 0)
9101       ModVal |= SISrcMods::DST_OP_SEL;
9102 
9103     Inst.getOperand(ModIdx).setImm(ModVal);
9104   }
9105 }
9106 void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9107                                     const OperandVector &Operands) {
9108   OptionalImmIndexMap OptionalIdx;
9109   unsigned Opc = Inst.getOpcode();
9110   unsigned I = 1;
9111   int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9112 
9113   const MCInstrDesc &Desc = MII.get(Opc);
9114 
9115   for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9116     static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9117 
9118   for (unsigned E = Operands.size(); I != E; ++I) {
9119     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9120     int NumOperands = Inst.getNumOperands();
9121     // The order of operands in MCInst and parsed operands are different.
9122     // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9123     // indices for parsing scale values correctly.
9124     if (NumOperands == CbszOpIdx) {
9125       Inst.addOperand(MCOperand::createImm(0));
9126       Inst.addOperand(MCOperand::createImm(0));
9127     }
9128     if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9129       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9130     } else if (Op.isImmModifier()) {
9131       OptionalIdx[Op.getImmTy()] = I;
9132     } else {
9133       Op.addRegOrImmOperands(Inst, 1);
9134     }
9135   }
9136 
9137   // Insert CBSZ and BLGP operands for F8F6F4 variants
9138   auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9139   if (CbszIdx != OptionalIdx.end()) {
9140     int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9141     Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9142   }
9143 
9144   int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9145   auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9146   if (BlgpIdx != OptionalIdx.end()) {
9147     int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9148     Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9149   }
9150 
9151   // Add dummy src_modifiers
9152   Inst.addOperand(MCOperand::createImm(0));
9153   Inst.addOperand(MCOperand::createImm(0));
9154 
9155   // Handle op_sel fields
9156 
9157   unsigned OpSel = 0;
9158   auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9159   if (OpselIdx != OptionalIdx.end()) {
9160     OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9161                 .getImm();
9162   }
9163 
9164   unsigned OpSelHi = 0;
9165   auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9166   if (OpselHiIdx != OptionalIdx.end()) {
9167     OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9168                   .getImm();
9169   }
9170   const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9171                                    AMDGPU::OpName::src1_modifiers};
9172 
9173   for (unsigned J = 0; J < 2; ++J) {
9174     unsigned ModVal = 0;
9175     if (OpSel & (1 << J))
9176       ModVal |= SISrcMods::OP_SEL_0;
9177     if (OpSelHi & (1 << J))
9178       ModVal |= SISrcMods::OP_SEL_1;
9179 
9180     const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9181     Inst.getOperand(ModIdx).setImm(ModVal);
9182   }
9183 }
9184 
9185 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9186                               OptionalImmIndexMap &OptionalIdx) {
9187   unsigned Opc = Inst.getOpcode();
9188 
9189   unsigned I = 1;
9190   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9191   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9192     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9193   }
9194 
9195   for (unsigned E = Operands.size(); I != E; ++I) {
9196     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9197     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9198       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9199     } else if (Op.isImmModifier()) {
9200       OptionalIdx[Op.getImmTy()] = I;
9201     } else {
9202       Op.addRegOrImmOperands(Inst, 1);
9203     }
9204   }
9205 
9206   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9207     if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9208       Inst.addOperand(Inst.getOperand(0));
9209     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9210                           AMDGPUOperand::ImmTyByteSel);
9211   }
9212 
9213   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9214     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9215                           AMDGPUOperand::ImmTyClamp);
9216 
9217   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9218     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9219                           AMDGPUOperand::ImmTyOModSI);
9220 
9221   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9222   // it has src2 register operand that is tied to dst operand
9223   // we don't allow modifiers for this operand in assembler so src2_modifiers
9224   // should be 0.
9225   if (isMAC(Opc)) {
9226     auto *it = Inst.begin();
9227     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9228     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9229     ++it;
9230     // Copy the operand to ensure it's not invalidated when Inst grows.
9231     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9232   }
9233 }
9234 
9235 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9236   OptionalImmIndexMap OptionalIdx;
9237   cvtVOP3(Inst, Operands, OptionalIdx);
9238 }
9239 
9240 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9241                                OptionalImmIndexMap &OptIdx) {
9242   const int Opc = Inst.getOpcode();
9243   const MCInstrDesc &Desc = MII.get(Opc);
9244 
9245   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9246 
9247   if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9248       Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9249       Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9250       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9251       Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9252       Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9253     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9254     Inst.addOperand(Inst.getOperand(0));
9255   }
9256 
9257   // Adding vdst_in operand is already covered for these DPP instructions in
9258   // cvtVOP3DPP.
9259   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9260       !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9261         Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9262         Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9263         Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9264         Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9265         Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9266         Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9267         Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9268         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9269         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9270         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9271         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
9272     Inst.addOperand(Inst.getOperand(0));
9273   }
9274 
9275   int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9276   if (BitOp3Idx != -1) {
9277     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9278   }
9279 
9280   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9281   // instruction, and then figure out where to actually put the modifiers
9282 
9283   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9284   if (OpSelIdx != -1) {
9285     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9286   }
9287 
9288   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9289   if (OpSelHiIdx != -1) {
9290     int DefaultVal = IsPacked ? -1 : 0;
9291     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9292                           DefaultVal);
9293   }
9294 
9295   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9296     addOptionalImmOperand(Inst, Operands, OptIdx,
9297                           AMDGPUOperand::ImmTyMatrixAReuse, 0);
9298 
9299   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9300     addOptionalImmOperand(Inst, Operands, OptIdx,
9301                           AMDGPUOperand::ImmTyMatrixBReuse, 0);
9302 
9303   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9304   if (NegLoIdx != -1)
9305     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9306 
9307   int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9308   if (NegHiIdx != -1)
9309     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9310 
9311   const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9312                                 AMDGPU::OpName::src2};
9313   const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9314                                    AMDGPU::OpName::src1_modifiers,
9315                                    AMDGPU::OpName::src2_modifiers};
9316 
9317   unsigned OpSel = 0;
9318   unsigned OpSelHi = 0;
9319   unsigned NegLo = 0;
9320   unsigned NegHi = 0;
9321 
9322   if (OpSelIdx != -1)
9323     OpSel = Inst.getOperand(OpSelIdx).getImm();
9324 
9325   if (OpSelHiIdx != -1)
9326     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9327 
9328   if (NegLoIdx != -1)
9329     NegLo = Inst.getOperand(NegLoIdx).getImm();
9330 
9331   if (NegHiIdx != -1)
9332     NegHi = Inst.getOperand(NegHiIdx).getImm();
9333 
9334   for (int J = 0; J < 3; ++J) {
9335     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9336     if (OpIdx == -1)
9337       break;
9338 
9339     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9340 
9341     if (ModIdx == -1)
9342       continue;
9343 
9344     uint32_t ModVal = 0;
9345 
9346     const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9347     if (SrcOp.isReg() && getMRI()
9348                              ->getRegClass(AMDGPU::VGPR_16RegClassID)
9349                              .contains(SrcOp.getReg())) {
9350       bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9351       if (VGPRSuffixIsHi)
9352         ModVal |= SISrcMods::OP_SEL_0;
9353     } else {
9354       if ((OpSel & (1 << J)) != 0)
9355         ModVal |= SISrcMods::OP_SEL_0;
9356     }
9357 
9358     if ((OpSelHi & (1 << J)) != 0)
9359       ModVal |= SISrcMods::OP_SEL_1;
9360 
9361     if ((NegLo & (1 << J)) != 0)
9362       ModVal |= SISrcMods::NEG;
9363 
9364     if ((NegHi & (1 << J)) != 0)
9365       ModVal |= SISrcMods::NEG_HI;
9366 
9367     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9368   }
9369 }
9370 
9371 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9372   OptionalImmIndexMap OptIdx;
9373   cvtVOP3(Inst, Operands, OptIdx);
9374   cvtVOP3P(Inst, Operands, OptIdx);
9375 }
9376 
9377 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
9378                                   unsigned i, unsigned Opc,
9379                                   AMDGPU::OpName OpName) {
9380   if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9381     ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9382   else
9383     ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9384 }
9385 
9386 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9387   unsigned Opc = Inst.getOpcode();
9388 
9389   ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9390   addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9391   addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9392   ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9393   ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9394 
9395   OptionalImmIndexMap OptIdx;
9396   for (unsigned i = 5; i < Operands.size(); ++i) {
9397     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9398     OptIdx[Op.getImmTy()] = i;
9399   }
9400 
9401   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9402     addOptionalImmOperand(Inst, Operands, OptIdx,
9403                           AMDGPUOperand::ImmTyIndexKey8bit);
9404 
9405   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9406     addOptionalImmOperand(Inst, Operands, OptIdx,
9407                           AMDGPUOperand::ImmTyIndexKey16bit);
9408 
9409   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9410     addOptionalImmOperand(Inst, Operands, OptIdx,
9411                           AMDGPUOperand::ImmTyIndexKey32bit);
9412 
9413   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9414     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9415 
9416   cvtVOP3P(Inst, Operands, OptIdx);
9417 }
9418 
9419 //===----------------------------------------------------------------------===//
9420 // VOPD
9421 //===----------------------------------------------------------------------===//
9422 
9423 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9424   if (!hasVOPD(getSTI()))
9425     return ParseStatus::NoMatch;
9426 
9427   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9428     SMLoc S = getLoc();
9429     lex();
9430     lex();
9431     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9432     SMLoc OpYLoc = getLoc();
9433     StringRef OpYName;
9434     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9435       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9436       return ParseStatus::Success;
9437     }
9438     return Error(OpYLoc, "expected a VOPDY instruction after ::");
9439   }
9440   return ParseStatus::NoMatch;
9441 }
9442 
9443 // Create VOPD MCInst operands using parsed assembler operands.
9444 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9445   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9446 
9447   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9448     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9449     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9450       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9451       return;
9452     }
9453     if (Op.isReg()) {
9454       Op.addRegOperands(Inst, 1);
9455       return;
9456     }
9457     if (Op.isImm()) {
9458       Op.addImmOperands(Inst, 1);
9459       return;
9460     }
9461     llvm_unreachable("Unhandled operand type in cvtVOPD");
9462   };
9463 
9464   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9465 
9466   // MCInst operands are ordered as follows:
9467   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9468 
9469   for (auto CompIdx : VOPD::COMPONENTS) {
9470     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9471   }
9472 
9473   for (auto CompIdx : VOPD::COMPONENTS) {
9474     const auto &CInfo = InstInfo[CompIdx];
9475     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9476     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9477       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9478     if (CInfo.hasSrc2Acc())
9479       addOp(CInfo.getIndexOfDstInParsedOperands());
9480   }
9481 
9482   int BitOp3Idx =
9483       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9484   if (BitOp3Idx != -1) {
9485     OptionalImmIndexMap OptIdx;
9486     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9487     if (Op.isImm())
9488       OptIdx[Op.getImmTy()] = Operands.size() - 1;
9489 
9490     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9491   }
9492 }
9493 
9494 //===----------------------------------------------------------------------===//
9495 // dpp
9496 //===----------------------------------------------------------------------===//
9497 
9498 bool AMDGPUOperand::isDPP8() const {
9499   return isImmTy(ImmTyDPP8);
9500 }
9501 
9502 bool AMDGPUOperand::isDPPCtrl() const {
9503   using namespace AMDGPU::DPP;
9504 
9505   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9506   if (result) {
9507     int64_t Imm = getImm();
9508     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9509            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9510            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9511            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9512            (Imm == DppCtrl::WAVE_SHL1) ||
9513            (Imm == DppCtrl::WAVE_ROL1) ||
9514            (Imm == DppCtrl::WAVE_SHR1) ||
9515            (Imm == DppCtrl::WAVE_ROR1) ||
9516            (Imm == DppCtrl::ROW_MIRROR) ||
9517            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9518            (Imm == DppCtrl::BCAST15) ||
9519            (Imm == DppCtrl::BCAST31) ||
9520            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9521            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9522   }
9523   return false;
9524 }
9525 
9526 //===----------------------------------------------------------------------===//
9527 // mAI
9528 //===----------------------------------------------------------------------===//
9529 
9530 bool AMDGPUOperand::isBLGP() const {
9531   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9532 }
9533 
9534 bool AMDGPUOperand::isS16Imm() const {
9535   return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9536 }
9537 
9538 bool AMDGPUOperand::isU16Imm() const {
9539   return isImmLiteral() && isUInt<16>(getImm());
9540 }
9541 
9542 //===----------------------------------------------------------------------===//
9543 // dim
9544 //===----------------------------------------------------------------------===//
9545 
9546 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9547   // We want to allow "dim:1D" etc.,
9548   // but the initial 1 is tokenized as an integer.
9549   std::string Token;
9550   if (isToken(AsmToken::Integer)) {
9551     SMLoc Loc = getToken().getEndLoc();
9552     Token = std::string(getTokenStr());
9553     lex();
9554     if (getLoc() != Loc)
9555       return false;
9556   }
9557 
9558   StringRef Suffix;
9559   if (!parseId(Suffix))
9560     return false;
9561   Token += Suffix;
9562 
9563   StringRef DimId = Token;
9564   DimId.consume_front("SQ_RSRC_IMG_");
9565 
9566   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9567   if (!DimInfo)
9568     return false;
9569 
9570   Encoding = DimInfo->Encoding;
9571   return true;
9572 }
9573 
9574 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9575   if (!isGFX10Plus())
9576     return ParseStatus::NoMatch;
9577 
9578   SMLoc S = getLoc();
9579 
9580   if (!trySkipId("dim", AsmToken::Colon))
9581     return ParseStatus::NoMatch;
9582 
9583   unsigned Encoding;
9584   SMLoc Loc = getLoc();
9585   if (!parseDimId(Encoding))
9586     return Error(Loc, "invalid dim value");
9587 
9588   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9589                                               AMDGPUOperand::ImmTyDim));
9590   return ParseStatus::Success;
9591 }
9592 
9593 //===----------------------------------------------------------------------===//
9594 // dpp
9595 //===----------------------------------------------------------------------===//
9596 
9597 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9598   SMLoc S = getLoc();
9599 
9600   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9601     return ParseStatus::NoMatch;
9602 
9603   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9604 
9605   int64_t Sels[8];
9606 
9607   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9608     return ParseStatus::Failure;
9609 
9610   for (size_t i = 0; i < 8; ++i) {
9611     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9612       return ParseStatus::Failure;
9613 
9614     SMLoc Loc = getLoc();
9615     if (getParser().parseAbsoluteExpression(Sels[i]))
9616       return ParseStatus::Failure;
9617     if (0 > Sels[i] || 7 < Sels[i])
9618       return Error(Loc, "expected a 3-bit value");
9619   }
9620 
9621   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9622     return ParseStatus::Failure;
9623 
9624   unsigned DPP8 = 0;
9625   for (size_t i = 0; i < 8; ++i)
9626     DPP8 |= (Sels[i] << (i * 3));
9627 
9628   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9629   return ParseStatus::Success;
9630 }
9631 
9632 bool
9633 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9634                                     const OperandVector &Operands) {
9635   if (Ctrl == "row_newbcast")
9636     return isGFX90A();
9637 
9638   if (Ctrl == "row_share" ||
9639       Ctrl == "row_xmask")
9640     return isGFX10Plus();
9641 
9642   if (Ctrl == "wave_shl" ||
9643       Ctrl == "wave_shr" ||
9644       Ctrl == "wave_rol" ||
9645       Ctrl == "wave_ror" ||
9646       Ctrl == "row_bcast")
9647     return isVI() || isGFX9();
9648 
9649   return Ctrl == "row_mirror" ||
9650          Ctrl == "row_half_mirror" ||
9651          Ctrl == "quad_perm" ||
9652          Ctrl == "row_shl" ||
9653          Ctrl == "row_shr" ||
9654          Ctrl == "row_ror";
9655 }
9656 
9657 int64_t
9658 AMDGPUAsmParser::parseDPPCtrlPerm() {
9659   // quad_perm:[%d,%d,%d,%d]
9660 
9661   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9662     return -1;
9663 
9664   int64_t Val = 0;
9665   for (int i = 0; i < 4; ++i) {
9666     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9667       return -1;
9668 
9669     int64_t Temp;
9670     SMLoc Loc = getLoc();
9671     if (getParser().parseAbsoluteExpression(Temp))
9672       return -1;
9673     if (Temp < 0 || Temp > 3) {
9674       Error(Loc, "expected a 2-bit value");
9675       return -1;
9676     }
9677 
9678     Val += (Temp << i * 2);
9679   }
9680 
9681   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9682     return -1;
9683 
9684   return Val;
9685 }
9686 
9687 int64_t
9688 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9689   using namespace AMDGPU::DPP;
9690 
9691   // sel:%d
9692 
9693   int64_t Val;
9694   SMLoc Loc = getLoc();
9695 
9696   if (getParser().parseAbsoluteExpression(Val))
9697     return -1;
9698 
9699   struct DppCtrlCheck {
9700     int64_t Ctrl;
9701     int Lo;
9702     int Hi;
9703   };
9704 
9705   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9706     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
9707     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
9708     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
9709     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
9710     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
9711     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
9712     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
9713     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9714     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9715     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9716     .Default({-1, 0, 0});
9717 
9718   bool Valid;
9719   if (Check.Ctrl == -1) {
9720     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9721     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9722   } else {
9723     Valid = Check.Lo <= Val && Val <= Check.Hi;
9724     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9725   }
9726 
9727   if (!Valid) {
9728     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9729     return -1;
9730   }
9731 
9732   return Val;
9733 }
9734 
9735 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9736   using namespace AMDGPU::DPP;
9737 
9738   if (!isToken(AsmToken::Identifier) ||
9739       !isSupportedDPPCtrl(getTokenStr(), Operands))
9740     return ParseStatus::NoMatch;
9741 
9742   SMLoc S = getLoc();
9743   int64_t Val = -1;
9744   StringRef Ctrl;
9745 
9746   parseId(Ctrl);
9747 
9748   if (Ctrl == "row_mirror") {
9749     Val = DppCtrl::ROW_MIRROR;
9750   } else if (Ctrl == "row_half_mirror") {
9751     Val = DppCtrl::ROW_HALF_MIRROR;
9752   } else {
9753     if (skipToken(AsmToken::Colon, "expected a colon")) {
9754       if (Ctrl == "quad_perm") {
9755         Val = parseDPPCtrlPerm();
9756       } else {
9757         Val = parseDPPCtrlSel(Ctrl);
9758       }
9759     }
9760   }
9761 
9762   if (Val == -1)
9763     return ParseStatus::Failure;
9764 
9765   Operands.push_back(
9766     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9767   return ParseStatus::Success;
9768 }
9769 
9770 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9771                                  bool IsDPP8) {
9772   OptionalImmIndexMap OptionalIdx;
9773   unsigned Opc = Inst.getOpcode();
9774   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9775 
9776   // MAC instructions are special because they have 'old'
9777   // operand which is not tied to dst (but assumed to be).
9778   // They also have dummy unused src2_modifiers.
9779   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9780   int Src2ModIdx =
9781       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9782   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9783                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9784 
9785   unsigned I = 1;
9786   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9787     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9788   }
9789 
9790   int Fi = 0;
9791   int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9792   bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9793                         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9794                         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9795                         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9796 
9797   for (unsigned E = Operands.size(); I != E; ++I) {
9798 
9799     if (IsMAC) {
9800       int NumOperands = Inst.getNumOperands();
9801       if (OldIdx == NumOperands) {
9802         // Handle old operand
9803         constexpr int DST_IDX = 0;
9804         Inst.addOperand(Inst.getOperand(DST_IDX));
9805       } else if (Src2ModIdx == NumOperands) {
9806         // Add unused dummy src2_modifiers
9807         Inst.addOperand(MCOperand::createImm(0));
9808       }
9809     }
9810 
9811     if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9812       Inst.addOperand(Inst.getOperand(0));
9813     }
9814 
9815     if (IsVOP3CvtSrDpp) {
9816       if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9817         Inst.addOperand(MCOperand::createImm(0));
9818         Inst.addOperand(MCOperand::createReg(MCRegister()));
9819       }
9820     }
9821 
9822     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9823                                             MCOI::TIED_TO);
9824     if (TiedTo != -1) {
9825       assert((unsigned)TiedTo < Inst.getNumOperands());
9826       // handle tied old or src2 for MAC instructions
9827       Inst.addOperand(Inst.getOperand(TiedTo));
9828     }
9829     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9830     // Add the register arguments
9831     if (IsDPP8 && Op.isDppFI()) {
9832       Fi = Op.getImm();
9833     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9834       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9835     } else if (Op.isReg()) {
9836       Op.addRegOperands(Inst, 1);
9837     } else if (Op.isImm() &&
9838                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9839       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9840       Op.addImmOperands(Inst, 1);
9841     } else if (Op.isImm()) {
9842       OptionalIdx[Op.getImmTy()] = I;
9843     } else {
9844       llvm_unreachable("unhandled operand type");
9845     }
9846   }
9847 
9848   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
9849     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9850                           AMDGPUOperand::ImmTyClamp);
9851 
9852   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9853     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9854                           AMDGPUOperand::ImmTyByteSel);
9855 
9856   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9857     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9858 
9859   if (Desc.TSFlags & SIInstrFlags::VOP3P)
9860     cvtVOP3P(Inst, Operands, OptionalIdx);
9861   else if (Desc.TSFlags & SIInstrFlags::VOP3)
9862     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9863   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9864     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9865   }
9866 
9867   if (IsDPP8) {
9868     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9869     using namespace llvm::AMDGPU::DPP;
9870     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9871   } else {
9872     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9873     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9874     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9875     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9876 
9877     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9878       addOptionalImmOperand(Inst, Operands, OptionalIdx,
9879                             AMDGPUOperand::ImmTyDppFI);
9880   }
9881 }
9882 
9883 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9884   OptionalImmIndexMap OptionalIdx;
9885 
9886   unsigned I = 1;
9887   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9888   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9889     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9890   }
9891 
9892   int Fi = 0;
9893   for (unsigned E = Operands.size(); I != E; ++I) {
9894     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9895                                             MCOI::TIED_TO);
9896     if (TiedTo != -1) {
9897       assert((unsigned)TiedTo < Inst.getNumOperands());
9898       // handle tied old or src2 for MAC instructions
9899       Inst.addOperand(Inst.getOperand(TiedTo));
9900     }
9901     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9902     // Add the register arguments
9903     if (Op.isReg() && validateVccOperand(Op.getReg())) {
9904       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9905       // Skip it.
9906       continue;
9907     }
9908 
9909     if (IsDPP8) {
9910       if (Op.isDPP8()) {
9911         Op.addImmOperands(Inst, 1);
9912       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9913         Op.addRegWithFPInputModsOperands(Inst, 2);
9914       } else if (Op.isDppFI()) {
9915         Fi = Op.getImm();
9916       } else if (Op.isReg()) {
9917         Op.addRegOperands(Inst, 1);
9918       } else {
9919         llvm_unreachable("Invalid operand type");
9920       }
9921     } else {
9922       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9923         Op.addRegWithFPInputModsOperands(Inst, 2);
9924       } else if (Op.isReg()) {
9925         Op.addRegOperands(Inst, 1);
9926       } else if (Op.isDPPCtrl()) {
9927         Op.addImmOperands(Inst, 1);
9928       } else if (Op.isImm()) {
9929         // Handle optional arguments
9930         OptionalIdx[Op.getImmTy()] = I;
9931       } else {
9932         llvm_unreachable("Invalid operand type");
9933       }
9934     }
9935   }
9936 
9937   if (IsDPP8) {
9938     using namespace llvm::AMDGPU::DPP;
9939     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9940   } else {
9941     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9942     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9943     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9944     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9945       addOptionalImmOperand(Inst, Operands, OptionalIdx,
9946                             AMDGPUOperand::ImmTyDppFI);
9947     }
9948   }
9949 }
9950 
9951 //===----------------------------------------------------------------------===//
9952 // sdwa
9953 //===----------------------------------------------------------------------===//
9954 
9955 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9956                                           StringRef Prefix,
9957                                           AMDGPUOperand::ImmTy Type) {
9958   return parseStringOrIntWithPrefix(
9959       Operands, Prefix,
9960       {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
9961       Type);
9962 }
9963 
9964 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9965   return parseStringOrIntWithPrefix(
9966       Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
9967       AMDGPUOperand::ImmTySDWADstUnused);
9968 }
9969 
9970 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9971   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9972 }
9973 
9974 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9975   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9976 }
9977 
9978 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9979   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9980 }
9981 
9982 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9983   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9984 }
9985 
9986 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9987   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9988 }
9989 
9990 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9991                               uint64_t BasicInstType,
9992                               bool SkipDstVcc,
9993                               bool SkipSrcVcc) {
9994   using namespace llvm::AMDGPU::SDWA;
9995 
9996   OptionalImmIndexMap OptionalIdx;
9997   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9998   bool SkippedVcc = false;
9999 
10000   unsigned I = 1;
10001   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10002   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10003     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10004   }
10005 
10006   for (unsigned E = Operands.size(); I != E; ++I) {
10007     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10008     if (SkipVcc && !SkippedVcc && Op.isReg() &&
10009         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10010       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10011       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10012       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10013       // Skip VCC only if we didn't skip it on previous iteration.
10014       // Note that src0 and src1 occupy 2 slots each because of modifiers.
10015       if (BasicInstType == SIInstrFlags::VOP2 &&
10016           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10017            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10018         SkippedVcc = true;
10019         continue;
10020       }
10021       if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10022         SkippedVcc = true;
10023         continue;
10024       }
10025     }
10026     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10027       Op.addRegOrImmWithInputModsOperands(Inst, 2);
10028     } else if (Op.isImm()) {
10029       // Handle optional arguments
10030       OptionalIdx[Op.getImmTy()] = I;
10031     } else {
10032       llvm_unreachable("Invalid operand type");
10033     }
10034     SkippedVcc = false;
10035   }
10036 
10037   const unsigned Opc = Inst.getOpcode();
10038   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10039       Opc != AMDGPU::V_NOP_sdwa_vi) {
10040     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10041     switch (BasicInstType) {
10042     case SIInstrFlags::VOP1:
10043       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10044         addOptionalImmOperand(Inst, Operands, OptionalIdx,
10045                               AMDGPUOperand::ImmTyClamp, 0);
10046 
10047       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10048         addOptionalImmOperand(Inst, Operands, OptionalIdx,
10049                               AMDGPUOperand::ImmTyOModSI, 0);
10050 
10051       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10052         addOptionalImmOperand(Inst, Operands, OptionalIdx,
10053                               AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10054 
10055       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10056         addOptionalImmOperand(Inst, Operands, OptionalIdx,
10057                               AMDGPUOperand::ImmTySDWADstUnused,
10058                               DstUnused::UNUSED_PRESERVE);
10059 
10060       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10061       break;
10062 
10063     case SIInstrFlags::VOP2:
10064       addOptionalImmOperand(Inst, Operands, OptionalIdx,
10065                             AMDGPUOperand::ImmTyClamp, 0);
10066 
10067       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10068         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10069 
10070       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10071       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10072       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10073       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10074       break;
10075 
10076     case SIInstrFlags::VOPC:
10077       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10078         addOptionalImmOperand(Inst, Operands, OptionalIdx,
10079                               AMDGPUOperand::ImmTyClamp, 0);
10080       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10081       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10082       break;
10083 
10084     default:
10085       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10086     }
10087   }
10088 
10089   // special case v_mac_{f16, f32}:
10090   // it has src2 register operand that is tied to dst operand
10091   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10092       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
10093     auto *it = Inst.begin();
10094     std::advance(
10095       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10096     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10097   }
10098 }
10099 
10100 /// Force static initialization.
10101 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10102 LLVMInitializeAMDGPUAsmParser() {
10103   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
10104   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
10105 }
10106 
10107 #define GET_REGISTER_MATCHER
10108 #define GET_MATCHER_IMPLEMENTATION
10109 #define GET_MNEMONIC_SPELL_CHECKER
10110 #define GET_MNEMONIC_CHECKER
10111 #include "AMDGPUGenAsmMatcher.inc"
10112 
10113 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10114                                                 unsigned MCK) {
10115   switch (MCK) {
10116   case MCK_addr64:
10117     return parseTokenOp("addr64", Operands);
10118   case MCK_done:
10119     return parseTokenOp("done", Operands);
10120   case MCK_idxen:
10121     return parseTokenOp("idxen", Operands);
10122   case MCK_lds:
10123     return parseTokenOp("lds", Operands);
10124   case MCK_offen:
10125     return parseTokenOp("offen", Operands);
10126   case MCK_off:
10127     return parseTokenOp("off", Operands);
10128   case MCK_row_95_en:
10129     return parseTokenOp("row_en", Operands);
10130   case MCK_gds:
10131     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10132   case MCK_tfe:
10133     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10134   }
10135   return tryCustomParseOperand(Operands, MCK);
10136 }
10137 
10138 // This function should be defined after auto-generated include so that we have
10139 // MatchClassKind enum defined
10140 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10141                                                      unsigned Kind) {
10142   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10143   // But MatchInstructionImpl() expects to meet token and fails to validate
10144   // operand. This method checks if we are given immediate operand but expect to
10145   // get corresponding token.
10146   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10147   switch (Kind) {
10148   case MCK_addr64:
10149     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10150   case MCK_gds:
10151     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10152   case MCK_lds:
10153     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10154   case MCK_idxen:
10155     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10156   case MCK_offen:
10157     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10158   case MCK_tfe:
10159     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10160   case MCK_SSrc_b32:
10161     // When operands have expression values, they will return true for isToken,
10162     // because it is not possible to distinguish between a token and an
10163     // expression at parse time. MatchInstructionImpl() will always try to
10164     // match an operand as a token, when isToken returns true, and when the
10165     // name of the expression is not a valid token, the match will fail,
10166     // so we need to handle it here.
10167     return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10168   case MCK_SSrc_f32:
10169     return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10170   case MCK_SOPPBrTarget:
10171     return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10172   case MCK_VReg32OrOff:
10173     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10174   case MCK_InterpSlot:
10175     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10176   case MCK_InterpAttr:
10177     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10178   case MCK_InterpAttrChan:
10179     return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10180   case MCK_SReg_64:
10181   case MCK_SReg_64_XEXEC:
10182     // Null is defined as a 32-bit register but
10183     // it should also be enabled with 64-bit operands or larger.
10184     // The following code enables it for SReg_64 and larger operands
10185     // used as source and destination. Remaining source
10186     // operands are handled in isInlinableImm.
10187   case MCK_SReg_96:
10188   case MCK_SReg_128:
10189   case MCK_SReg_256:
10190   case MCK_SReg_512:
10191     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10192   default:
10193     return Match_InvalidOperand;
10194   }
10195 }
10196 
10197 //===----------------------------------------------------------------------===//
10198 // endpgm
10199 //===----------------------------------------------------------------------===//
10200 
10201 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10202   SMLoc S = getLoc();
10203   int64_t Imm = 0;
10204 
10205   if (!parseExpr(Imm)) {
10206     // The operand is optional, if not present default to 0
10207     Imm = 0;
10208   }
10209 
10210   if (!isUInt<16>(Imm))
10211     return Error(S, "expected a 16-bit value");
10212 
10213   Operands.push_back(
10214       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10215   return ParseStatus::Success;
10216 }
10217 
10218 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10219 
10220 //===----------------------------------------------------------------------===//
10221 // Split Barrier
10222 //===----------------------------------------------------------------------===//
10223 
10224 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
10225