xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/CodeGen/MachineValueType.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCExpr.h"
28 #include "llvm/MC/MCInst.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCParser/MCAsmLexer.h"
31 #include "llvm/MC/MCParser/MCAsmParser.h"
32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDGPUMetadata.h"
37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/TargetParser/TargetParser.h"
41 #include <optional>
42 
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 using namespace llvm::amdhsa;
46 
47 namespace {
48 
49 class AMDGPUAsmParser;
50 
51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52 
53 //===----------------------------------------------------------------------===//
54 // Operand
55 //===----------------------------------------------------------------------===//
56 
57 class AMDGPUOperand : public MCParsedAsmOperand {
58   enum KindTy {
59     Token,
60     Immediate,
61     Register,
62     Expression
63   } Kind;
64 
65   SMLoc StartLoc, EndLoc;
66   const AMDGPUAsmParser *AsmParser;
67 
68 public:
69   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70       : Kind(Kind_), AsmParser(AsmParser_) {}
71 
72   using Ptr = std::unique_ptr<AMDGPUOperand>;
73 
74   struct Modifiers {
75     bool Abs = false;
76     bool Neg = false;
77     bool Sext = false;
78     bool Lit = false;
79 
80     bool hasFPModifiers() const { return Abs || Neg; }
81     bool hasIntModifiers() const { return Sext; }
82     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
83 
84     int64_t getFPModifiersOperand() const {
85       int64_t Operand = 0;
86       Operand |= Abs ? SISrcMods::ABS : 0u;
87       Operand |= Neg ? SISrcMods::NEG : 0u;
88       return Operand;
89     }
90 
91     int64_t getIntModifiersOperand() const {
92       int64_t Operand = 0;
93       Operand |= Sext ? SISrcMods::SEXT : 0u;
94       return Operand;
95     }
96 
97     int64_t getModifiersOperand() const {
98       assert(!(hasFPModifiers() && hasIntModifiers())
99            && "fp and int modifiers should not be used simultaneously");
100       if (hasFPModifiers()) {
101         return getFPModifiersOperand();
102       } else if (hasIntModifiers()) {
103         return getIntModifiersOperand();
104       } else {
105         return 0;
106       }
107     }
108 
109     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110   };
111 
112   enum ImmTy {
113     ImmTyNone,
114     ImmTyGDS,
115     ImmTyLDS,
116     ImmTyOffen,
117     ImmTyIdxen,
118     ImmTyAddr64,
119     ImmTyOffset,
120     ImmTyInstOffset,
121     ImmTyOffset0,
122     ImmTyOffset1,
123     ImmTySMEMOffsetMod,
124     ImmTyCPol,
125     ImmTyTFE,
126     ImmTyD16,
127     ImmTyClampSI,
128     ImmTyOModSI,
129     ImmTySDWADstSel,
130     ImmTySDWASrc0Sel,
131     ImmTySDWASrc1Sel,
132     ImmTySDWADstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyInterpAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTyIndexKey8bit,
155     ImmTyIndexKey16bit,
156     ImmTyDPP8,
157     ImmTyDppCtrl,
158     ImmTyDppRowMask,
159     ImmTyDppBankMask,
160     ImmTyDppBoundCtrl,
161     ImmTyDppFI,
162     ImmTySwizzle,
163     ImmTyGprIdxMode,
164     ImmTyHigh,
165     ImmTyBLGP,
166     ImmTyCBSZ,
167     ImmTyABID,
168     ImmTyEndpgm,
169     ImmTyWaitVDST,
170     ImmTyWaitEXP,
171     ImmTyWaitVAVDst,
172     ImmTyWaitVMVSrc,
173   };
174 
175   // Immediate operand kind.
176   // It helps to identify the location of an offending operand after an error.
177   // Note that regular literals and mandatory literals (KImm) must be handled
178   // differently. When looking for an offending operand, we should usually
179   // ignore mandatory literals because they are part of the instruction and
180   // cannot be changed. Report location of mandatory operands only for VOPD,
181   // when both OpX and OpY have a KImm and there are no other literals.
182   enum ImmKindTy {
183     ImmKindTyNone,
184     ImmKindTyLiteral,
185     ImmKindTyMandatoryLiteral,
186     ImmKindTyConst,
187   };
188 
189 private:
190   struct TokOp {
191     const char *Data;
192     unsigned Length;
193   };
194 
195   struct ImmOp {
196     int64_t Val;
197     ImmTy Type;
198     bool IsFPImm;
199     mutable ImmKindTy Kind;
200     Modifiers Mods;
201   };
202 
203   struct RegOp {
204     unsigned RegNo;
205     Modifiers Mods;
206   };
207 
208   union {
209     TokOp Tok;
210     ImmOp Imm;
211     RegOp Reg;
212     const MCExpr *Expr;
213   };
214 
215 public:
216   bool isToken() const override { return Kind == Token; }
217 
218   bool isSymbolRefExpr() const {
219     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
220   }
221 
222   bool isImm() const override {
223     return Kind == Immediate;
224   }
225 
226   void setImmKindNone() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyNone;
229   }
230 
231   void setImmKindLiteral() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyLiteral;
234   }
235 
236   void setImmKindMandatoryLiteral() const {
237     assert(isImm());
238     Imm.Kind = ImmKindTyMandatoryLiteral;
239   }
240 
241   void setImmKindConst() const {
242     assert(isImm());
243     Imm.Kind = ImmKindTyConst;
244   }
245 
246   bool IsImmKindLiteral() const {
247     return isImm() && Imm.Kind == ImmKindTyLiteral;
248   }
249 
250   bool IsImmKindMandatoryLiteral() const {
251     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
252   }
253 
254   bool isImmKindConst() const {
255     return isImm() && Imm.Kind == ImmKindTyConst;
256   }
257 
258   bool isInlinableImm(MVT type) const;
259   bool isLiteralImm(MVT type) const;
260 
261   bool isRegKind() const {
262     return Kind == Register;
263   }
264 
265   bool isReg() const override {
266     return isRegKind() && !hasModifiers();
267   }
268 
269   bool isRegOrInline(unsigned RCID, MVT type) const {
270     return isRegClass(RCID) || isInlinableImm(type);
271   }
272 
273   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
274     return isRegOrInline(RCID, type) || isLiteralImm(type);
275   }
276 
277   bool isRegOrImmWithInt16InputMods() const {
278     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
279   }
280 
281   bool isRegOrImmWithIntT16InputMods() const {
282     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
283   }
284 
285   bool isRegOrImmWithInt32InputMods() const {
286     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
287   }
288 
289   bool isRegOrInlineImmWithInt16InputMods() const {
290     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
291   }
292 
293   bool isRegOrInlineImmWithInt32InputMods() const {
294     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
295   }
296 
297   bool isRegOrImmWithInt64InputMods() const {
298     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
299   }
300 
301   bool isRegOrImmWithFP16InputMods() const {
302     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
303   }
304 
305   bool isRegOrImmWithFPT16InputMods() const {
306     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
307   }
308 
309   bool isRegOrImmWithFP32InputMods() const {
310     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
311   }
312 
313   bool isRegOrImmWithFP64InputMods() const {
314     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
315   }
316 
317   bool isRegOrInlineImmWithFP16InputMods() const {
318     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
319   }
320 
321   bool isRegOrInlineImmWithFP32InputMods() const {
322     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
323   }
324 
325 
326   bool isVReg() const {
327     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328            isRegClass(AMDGPU::VReg_64RegClassID) ||
329            isRegClass(AMDGPU::VReg_96RegClassID) ||
330            isRegClass(AMDGPU::VReg_128RegClassID) ||
331            isRegClass(AMDGPU::VReg_160RegClassID) ||
332            isRegClass(AMDGPU::VReg_192RegClassID) ||
333            isRegClass(AMDGPU::VReg_256RegClassID) ||
334            isRegClass(AMDGPU::VReg_512RegClassID) ||
335            isRegClass(AMDGPU::VReg_1024RegClassID);
336   }
337 
338   bool isVReg32() const {
339     return isRegClass(AMDGPU::VGPR_32RegClassID);
340   }
341 
342   bool isVReg32OrOff() const {
343     return isOff() || isVReg32();
344   }
345 
346   bool isNull() const {
347     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348   }
349 
350   bool isVRegWithInputMods() const;
351   template <bool IsFake16> bool isT16VRegWithInputMods() const;
352 
353   bool isSDWAOperand(MVT type) const;
354   bool isSDWAFP16Operand() const;
355   bool isSDWAFP32Operand() const;
356   bool isSDWAInt16Operand() const;
357   bool isSDWAInt32Operand() const;
358 
359   bool isImmTy(ImmTy ImmT) const {
360     return isImm() && Imm.Type == ImmT;
361   }
362 
363   template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
364 
365   bool isImmLiteral() const { return isImmTy(ImmTyNone); }
366 
367   bool isImmModifier() const {
368     return isImm() && Imm.Type != ImmTyNone;
369   }
370 
371   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
372   bool isDMask() const { return isImmTy(ImmTyDMask); }
373   bool isDim() const { return isImmTy(ImmTyDim); }
374   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
375   bool isOff() const { return isImmTy(ImmTyOff); }
376   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
377   bool isOffen() const { return isImmTy(ImmTyOffen); }
378   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
379   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
380   bool isOffset() const { return isImmTy(ImmTyOffset); }
381   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
382   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
383   bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
384   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
385   bool isGDS() const { return isImmTy(ImmTyGDS); }
386   bool isLDS() const { return isImmTy(ImmTyLDS); }
387   bool isCPol() const { return isImmTy(ImmTyCPol); }
388   bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
389   bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
390   bool isTFE() const { return isImmTy(ImmTyTFE); }
391   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
392   bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
393   bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
394   bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
395   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
396   bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
397   bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
398   bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
399   bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
400   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
401   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
402   bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
403   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
404   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
405   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
406   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
407 
408   bool isRegOrImm() const {
409     return isReg() || isImm();
410   }
411 
412   bool isRegClass(unsigned RCID) const;
413 
414   bool isInlineValue() const;
415 
416   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
417     return isRegOrInline(RCID, type) && !hasModifiers();
418   }
419 
420   bool isSCSrcB16() const {
421     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
422   }
423 
424   bool isSCSrcV2B16() const {
425     return isSCSrcB16();
426   }
427 
428   bool isSCSrcB32() const {
429     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
430   }
431 
432   bool isSCSrcB64() const {
433     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
434   }
435 
436   bool isBoolReg() const;
437 
438   bool isSCSrcF16() const {
439     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
440   }
441 
442   bool isSCSrcV2F16() const {
443     return isSCSrcF16();
444   }
445 
446   bool isSCSrcF32() const {
447     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
448   }
449 
450   bool isSCSrcF64() const {
451     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
452   }
453 
454   bool isSSrcB32() const {
455     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
456   }
457 
458   bool isSSrcB16() const {
459     return isSCSrcB16() || isLiteralImm(MVT::i16);
460   }
461 
462   bool isSSrcV2B16() const {
463     llvm_unreachable("cannot happen");
464     return isSSrcB16();
465   }
466 
467   bool isSSrcB64() const {
468     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
469     // See isVSrc64().
470     return isSCSrcB64() || isLiteralImm(MVT::i64);
471   }
472 
473   bool isSSrcF32() const {
474     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
475   }
476 
477   bool isSSrcF64() const {
478     return isSCSrcB64() || isLiteralImm(MVT::f64);
479   }
480 
481   bool isSSrcF16() const {
482     return isSCSrcB16() || isLiteralImm(MVT::f16);
483   }
484 
485   bool isSSrcV2F16() const {
486     llvm_unreachable("cannot happen");
487     return isSSrcF16();
488   }
489 
490   bool isSSrcV2FP32() const {
491     llvm_unreachable("cannot happen");
492     return isSSrcF32();
493   }
494 
495   bool isSCSrcV2FP32() const {
496     llvm_unreachable("cannot happen");
497     return isSCSrcF32();
498   }
499 
500   bool isSSrcV2INT32() const {
501     llvm_unreachable("cannot happen");
502     return isSSrcB32();
503   }
504 
505   bool isSCSrcV2INT32() const {
506     llvm_unreachable("cannot happen");
507     return isSCSrcB32();
508   }
509 
510   bool isSSrcOrLdsB32() const {
511     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
512            isLiteralImm(MVT::i32) || isExpr();
513   }
514 
515   bool isVCSrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
517   }
518 
519   bool isVCSrcB64() const {
520     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
521   }
522 
523   bool isVCSrcTB16() const {
524     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
525   }
526 
527   bool isVCSrcTB16_Lo128() const {
528     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
529   }
530 
531   bool isVCSrcFake16B16_Lo128() const {
532     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
533   }
534 
535   bool isVCSrcB16() const {
536     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
537   }
538 
539   bool isVCSrcV2B16() const {
540     return isVCSrcB16();
541   }
542 
543   bool isVCSrcF32() const {
544     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
545   }
546 
547   bool isVCSrcF64() const {
548     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
549   }
550 
551   bool isVCSrcTF16() const {
552     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
553   }
554 
555   bool isVCSrcTF16_Lo128() const {
556     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
557   }
558 
559   bool isVCSrcFake16F16_Lo128() const {
560     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
561   }
562 
563   bool isVCSrcF16() const {
564     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
565   }
566 
567   bool isVCSrcV2F16() const {
568     return isVCSrcF16();
569   }
570 
571   bool isVSrcB32() const {
572     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
573   }
574 
575   bool isVSrcB64() const {
576     return isVCSrcF64() || isLiteralImm(MVT::i64);
577   }
578 
579   bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
580 
581   bool isVSrcTB16_Lo128() const {
582     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
583   }
584 
585   bool isVSrcFake16B16_Lo128() const {
586     return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
587   }
588 
589   bool isVSrcB16() const {
590     return isVCSrcB16() || isLiteralImm(MVT::i16);
591   }
592 
593   bool isVSrcV2B16() const {
594     return isVSrcB16() || isLiteralImm(MVT::v2i16);
595   }
596 
597   bool isVCSrcV2FP32() const {
598     return isVCSrcF64();
599   }
600 
601   bool isVSrcV2FP32() const {
602     return isVSrcF64() || isLiteralImm(MVT::v2f32);
603   }
604 
605   bool isVCSrcV2INT32() const {
606     return isVCSrcB64();
607   }
608 
609   bool isVSrcV2INT32() const {
610     return isVSrcB64() || isLiteralImm(MVT::v2i32);
611   }
612 
613   bool isVSrcF32() const {
614     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
615   }
616 
617   bool isVSrcF64() const {
618     return isVCSrcF64() || isLiteralImm(MVT::f64);
619   }
620 
621   bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
622 
623   bool isVSrcTF16_Lo128() const {
624     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
625   }
626 
627   bool isVSrcFake16F16_Lo128() const {
628     return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
629   }
630 
631   bool isVSrcF16() const {
632     return isVCSrcF16() || isLiteralImm(MVT::f16);
633   }
634 
635   bool isVSrcV2F16() const {
636     return isVSrcF16() || isLiteralImm(MVT::v2f16);
637   }
638 
639   bool isVISrcB32() const {
640     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
641   }
642 
643   bool isVISrcB16() const {
644     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
645   }
646 
647   bool isVISrcV2B16() const {
648     return isVISrcB16();
649   }
650 
651   bool isVISrcF32() const {
652     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
653   }
654 
655   bool isVISrcF16() const {
656     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
657   }
658 
659   bool isVISrcV2F16() const {
660     return isVISrcF16() || isVISrcB32();
661   }
662 
663   bool isVISrc_64F16() const {
664     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
665   }
666 
667   bool isVISrc_64B32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
669   }
670 
671   bool isVISrc_64B64() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
673   }
674 
675   bool isVISrc_64F64() const {
676     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
677   }
678 
679   bool isVISrc_64V2FP32() const {
680     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
681   }
682 
683   bool isVISrc_64V2INT32() const {
684     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
685   }
686 
687   bool isVISrc_256B32() const {
688     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
689   }
690 
691   bool isVISrc_256F32() const {
692     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
693   }
694 
695   bool isVISrc_256B64() const {
696     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
697   }
698 
699   bool isVISrc_256F64() const {
700     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
701   }
702 
703   bool isVISrc_128B16() const {
704     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
705   }
706 
707   bool isVISrc_128V2B16() const {
708     return isVISrc_128B16();
709   }
710 
711   bool isVISrc_128B32() const {
712     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
713   }
714 
715   bool isVISrc_128F32() const {
716     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
717   }
718 
719   bool isVISrc_256V2FP32() const {
720     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
721   }
722 
723   bool isVISrc_256V2INT32() const {
724     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
725   }
726 
727   bool isVISrc_512B32() const {
728     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
729   }
730 
731   bool isVISrc_512B16() const {
732     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
733   }
734 
735   bool isVISrc_512V2B16() const {
736     return isVISrc_512B16();
737   }
738 
739   bool isVISrc_512F32() const {
740     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
741   }
742 
743   bool isVISrc_512F16() const {
744     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
745   }
746 
747   bool isVISrc_512V2F16() const {
748     return isVISrc_512F16() || isVISrc_512B32();
749   }
750 
751   bool isVISrc_1024B32() const {
752     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
753   }
754 
755   bool isVISrc_1024B16() const {
756     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
757   }
758 
759   bool isVISrc_1024V2B16() const {
760     return isVISrc_1024B16();
761   }
762 
763   bool isVISrc_1024F32() const {
764     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
765   }
766 
767   bool isVISrc_1024F16() const {
768     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
769   }
770 
771   bool isVISrc_1024V2F16() const {
772     return isVISrc_1024F16() || isVISrc_1024B32();
773   }
774 
775   bool isAISrcB32() const {
776     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
777   }
778 
779   bool isAISrcB16() const {
780     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
781   }
782 
783   bool isAISrcV2B16() const {
784     return isAISrcB16();
785   }
786 
787   bool isAISrcF32() const {
788     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
789   }
790 
791   bool isAISrcF16() const {
792     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
793   }
794 
795   bool isAISrcV2F16() const {
796     return isAISrcF16() || isAISrcB32();
797   }
798 
799   bool isAISrc_64B64() const {
800     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
801   }
802 
803   bool isAISrc_64F64() const {
804     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
805   }
806 
807   bool isAISrc_128B32() const {
808     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
809   }
810 
811   bool isAISrc_128B16() const {
812     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
813   }
814 
815   bool isAISrc_128V2B16() const {
816     return isAISrc_128B16();
817   }
818 
819   bool isAISrc_128F32() const {
820     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
821   }
822 
823   bool isAISrc_128F16() const {
824     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
825   }
826 
827   bool isAISrc_128V2F16() const {
828     return isAISrc_128F16() || isAISrc_128B32();
829   }
830 
831   bool isVISrc_128F16() const {
832     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
833   }
834 
835   bool isVISrc_128V2F16() const {
836     return isVISrc_128F16() || isVISrc_128B32();
837   }
838 
839   bool isAISrc_256B64() const {
840     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
841   }
842 
843   bool isAISrc_256F64() const {
844     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
845   }
846 
847   bool isAISrc_512B32() const {
848     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
849   }
850 
851   bool isAISrc_512B16() const {
852     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
853   }
854 
855   bool isAISrc_512V2B16() const {
856     return isAISrc_512B16();
857   }
858 
859   bool isAISrc_512F32() const {
860     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
861   }
862 
863   bool isAISrc_512F16() const {
864     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
865   }
866 
867   bool isAISrc_512V2F16() const {
868     return isAISrc_512F16() || isAISrc_512B32();
869   }
870 
871   bool isAISrc_1024B32() const {
872     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
873   }
874 
875   bool isAISrc_1024B16() const {
876     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
877   }
878 
879   bool isAISrc_1024V2B16() const {
880     return isAISrc_1024B16();
881   }
882 
883   bool isAISrc_1024F32() const {
884     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
885   }
886 
887   bool isAISrc_1024F16() const {
888     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
889   }
890 
891   bool isAISrc_1024V2F16() const {
892     return isAISrc_1024F16() || isAISrc_1024B32();
893   }
894 
895   bool isKImmFP32() const {
896     return isLiteralImm(MVT::f32);
897   }
898 
899   bool isKImmFP16() const {
900     return isLiteralImm(MVT::f16);
901   }
902 
903   bool isMem() const override {
904     return false;
905   }
906 
907   bool isExpr() const {
908     return Kind == Expression;
909   }
910 
911   bool isSOPPBrTarget() const { return isExpr() || isImm(); }
912 
913   bool isSWaitCnt() const;
914   bool isDepCtr() const;
915   bool isSDelayALU() const;
916   bool isHwreg() const;
917   bool isSendMsg() const;
918   bool isSplitBarrier() const;
919   bool isSwizzle() const;
920   bool isSMRDOffset8() const;
921   bool isSMEMOffset() const;
922   bool isSMRDLiteralOffset() const;
923   bool isDPP8() const;
924   bool isDPPCtrl() const;
925   bool isBLGP() const;
926   bool isCBSZ() const;
927   bool isABID() const;
928   bool isGPRIdxMode() const;
929   bool isS16Imm() const;
930   bool isU16Imm() const;
931   bool isEndpgm() const;
932   bool isWaitVDST() const;
933   bool isWaitEXP() const;
934   bool isWaitVAVDst() const;
935   bool isWaitVMVSrc() const;
936 
937   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
938     return std::bind(P, *this);
939   }
940 
941   StringRef getToken() const {
942     assert(isToken());
943     return StringRef(Tok.Data, Tok.Length);
944   }
945 
946   int64_t getImm() const {
947     assert(isImm());
948     return Imm.Val;
949   }
950 
951   void setImm(int64_t Val) {
952     assert(isImm());
953     Imm.Val = Val;
954   }
955 
956   ImmTy getImmTy() const {
957     assert(isImm());
958     return Imm.Type;
959   }
960 
961   unsigned getReg() const override {
962     assert(isRegKind());
963     return Reg.RegNo;
964   }
965 
966   SMLoc getStartLoc() const override {
967     return StartLoc;
968   }
969 
970   SMLoc getEndLoc() const override {
971     return EndLoc;
972   }
973 
974   SMRange getLocRange() const {
975     return SMRange(StartLoc, EndLoc);
976   }
977 
978   Modifiers getModifiers() const {
979     assert(isRegKind() || isImmTy(ImmTyNone));
980     return isRegKind() ? Reg.Mods : Imm.Mods;
981   }
982 
983   void setModifiers(Modifiers Mods) {
984     assert(isRegKind() || isImmTy(ImmTyNone));
985     if (isRegKind())
986       Reg.Mods = Mods;
987     else
988       Imm.Mods = Mods;
989   }
990 
991   bool hasModifiers() const {
992     return getModifiers().hasModifiers();
993   }
994 
995   bool hasFPModifiers() const {
996     return getModifiers().hasFPModifiers();
997   }
998 
999   bool hasIntModifiers() const {
1000     return getModifiers().hasIntModifiers();
1001   }
1002 
1003   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1004 
1005   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1006 
1007   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1008 
1009   void addRegOperands(MCInst &Inst, unsigned N) const;
1010 
1011   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1012     if (isRegKind())
1013       addRegOperands(Inst, N);
1014     else
1015       addImmOperands(Inst, N);
1016   }
1017 
1018   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1019     Modifiers Mods = getModifiers();
1020     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1021     if (isRegKind()) {
1022       addRegOperands(Inst, N);
1023     } else {
1024       addImmOperands(Inst, N, false);
1025     }
1026   }
1027 
1028   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1029     assert(!hasIntModifiers());
1030     addRegOrImmWithInputModsOperands(Inst, N);
1031   }
1032 
1033   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1034     assert(!hasFPModifiers());
1035     addRegOrImmWithInputModsOperands(Inst, N);
1036   }
1037 
1038   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1039     Modifiers Mods = getModifiers();
1040     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1041     assert(isRegKind());
1042     addRegOperands(Inst, N);
1043   }
1044 
1045   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1046     assert(!hasIntModifiers());
1047     addRegWithInputModsOperands(Inst, N);
1048   }
1049 
1050   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1051     assert(!hasFPModifiers());
1052     addRegWithInputModsOperands(Inst, N);
1053   }
1054 
1055   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1056     // clang-format off
1057     switch (Type) {
1058     case ImmTyNone: OS << "None"; break;
1059     case ImmTyGDS: OS << "GDS"; break;
1060     case ImmTyLDS: OS << "LDS"; break;
1061     case ImmTyOffen: OS << "Offen"; break;
1062     case ImmTyIdxen: OS << "Idxen"; break;
1063     case ImmTyAddr64: OS << "Addr64"; break;
1064     case ImmTyOffset: OS << "Offset"; break;
1065     case ImmTyInstOffset: OS << "InstOffset"; break;
1066     case ImmTyOffset0: OS << "Offset0"; break;
1067     case ImmTyOffset1: OS << "Offset1"; break;
1068     case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1069     case ImmTyCPol: OS << "CPol"; break;
1070     case ImmTyIndexKey8bit: OS << "index_key"; break;
1071     case ImmTyIndexKey16bit: OS << "index_key"; break;
1072     case ImmTyTFE: OS << "TFE"; break;
1073     case ImmTyD16: OS << "D16"; break;
1074     case ImmTyFORMAT: OS << "FORMAT"; break;
1075     case ImmTyClampSI: OS << "ClampSI"; break;
1076     case ImmTyOModSI: OS << "OModSI"; break;
1077     case ImmTyDPP8: OS << "DPP8"; break;
1078     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1079     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1080     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1081     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1082     case ImmTyDppFI: OS << "DppFI"; break;
1083     case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1084     case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1085     case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1086     case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1087     case ImmTyDMask: OS << "DMask"; break;
1088     case ImmTyDim: OS << "Dim"; break;
1089     case ImmTyUNorm: OS << "UNorm"; break;
1090     case ImmTyDA: OS << "DA"; break;
1091     case ImmTyR128A16: OS << "R128A16"; break;
1092     case ImmTyA16: OS << "A16"; break;
1093     case ImmTyLWE: OS << "LWE"; break;
1094     case ImmTyOff: OS << "Off"; break;
1095     case ImmTyExpTgt: OS << "ExpTgt"; break;
1096     case ImmTyExpCompr: OS << "ExpCompr"; break;
1097     case ImmTyExpVM: OS << "ExpVM"; break;
1098     case ImmTyHwreg: OS << "Hwreg"; break;
1099     case ImmTySendMsg: OS << "SendMsg"; break;
1100     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1101     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1102     case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1103     case ImmTyOpSel: OS << "OpSel"; break;
1104     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1105     case ImmTyNegLo: OS << "NegLo"; break;
1106     case ImmTyNegHi: OS << "NegHi"; break;
1107     case ImmTySwizzle: OS << "Swizzle"; break;
1108     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1109     case ImmTyHigh: OS << "High"; break;
1110     case ImmTyBLGP: OS << "BLGP"; break;
1111     case ImmTyCBSZ: OS << "CBSZ"; break;
1112     case ImmTyABID: OS << "ABID"; break;
1113     case ImmTyEndpgm: OS << "Endpgm"; break;
1114     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1115     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1116     case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1117     case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1118     }
1119     // clang-format on
1120   }
1121 
1122   void print(raw_ostream &OS) const override {
1123     switch (Kind) {
1124     case Register:
1125       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1126       break;
1127     case Immediate:
1128       OS << '<' << getImm();
1129       if (getImmTy() != ImmTyNone) {
1130         OS << " type: "; printImmTy(OS, getImmTy());
1131       }
1132       OS << " mods: " << Imm.Mods << '>';
1133       break;
1134     case Token:
1135       OS << '\'' << getToken() << '\'';
1136       break;
1137     case Expression:
1138       OS << "<expr " << *Expr << '>';
1139       break;
1140     }
1141   }
1142 
1143   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1144                                       int64_t Val, SMLoc Loc,
1145                                       ImmTy Type = ImmTyNone,
1146                                       bool IsFPImm = false) {
1147     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1148     Op->Imm.Val = Val;
1149     Op->Imm.IsFPImm = IsFPImm;
1150     Op->Imm.Kind = ImmKindTyNone;
1151     Op->Imm.Type = Type;
1152     Op->Imm.Mods = Modifiers();
1153     Op->StartLoc = Loc;
1154     Op->EndLoc = Loc;
1155     return Op;
1156   }
1157 
1158   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1159                                         StringRef Str, SMLoc Loc,
1160                                         bool HasExplicitEncodingSize = true) {
1161     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1162     Res->Tok.Data = Str.data();
1163     Res->Tok.Length = Str.size();
1164     Res->StartLoc = Loc;
1165     Res->EndLoc = Loc;
1166     return Res;
1167   }
1168 
1169   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1170                                       unsigned RegNo, SMLoc S,
1171                                       SMLoc E) {
1172     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1173     Op->Reg.RegNo = RegNo;
1174     Op->Reg.Mods = Modifiers();
1175     Op->StartLoc = S;
1176     Op->EndLoc = E;
1177     return Op;
1178   }
1179 
1180   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1181                                        const class MCExpr *Expr, SMLoc S) {
1182     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1183     Op->Expr = Expr;
1184     Op->StartLoc = S;
1185     Op->EndLoc = S;
1186     return Op;
1187   }
1188 };
1189 
1190 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1191   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1192   return OS;
1193 }
1194 
1195 //===----------------------------------------------------------------------===//
1196 // AsmParser
1197 //===----------------------------------------------------------------------===//
1198 
1199 // Holds info related to the current kernel, e.g. count of SGPRs used.
1200 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1201 // .amdgpu_hsa_kernel or at EOF.
1202 class KernelScopeInfo {
1203   int SgprIndexUnusedMin = -1;
1204   int VgprIndexUnusedMin = -1;
1205   int AgprIndexUnusedMin = -1;
1206   MCContext *Ctx = nullptr;
1207   MCSubtargetInfo const *MSTI = nullptr;
1208 
1209   void usesSgprAt(int i) {
1210     if (i >= SgprIndexUnusedMin) {
1211       SgprIndexUnusedMin = ++i;
1212       if (Ctx) {
1213         MCSymbol* const Sym =
1214           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1215         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1216       }
1217     }
1218   }
1219 
1220   void usesVgprAt(int i) {
1221     if (i >= VgprIndexUnusedMin) {
1222       VgprIndexUnusedMin = ++i;
1223       if (Ctx) {
1224         MCSymbol* const Sym =
1225           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1226         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1227                                          VgprIndexUnusedMin);
1228         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1229       }
1230     }
1231   }
1232 
1233   void usesAgprAt(int i) {
1234     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1235     if (!hasMAIInsts(*MSTI))
1236       return;
1237 
1238     if (i >= AgprIndexUnusedMin) {
1239       AgprIndexUnusedMin = ++i;
1240       if (Ctx) {
1241         MCSymbol* const Sym =
1242           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1243         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1244 
1245         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1246         MCSymbol* const vSym =
1247           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1248         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1249                                          VgprIndexUnusedMin);
1250         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1251       }
1252     }
1253   }
1254 
1255 public:
1256   KernelScopeInfo() = default;
1257 
1258   void initialize(MCContext &Context) {
1259     Ctx = &Context;
1260     MSTI = Ctx->getSubtargetInfo();
1261 
1262     usesSgprAt(SgprIndexUnusedMin = -1);
1263     usesVgprAt(VgprIndexUnusedMin = -1);
1264     if (hasMAIInsts(*MSTI)) {
1265       usesAgprAt(AgprIndexUnusedMin = -1);
1266     }
1267   }
1268 
1269   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1270                     unsigned RegWidth) {
1271     switch (RegKind) {
1272     case IS_SGPR:
1273       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1274       break;
1275     case IS_AGPR:
1276       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1277       break;
1278     case IS_VGPR:
1279       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1280       break;
1281     default:
1282       break;
1283     }
1284   }
1285 };
1286 
1287 class AMDGPUAsmParser : public MCTargetAsmParser {
1288   MCAsmParser &Parser;
1289 
1290   unsigned ForcedEncodingSize = 0;
1291   bool ForcedDPP = false;
1292   bool ForcedSDWA = false;
1293   KernelScopeInfo KernelScope;
1294 
1295   /// @name Auto-generated Match Functions
1296   /// {
1297 
1298 #define GET_ASSEMBLER_HEADER
1299 #include "AMDGPUGenAsmMatcher.inc"
1300 
1301   /// }
1302 
1303 private:
1304   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1305   bool OutOfRangeError(SMRange Range);
1306   /// Calculate VGPR/SGPR blocks required for given target, reserved
1307   /// registers, and user-specified NextFreeXGPR values.
1308   ///
1309   /// \param Features [in] Target features, used for bug corrections.
1310   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1311   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1312   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1313   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1314   /// descriptor field, if valid.
1315   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1316   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1317   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1318   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1319   /// \param VGPRBlocks [out] Result VGPR block count.
1320   /// \param SGPRBlocks [out] Result SGPR block count.
1321   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1322                           bool FlatScrUsed, bool XNACKUsed,
1323                           std::optional<bool> EnableWavefrontSize32,
1324                           unsigned NextFreeVGPR, SMRange VGPRRange,
1325                           unsigned NextFreeSGPR, SMRange SGPRRange,
1326                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1327   bool ParseDirectiveAMDGCNTarget();
1328   bool ParseDirectiveAMDHSACodeObjectVersion();
1329   bool ParseDirectiveAMDHSAKernel();
1330   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1331   bool ParseDirectiveAMDKernelCodeT();
1332   // TODO: Possibly make subtargetHasRegister const.
1333   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1334   bool ParseDirectiveAMDGPUHsaKernel();
1335 
1336   bool ParseDirectiveISAVersion();
1337   bool ParseDirectiveHSAMetadata();
1338   bool ParseDirectivePALMetadataBegin();
1339   bool ParseDirectivePALMetadata();
1340   bool ParseDirectiveAMDGPULDS();
1341 
1342   /// Common code to parse out a block of text (typically YAML) between start and
1343   /// end directives.
1344   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1345                            const char *AssemblerDirectiveEnd,
1346                            std::string &CollectString);
1347 
1348   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1349                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1350   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1351                            unsigned &RegNum, unsigned &RegWidth,
1352                            bool RestoreOnFailure = false);
1353   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1354                            unsigned &RegNum, unsigned &RegWidth,
1355                            SmallVectorImpl<AsmToken> &Tokens);
1356   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1357                            unsigned &RegWidth,
1358                            SmallVectorImpl<AsmToken> &Tokens);
1359   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1360                            unsigned &RegWidth,
1361                            SmallVectorImpl<AsmToken> &Tokens);
1362   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1363                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1364   bool ParseRegRange(unsigned& Num, unsigned& Width);
1365   unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1366                          unsigned RegWidth, SMLoc Loc);
1367 
1368   bool isRegister();
1369   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1370   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1371   void initializeGprCountSymbol(RegisterKind RegKind);
1372   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1373                              unsigned RegWidth);
1374   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1375                     bool IsAtomic);
1376 
1377 public:
1378   enum AMDGPUMatchResultTy {
1379     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1380   };
1381   enum OperandMode {
1382     OperandMode_Default,
1383     OperandMode_NSA,
1384   };
1385 
1386   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1387 
1388   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1389                const MCInstrInfo &MII,
1390                const MCTargetOptions &Options)
1391       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1392     MCAsmParserExtension::Initialize(Parser);
1393 
1394     if (getFeatureBits().none()) {
1395       // Set default features.
1396       copySTI().ToggleFeature("southern-islands");
1397     }
1398 
1399     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1400 
1401     {
1402       // TODO: make those pre-defined variables read-only.
1403       // Currently there is none suitable machinery in the core llvm-mc for this.
1404       // MCSymbol::isRedefinable is intended for another purpose, and
1405       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1406       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1407       MCContext &Ctx = getContext();
1408       if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1409         MCSymbol *Sym =
1410             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1411         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1412         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1413         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1414         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1415         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1416       } else {
1417         MCSymbol *Sym =
1418             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1419         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1420         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1421         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1422         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1423         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1424       }
1425       if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1426         initializeGprCountSymbol(IS_VGPR);
1427         initializeGprCountSymbol(IS_SGPR);
1428       } else
1429         KernelScope.initialize(getContext());
1430     }
1431   }
1432 
1433   bool hasMIMG_R128() const {
1434     return AMDGPU::hasMIMG_R128(getSTI());
1435   }
1436 
1437   bool hasPackedD16() const {
1438     return AMDGPU::hasPackedD16(getSTI());
1439   }
1440 
1441   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1442 
1443   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1444 
1445   bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1446 
1447   bool isSI() const {
1448     return AMDGPU::isSI(getSTI());
1449   }
1450 
1451   bool isCI() const {
1452     return AMDGPU::isCI(getSTI());
1453   }
1454 
1455   bool isVI() const {
1456     return AMDGPU::isVI(getSTI());
1457   }
1458 
1459   bool isGFX9() const {
1460     return AMDGPU::isGFX9(getSTI());
1461   }
1462 
1463   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1464   bool isGFX90A() const {
1465     return AMDGPU::isGFX90A(getSTI());
1466   }
1467 
1468   bool isGFX940() const {
1469     return AMDGPU::isGFX940(getSTI());
1470   }
1471 
1472   bool isGFX9Plus() const {
1473     return AMDGPU::isGFX9Plus(getSTI());
1474   }
1475 
1476   bool isGFX10() const {
1477     return AMDGPU::isGFX10(getSTI());
1478   }
1479 
1480   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1481 
1482   bool isGFX11() const {
1483     return AMDGPU::isGFX11(getSTI());
1484   }
1485 
1486   bool isGFX11Plus() const {
1487     return AMDGPU::isGFX11Plus(getSTI());
1488   }
1489 
1490   bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1491 
1492   bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1493 
1494   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1495 
1496   bool isGFX10_BEncoding() const {
1497     return AMDGPU::isGFX10_BEncoding(getSTI());
1498   }
1499 
1500   bool hasInv2PiInlineImm() const {
1501     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1502   }
1503 
1504   bool hasFlatOffsets() const {
1505     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1506   }
1507 
1508   bool hasArchitectedFlatScratch() const {
1509     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1510   }
1511 
1512   bool hasSGPR102_SGPR103() const {
1513     return !isVI() && !isGFX9();
1514   }
1515 
1516   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1517 
1518   bool hasIntClamp() const {
1519     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1520   }
1521 
1522   bool hasPartialNSAEncoding() const {
1523     return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1524   }
1525 
1526   unsigned getNSAMaxSize(bool HasSampler = false) const {
1527     return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1528   }
1529 
1530   unsigned getMaxNumUserSGPRs() const {
1531     return AMDGPU::getMaxNumUserSGPRs(getSTI());
1532   }
1533 
1534   bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1535 
1536   AMDGPUTargetStreamer &getTargetStreamer() {
1537     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1538     return static_cast<AMDGPUTargetStreamer &>(TS);
1539   }
1540 
1541   const MCRegisterInfo *getMRI() const {
1542     // We need this const_cast because for some reason getContext() is not const
1543     // in MCAsmParser.
1544     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1545   }
1546 
1547   const MCInstrInfo *getMII() const {
1548     return &MII;
1549   }
1550 
1551   const FeatureBitset &getFeatureBits() const {
1552     return getSTI().getFeatureBits();
1553   }
1554 
1555   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1556   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1557   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1558 
1559   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1560   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1561   bool isForcedDPP() const { return ForcedDPP; }
1562   bool isForcedSDWA() const { return ForcedSDWA; }
1563   ArrayRef<unsigned> getMatchedVariants() const;
1564   StringRef getMatchedVariantName() const;
1565 
1566   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1567   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1568                      bool RestoreOnFailure);
1569   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1570   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1571                                SMLoc &EndLoc) override;
1572   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1573   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1574                                       unsigned Kind) override;
1575   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1576                                OperandVector &Operands, MCStreamer &Out,
1577                                uint64_t &ErrorInfo,
1578                                bool MatchingInlineAsm) override;
1579   bool ParseDirective(AsmToken DirectiveID) override;
1580   ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1581                            OperandMode Mode = OperandMode_Default);
1582   StringRef parseMnemonicSuffix(StringRef Name);
1583   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1584                         SMLoc NameLoc, OperandVector &Operands) override;
1585   //bool ProcessInstruction(MCInst &Inst);
1586 
1587   ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1588 
1589   ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1590 
1591   ParseStatus
1592   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1593                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1594                      std::function<bool(int64_t &)> ConvertResult = nullptr);
1595 
1596   ParseStatus parseOperandArrayWithPrefix(
1597       const char *Prefix, OperandVector &Operands,
1598       AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1599       bool (*ConvertResult)(int64_t &) = nullptr);
1600 
1601   ParseStatus
1602   parseNamedBit(StringRef Name, OperandVector &Operands,
1603                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1604   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1605   ParseStatus parseCPol(OperandVector &Operands);
1606   ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1607   ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1608   ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1609                                     SMLoc &StringLoc);
1610 
1611   bool isModifier();
1612   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1613   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1614   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1615   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1616   bool parseSP3NegModifier();
1617   ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1618                        bool HasLit = false);
1619   ParseStatus parseReg(OperandVector &Operands);
1620   ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1621                             bool HasLit = false);
1622   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1623                                            bool AllowImm = true);
1624   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1625                                             bool AllowImm = true);
1626   ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1627   ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1628   ParseStatus parseVReg32OrOff(OperandVector &Operands);
1629   ParseStatus tryParseIndexKey(OperandVector &Operands,
1630                                AMDGPUOperand::ImmTy ImmTy);
1631   ParseStatus parseIndexKey8bit(OperandVector &Operands);
1632   ParseStatus parseIndexKey16bit(OperandVector &Operands);
1633 
1634   ParseStatus parseDfmtNfmt(int64_t &Format);
1635   ParseStatus parseUfmt(int64_t &Format);
1636   ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1637                                        int64_t &Format);
1638   ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1639                                          int64_t &Format);
1640   ParseStatus parseFORMAT(OperandVector &Operands);
1641   ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1642   ParseStatus parseNumericFormat(int64_t &Format);
1643   ParseStatus parseFlatOffset(OperandVector &Operands);
1644   ParseStatus parseR128A16(OperandVector &Operands);
1645   ParseStatus parseBLGP(OperandVector &Operands);
1646   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1647   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1648 
1649   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1650 
1651   bool parseCnt(int64_t &IntVal);
1652   ParseStatus parseSWaitCnt(OperandVector &Operands);
1653 
1654   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1655   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1656   ParseStatus parseDepCtr(OperandVector &Operands);
1657 
1658   bool parseDelay(int64_t &Delay);
1659   ParseStatus parseSDelayALU(OperandVector &Operands);
1660 
1661   ParseStatus parseHwreg(OperandVector &Operands);
1662 
1663 private:
1664   struct OperandInfoTy {
1665     SMLoc Loc;
1666     int64_t Id;
1667     bool IsSymbolic = false;
1668     bool IsDefined = false;
1669 
1670     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1671   };
1672 
1673   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1674   bool validateSendMsg(const OperandInfoTy &Msg,
1675                        const OperandInfoTy &Op,
1676                        const OperandInfoTy &Stream);
1677 
1678   bool parseHwregBody(OperandInfoTy &HwReg,
1679                       OperandInfoTy &Offset,
1680                       OperandInfoTy &Width);
1681   bool validateHwreg(const OperandInfoTy &HwReg,
1682                      const OperandInfoTy &Offset,
1683                      const OperandInfoTy &Width);
1684 
1685   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1686   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1687   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1688 
1689   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1690                       const OperandVector &Operands) const;
1691   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1692   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1693   SMLoc getLitLoc(const OperandVector &Operands,
1694                   bool SearchMandatoryLiterals = false) const;
1695   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1696   SMLoc getConstLoc(const OperandVector &Operands) const;
1697   SMLoc getInstLoc(const OperandVector &Operands) const;
1698 
1699   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1700   bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1701   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1702   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1703   bool validateSOPLiteral(const MCInst &Inst) const;
1704   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1705   bool validateVOPDRegBankConstraints(const MCInst &Inst,
1706                                       const OperandVector &Operands);
1707   bool validateIntClampSupported(const MCInst &Inst);
1708   bool validateMIMGAtomicDMask(const MCInst &Inst);
1709   bool validateMIMGGatherDMask(const MCInst &Inst);
1710   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1711   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1712   bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1713   bool validateMIMGD16(const MCInst &Inst);
1714   bool validateMIMGMSAA(const MCInst &Inst);
1715   bool validateOpSel(const MCInst &Inst);
1716   bool validateNeg(const MCInst &Inst, int OpName);
1717   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1718   bool validateVccOperand(unsigned Reg) const;
1719   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1720   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1721   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1722   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1723   bool validateAGPRLdSt(const MCInst &Inst) const;
1724   bool validateVGPRAlign(const MCInst &Inst) const;
1725   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1726   bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1727   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1728   bool validateDivScale(const MCInst &Inst);
1729   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1730   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1731                              const SMLoc &IDLoc);
1732   bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1733                               const unsigned CPol);
1734   bool validateExeczVcczOperands(const OperandVector &Operands);
1735   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1736   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1737   unsigned getConstantBusLimit(unsigned Opcode) const;
1738   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1739   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1740   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1741 
1742   bool isSupportedMnemo(StringRef Mnemo,
1743                         const FeatureBitset &FBS);
1744   bool isSupportedMnemo(StringRef Mnemo,
1745                         const FeatureBitset &FBS,
1746                         ArrayRef<unsigned> Variants);
1747   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1748 
1749   bool isId(const StringRef Id) const;
1750   bool isId(const AsmToken &Token, const StringRef Id) const;
1751   bool isToken(const AsmToken::TokenKind Kind) const;
1752   StringRef getId() const;
1753   bool trySkipId(const StringRef Id);
1754   bool trySkipId(const StringRef Pref, const StringRef Id);
1755   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1756   bool trySkipToken(const AsmToken::TokenKind Kind);
1757   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1758   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1759   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1760 
1761   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1762   AsmToken::TokenKind getTokenKind() const;
1763   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1764   bool parseExpr(OperandVector &Operands);
1765   StringRef getTokenStr() const;
1766   AsmToken peekToken(bool ShouldSkipSpace = true);
1767   AsmToken getToken() const;
1768   SMLoc getLoc() const;
1769   void lex();
1770 
1771 public:
1772   void onBeginOfFile() override;
1773 
1774   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1775 
1776   ParseStatus parseExpTgt(OperandVector &Operands);
1777   ParseStatus parseSendMsg(OperandVector &Operands);
1778   ParseStatus parseInterpSlot(OperandVector &Operands);
1779   ParseStatus parseInterpAttr(OperandVector &Operands);
1780   ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1781   ParseStatus parseBoolReg(OperandVector &Operands);
1782 
1783   bool parseSwizzleOperand(int64_t &Op,
1784                            const unsigned MinVal,
1785                            const unsigned MaxVal,
1786                            const StringRef ErrMsg,
1787                            SMLoc &Loc);
1788   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1789                             const unsigned MinVal,
1790                             const unsigned MaxVal,
1791                             const StringRef ErrMsg);
1792   ParseStatus parseSwizzle(OperandVector &Operands);
1793   bool parseSwizzleOffset(int64_t &Imm);
1794   bool parseSwizzleMacro(int64_t &Imm);
1795   bool parseSwizzleQuadPerm(int64_t &Imm);
1796   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1797   bool parseSwizzleBroadcast(int64_t &Imm);
1798   bool parseSwizzleSwap(int64_t &Imm);
1799   bool parseSwizzleReverse(int64_t &Imm);
1800 
1801   ParseStatus parseGPRIdxMode(OperandVector &Operands);
1802   int64_t parseGPRIdxMacro();
1803 
1804   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1805   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1806 
1807   ParseStatus parseOModSI(OperandVector &Operands);
1808 
1809   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1810                OptionalImmIndexMap &OptionalIdx);
1811   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1812   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1813   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1814   void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1815 
1816   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1817   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1818                     OptionalImmIndexMap &OptionalIdx);
1819   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1820                 OptionalImmIndexMap &OptionalIdx);
1821 
1822   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1823   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1824 
1825   bool parseDimId(unsigned &Encoding);
1826   ParseStatus parseDim(OperandVector &Operands);
1827   bool convertDppBoundCtrl(int64_t &BoundCtrl);
1828   ParseStatus parseDPP8(OperandVector &Operands);
1829   ParseStatus parseDPPCtrl(OperandVector &Operands);
1830   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1831   int64_t parseDPPCtrlSel(StringRef Ctrl);
1832   int64_t parseDPPCtrlPerm();
1833   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1834   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1835     cvtDPP(Inst, Operands, true);
1836   }
1837   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1838                   bool IsDPP8 = false);
1839   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1840     cvtVOP3DPP(Inst, Operands, true);
1841   }
1842 
1843   ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1844                            AMDGPUOperand::ImmTy Type);
1845   ParseStatus parseSDWADstUnused(OperandVector &Operands);
1846   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1847   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1848   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1849   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1850   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1851   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1852                uint64_t BasicInstType,
1853                bool SkipDstVcc = false,
1854                bool SkipSrcVcc = false);
1855 
1856   ParseStatus parseEndpgm(OperandVector &Operands);
1857 
1858   ParseStatus parseVOPD(OperandVector &Operands);
1859 };
1860 
1861 } // end anonymous namespace
1862 
1863 // May be called with integer type with equivalent bitwidth.
1864 static const fltSemantics *getFltSemantics(unsigned Size) {
1865   switch (Size) {
1866   case 4:
1867     return &APFloat::IEEEsingle();
1868   case 8:
1869     return &APFloat::IEEEdouble();
1870   case 2:
1871     return &APFloat::IEEEhalf();
1872   default:
1873     llvm_unreachable("unsupported fp type");
1874   }
1875 }
1876 
1877 static const fltSemantics *getFltSemantics(MVT VT) {
1878   return getFltSemantics(VT.getSizeInBits() / 8);
1879 }
1880 
1881 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1882   switch (OperandType) {
1883   case AMDGPU::OPERAND_REG_IMM_INT32:
1884   case AMDGPU::OPERAND_REG_IMM_FP32:
1885   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1886   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1887   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1888   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1889   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1890   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1891   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1892   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1893   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1894   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1895   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1896   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1897   case AMDGPU::OPERAND_KIMM32:
1898   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1899     return &APFloat::IEEEsingle();
1900   case AMDGPU::OPERAND_REG_IMM_INT64:
1901   case AMDGPU::OPERAND_REG_IMM_FP64:
1902   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1903   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1904   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1905     return &APFloat::IEEEdouble();
1906   case AMDGPU::OPERAND_REG_IMM_INT16:
1907   case AMDGPU::OPERAND_REG_IMM_FP16:
1908   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1909   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1910   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1911   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1912   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1913   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1914   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1915   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1916   case AMDGPU::OPERAND_KIMM16:
1917     return &APFloat::IEEEhalf();
1918   default:
1919     llvm_unreachable("unsupported fp type");
1920   }
1921 }
1922 
1923 //===----------------------------------------------------------------------===//
1924 // Operand
1925 //===----------------------------------------------------------------------===//
1926 
1927 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1928   bool Lost;
1929 
1930   // Convert literal to single precision
1931   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1932                                                APFloat::rmNearestTiesToEven,
1933                                                &Lost);
1934   // We allow precision lost but not overflow or underflow
1935   if (Status != APFloat::opOK &&
1936       Lost &&
1937       ((Status & APFloat::opOverflow)  != 0 ||
1938        (Status & APFloat::opUnderflow) != 0)) {
1939     return false;
1940   }
1941 
1942   return true;
1943 }
1944 
1945 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1946   return isUIntN(Size, Val) || isIntN(Size, Val);
1947 }
1948 
1949 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1950   if (VT.getScalarType() == MVT::i16) {
1951     // FP immediate values are broken.
1952     return isInlinableIntLiteral(Val);
1953   }
1954 
1955   // f16/v2f16 operands work correctly for all values.
1956   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1957 }
1958 
1959 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1960 
1961   // This is a hack to enable named inline values like
1962   // shared_base with both 32-bit and 64-bit operands.
1963   // Note that these values are defined as
1964   // 32-bit operands only.
1965   if (isInlineValue()) {
1966     return true;
1967   }
1968 
1969   if (!isImmTy(ImmTyNone)) {
1970     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1971     return false;
1972   }
1973   // TODO: We should avoid using host float here. It would be better to
1974   // check the float bit values which is what a few other places do.
1975   // We've had bot failures before due to weird NaN support on mips hosts.
1976 
1977   APInt Literal(64, Imm.Val);
1978 
1979   if (Imm.IsFPImm) { // We got fp literal token
1980     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1981       return AMDGPU::isInlinableLiteral64(Imm.Val,
1982                                           AsmParser->hasInv2PiInlineImm());
1983     }
1984 
1985     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1986     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1987       return false;
1988 
1989     if (type.getScalarSizeInBits() == 16) {
1990       return isInlineableLiteralOp16(
1991         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1992         type, AsmParser->hasInv2PiInlineImm());
1993     }
1994 
1995     // Check if single precision literal is inlinable
1996     return AMDGPU::isInlinableLiteral32(
1997       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1998       AsmParser->hasInv2PiInlineImm());
1999   }
2000 
2001   // We got int literal token.
2002   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2003     return AMDGPU::isInlinableLiteral64(Imm.Val,
2004                                         AsmParser->hasInv2PiInlineImm());
2005   }
2006 
2007   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2008     return false;
2009   }
2010 
2011   if (type.getScalarSizeInBits() == 16) {
2012     return isInlineableLiteralOp16(
2013       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2014       type, AsmParser->hasInv2PiInlineImm());
2015   }
2016 
2017   return AMDGPU::isInlinableLiteral32(
2018     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2019     AsmParser->hasInv2PiInlineImm());
2020 }
2021 
2022 bool AMDGPUOperand::isLiteralImm(MVT type) const {
2023   // Check that this immediate can be added as literal
2024   if (!isImmTy(ImmTyNone)) {
2025     return false;
2026   }
2027 
2028   if (!Imm.IsFPImm) {
2029     // We got int literal token.
2030 
2031     if (type == MVT::f64 && hasFPModifiers()) {
2032       // Cannot apply fp modifiers to int literals preserving the same semantics
2033       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2034       // disable these cases.
2035       return false;
2036     }
2037 
2038     unsigned Size = type.getSizeInBits();
2039     if (Size == 64)
2040       Size = 32;
2041 
2042     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2043     // types.
2044     return isSafeTruncation(Imm.Val, Size);
2045   }
2046 
2047   // We got fp literal token
2048   if (type == MVT::f64) { // Expected 64-bit fp operand
2049     // We would set low 64-bits of literal to zeroes but we accept this literals
2050     return true;
2051   }
2052 
2053   if (type == MVT::i64) { // Expected 64-bit int operand
2054     // We don't allow fp literals in 64-bit integer instructions. It is
2055     // unclear how we should encode them.
2056     return false;
2057   }
2058 
2059   // We allow fp literals with f16x2 operands assuming that the specified
2060   // literal goes into the lower half and the upper half is zero. We also
2061   // require that the literal may be losslessly converted to f16.
2062   //
2063   // For i16x2 operands, we assume that the specified literal is encoded as a
2064   // single-precision float. This is pretty odd, but it matches SP3 and what
2065   // happens in hardware.
2066   MVT ExpectedType = (type == MVT::v2f16)   ? MVT::f16
2067                      : (type == MVT::v2i16) ? MVT::f32
2068                      : (type == MVT::v2f32) ? MVT::f32
2069                                             : type;
2070 
2071   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2072   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2073 }
2074 
2075 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2076   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2077 }
2078 
2079 bool AMDGPUOperand::isVRegWithInputMods() const {
2080   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2081          // GFX90A allows DPP on 64-bit operands.
2082          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2083           AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2084 }
2085 
2086 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2087   return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2088                              : AMDGPU::VGPR_16_Lo128RegClassID);
2089 }
2090 
2091 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2092   if (AsmParser->isVI())
2093     return isVReg32();
2094   else if (AsmParser->isGFX9Plus())
2095     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2096   else
2097     return false;
2098 }
2099 
2100 bool AMDGPUOperand::isSDWAFP16Operand() const {
2101   return isSDWAOperand(MVT::f16);
2102 }
2103 
2104 bool AMDGPUOperand::isSDWAFP32Operand() const {
2105   return isSDWAOperand(MVT::f32);
2106 }
2107 
2108 bool AMDGPUOperand::isSDWAInt16Operand() const {
2109   return isSDWAOperand(MVT::i16);
2110 }
2111 
2112 bool AMDGPUOperand::isSDWAInt32Operand() const {
2113   return isSDWAOperand(MVT::i32);
2114 }
2115 
2116 bool AMDGPUOperand::isBoolReg() const {
2117   auto FB = AsmParser->getFeatureBits();
2118   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2119                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2120 }
2121 
2122 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2123 {
2124   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2125   assert(Size == 2 || Size == 4 || Size == 8);
2126 
2127   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2128 
2129   if (Imm.Mods.Abs) {
2130     Val &= ~FpSignMask;
2131   }
2132   if (Imm.Mods.Neg) {
2133     Val ^= FpSignMask;
2134   }
2135 
2136   return Val;
2137 }
2138 
2139 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2140   if (isExpr()) {
2141     Inst.addOperand(MCOperand::createExpr(Expr));
2142     return;
2143   }
2144 
2145   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2146                              Inst.getNumOperands())) {
2147     addLiteralImmOperand(Inst, Imm.Val,
2148                          ApplyModifiers &
2149                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2150   } else {
2151     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2152     Inst.addOperand(MCOperand::createImm(Imm.Val));
2153     setImmKindNone();
2154   }
2155 }
2156 
2157 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2158   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2159   auto OpNum = Inst.getNumOperands();
2160   // Check that this operand accepts literals
2161   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2162 
2163   if (ApplyModifiers) {
2164     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2165     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2166     Val = applyInputFPModifiers(Val, Size);
2167   }
2168 
2169   APInt Literal(64, Val);
2170   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2171 
2172   if (Imm.IsFPImm) { // We got fp literal token
2173     switch (OpTy) {
2174     case AMDGPU::OPERAND_REG_IMM_INT64:
2175     case AMDGPU::OPERAND_REG_IMM_FP64:
2176     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2177     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2178     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2179       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2180                                        AsmParser->hasInv2PiInlineImm())) {
2181         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2182         setImmKindConst();
2183         return;
2184       }
2185 
2186       // Non-inlineable
2187       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2188         // For fp operands we check if low 32 bits are zeros
2189         if (Literal.getLoBits(32) != 0) {
2190           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2191           "Can't encode literal as exact 64-bit floating-point operand. "
2192           "Low 32-bits will be set to zero");
2193           Val &= 0xffffffff00000000u;
2194         }
2195 
2196         Inst.addOperand(MCOperand::createImm(Val));
2197         setImmKindLiteral();
2198         return;
2199       }
2200 
2201       // We don't allow fp literals in 64-bit integer instructions. It is
2202       // unclear how we should encode them. This case should be checked earlier
2203       // in predicate methods (isLiteralImm())
2204       llvm_unreachable("fp literal in 64-bit integer instruction.");
2205 
2206     case AMDGPU::OPERAND_REG_IMM_INT32:
2207     case AMDGPU::OPERAND_REG_IMM_FP32:
2208     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2209     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2210     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2211     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2212     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2213     case AMDGPU::OPERAND_REG_IMM_INT16:
2214     case AMDGPU::OPERAND_REG_IMM_FP16:
2215     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2216     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2217     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2218     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2219     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2220     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2221     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2222     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2223     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2224     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2225     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2226     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2227     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2228     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2229     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2230     case AMDGPU::OPERAND_KIMM32:
2231     case AMDGPU::OPERAND_KIMM16:
2232     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2233       bool lost;
2234       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2235       // Convert literal to single precision
2236       FPLiteral.convert(*getOpFltSemantics(OpTy),
2237                         APFloat::rmNearestTiesToEven, &lost);
2238       // We allow precision lost but not overflow or underflow. This should be
2239       // checked earlier in isLiteralImm()
2240 
2241       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2242       Inst.addOperand(MCOperand::createImm(ImmVal));
2243       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2244         setImmKindMandatoryLiteral();
2245       } else {
2246         setImmKindLiteral();
2247       }
2248       return;
2249     }
2250     default:
2251       llvm_unreachable("invalid operand size");
2252     }
2253 
2254     return;
2255   }
2256 
2257   // We got int literal token.
2258   // Only sign extend inline immediates.
2259   switch (OpTy) {
2260   case AMDGPU::OPERAND_REG_IMM_INT32:
2261   case AMDGPU::OPERAND_REG_IMM_FP32:
2262   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2263   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2264   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2265   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2266   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2267   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2268   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2269   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2270   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2271   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2272   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2273   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2274     if (isSafeTruncation(Val, 32) &&
2275         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2276                                      AsmParser->hasInv2PiInlineImm())) {
2277       Inst.addOperand(MCOperand::createImm(Val));
2278       setImmKindConst();
2279       return;
2280     }
2281 
2282     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2283     setImmKindLiteral();
2284     return;
2285 
2286   case AMDGPU::OPERAND_REG_IMM_INT64:
2287   case AMDGPU::OPERAND_REG_IMM_FP64:
2288   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2289   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2290   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2291     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2292       Inst.addOperand(MCOperand::createImm(Val));
2293       setImmKindConst();
2294       return;
2295     }
2296 
2297     Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2298                                                     : Lo_32(Val);
2299 
2300     Inst.addOperand(MCOperand::createImm(Val));
2301     setImmKindLiteral();
2302     return;
2303 
2304   case AMDGPU::OPERAND_REG_IMM_INT16:
2305   case AMDGPU::OPERAND_REG_IMM_FP16:
2306   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2307   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2308   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2309   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2310   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2311     if (isSafeTruncation(Val, 16) &&
2312         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2313                                      AsmParser->hasInv2PiInlineImm())) {
2314       Inst.addOperand(MCOperand::createImm(Val));
2315       setImmKindConst();
2316       return;
2317     }
2318 
2319     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2320     setImmKindLiteral();
2321     return;
2322 
2323   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2324   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2325   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2326   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2327     assert(isSafeTruncation(Val, 16));
2328     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2329                                         AsmParser->hasInv2PiInlineImm()));
2330 
2331     Inst.addOperand(MCOperand::createImm(Val));
2332     return;
2333   }
2334   case AMDGPU::OPERAND_KIMM32:
2335     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2336     setImmKindMandatoryLiteral();
2337     return;
2338   case AMDGPU::OPERAND_KIMM16:
2339     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2340     setImmKindMandatoryLiteral();
2341     return;
2342   default:
2343     llvm_unreachable("invalid operand size");
2344   }
2345 }
2346 
2347 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2348   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2349 }
2350 
2351 bool AMDGPUOperand::isInlineValue() const {
2352   return isRegKind() && ::isInlineValue(getReg());
2353 }
2354 
2355 //===----------------------------------------------------------------------===//
2356 // AsmParser
2357 //===----------------------------------------------------------------------===//
2358 
2359 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2360   if (Is == IS_VGPR) {
2361     switch (RegWidth) {
2362       default: return -1;
2363       case 32:
2364         return AMDGPU::VGPR_32RegClassID;
2365       case 64:
2366         return AMDGPU::VReg_64RegClassID;
2367       case 96:
2368         return AMDGPU::VReg_96RegClassID;
2369       case 128:
2370         return AMDGPU::VReg_128RegClassID;
2371       case 160:
2372         return AMDGPU::VReg_160RegClassID;
2373       case 192:
2374         return AMDGPU::VReg_192RegClassID;
2375       case 224:
2376         return AMDGPU::VReg_224RegClassID;
2377       case 256:
2378         return AMDGPU::VReg_256RegClassID;
2379       case 288:
2380         return AMDGPU::VReg_288RegClassID;
2381       case 320:
2382         return AMDGPU::VReg_320RegClassID;
2383       case 352:
2384         return AMDGPU::VReg_352RegClassID;
2385       case 384:
2386         return AMDGPU::VReg_384RegClassID;
2387       case 512:
2388         return AMDGPU::VReg_512RegClassID;
2389       case 1024:
2390         return AMDGPU::VReg_1024RegClassID;
2391     }
2392   } else if (Is == IS_TTMP) {
2393     switch (RegWidth) {
2394       default: return -1;
2395       case 32:
2396         return AMDGPU::TTMP_32RegClassID;
2397       case 64:
2398         return AMDGPU::TTMP_64RegClassID;
2399       case 128:
2400         return AMDGPU::TTMP_128RegClassID;
2401       case 256:
2402         return AMDGPU::TTMP_256RegClassID;
2403       case 512:
2404         return AMDGPU::TTMP_512RegClassID;
2405     }
2406   } else if (Is == IS_SGPR) {
2407     switch (RegWidth) {
2408       default: return -1;
2409       case 32:
2410         return AMDGPU::SGPR_32RegClassID;
2411       case 64:
2412         return AMDGPU::SGPR_64RegClassID;
2413       case 96:
2414         return AMDGPU::SGPR_96RegClassID;
2415       case 128:
2416         return AMDGPU::SGPR_128RegClassID;
2417       case 160:
2418         return AMDGPU::SGPR_160RegClassID;
2419       case 192:
2420         return AMDGPU::SGPR_192RegClassID;
2421       case 224:
2422         return AMDGPU::SGPR_224RegClassID;
2423       case 256:
2424         return AMDGPU::SGPR_256RegClassID;
2425       case 288:
2426         return AMDGPU::SGPR_288RegClassID;
2427       case 320:
2428         return AMDGPU::SGPR_320RegClassID;
2429       case 352:
2430         return AMDGPU::SGPR_352RegClassID;
2431       case 384:
2432         return AMDGPU::SGPR_384RegClassID;
2433       case 512:
2434         return AMDGPU::SGPR_512RegClassID;
2435     }
2436   } else if (Is == IS_AGPR) {
2437     switch (RegWidth) {
2438       default: return -1;
2439       case 32:
2440         return AMDGPU::AGPR_32RegClassID;
2441       case 64:
2442         return AMDGPU::AReg_64RegClassID;
2443       case 96:
2444         return AMDGPU::AReg_96RegClassID;
2445       case 128:
2446         return AMDGPU::AReg_128RegClassID;
2447       case 160:
2448         return AMDGPU::AReg_160RegClassID;
2449       case 192:
2450         return AMDGPU::AReg_192RegClassID;
2451       case 224:
2452         return AMDGPU::AReg_224RegClassID;
2453       case 256:
2454         return AMDGPU::AReg_256RegClassID;
2455       case 288:
2456         return AMDGPU::AReg_288RegClassID;
2457       case 320:
2458         return AMDGPU::AReg_320RegClassID;
2459       case 352:
2460         return AMDGPU::AReg_352RegClassID;
2461       case 384:
2462         return AMDGPU::AReg_384RegClassID;
2463       case 512:
2464         return AMDGPU::AReg_512RegClassID;
2465       case 1024:
2466         return AMDGPU::AReg_1024RegClassID;
2467     }
2468   }
2469   return -1;
2470 }
2471 
2472 static unsigned getSpecialRegForName(StringRef RegName) {
2473   return StringSwitch<unsigned>(RegName)
2474     .Case("exec", AMDGPU::EXEC)
2475     .Case("vcc", AMDGPU::VCC)
2476     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2477     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2478     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2479     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2480     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2481     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2482     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2483     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2484     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2485     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2486     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2487     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2488     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2489     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2490     .Case("m0", AMDGPU::M0)
2491     .Case("vccz", AMDGPU::SRC_VCCZ)
2492     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2493     .Case("execz", AMDGPU::SRC_EXECZ)
2494     .Case("src_execz", AMDGPU::SRC_EXECZ)
2495     .Case("scc", AMDGPU::SRC_SCC)
2496     .Case("src_scc", AMDGPU::SRC_SCC)
2497     .Case("tba", AMDGPU::TBA)
2498     .Case("tma", AMDGPU::TMA)
2499     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2500     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2501     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2502     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2503     .Case("vcc_lo", AMDGPU::VCC_LO)
2504     .Case("vcc_hi", AMDGPU::VCC_HI)
2505     .Case("exec_lo", AMDGPU::EXEC_LO)
2506     .Case("exec_hi", AMDGPU::EXEC_HI)
2507     .Case("tma_lo", AMDGPU::TMA_LO)
2508     .Case("tma_hi", AMDGPU::TMA_HI)
2509     .Case("tba_lo", AMDGPU::TBA_LO)
2510     .Case("tba_hi", AMDGPU::TBA_HI)
2511     .Case("pc", AMDGPU::PC_REG)
2512     .Case("null", AMDGPU::SGPR_NULL)
2513     .Default(AMDGPU::NoRegister);
2514 }
2515 
2516 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2517                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2518   auto R = parseRegister();
2519   if (!R) return true;
2520   assert(R->isReg());
2521   RegNo = R->getReg();
2522   StartLoc = R->getStartLoc();
2523   EndLoc = R->getEndLoc();
2524   return false;
2525 }
2526 
2527 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2528                                     SMLoc &EndLoc) {
2529   return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2530 }
2531 
2532 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2533                                               SMLoc &EndLoc) {
2534   bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2535   bool PendingErrors = getParser().hasPendingError();
2536   getParser().clearPendingErrors();
2537   if (PendingErrors)
2538     return ParseStatus::Failure;
2539   if (Result)
2540     return ParseStatus::NoMatch;
2541   return ParseStatus::Success;
2542 }
2543 
2544 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2545                                             RegisterKind RegKind, unsigned Reg1,
2546                                             SMLoc Loc) {
2547   switch (RegKind) {
2548   case IS_SPECIAL:
2549     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2550       Reg = AMDGPU::EXEC;
2551       RegWidth = 64;
2552       return true;
2553     }
2554     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2555       Reg = AMDGPU::FLAT_SCR;
2556       RegWidth = 64;
2557       return true;
2558     }
2559     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2560       Reg = AMDGPU::XNACK_MASK;
2561       RegWidth = 64;
2562       return true;
2563     }
2564     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2565       Reg = AMDGPU::VCC;
2566       RegWidth = 64;
2567       return true;
2568     }
2569     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2570       Reg = AMDGPU::TBA;
2571       RegWidth = 64;
2572       return true;
2573     }
2574     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2575       Reg = AMDGPU::TMA;
2576       RegWidth = 64;
2577       return true;
2578     }
2579     Error(Loc, "register does not fit in the list");
2580     return false;
2581   case IS_VGPR:
2582   case IS_SGPR:
2583   case IS_AGPR:
2584   case IS_TTMP:
2585     if (Reg1 != Reg + RegWidth / 32) {
2586       Error(Loc, "registers in a list must have consecutive indices");
2587       return false;
2588     }
2589     RegWidth += 32;
2590     return true;
2591   default:
2592     llvm_unreachable("unexpected register kind");
2593   }
2594 }
2595 
2596 struct RegInfo {
2597   StringLiteral Name;
2598   RegisterKind Kind;
2599 };
2600 
2601 static constexpr RegInfo RegularRegisters[] = {
2602   {{"v"},    IS_VGPR},
2603   {{"s"},    IS_SGPR},
2604   {{"ttmp"}, IS_TTMP},
2605   {{"acc"},  IS_AGPR},
2606   {{"a"},    IS_AGPR},
2607 };
2608 
2609 static bool isRegularReg(RegisterKind Kind) {
2610   return Kind == IS_VGPR ||
2611          Kind == IS_SGPR ||
2612          Kind == IS_TTMP ||
2613          Kind == IS_AGPR;
2614 }
2615 
2616 static const RegInfo* getRegularRegInfo(StringRef Str) {
2617   for (const RegInfo &Reg : RegularRegisters)
2618     if (Str.starts_with(Reg.Name))
2619       return &Reg;
2620   return nullptr;
2621 }
2622 
2623 static bool getRegNum(StringRef Str, unsigned& Num) {
2624   return !Str.getAsInteger(10, Num);
2625 }
2626 
2627 bool
2628 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2629                             const AsmToken &NextToken) const {
2630 
2631   // A list of consecutive registers: [s0,s1,s2,s3]
2632   if (Token.is(AsmToken::LBrac))
2633     return true;
2634 
2635   if (!Token.is(AsmToken::Identifier))
2636     return false;
2637 
2638   // A single register like s0 or a range of registers like s[0:1]
2639 
2640   StringRef Str = Token.getString();
2641   const RegInfo *Reg = getRegularRegInfo(Str);
2642   if (Reg) {
2643     StringRef RegName = Reg->Name;
2644     StringRef RegSuffix = Str.substr(RegName.size());
2645     if (!RegSuffix.empty()) {
2646       RegSuffix.consume_back(".l");
2647       RegSuffix.consume_back(".h");
2648       unsigned Num;
2649       // A single register with an index: rXX
2650       if (getRegNum(RegSuffix, Num))
2651         return true;
2652     } else {
2653       // A range of registers: r[XX:YY].
2654       if (NextToken.is(AsmToken::LBrac))
2655         return true;
2656     }
2657   }
2658 
2659   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2660 }
2661 
2662 bool
2663 AMDGPUAsmParser::isRegister()
2664 {
2665   return isRegister(getToken(), peekToken());
2666 }
2667 
2668 unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2669                                         unsigned SubReg, unsigned RegWidth,
2670                                         SMLoc Loc) {
2671   assert(isRegularReg(RegKind));
2672 
2673   unsigned AlignSize = 1;
2674   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2675     // SGPR and TTMP registers must be aligned.
2676     // Max required alignment is 4 dwords.
2677     AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2678   }
2679 
2680   if (RegNum % AlignSize != 0) {
2681     Error(Loc, "invalid register alignment");
2682     return AMDGPU::NoRegister;
2683   }
2684 
2685   unsigned RegIdx = RegNum / AlignSize;
2686   int RCID = getRegClass(RegKind, RegWidth);
2687   if (RCID == -1) {
2688     Error(Loc, "invalid or unsupported register size");
2689     return AMDGPU::NoRegister;
2690   }
2691 
2692   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2693   const MCRegisterClass RC = TRI->getRegClass(RCID);
2694   if (RegIdx >= RC.getNumRegs()) {
2695     Error(Loc, "register index is out of range");
2696     return AMDGPU::NoRegister;
2697   }
2698 
2699   unsigned Reg = RC.getRegister(RegIdx);
2700 
2701   if (SubReg) {
2702     Reg = TRI->getSubReg(Reg, SubReg);
2703 
2704     // Currently all regular registers have their .l and .h subregisters, so
2705     // we should never need to generate an error here.
2706     assert(Reg && "Invalid subregister!");
2707   }
2708 
2709   return Reg;
2710 }
2711 
2712 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2713   int64_t RegLo, RegHi;
2714   if (!skipToken(AsmToken::LBrac, "missing register index"))
2715     return false;
2716 
2717   SMLoc FirstIdxLoc = getLoc();
2718   SMLoc SecondIdxLoc;
2719 
2720   if (!parseExpr(RegLo))
2721     return false;
2722 
2723   if (trySkipToken(AsmToken::Colon)) {
2724     SecondIdxLoc = getLoc();
2725     if (!parseExpr(RegHi))
2726       return false;
2727   } else {
2728     RegHi = RegLo;
2729   }
2730 
2731   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2732     return false;
2733 
2734   if (!isUInt<32>(RegLo)) {
2735     Error(FirstIdxLoc, "invalid register index");
2736     return false;
2737   }
2738 
2739   if (!isUInt<32>(RegHi)) {
2740     Error(SecondIdxLoc, "invalid register index");
2741     return false;
2742   }
2743 
2744   if (RegLo > RegHi) {
2745     Error(FirstIdxLoc, "first register index should not exceed second index");
2746     return false;
2747   }
2748 
2749   Num = static_cast<unsigned>(RegLo);
2750   RegWidth = 32 * ((RegHi - RegLo) + 1);
2751   return true;
2752 }
2753 
2754 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2755                                           unsigned &RegNum, unsigned &RegWidth,
2756                                           SmallVectorImpl<AsmToken> &Tokens) {
2757   assert(isToken(AsmToken::Identifier));
2758   unsigned Reg = getSpecialRegForName(getTokenStr());
2759   if (Reg) {
2760     RegNum = 0;
2761     RegWidth = 32;
2762     RegKind = IS_SPECIAL;
2763     Tokens.push_back(getToken());
2764     lex(); // skip register name
2765   }
2766   return Reg;
2767 }
2768 
2769 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2770                                           unsigned &RegNum, unsigned &RegWidth,
2771                                           SmallVectorImpl<AsmToken> &Tokens) {
2772   assert(isToken(AsmToken::Identifier));
2773   StringRef RegName = getTokenStr();
2774   auto Loc = getLoc();
2775 
2776   const RegInfo *RI = getRegularRegInfo(RegName);
2777   if (!RI) {
2778     Error(Loc, "invalid register name");
2779     return AMDGPU::NoRegister;
2780   }
2781 
2782   Tokens.push_back(getToken());
2783   lex(); // skip register name
2784 
2785   RegKind = RI->Kind;
2786   StringRef RegSuffix = RegName.substr(RI->Name.size());
2787   unsigned SubReg = NoSubRegister;
2788   if (!RegSuffix.empty()) {
2789     // We don't know the opcode till we are done parsing, so we don't know if
2790     // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2791     // .h to correctly specify 16 bit registers. We also can't determine class
2792     // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2793     if (RegSuffix.consume_back(".l"))
2794       SubReg = AMDGPU::lo16;
2795     else if (RegSuffix.consume_back(".h"))
2796       SubReg = AMDGPU::hi16;
2797 
2798     // Single 32-bit register: vXX.
2799     if (!getRegNum(RegSuffix, RegNum)) {
2800       Error(Loc, "invalid register index");
2801       return AMDGPU::NoRegister;
2802     }
2803     RegWidth = 32;
2804   } else {
2805     // Range of registers: v[XX:YY]. ":YY" is optional.
2806     if (!ParseRegRange(RegNum, RegWidth))
2807       return AMDGPU::NoRegister;
2808   }
2809 
2810   return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2811 }
2812 
2813 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2814                                        unsigned &RegWidth,
2815                                        SmallVectorImpl<AsmToken> &Tokens) {
2816   unsigned Reg = AMDGPU::NoRegister;
2817   auto ListLoc = getLoc();
2818 
2819   if (!skipToken(AsmToken::LBrac,
2820                  "expected a register or a list of registers")) {
2821     return AMDGPU::NoRegister;
2822   }
2823 
2824   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2825 
2826   auto Loc = getLoc();
2827   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2828     return AMDGPU::NoRegister;
2829   if (RegWidth != 32) {
2830     Error(Loc, "expected a single 32-bit register");
2831     return AMDGPU::NoRegister;
2832   }
2833 
2834   for (; trySkipToken(AsmToken::Comma); ) {
2835     RegisterKind NextRegKind;
2836     unsigned NextReg, NextRegNum, NextRegWidth;
2837     Loc = getLoc();
2838 
2839     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2840                              NextRegNum, NextRegWidth,
2841                              Tokens)) {
2842       return AMDGPU::NoRegister;
2843     }
2844     if (NextRegWidth != 32) {
2845       Error(Loc, "expected a single 32-bit register");
2846       return AMDGPU::NoRegister;
2847     }
2848     if (NextRegKind != RegKind) {
2849       Error(Loc, "registers in a list must be of the same kind");
2850       return AMDGPU::NoRegister;
2851     }
2852     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2853       return AMDGPU::NoRegister;
2854   }
2855 
2856   if (!skipToken(AsmToken::RBrac,
2857                  "expected a comma or a closing square bracket")) {
2858     return AMDGPU::NoRegister;
2859   }
2860 
2861   if (isRegularReg(RegKind))
2862     Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
2863 
2864   return Reg;
2865 }
2866 
2867 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2868                                           unsigned &RegNum, unsigned &RegWidth,
2869                                           SmallVectorImpl<AsmToken> &Tokens) {
2870   auto Loc = getLoc();
2871   Reg = AMDGPU::NoRegister;
2872 
2873   if (isToken(AsmToken::Identifier)) {
2874     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2875     if (Reg == AMDGPU::NoRegister)
2876       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2877   } else {
2878     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2879   }
2880 
2881   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2882   if (Reg == AMDGPU::NoRegister) {
2883     assert(Parser.hasPendingError());
2884     return false;
2885   }
2886 
2887   if (!subtargetHasRegister(*TRI, Reg)) {
2888     if (Reg == AMDGPU::SGPR_NULL) {
2889       Error(Loc, "'null' operand is not supported on this GPU");
2890     } else {
2891       Error(Loc, "register not available on this GPU");
2892     }
2893     return false;
2894   }
2895 
2896   return true;
2897 }
2898 
2899 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2900                                           unsigned &RegNum, unsigned &RegWidth,
2901                                           bool RestoreOnFailure /*=false*/) {
2902   Reg = AMDGPU::NoRegister;
2903 
2904   SmallVector<AsmToken, 1> Tokens;
2905   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2906     if (RestoreOnFailure) {
2907       while (!Tokens.empty()) {
2908         getLexer().UnLex(Tokens.pop_back_val());
2909       }
2910     }
2911     return true;
2912   }
2913   return false;
2914 }
2915 
2916 std::optional<StringRef>
2917 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2918   switch (RegKind) {
2919   case IS_VGPR:
2920     return StringRef(".amdgcn.next_free_vgpr");
2921   case IS_SGPR:
2922     return StringRef(".amdgcn.next_free_sgpr");
2923   default:
2924     return std::nullopt;
2925   }
2926 }
2927 
2928 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2929   auto SymbolName = getGprCountSymbolName(RegKind);
2930   assert(SymbolName && "initializing invalid register kind");
2931   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2932   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2933 }
2934 
2935 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2936                                             unsigned DwordRegIndex,
2937                                             unsigned RegWidth) {
2938   // Symbols are only defined for GCN targets
2939   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2940     return true;
2941 
2942   auto SymbolName = getGprCountSymbolName(RegKind);
2943   if (!SymbolName)
2944     return true;
2945   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2946 
2947   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2948   int64_t OldCount;
2949 
2950   if (!Sym->isVariable())
2951     return !Error(getLoc(),
2952                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2953   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2954     return !Error(
2955         getLoc(),
2956         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2957 
2958   if (OldCount <= NewMax)
2959     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2960 
2961   return true;
2962 }
2963 
2964 std::unique_ptr<AMDGPUOperand>
2965 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2966   const auto &Tok = getToken();
2967   SMLoc StartLoc = Tok.getLoc();
2968   SMLoc EndLoc = Tok.getEndLoc();
2969   RegisterKind RegKind;
2970   unsigned Reg, RegNum, RegWidth;
2971 
2972   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2973     return nullptr;
2974   }
2975   if (isHsaAbi(getSTI())) {
2976     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2977       return nullptr;
2978   } else
2979     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2980   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2981 }
2982 
2983 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2984                                       bool HasSP3AbsModifier, bool HasLit) {
2985   // TODO: add syntactic sugar for 1/(2*PI)
2986 
2987   if (isRegister())
2988     return ParseStatus::NoMatch;
2989   assert(!isModifier());
2990 
2991   if (!HasLit) {
2992     HasLit = trySkipId("lit");
2993     if (HasLit) {
2994       if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
2995         return ParseStatus::Failure;
2996       ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
2997       if (S.isSuccess() &&
2998           !skipToken(AsmToken::RParen, "expected closing parentheses"))
2999         return ParseStatus::Failure;
3000       return S;
3001     }
3002   }
3003 
3004   const auto& Tok = getToken();
3005   const auto& NextTok = peekToken();
3006   bool IsReal = Tok.is(AsmToken::Real);
3007   SMLoc S = getLoc();
3008   bool Negate = false;
3009 
3010   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3011     lex();
3012     IsReal = true;
3013     Negate = true;
3014   }
3015 
3016   AMDGPUOperand::Modifiers Mods;
3017   Mods.Lit = HasLit;
3018 
3019   if (IsReal) {
3020     // Floating-point expressions are not supported.
3021     // Can only allow floating-point literals with an
3022     // optional sign.
3023 
3024     StringRef Num = getTokenStr();
3025     lex();
3026 
3027     APFloat RealVal(APFloat::IEEEdouble());
3028     auto roundMode = APFloat::rmNearestTiesToEven;
3029     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3030       return ParseStatus::Failure;
3031     if (Negate)
3032       RealVal.changeSign();
3033 
3034     Operands.push_back(
3035       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3036                                AMDGPUOperand::ImmTyNone, true));
3037     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3038     Op.setModifiers(Mods);
3039 
3040     return ParseStatus::Success;
3041 
3042   } else {
3043     int64_t IntVal;
3044     const MCExpr *Expr;
3045     SMLoc S = getLoc();
3046 
3047     if (HasSP3AbsModifier) {
3048       // This is a workaround for handling expressions
3049       // as arguments of SP3 'abs' modifier, for example:
3050       //     |1.0|
3051       //     |-1|
3052       //     |1+x|
3053       // This syntax is not compatible with syntax of standard
3054       // MC expressions (due to the trailing '|').
3055       SMLoc EndLoc;
3056       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3057         return ParseStatus::Failure;
3058     } else {
3059       if (Parser.parseExpression(Expr))
3060         return ParseStatus::Failure;
3061     }
3062 
3063     if (Expr->evaluateAsAbsolute(IntVal)) {
3064       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3065       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3066       Op.setModifiers(Mods);
3067     } else {
3068       if (HasLit)
3069         return ParseStatus::NoMatch;
3070       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3071     }
3072 
3073     return ParseStatus::Success;
3074   }
3075 
3076   return ParseStatus::NoMatch;
3077 }
3078 
3079 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3080   if (!isRegister())
3081     return ParseStatus::NoMatch;
3082 
3083   if (auto R = parseRegister()) {
3084     assert(R->isReg());
3085     Operands.push_back(std::move(R));
3086     return ParseStatus::Success;
3087   }
3088   return ParseStatus::Failure;
3089 }
3090 
3091 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3092                                            bool HasSP3AbsMod, bool HasLit) {
3093   ParseStatus Res = parseReg(Operands);
3094   if (!Res.isNoMatch())
3095     return Res;
3096   if (isModifier())
3097     return ParseStatus::NoMatch;
3098   return parseImm(Operands, HasSP3AbsMod, HasLit);
3099 }
3100 
3101 bool
3102 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3103   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3104     const auto &str = Token.getString();
3105     return str == "abs" || str == "neg" || str == "sext";
3106   }
3107   return false;
3108 }
3109 
3110 bool
3111 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3112   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3113 }
3114 
3115 bool
3116 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3117   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3118 }
3119 
3120 bool
3121 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3122   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3123 }
3124 
3125 // Check if this is an operand modifier or an opcode modifier
3126 // which may look like an expression but it is not. We should
3127 // avoid parsing these modifiers as expressions. Currently
3128 // recognized sequences are:
3129 //   |...|
3130 //   abs(...)
3131 //   neg(...)
3132 //   sext(...)
3133 //   -reg
3134 //   -|...|
3135 //   -abs(...)
3136 //   name:...
3137 //
3138 bool
3139 AMDGPUAsmParser::isModifier() {
3140 
3141   AsmToken Tok = getToken();
3142   AsmToken NextToken[2];
3143   peekTokens(NextToken);
3144 
3145   return isOperandModifier(Tok, NextToken[0]) ||
3146          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3147          isOpcodeModifierWithVal(Tok, NextToken[0]);
3148 }
3149 
3150 // Check if the current token is an SP3 'neg' modifier.
3151 // Currently this modifier is allowed in the following context:
3152 //
3153 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3154 // 2. Before an 'abs' modifier: -abs(...)
3155 // 3. Before an SP3 'abs' modifier: -|...|
3156 //
3157 // In all other cases "-" is handled as a part
3158 // of an expression that follows the sign.
3159 //
3160 // Note: When "-" is followed by an integer literal,
3161 // this is interpreted as integer negation rather
3162 // than a floating-point NEG modifier applied to N.
3163 // Beside being contr-intuitive, such use of floating-point
3164 // NEG modifier would have resulted in different meaning
3165 // of integer literals used with VOP1/2/C and VOP3,
3166 // for example:
3167 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3168 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3169 // Negative fp literals with preceding "-" are
3170 // handled likewise for uniformity
3171 //
3172 bool
3173 AMDGPUAsmParser::parseSP3NegModifier() {
3174 
3175   AsmToken NextToken[2];
3176   peekTokens(NextToken);
3177 
3178   if (isToken(AsmToken::Minus) &&
3179       (isRegister(NextToken[0], NextToken[1]) ||
3180        NextToken[0].is(AsmToken::Pipe) ||
3181        isId(NextToken[0], "abs"))) {
3182     lex();
3183     return true;
3184   }
3185 
3186   return false;
3187 }
3188 
3189 ParseStatus
3190 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3191                                               bool AllowImm) {
3192   bool Neg, SP3Neg;
3193   bool Abs, SP3Abs;
3194   bool Lit;
3195   SMLoc Loc;
3196 
3197   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3198   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3199     return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3200 
3201   SP3Neg = parseSP3NegModifier();
3202 
3203   Loc = getLoc();
3204   Neg = trySkipId("neg");
3205   if (Neg && SP3Neg)
3206     return Error(Loc, "expected register or immediate");
3207   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3208     return ParseStatus::Failure;
3209 
3210   Abs = trySkipId("abs");
3211   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3212     return ParseStatus::Failure;
3213 
3214   Lit = trySkipId("lit");
3215   if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3216     return ParseStatus::Failure;
3217 
3218   Loc = getLoc();
3219   SP3Abs = trySkipToken(AsmToken::Pipe);
3220   if (Abs && SP3Abs)
3221     return Error(Loc, "expected register or immediate");
3222 
3223   ParseStatus Res;
3224   if (AllowImm) {
3225     Res = parseRegOrImm(Operands, SP3Abs, Lit);
3226   } else {
3227     Res = parseReg(Operands);
3228   }
3229   if (!Res.isSuccess())
3230     return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3231 
3232   if (Lit && !Operands.back()->isImm())
3233     Error(Loc, "expected immediate with lit modifier");
3234 
3235   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3236     return ParseStatus::Failure;
3237   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3238     return ParseStatus::Failure;
3239   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3240     return ParseStatus::Failure;
3241   if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3242     return ParseStatus::Failure;
3243 
3244   AMDGPUOperand::Modifiers Mods;
3245   Mods.Abs = Abs || SP3Abs;
3246   Mods.Neg = Neg || SP3Neg;
3247   Mods.Lit = Lit;
3248 
3249   if (Mods.hasFPModifiers() || Lit) {
3250     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3251     if (Op.isExpr())
3252       return Error(Op.getStartLoc(), "expected an absolute expression");
3253     Op.setModifiers(Mods);
3254   }
3255   return ParseStatus::Success;
3256 }
3257 
3258 ParseStatus
3259 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3260                                                bool AllowImm) {
3261   bool Sext = trySkipId("sext");
3262   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3263     return ParseStatus::Failure;
3264 
3265   ParseStatus Res;
3266   if (AllowImm) {
3267     Res = parseRegOrImm(Operands);
3268   } else {
3269     Res = parseReg(Operands);
3270   }
3271   if (!Res.isSuccess())
3272     return Sext ? ParseStatus::Failure : Res;
3273 
3274   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3275     return ParseStatus::Failure;
3276 
3277   AMDGPUOperand::Modifiers Mods;
3278   Mods.Sext = Sext;
3279 
3280   if (Mods.hasIntModifiers()) {
3281     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3282     if (Op.isExpr())
3283       return Error(Op.getStartLoc(), "expected an absolute expression");
3284     Op.setModifiers(Mods);
3285   }
3286 
3287   return ParseStatus::Success;
3288 }
3289 
3290 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3291   return parseRegOrImmWithFPInputMods(Operands, false);
3292 }
3293 
3294 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3295   return parseRegOrImmWithIntInputMods(Operands, false);
3296 }
3297 
3298 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3299   auto Loc = getLoc();
3300   if (trySkipId("off")) {
3301     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3302                                                 AMDGPUOperand::ImmTyOff, false));
3303     return ParseStatus::Success;
3304   }
3305 
3306   if (!isRegister())
3307     return ParseStatus::NoMatch;
3308 
3309   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3310   if (Reg) {
3311     Operands.push_back(std::move(Reg));
3312     return ParseStatus::Success;
3313   }
3314 
3315   return ParseStatus::Failure;
3316 }
3317 
3318 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3319   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3320 
3321   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3322       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3323       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3324       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3325     return Match_InvalidOperand;
3326 
3327   if ((TSFlags & SIInstrFlags::VOP3) &&
3328       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3329       getForcedEncodingSize() != 64)
3330     return Match_PreferE32;
3331 
3332   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3333       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3334     // v_mac_f32/16 allow only dst_sel == DWORD;
3335     auto OpNum =
3336         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3337     const auto &Op = Inst.getOperand(OpNum);
3338     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3339       return Match_InvalidOperand;
3340     }
3341   }
3342 
3343   return Match_Success;
3344 }
3345 
3346 static ArrayRef<unsigned> getAllVariants() {
3347   static const unsigned Variants[] = {
3348     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3349     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3350     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3351   };
3352 
3353   return ArrayRef(Variants);
3354 }
3355 
3356 // What asm variants we should check
3357 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3358   if (isForcedDPP() && isForcedVOP3()) {
3359     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3360     return ArrayRef(Variants);
3361   }
3362   if (getForcedEncodingSize() == 32) {
3363     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3364     return ArrayRef(Variants);
3365   }
3366 
3367   if (isForcedVOP3()) {
3368     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3369     return ArrayRef(Variants);
3370   }
3371 
3372   if (isForcedSDWA()) {
3373     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3374                                         AMDGPUAsmVariants::SDWA9};
3375     return ArrayRef(Variants);
3376   }
3377 
3378   if (isForcedDPP()) {
3379     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3380     return ArrayRef(Variants);
3381   }
3382 
3383   return getAllVariants();
3384 }
3385 
3386 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3387   if (isForcedDPP() && isForcedVOP3())
3388     return "e64_dpp";
3389 
3390   if (getForcedEncodingSize() == 32)
3391     return "e32";
3392 
3393   if (isForcedVOP3())
3394     return "e64";
3395 
3396   if (isForcedSDWA())
3397     return "sdwa";
3398 
3399   if (isForcedDPP())
3400     return "dpp";
3401 
3402   return "";
3403 }
3404 
3405 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3406   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3407   for (MCPhysReg Reg : Desc.implicit_uses()) {
3408     switch (Reg) {
3409     case AMDGPU::FLAT_SCR:
3410     case AMDGPU::VCC:
3411     case AMDGPU::VCC_LO:
3412     case AMDGPU::VCC_HI:
3413     case AMDGPU::M0:
3414       return Reg;
3415     default:
3416       break;
3417     }
3418   }
3419   return AMDGPU::NoRegister;
3420 }
3421 
3422 // NB: This code is correct only when used to check constant
3423 // bus limitations because GFX7 support no f16 inline constants.
3424 // Note that there are no cases when a GFX7 opcode violates
3425 // constant bus limitations due to the use of an f16 constant.
3426 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3427                                        unsigned OpIdx) const {
3428   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3429 
3430   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3431       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3432     return false;
3433   }
3434 
3435   const MCOperand &MO = Inst.getOperand(OpIdx);
3436 
3437   int64_t Val = MO.getImm();
3438   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3439 
3440   switch (OpSize) { // expected operand size
3441   case 8:
3442     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3443   case 4:
3444     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3445   case 2: {
3446     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3447     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3448         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3449         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3450       return AMDGPU::isInlinableIntLiteral(Val);
3451 
3452     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3453         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3454         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3455       return AMDGPU::isInlinableLiteralV2I16(Val);
3456 
3457     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3458         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3459         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3460       return AMDGPU::isInlinableLiteralV2F16(Val);
3461 
3462     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3463   }
3464   default:
3465     llvm_unreachable("invalid operand size");
3466   }
3467 }
3468 
3469 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3470   if (!isGFX10Plus())
3471     return 1;
3472 
3473   switch (Opcode) {
3474   // 64-bit shift instructions can use only one scalar value input
3475   case AMDGPU::V_LSHLREV_B64_e64:
3476   case AMDGPU::V_LSHLREV_B64_gfx10:
3477   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3478   case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3479   case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3480   case AMDGPU::V_LSHRREV_B64_e64:
3481   case AMDGPU::V_LSHRREV_B64_gfx10:
3482   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3483   case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3484   case AMDGPU::V_ASHRREV_I64_e64:
3485   case AMDGPU::V_ASHRREV_I64_gfx10:
3486   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3487   case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3488   case AMDGPU::V_LSHL_B64_e64:
3489   case AMDGPU::V_LSHR_B64_e64:
3490   case AMDGPU::V_ASHR_I64_e64:
3491     return 1;
3492   default:
3493     return 2;
3494   }
3495 }
3496 
3497 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3498 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3499 
3500 // Get regular operand indices in the same order as specified
3501 // in the instruction (but append mandatory literals to the end).
3502 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3503                                            bool AddMandatoryLiterals = false) {
3504 
3505   int16_t ImmIdx =
3506       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3507 
3508   if (isVOPD(Opcode)) {
3509     int16_t ImmDeferredIdx =
3510         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3511                              : -1;
3512 
3513     return {getNamedOperandIdx(Opcode, OpName::src0X),
3514             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3515             getNamedOperandIdx(Opcode, OpName::src0Y),
3516             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3517             ImmDeferredIdx,
3518             ImmIdx};
3519   }
3520 
3521   return {getNamedOperandIdx(Opcode, OpName::src0),
3522           getNamedOperandIdx(Opcode, OpName::src1),
3523           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3524 }
3525 
3526 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3527   const MCOperand &MO = Inst.getOperand(OpIdx);
3528   if (MO.isImm()) {
3529     return !isInlineConstant(Inst, OpIdx);
3530   } else if (MO.isReg()) {
3531     auto Reg = MO.getReg();
3532     if (!Reg) {
3533       return false;
3534     }
3535     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3536     auto PReg = mc2PseudoReg(Reg);
3537     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3538   } else {
3539     return true;
3540   }
3541 }
3542 
3543 bool AMDGPUAsmParser::validateConstantBusLimitations(
3544     const MCInst &Inst, const OperandVector &Operands) {
3545   const unsigned Opcode = Inst.getOpcode();
3546   const MCInstrDesc &Desc = MII.get(Opcode);
3547   unsigned LastSGPR = AMDGPU::NoRegister;
3548   unsigned ConstantBusUseCount = 0;
3549   unsigned NumLiterals = 0;
3550   unsigned LiteralSize;
3551 
3552   if (!(Desc.TSFlags &
3553         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3554          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3555       !isVOPD(Opcode))
3556     return true;
3557 
3558   // Check special imm operands (used by madmk, etc)
3559   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3560     ++NumLiterals;
3561     LiteralSize = 4;
3562   }
3563 
3564   SmallDenseSet<unsigned> SGPRsUsed;
3565   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3566   if (SGPRUsed != AMDGPU::NoRegister) {
3567     SGPRsUsed.insert(SGPRUsed);
3568     ++ConstantBusUseCount;
3569   }
3570 
3571   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3572 
3573   for (int OpIdx : OpIndices) {
3574     if (OpIdx == -1)
3575       continue;
3576 
3577     const MCOperand &MO = Inst.getOperand(OpIdx);
3578     if (usesConstantBus(Inst, OpIdx)) {
3579       if (MO.isReg()) {
3580         LastSGPR = mc2PseudoReg(MO.getReg());
3581         // Pairs of registers with a partial intersections like these
3582         //   s0, s[0:1]
3583         //   flat_scratch_lo, flat_scratch
3584         //   flat_scratch_lo, flat_scratch_hi
3585         // are theoretically valid but they are disabled anyway.
3586         // Note that this code mimics SIInstrInfo::verifyInstruction
3587         if (SGPRsUsed.insert(LastSGPR).second) {
3588           ++ConstantBusUseCount;
3589         }
3590       } else { // Expression or a literal
3591 
3592         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3593           continue; // special operand like VINTERP attr_chan
3594 
3595         // An instruction may use only one literal.
3596         // This has been validated on the previous step.
3597         // See validateVOPLiteral.
3598         // This literal may be used as more than one operand.
3599         // If all these operands are of the same size,
3600         // this literal counts as one scalar value.
3601         // Otherwise it counts as 2 scalar values.
3602         // See "GFX10 Shader Programming", section 3.6.2.3.
3603 
3604         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3605         if (Size < 4)
3606           Size = 4;
3607 
3608         if (NumLiterals == 0) {
3609           NumLiterals = 1;
3610           LiteralSize = Size;
3611         } else if (LiteralSize != Size) {
3612           NumLiterals = 2;
3613         }
3614       }
3615     }
3616   }
3617   ConstantBusUseCount += NumLiterals;
3618 
3619   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3620     return true;
3621 
3622   SMLoc LitLoc = getLitLoc(Operands);
3623   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3624   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3625   Error(Loc, "invalid operand (violates constant bus restrictions)");
3626   return false;
3627 }
3628 
3629 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3630     const MCInst &Inst, const OperandVector &Operands) {
3631 
3632   const unsigned Opcode = Inst.getOpcode();
3633   if (!isVOPD(Opcode))
3634     return true;
3635 
3636   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3637 
3638   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3639     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3640     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3641                ? Opr.getReg()
3642                : MCRegister::NoRegister;
3643   };
3644 
3645   // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3646   bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3647 
3648   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3649   auto InvalidCompOprIdx =
3650       InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3651   if (!InvalidCompOprIdx)
3652     return true;
3653 
3654   auto CompOprIdx = *InvalidCompOprIdx;
3655   auto ParsedIdx =
3656       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3657                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3658   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3659 
3660   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3661   if (CompOprIdx == VOPD::Component::DST) {
3662     Error(Loc, "one dst register must be even and the other odd");
3663   } else {
3664     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3665     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3666                    " operands must use different VGPR banks");
3667   }
3668 
3669   return false;
3670 }
3671 
3672 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3673 
3674   const unsigned Opc = Inst.getOpcode();
3675   const MCInstrDesc &Desc = MII.get(Opc);
3676 
3677   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3678     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3679     assert(ClampIdx != -1);
3680     return Inst.getOperand(ClampIdx).getImm() == 0;
3681   }
3682 
3683   return true;
3684 }
3685 
3686 constexpr uint64_t MIMGFlags =
3687     SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3688 
3689 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3690                                            const SMLoc &IDLoc) {
3691 
3692   const unsigned Opc = Inst.getOpcode();
3693   const MCInstrDesc &Desc = MII.get(Opc);
3694 
3695   if ((Desc.TSFlags & MIMGFlags) == 0)
3696     return true;
3697 
3698   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3699   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3700   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3701 
3702   assert(VDataIdx != -1);
3703 
3704   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3705     return true;
3706 
3707   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3708   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3709   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3710   if (DMask == 0)
3711     DMask = 1;
3712 
3713   bool IsPackedD16 = false;
3714   unsigned DataSize =
3715       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3716   if (hasPackedD16()) {
3717     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3718     IsPackedD16 = D16Idx >= 0;
3719     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3720       DataSize = (DataSize + 1) / 2;
3721   }
3722 
3723   if ((VDataSize / 4) == DataSize + TFESize)
3724     return true;
3725 
3726   StringRef Modifiers;
3727   if (isGFX90A())
3728     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3729   else
3730     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3731 
3732   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3733   return false;
3734 }
3735 
3736 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3737                                            const SMLoc &IDLoc) {
3738   const unsigned Opc = Inst.getOpcode();
3739   const MCInstrDesc &Desc = MII.get(Opc);
3740 
3741   if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3742     return true;
3743 
3744   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3745 
3746   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3747       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3748   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3749   int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3750                                                      : AMDGPU::OpName::rsrc;
3751   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3752   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3753   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3754 
3755   assert(VAddr0Idx != -1);
3756   assert(SrsrcIdx != -1);
3757   assert(SrsrcIdx > VAddr0Idx);
3758 
3759   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3760   if (BaseOpcode->BVH) {
3761     if (IsA16 == BaseOpcode->A16)
3762       return true;
3763     Error(IDLoc, "image address size does not match a16");
3764     return false;
3765   }
3766 
3767   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3768   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3769   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3770   unsigned ActualAddrSize =
3771       IsNSA ? SrsrcIdx - VAddr0Idx
3772             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3773 
3774   unsigned ExpectedAddrSize =
3775       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3776 
3777   if (IsNSA) {
3778     if (hasPartialNSAEncoding() &&
3779         ExpectedAddrSize >
3780             getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3781       int VAddrLastIdx = SrsrcIdx - 1;
3782       unsigned VAddrLastSize =
3783           AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3784 
3785       ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3786     }
3787   } else {
3788     if (ExpectedAddrSize > 12)
3789       ExpectedAddrSize = 16;
3790 
3791     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3792     // This provides backward compatibility for assembly created
3793     // before 160b/192b/224b types were directly supported.
3794     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3795       return true;
3796   }
3797 
3798   if (ActualAddrSize == ExpectedAddrSize)
3799     return true;
3800 
3801   Error(IDLoc, "image address size does not match dim and a16");
3802   return false;
3803 }
3804 
3805 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3806 
3807   const unsigned Opc = Inst.getOpcode();
3808   const MCInstrDesc &Desc = MII.get(Opc);
3809 
3810   if ((Desc.TSFlags & MIMGFlags) == 0)
3811     return true;
3812   if (!Desc.mayLoad() || !Desc.mayStore())
3813     return true; // Not atomic
3814 
3815   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3816   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3817 
3818   // This is an incomplete check because image_atomic_cmpswap
3819   // may only use 0x3 and 0xf while other atomic operations
3820   // may use 0x1 and 0x3. However these limitations are
3821   // verified when we check that dmask matches dst size.
3822   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3823 }
3824 
3825 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3826 
3827   const unsigned Opc = Inst.getOpcode();
3828   const MCInstrDesc &Desc = MII.get(Opc);
3829 
3830   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3831     return true;
3832 
3833   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3834   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3835 
3836   // GATHER4 instructions use dmask in a different fashion compared to
3837   // other MIMG instructions. The only useful DMASK values are
3838   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3839   // (red,red,red,red) etc.) The ISA document doesn't mention
3840   // this.
3841   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3842 }
3843 
3844 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3845   const unsigned Opc = Inst.getOpcode();
3846   const MCInstrDesc &Desc = MII.get(Opc);
3847 
3848   if ((Desc.TSFlags & MIMGFlags) == 0)
3849     return true;
3850 
3851   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3852   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3853       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3854 
3855   if (!BaseOpcode->MSAA)
3856     return true;
3857 
3858   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3859   assert(DimIdx != -1);
3860 
3861   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3862   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3863 
3864   return DimInfo->MSAA;
3865 }
3866 
3867 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3868 {
3869   switch (Opcode) {
3870   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3871   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3872   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3873     return true;
3874   default:
3875     return false;
3876   }
3877 }
3878 
3879 // movrels* opcodes should only allow VGPRS as src0.
3880 // This is specified in .td description for vop1/vop3,
3881 // but sdwa is handled differently. See isSDWAOperand.
3882 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3883                                       const OperandVector &Operands) {
3884 
3885   const unsigned Opc = Inst.getOpcode();
3886   const MCInstrDesc &Desc = MII.get(Opc);
3887 
3888   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3889     return true;
3890 
3891   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3892   assert(Src0Idx != -1);
3893 
3894   SMLoc ErrLoc;
3895   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3896   if (Src0.isReg()) {
3897     auto Reg = mc2PseudoReg(Src0.getReg());
3898     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3899     if (!isSGPR(Reg, TRI))
3900       return true;
3901     ErrLoc = getRegLoc(Reg, Operands);
3902   } else {
3903     ErrLoc = getConstLoc(Operands);
3904   }
3905 
3906   Error(ErrLoc, "source operand must be a VGPR");
3907   return false;
3908 }
3909 
3910 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3911                                           const OperandVector &Operands) {
3912 
3913   const unsigned Opc = Inst.getOpcode();
3914 
3915   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3916     return true;
3917 
3918   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3919   assert(Src0Idx != -1);
3920 
3921   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3922   if (!Src0.isReg())
3923     return true;
3924 
3925   auto Reg = mc2PseudoReg(Src0.getReg());
3926   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3927   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3928     Error(getRegLoc(Reg, Operands),
3929           "source operand must be either a VGPR or an inline constant");
3930     return false;
3931   }
3932 
3933   return true;
3934 }
3935 
3936 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3937                                       const OperandVector &Operands) {
3938   unsigned Opcode = Inst.getOpcode();
3939   const MCInstrDesc &Desc = MII.get(Opcode);
3940 
3941   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3942       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3943     return true;
3944 
3945   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3946   if (Src2Idx == -1)
3947     return true;
3948 
3949   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3950     Error(getConstLoc(Operands),
3951           "inline constants are not allowed for this operand");
3952     return false;
3953   }
3954 
3955   return true;
3956 }
3957 
3958 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3959                                    const OperandVector &Operands) {
3960   const unsigned Opc = Inst.getOpcode();
3961   const MCInstrDesc &Desc = MII.get(Opc);
3962 
3963   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3964     return true;
3965 
3966   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3967   if (Src2Idx == -1)
3968     return true;
3969 
3970   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3971   if (!Src2.isReg())
3972     return true;
3973 
3974   MCRegister Src2Reg = Src2.getReg();
3975   MCRegister DstReg = Inst.getOperand(0).getReg();
3976   if (Src2Reg == DstReg)
3977     return true;
3978 
3979   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3980   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3981     return true;
3982 
3983   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3984     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3985           "source 2 operand must not partially overlap with dst");
3986     return false;
3987   }
3988 
3989   return true;
3990 }
3991 
3992 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3993   switch (Inst.getOpcode()) {
3994   default:
3995     return true;
3996   case V_DIV_SCALE_F32_gfx6_gfx7:
3997   case V_DIV_SCALE_F32_vi:
3998   case V_DIV_SCALE_F32_gfx10:
3999   case V_DIV_SCALE_F64_gfx6_gfx7:
4000   case V_DIV_SCALE_F64_vi:
4001   case V_DIV_SCALE_F64_gfx10:
4002     break;
4003   }
4004 
4005   // TODO: Check that src0 = src1 or src2.
4006 
4007   for (auto Name : {AMDGPU::OpName::src0_modifiers,
4008                     AMDGPU::OpName::src2_modifiers,
4009                     AMDGPU::OpName::src2_modifiers}) {
4010     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4011             .getImm() &
4012         SISrcMods::ABS) {
4013       return false;
4014     }
4015   }
4016 
4017   return true;
4018 }
4019 
4020 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4021 
4022   const unsigned Opc = Inst.getOpcode();
4023   const MCInstrDesc &Desc = MII.get(Opc);
4024 
4025   if ((Desc.TSFlags & MIMGFlags) == 0)
4026     return true;
4027 
4028   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4029   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4030     if (isCI() || isSI())
4031       return false;
4032   }
4033 
4034   return true;
4035 }
4036 
4037 static bool IsRevOpcode(const unsigned Opcode)
4038 {
4039   switch (Opcode) {
4040   case AMDGPU::V_SUBREV_F32_e32:
4041   case AMDGPU::V_SUBREV_F32_e64:
4042   case AMDGPU::V_SUBREV_F32_e32_gfx10:
4043   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4044   case AMDGPU::V_SUBREV_F32_e32_vi:
4045   case AMDGPU::V_SUBREV_F32_e64_gfx10:
4046   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4047   case AMDGPU::V_SUBREV_F32_e64_vi:
4048 
4049   case AMDGPU::V_SUBREV_CO_U32_e32:
4050   case AMDGPU::V_SUBREV_CO_U32_e64:
4051   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4052   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4053 
4054   case AMDGPU::V_SUBBREV_U32_e32:
4055   case AMDGPU::V_SUBBREV_U32_e64:
4056   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4057   case AMDGPU::V_SUBBREV_U32_e32_vi:
4058   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4059   case AMDGPU::V_SUBBREV_U32_e64_vi:
4060 
4061   case AMDGPU::V_SUBREV_U32_e32:
4062   case AMDGPU::V_SUBREV_U32_e64:
4063   case AMDGPU::V_SUBREV_U32_e32_gfx9:
4064   case AMDGPU::V_SUBREV_U32_e32_vi:
4065   case AMDGPU::V_SUBREV_U32_e64_gfx9:
4066   case AMDGPU::V_SUBREV_U32_e64_vi:
4067 
4068   case AMDGPU::V_SUBREV_F16_e32:
4069   case AMDGPU::V_SUBREV_F16_e64:
4070   case AMDGPU::V_SUBREV_F16_e32_gfx10:
4071   case AMDGPU::V_SUBREV_F16_e32_vi:
4072   case AMDGPU::V_SUBREV_F16_e64_gfx10:
4073   case AMDGPU::V_SUBREV_F16_e64_vi:
4074 
4075   case AMDGPU::V_SUBREV_U16_e32:
4076   case AMDGPU::V_SUBREV_U16_e64:
4077   case AMDGPU::V_SUBREV_U16_e32_vi:
4078   case AMDGPU::V_SUBREV_U16_e64_vi:
4079 
4080   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4081   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4082   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4083 
4084   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4085   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4086 
4087   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4088   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4089 
4090   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4091   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4092 
4093   case AMDGPU::V_LSHRREV_B32_e32:
4094   case AMDGPU::V_LSHRREV_B32_e64:
4095   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4096   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4097   case AMDGPU::V_LSHRREV_B32_e32_vi:
4098   case AMDGPU::V_LSHRREV_B32_e64_vi:
4099   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4100   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4101 
4102   case AMDGPU::V_ASHRREV_I32_e32:
4103   case AMDGPU::V_ASHRREV_I32_e64:
4104   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4105   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4106   case AMDGPU::V_ASHRREV_I32_e32_vi:
4107   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4108   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4109   case AMDGPU::V_ASHRREV_I32_e64_vi:
4110 
4111   case AMDGPU::V_LSHLREV_B32_e32:
4112   case AMDGPU::V_LSHLREV_B32_e64:
4113   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4114   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4115   case AMDGPU::V_LSHLREV_B32_e32_vi:
4116   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4117   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4118   case AMDGPU::V_LSHLREV_B32_e64_vi:
4119 
4120   case AMDGPU::V_LSHLREV_B16_e32:
4121   case AMDGPU::V_LSHLREV_B16_e64:
4122   case AMDGPU::V_LSHLREV_B16_e32_vi:
4123   case AMDGPU::V_LSHLREV_B16_e64_vi:
4124   case AMDGPU::V_LSHLREV_B16_gfx10:
4125 
4126   case AMDGPU::V_LSHRREV_B16_e32:
4127   case AMDGPU::V_LSHRREV_B16_e64:
4128   case AMDGPU::V_LSHRREV_B16_e32_vi:
4129   case AMDGPU::V_LSHRREV_B16_e64_vi:
4130   case AMDGPU::V_LSHRREV_B16_gfx10:
4131 
4132   case AMDGPU::V_ASHRREV_I16_e32:
4133   case AMDGPU::V_ASHRREV_I16_e64:
4134   case AMDGPU::V_ASHRREV_I16_e32_vi:
4135   case AMDGPU::V_ASHRREV_I16_e64_vi:
4136   case AMDGPU::V_ASHRREV_I16_gfx10:
4137 
4138   case AMDGPU::V_LSHLREV_B64_e64:
4139   case AMDGPU::V_LSHLREV_B64_gfx10:
4140   case AMDGPU::V_LSHLREV_B64_vi:
4141 
4142   case AMDGPU::V_LSHRREV_B64_e64:
4143   case AMDGPU::V_LSHRREV_B64_gfx10:
4144   case AMDGPU::V_LSHRREV_B64_vi:
4145 
4146   case AMDGPU::V_ASHRREV_I64_e64:
4147   case AMDGPU::V_ASHRREV_I64_gfx10:
4148   case AMDGPU::V_ASHRREV_I64_vi:
4149 
4150   case AMDGPU::V_PK_LSHLREV_B16:
4151   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4152   case AMDGPU::V_PK_LSHLREV_B16_vi:
4153 
4154   case AMDGPU::V_PK_LSHRREV_B16:
4155   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4156   case AMDGPU::V_PK_LSHRREV_B16_vi:
4157   case AMDGPU::V_PK_ASHRREV_I16:
4158   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4159   case AMDGPU::V_PK_ASHRREV_I16_vi:
4160     return true;
4161   default:
4162     return false;
4163   }
4164 }
4165 
4166 std::optional<StringRef>
4167 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4168 
4169   using namespace SIInstrFlags;
4170   const unsigned Opcode = Inst.getOpcode();
4171   const MCInstrDesc &Desc = MII.get(Opcode);
4172 
4173   // lds_direct register is defined so that it can be used
4174   // with 9-bit operands only. Ignore encodings which do not accept these.
4175   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4176   if ((Desc.TSFlags & Enc) == 0)
4177     return std::nullopt;
4178 
4179   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4180     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4181     if (SrcIdx == -1)
4182       break;
4183     const auto &Src = Inst.getOperand(SrcIdx);
4184     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4185 
4186       if (isGFX90A() || isGFX11Plus())
4187         return StringRef("lds_direct is not supported on this GPU");
4188 
4189       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4190         return StringRef("lds_direct cannot be used with this instruction");
4191 
4192       if (SrcName != OpName::src0)
4193         return StringRef("lds_direct may be used as src0 only");
4194     }
4195   }
4196 
4197   return std::nullopt;
4198 }
4199 
4200 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4201   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4202     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4203     if (Op.isFlatOffset())
4204       return Op.getStartLoc();
4205   }
4206   return getLoc();
4207 }
4208 
4209 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4210                                      const OperandVector &Operands) {
4211   auto Opcode = Inst.getOpcode();
4212   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4213   if (OpNum == -1)
4214     return true;
4215 
4216   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4217   if ((TSFlags & SIInstrFlags::FLAT))
4218     return validateFlatOffset(Inst, Operands);
4219 
4220   if ((TSFlags & SIInstrFlags::SMRD))
4221     return validateSMEMOffset(Inst, Operands);
4222 
4223   const auto &Op = Inst.getOperand(OpNum);
4224   if (isGFX12Plus() &&
4225       (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4226     const unsigned OffsetSize = 24;
4227     if (!isIntN(OffsetSize, Op.getImm())) {
4228       Error(getFlatOffsetLoc(Operands),
4229             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4230       return false;
4231     }
4232   } else {
4233     const unsigned OffsetSize = 16;
4234     if (!isUIntN(OffsetSize, Op.getImm())) {
4235       Error(getFlatOffsetLoc(Operands),
4236             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4237       return false;
4238     }
4239   }
4240   return true;
4241 }
4242 
4243 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4244                                          const OperandVector &Operands) {
4245   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4246   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4247     return true;
4248 
4249   auto Opcode = Inst.getOpcode();
4250   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4251   assert(OpNum != -1);
4252 
4253   const auto &Op = Inst.getOperand(OpNum);
4254   if (!hasFlatOffsets() && Op.getImm() != 0) {
4255     Error(getFlatOffsetLoc(Operands),
4256           "flat offset modifier is not supported on this GPU");
4257     return false;
4258   }
4259 
4260   // For pre-GFX12 FLAT instructions the offset must be positive;
4261   // MSB is ignored and forced to zero.
4262   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4263   bool AllowNegative =
4264       (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4265       isGFX12Plus();
4266   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4267     Error(getFlatOffsetLoc(Operands),
4268           Twine("expected a ") +
4269               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4270                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4271     return false;
4272   }
4273 
4274   return true;
4275 }
4276 
4277 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4278   // Start with second operand because SMEM Offset cannot be dst or src0.
4279   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4280     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4281     if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4282       return Op.getStartLoc();
4283   }
4284   return getLoc();
4285 }
4286 
4287 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4288                                          const OperandVector &Operands) {
4289   if (isCI() || isSI())
4290     return true;
4291 
4292   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4293   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4294     return true;
4295 
4296   auto Opcode = Inst.getOpcode();
4297   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4298   if (OpNum == -1)
4299     return true;
4300 
4301   const auto &Op = Inst.getOperand(OpNum);
4302   if (!Op.isImm())
4303     return true;
4304 
4305   uint64_t Offset = Op.getImm();
4306   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4307   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4308       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4309     return true;
4310 
4311   Error(getSMEMOffsetLoc(Operands),
4312         isGFX12Plus()          ? "expected a 24-bit signed offset"
4313         : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4314                                : "expected a 21-bit signed offset");
4315 
4316   return false;
4317 }
4318 
4319 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4320   unsigned Opcode = Inst.getOpcode();
4321   const MCInstrDesc &Desc = MII.get(Opcode);
4322   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4323     return true;
4324 
4325   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4326   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4327 
4328   const int OpIndices[] = { Src0Idx, Src1Idx };
4329 
4330   unsigned NumExprs = 0;
4331   unsigned NumLiterals = 0;
4332   uint32_t LiteralValue;
4333 
4334   for (int OpIdx : OpIndices) {
4335     if (OpIdx == -1) break;
4336 
4337     const MCOperand &MO = Inst.getOperand(OpIdx);
4338     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4339     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4340       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4341         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4342         if (NumLiterals == 0 || LiteralValue != Value) {
4343           LiteralValue = Value;
4344           ++NumLiterals;
4345         }
4346       } else if (MO.isExpr()) {
4347         ++NumExprs;
4348       }
4349     }
4350   }
4351 
4352   return NumLiterals + NumExprs <= 1;
4353 }
4354 
4355 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4356   const unsigned Opc = Inst.getOpcode();
4357   if (isPermlane16(Opc)) {
4358     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4359     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4360 
4361     if (OpSel & ~3)
4362       return false;
4363   }
4364 
4365   uint64_t TSFlags = MII.get(Opc).TSFlags;
4366 
4367   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4368     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4369     if (OpSelIdx != -1) {
4370       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4371         return false;
4372     }
4373     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4374     if (OpSelHiIdx != -1) {
4375       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4376         return false;
4377     }
4378   }
4379 
4380   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4381   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4382       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4383     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4384     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4385     if (OpSel & 3)
4386       return false;
4387   }
4388 
4389   return true;
4390 }
4391 
4392 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4393   assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4394 
4395   const unsigned Opc = Inst.getOpcode();
4396   uint64_t TSFlags = MII.get(Opc).TSFlags;
4397 
4398   // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4399   // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4400   // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4401   // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4402   if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4403       !(TSFlags & SIInstrFlags::IsSWMMAC))
4404     return true;
4405 
4406   int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4407   if (NegIdx == -1)
4408     return true;
4409 
4410   unsigned Neg = Inst.getOperand(NegIdx).getImm();
4411 
4412   // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4413   // on some src operands but not allowed on other.
4414   // It is convenient that such instructions don't have src_modifiers operand
4415   // for src operands that don't allow neg because they also don't allow opsel.
4416 
4417   int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4418                     AMDGPU::OpName::src1_modifiers,
4419                     AMDGPU::OpName::src2_modifiers};
4420 
4421   for (unsigned i = 0; i < 3; ++i) {
4422     if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4423       if (Neg & (1 << i))
4424         return false;
4425     }
4426   }
4427 
4428   return true;
4429 }
4430 
4431 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4432                                   const OperandVector &Operands) {
4433   const unsigned Opc = Inst.getOpcode();
4434   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4435   if (DppCtrlIdx >= 0) {
4436     unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4437 
4438     if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4439         AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4440       // DP ALU DPP is supported for row_newbcast only on GFX9*
4441       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4442       Error(S, "DP ALU dpp only supports row_newbcast");
4443       return false;
4444     }
4445   }
4446 
4447   int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4448   bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4449 
4450   if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4451     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4452     if (Src1Idx >= 0) {
4453       const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4454       const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4455       if (Src1.isImm() ||
4456           (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4457         AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4458         Error(Op.getStartLoc(), "invalid operand for instruction");
4459         return false;
4460       }
4461     }
4462   }
4463 
4464   return true;
4465 }
4466 
4467 // Check if VCC register matches wavefront size
4468 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4469   auto FB = getFeatureBits();
4470   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4471     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4472 }
4473 
4474 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4475 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4476                                          const OperandVector &Operands) {
4477   unsigned Opcode = Inst.getOpcode();
4478   const MCInstrDesc &Desc = MII.get(Opcode);
4479   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4480   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4481       !HasMandatoryLiteral && !isVOPD(Opcode))
4482     return true;
4483 
4484   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4485 
4486   unsigned NumExprs = 0;
4487   unsigned NumLiterals = 0;
4488   uint32_t LiteralValue;
4489 
4490   for (int OpIdx : OpIndices) {
4491     if (OpIdx == -1)
4492       continue;
4493 
4494     const MCOperand &MO = Inst.getOperand(OpIdx);
4495     if (!MO.isImm() && !MO.isExpr())
4496       continue;
4497     if (!isSISrcOperand(Desc, OpIdx))
4498       continue;
4499 
4500     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4501       uint64_t Value = static_cast<uint64_t>(MO.getImm());
4502       bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4503                     AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4504       bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4505 
4506       if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4507         Error(getLitLoc(Operands), "invalid operand for instruction");
4508         return false;
4509       }
4510 
4511       if (IsFP64 && IsValid32Op)
4512         Value = Hi_32(Value);
4513 
4514       if (NumLiterals == 0 || LiteralValue != Value) {
4515         LiteralValue = Value;
4516         ++NumLiterals;
4517       }
4518     } else if (MO.isExpr()) {
4519       ++NumExprs;
4520     }
4521   }
4522   NumLiterals += NumExprs;
4523 
4524   if (!NumLiterals)
4525     return true;
4526 
4527   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4528     Error(getLitLoc(Operands), "literal operands are not supported");
4529     return false;
4530   }
4531 
4532   if (NumLiterals > 1) {
4533     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4534     return false;
4535   }
4536 
4537   return true;
4538 }
4539 
4540 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4541 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4542                          const MCRegisterInfo *MRI) {
4543   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4544   if (OpIdx < 0)
4545     return -1;
4546 
4547   const MCOperand &Op = Inst.getOperand(OpIdx);
4548   if (!Op.isReg())
4549     return -1;
4550 
4551   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4552   auto Reg = Sub ? Sub : Op.getReg();
4553   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4554   return AGPR32.contains(Reg) ? 1 : 0;
4555 }
4556 
4557 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4558   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4559   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4560                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4561                   SIInstrFlags::DS)) == 0)
4562     return true;
4563 
4564   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4565                                                       : AMDGPU::OpName::vdata;
4566 
4567   const MCRegisterInfo *MRI = getMRI();
4568   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4569   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4570 
4571   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4572     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4573     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4574       return false;
4575   }
4576 
4577   auto FB = getFeatureBits();
4578   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4579     if (DataAreg < 0 || DstAreg < 0)
4580       return true;
4581     return DstAreg == DataAreg;
4582   }
4583 
4584   return DstAreg < 1 && DataAreg < 1;
4585 }
4586 
4587 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4588   auto FB = getFeatureBits();
4589   if (!FB[AMDGPU::FeatureGFX90AInsts])
4590     return true;
4591 
4592   const MCRegisterInfo *MRI = getMRI();
4593   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4594   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4595   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4596     const MCOperand &Op = Inst.getOperand(I);
4597     if (!Op.isReg())
4598       continue;
4599 
4600     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4601     if (!Sub)
4602       continue;
4603 
4604     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4605       return false;
4606     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4607       return false;
4608   }
4609 
4610   return true;
4611 }
4612 
4613 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4614   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4615     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4616     if (Op.isBLGP())
4617       return Op.getStartLoc();
4618   }
4619   return SMLoc();
4620 }
4621 
4622 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4623                                    const OperandVector &Operands) {
4624   unsigned Opc = Inst.getOpcode();
4625   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4626   if (BlgpIdx == -1)
4627     return true;
4628   SMLoc BLGPLoc = getBLGPLoc(Operands);
4629   if (!BLGPLoc.isValid())
4630     return true;
4631   bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4632   auto FB = getFeatureBits();
4633   bool UsesNeg = false;
4634   if (FB[AMDGPU::FeatureGFX940Insts]) {
4635     switch (Opc) {
4636     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4637     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4638     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4639     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4640       UsesNeg = true;
4641     }
4642   }
4643 
4644   if (IsNeg == UsesNeg)
4645     return true;
4646 
4647   Error(BLGPLoc,
4648         UsesNeg ? "invalid modifier: blgp is not supported"
4649                 : "invalid modifier: neg is not supported");
4650 
4651   return false;
4652 }
4653 
4654 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4655                                       const OperandVector &Operands) {
4656   if (!isGFX11Plus())
4657     return true;
4658 
4659   unsigned Opc = Inst.getOpcode();
4660   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4661       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4662       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4663       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4664     return true;
4665 
4666   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4667   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4668   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4669   if (Reg == AMDGPU::SGPR_NULL)
4670     return true;
4671 
4672   SMLoc RegLoc = getRegLoc(Reg, Operands);
4673   Error(RegLoc, "src0 must be null");
4674   return false;
4675 }
4676 
4677 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4678                                  const OperandVector &Operands) {
4679   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4680   if ((TSFlags & SIInstrFlags::DS) == 0)
4681     return true;
4682   if (TSFlags & SIInstrFlags::GWS)
4683     return validateGWS(Inst, Operands);
4684   // Only validate GDS for non-GWS instructions.
4685   if (hasGDS())
4686     return true;
4687   int GDSIdx =
4688       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4689   if (GDSIdx < 0)
4690     return true;
4691   unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4692   if (GDS) {
4693     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4694     Error(S, "gds modifier is not supported on this GPU");
4695     return false;
4696   }
4697   return true;
4698 }
4699 
4700 // gfx90a has an undocumented limitation:
4701 // DS_GWS opcodes must use even aligned registers.
4702 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4703                                   const OperandVector &Operands) {
4704   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4705     return true;
4706 
4707   int Opc = Inst.getOpcode();
4708   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4709       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4710     return true;
4711 
4712   const MCRegisterInfo *MRI = getMRI();
4713   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4714   int Data0Pos =
4715       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4716   assert(Data0Pos != -1);
4717   auto Reg = Inst.getOperand(Data0Pos).getReg();
4718   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4719   if (RegIdx & 1) {
4720     SMLoc RegLoc = getRegLoc(Reg, Operands);
4721     Error(RegLoc, "vgpr must be even aligned");
4722     return false;
4723   }
4724 
4725   return true;
4726 }
4727 
4728 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4729                                             const OperandVector &Operands,
4730                                             const SMLoc &IDLoc) {
4731   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4732                                            AMDGPU::OpName::cpol);
4733   if (CPolPos == -1)
4734     return true;
4735 
4736   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4737 
4738   if (isGFX12Plus())
4739     return validateTHAndScopeBits(Inst, Operands, CPol);
4740 
4741   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4742   if (TSFlags & SIInstrFlags::SMRD) {
4743     if (CPol && (isSI() || isCI())) {
4744       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4745       Error(S, "cache policy is not supported for SMRD instructions");
4746       return false;
4747     }
4748     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4749       Error(IDLoc, "invalid cache policy for SMEM instruction");
4750       return false;
4751     }
4752   }
4753 
4754   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4755     const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4756                                       SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4757                                       SIInstrFlags::FLAT;
4758     if (!(TSFlags & AllowSCCModifier)) {
4759       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4760       StringRef CStr(S.getPointer());
4761       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4762       Error(S,
4763             "scc modifier is not supported for this instruction on this GPU");
4764       return false;
4765     }
4766   }
4767 
4768   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4769     return true;
4770 
4771   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4772     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4773       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4774                               : "instruction must use glc");
4775       return false;
4776     }
4777   } else {
4778     if (CPol & CPol::GLC) {
4779       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4780       StringRef CStr(S.getPointer());
4781       S = SMLoc::getFromPointer(
4782           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4783       Error(S, isGFX940() ? "instruction must not use sc0"
4784                           : "instruction must not use glc");
4785       return false;
4786     }
4787   }
4788 
4789   return true;
4790 }
4791 
4792 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4793                                              const OperandVector &Operands,
4794                                              const unsigned CPol) {
4795   const unsigned TH = CPol & AMDGPU::CPol::TH;
4796   const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4797 
4798   const unsigned Opcode = Inst.getOpcode();
4799   const MCInstrDesc &TID = MII.get(Opcode);
4800 
4801   auto PrintError = [&](StringRef Msg) {
4802     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4803     Error(S, Msg);
4804     return false;
4805   };
4806 
4807   if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4808       (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
4809       (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
4810     return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4811 
4812   if (TH == 0)
4813     return true;
4814 
4815   if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4816       ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4817        (TH == AMDGPU::CPol::TH_NT_HT)))
4818     return PrintError("invalid th value for SMEM instruction");
4819 
4820   if (TH == AMDGPU::CPol::TH_BYPASS) {
4821     if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4822          CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
4823         (Scope == AMDGPU::CPol::SCOPE_SYS &&
4824          !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
4825       return PrintError("scope and th combination is not valid");
4826   }
4827 
4828   bool IsStore = TID.mayStore();
4829   bool IsAtomic =
4830       TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
4831 
4832   if (IsAtomic) {
4833     if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
4834       return PrintError("invalid th value for atomic instructions");
4835   } else if (IsStore) {
4836     if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
4837       return PrintError("invalid th value for store instructions");
4838   } else {
4839     if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
4840       return PrintError("invalid th value for load instructions");
4841   }
4842 
4843   return true;
4844 }
4845 
4846 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4847   if (!isGFX11Plus())
4848     return true;
4849   for (auto &Operand : Operands) {
4850     if (!Operand->isReg())
4851       continue;
4852     unsigned Reg = Operand->getReg();
4853     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4854       Error(getRegLoc(Reg, Operands),
4855             "execz and vccz are not supported on this GPU");
4856       return false;
4857     }
4858   }
4859   return true;
4860 }
4861 
4862 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4863                                   const OperandVector &Operands) {
4864   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4865   if (Desc.mayStore() &&
4866       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4867     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4868     if (Loc != getInstLoc(Operands)) {
4869       Error(Loc, "TFE modifier has no meaning for store instructions");
4870       return false;
4871     }
4872   }
4873 
4874   return true;
4875 }
4876 
4877 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4878                                           const SMLoc &IDLoc,
4879                                           const OperandVector &Operands) {
4880   if (auto ErrMsg = validateLdsDirect(Inst)) {
4881     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4882     return false;
4883   }
4884   if (!validateSOPLiteral(Inst)) {
4885     Error(getLitLoc(Operands),
4886       "only one unique literal operand is allowed");
4887     return false;
4888   }
4889   if (!validateVOPLiteral(Inst, Operands)) {
4890     return false;
4891   }
4892   if (!validateConstantBusLimitations(Inst, Operands)) {
4893     return false;
4894   }
4895   if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4896     return false;
4897   }
4898   if (!validateIntClampSupported(Inst)) {
4899     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4900       "integer clamping is not supported on this GPU");
4901     return false;
4902   }
4903   if (!validateOpSel(Inst)) {
4904     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4905       "invalid op_sel operand");
4906     return false;
4907   }
4908   if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
4909     Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
4910           "invalid neg_lo operand");
4911     return false;
4912   }
4913   if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
4914     Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
4915           "invalid neg_hi operand");
4916     return false;
4917   }
4918   if (!validateDPP(Inst, Operands)) {
4919     return false;
4920   }
4921   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4922   if (!validateMIMGD16(Inst)) {
4923     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4924       "d16 modifier is not supported on this GPU");
4925     return false;
4926   }
4927   if (!validateMIMGMSAA(Inst)) {
4928     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4929           "invalid dim; must be MSAA type");
4930     return false;
4931   }
4932   if (!validateMIMGDataSize(Inst, IDLoc)) {
4933     return false;
4934   }
4935   if (!validateMIMGAddrSize(Inst, IDLoc))
4936     return false;
4937   if (!validateMIMGAtomicDMask(Inst)) {
4938     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4939       "invalid atomic image dmask");
4940     return false;
4941   }
4942   if (!validateMIMGGatherDMask(Inst)) {
4943     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4944       "invalid image_gather dmask: only one bit must be set");
4945     return false;
4946   }
4947   if (!validateMovrels(Inst, Operands)) {
4948     return false;
4949   }
4950   if (!validateOffset(Inst, Operands)) {
4951     return false;
4952   }
4953   if (!validateMAIAccWrite(Inst, Operands)) {
4954     return false;
4955   }
4956   if (!validateMAISrc2(Inst, Operands)) {
4957     return false;
4958   }
4959   if (!validateMFMA(Inst, Operands)) {
4960     return false;
4961   }
4962   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4963     return false;
4964   }
4965 
4966   if (!validateAGPRLdSt(Inst)) {
4967     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4968     ? "invalid register class: data and dst should be all VGPR or AGPR"
4969     : "invalid register class: agpr loads and stores not supported on this GPU"
4970     );
4971     return false;
4972   }
4973   if (!validateVGPRAlign(Inst)) {
4974     Error(IDLoc,
4975       "invalid register class: vgpr tuples must be 64 bit aligned");
4976     return false;
4977   }
4978   if (!validateDS(Inst, Operands)) {
4979     return false;
4980   }
4981 
4982   if (!validateBLGP(Inst, Operands)) {
4983     return false;
4984   }
4985 
4986   if (!validateDivScale(Inst)) {
4987     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4988     return false;
4989   }
4990   if (!validateWaitCnt(Inst, Operands)) {
4991     return false;
4992   }
4993   if (!validateExeczVcczOperands(Operands)) {
4994     return false;
4995   }
4996   if (!validateTFE(Inst, Operands)) {
4997     return false;
4998   }
4999 
5000   return true;
5001 }
5002 
5003 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5004                                             const FeatureBitset &FBS,
5005                                             unsigned VariantID = 0);
5006 
5007 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5008                                 const FeatureBitset &AvailableFeatures,
5009                                 unsigned VariantID);
5010 
5011 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5012                                        const FeatureBitset &FBS) {
5013   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5014 }
5015 
5016 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5017                                        const FeatureBitset &FBS,
5018                                        ArrayRef<unsigned> Variants) {
5019   for (auto Variant : Variants) {
5020     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5021       return true;
5022   }
5023 
5024   return false;
5025 }
5026 
5027 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5028                                                   const SMLoc &IDLoc) {
5029   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5030 
5031   // Check if requested instruction variant is supported.
5032   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5033     return false;
5034 
5035   // This instruction is not supported.
5036   // Clear any other pending errors because they are no longer relevant.
5037   getParser().clearPendingErrors();
5038 
5039   // Requested instruction variant is not supported.
5040   // Check if any other variants are supported.
5041   StringRef VariantName = getMatchedVariantName();
5042   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5043     return Error(IDLoc,
5044                  Twine(VariantName,
5045                        " variant of this instruction is not supported"));
5046   }
5047 
5048   // Check if this instruction may be used with a different wavesize.
5049   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5050       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5051 
5052     FeatureBitset FeaturesWS32 = getFeatureBits();
5053     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5054         .flip(AMDGPU::FeatureWavefrontSize32);
5055     FeatureBitset AvailableFeaturesWS32 =
5056         ComputeAvailableFeatures(FeaturesWS32);
5057 
5058     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5059       return Error(IDLoc, "instruction requires wavesize=32");
5060   }
5061 
5062   // Finally check if this instruction is supported on any other GPU.
5063   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5064     return Error(IDLoc, "instruction not supported on this GPU");
5065   }
5066 
5067   // Instruction not supported on any GPU. Probably a typo.
5068   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5069   return Error(IDLoc, "invalid instruction" + Suggestion);
5070 }
5071 
5072 static bool isInvalidVOPDY(const OperandVector &Operands,
5073                            uint64_t InvalidOprIdx) {
5074   assert(InvalidOprIdx < Operands.size());
5075   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5076   if (Op.isToken() && InvalidOprIdx > 1) {
5077     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5078     return PrevOp.isToken() && PrevOp.getToken() == "::";
5079   }
5080   return false;
5081 }
5082 
5083 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5084                                               OperandVector &Operands,
5085                                               MCStreamer &Out,
5086                                               uint64_t &ErrorInfo,
5087                                               bool MatchingInlineAsm) {
5088   MCInst Inst;
5089   unsigned Result = Match_Success;
5090   for (auto Variant : getMatchedVariants()) {
5091     uint64_t EI;
5092     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5093                                   Variant);
5094     // We order match statuses from least to most specific. We use most specific
5095     // status as resulting
5096     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5097     if ((R == Match_Success) ||
5098         (R == Match_PreferE32) ||
5099         (R == Match_MissingFeature && Result != Match_PreferE32) ||
5100         (R == Match_InvalidOperand && Result != Match_MissingFeature
5101                                    && Result != Match_PreferE32) ||
5102         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
5103                                    && Result != Match_MissingFeature
5104                                    && Result != Match_PreferE32)) {
5105       Result = R;
5106       ErrorInfo = EI;
5107     }
5108     if (R == Match_Success)
5109       break;
5110   }
5111 
5112   if (Result == Match_Success) {
5113     if (!validateInstruction(Inst, IDLoc, Operands)) {
5114       return true;
5115     }
5116     Inst.setLoc(IDLoc);
5117     Out.emitInstruction(Inst, getSTI());
5118     return false;
5119   }
5120 
5121   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5122   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5123     return true;
5124   }
5125 
5126   switch (Result) {
5127   default: break;
5128   case Match_MissingFeature:
5129     // It has been verified that the specified instruction
5130     // mnemonic is valid. A match was found but it requires
5131     // features which are not supported on this GPU.
5132     return Error(IDLoc, "operands are not valid for this GPU or mode");
5133 
5134   case Match_InvalidOperand: {
5135     SMLoc ErrorLoc = IDLoc;
5136     if (ErrorInfo != ~0ULL) {
5137       if (ErrorInfo >= Operands.size()) {
5138         return Error(IDLoc, "too few operands for instruction");
5139       }
5140       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5141       if (ErrorLoc == SMLoc())
5142         ErrorLoc = IDLoc;
5143 
5144       if (isInvalidVOPDY(Operands, ErrorInfo))
5145         return Error(ErrorLoc, "invalid VOPDY instruction");
5146     }
5147     return Error(ErrorLoc, "invalid operand for instruction");
5148   }
5149 
5150   case Match_PreferE32:
5151     return Error(IDLoc, "internal error: instruction without _e64 suffix "
5152                         "should be encoded as e32");
5153   case Match_MnemonicFail:
5154     llvm_unreachable("Invalid instructions should have been handled already");
5155   }
5156   llvm_unreachable("Implement any new match types added!");
5157 }
5158 
5159 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5160   int64_t Tmp = -1;
5161   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5162     return true;
5163   }
5164   if (getParser().parseAbsoluteExpression(Tmp)) {
5165     return true;
5166   }
5167   Ret = static_cast<uint32_t>(Tmp);
5168   return false;
5169 }
5170 
5171 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5172   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5173     return TokError("directive only supported for amdgcn architecture");
5174 
5175   std::string TargetIDDirective;
5176   SMLoc TargetStart = getTok().getLoc();
5177   if (getParser().parseEscapedString(TargetIDDirective))
5178     return true;
5179 
5180   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5181   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5182     return getParser().Error(TargetRange.Start,
5183         (Twine(".amdgcn_target directive's target id ") +
5184          Twine(TargetIDDirective) +
5185          Twine(" does not match the specified target id ") +
5186          Twine(getTargetStreamer().getTargetID()->toString())).str());
5187 
5188   return false;
5189 }
5190 
5191 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5192   return Error(Range.Start, "value out of range", Range);
5193 }
5194 
5195 bool AMDGPUAsmParser::calculateGPRBlocks(
5196     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5197     bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5198     unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5199     SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5200   // TODO(scott.linder): These calculations are duplicated from
5201   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5202   IsaVersion Version = getIsaVersion(getSTI().getCPU());
5203 
5204   unsigned NumVGPRs = NextFreeVGPR;
5205   unsigned NumSGPRs = NextFreeSGPR;
5206 
5207   if (Version.Major >= 10)
5208     NumSGPRs = 0;
5209   else {
5210     unsigned MaxAddressableNumSGPRs =
5211         IsaInfo::getAddressableNumSGPRs(&getSTI());
5212 
5213     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5214         NumSGPRs > MaxAddressableNumSGPRs)
5215       return OutOfRangeError(SGPRRange);
5216 
5217     NumSGPRs +=
5218         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5219 
5220     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5221         NumSGPRs > MaxAddressableNumSGPRs)
5222       return OutOfRangeError(SGPRRange);
5223 
5224     if (Features.test(FeatureSGPRInitBug))
5225       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
5226   }
5227 
5228   VGPRBlocks =
5229       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
5230   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5231 
5232   return false;
5233 }
5234 
5235 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5236   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5237     return TokError("directive only supported for amdgcn architecture");
5238 
5239   if (!isHsaAbi(getSTI()))
5240     return TokError("directive only supported for amdhsa OS");
5241 
5242   StringRef KernelName;
5243   if (getParser().parseIdentifier(KernelName))
5244     return true;
5245 
5246   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
5247 
5248   StringSet<> Seen;
5249 
5250   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5251 
5252   SMRange VGPRRange;
5253   uint64_t NextFreeVGPR = 0;
5254   uint64_t AccumOffset = 0;
5255   uint64_t SharedVGPRCount = 0;
5256   uint64_t PreloadLength = 0;
5257   uint64_t PreloadOffset = 0;
5258   SMRange SGPRRange;
5259   uint64_t NextFreeSGPR = 0;
5260 
5261   // Count the number of user SGPRs implied from the enabled feature bits.
5262   unsigned ImpliedUserSGPRCount = 0;
5263 
5264   // Track if the asm explicitly contains the directive for the user SGPR
5265   // count.
5266   std::optional<unsigned> ExplicitUserSGPRCount;
5267   bool ReserveVCC = true;
5268   bool ReserveFlatScr = true;
5269   std::optional<bool> EnableWavefrontSize32;
5270 
5271   while (true) {
5272     while (trySkipToken(AsmToken::EndOfStatement));
5273 
5274     StringRef ID;
5275     SMRange IDRange = getTok().getLocRange();
5276     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5277       return true;
5278 
5279     if (ID == ".end_amdhsa_kernel")
5280       break;
5281 
5282     if (!Seen.insert(ID).second)
5283       return TokError(".amdhsa_ directives cannot be repeated");
5284 
5285     SMLoc ValStart = getLoc();
5286     int64_t IVal;
5287     if (getParser().parseAbsoluteExpression(IVal))
5288       return true;
5289     SMLoc ValEnd = getLoc();
5290     SMRange ValRange = SMRange(ValStart, ValEnd);
5291 
5292     if (IVal < 0)
5293       return OutOfRangeError(ValRange);
5294 
5295     uint64_t Val = IVal;
5296 
5297 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
5298   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
5299     return OutOfRangeError(RANGE);                                             \
5300   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5301 
5302     if (ID == ".amdhsa_group_segment_fixed_size") {
5303       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5304         return OutOfRangeError(ValRange);
5305       KD.group_segment_fixed_size = Val;
5306     } else if (ID == ".amdhsa_private_segment_fixed_size") {
5307       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5308         return OutOfRangeError(ValRange);
5309       KD.private_segment_fixed_size = Val;
5310     } else if (ID == ".amdhsa_kernarg_size") {
5311       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5312         return OutOfRangeError(ValRange);
5313       KD.kernarg_size = Val;
5314     } else if (ID == ".amdhsa_user_sgpr_count") {
5315       ExplicitUserSGPRCount = Val;
5316     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5317       if (hasArchitectedFlatScratch())
5318         return Error(IDRange.Start,
5319                      "directive is not supported with architected flat scratch",
5320                      IDRange);
5321       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5322                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5323                        Val, ValRange);
5324       if (Val)
5325         ImpliedUserSGPRCount += 4;
5326     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5327       if (!hasKernargPreload())
5328         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5329 
5330       if (Val > getMaxNumUserSGPRs())
5331         return OutOfRangeError(ValRange);
5332       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
5333                        ValRange);
5334       if (Val) {
5335         ImpliedUserSGPRCount += Val;
5336         PreloadLength = Val;
5337       }
5338     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5339       if (!hasKernargPreload())
5340         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5341 
5342       if (Val >= 1024)
5343         return OutOfRangeError(ValRange);
5344       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
5345                        ValRange);
5346       if (Val)
5347         PreloadOffset = Val;
5348     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5349       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5350                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5351                        ValRange);
5352       if (Val)
5353         ImpliedUserSGPRCount += 2;
5354     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5355       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5356                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5357                        ValRange);
5358       if (Val)
5359         ImpliedUserSGPRCount += 2;
5360     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5361       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5362                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5363                        Val, ValRange);
5364       if (Val)
5365         ImpliedUserSGPRCount += 2;
5366     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5367       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5368                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5369                        ValRange);
5370       if (Val)
5371         ImpliedUserSGPRCount += 2;
5372     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5373       if (hasArchitectedFlatScratch())
5374         return Error(IDRange.Start,
5375                      "directive is not supported with architected flat scratch",
5376                      IDRange);
5377       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5378                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5379                        ValRange);
5380       if (Val)
5381         ImpliedUserSGPRCount += 2;
5382     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5383       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5384                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5385                        Val, ValRange);
5386       if (Val)
5387         ImpliedUserSGPRCount += 1;
5388     } else if (ID == ".amdhsa_wavefront_size32") {
5389       if (IVersion.Major < 10)
5390         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5391       EnableWavefrontSize32 = Val;
5392       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5393                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5394                        Val, ValRange);
5395     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5396       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5397                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5398     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5399       if (hasArchitectedFlatScratch())
5400         return Error(IDRange.Start,
5401                      "directive is not supported with architected flat scratch",
5402                      IDRange);
5403       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5404                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5405     } else if (ID == ".amdhsa_enable_private_segment") {
5406       if (!hasArchitectedFlatScratch())
5407         return Error(
5408             IDRange.Start,
5409             "directive is not supported without architected flat scratch",
5410             IDRange);
5411       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5412                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5413     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5414       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5415                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5416                        ValRange);
5417     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5418       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5419                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5420                        ValRange);
5421     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5422       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5423                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5424                        ValRange);
5425     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5426       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5427                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5428                        ValRange);
5429     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5430       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5431                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5432                        ValRange);
5433     } else if (ID == ".amdhsa_next_free_vgpr") {
5434       VGPRRange = ValRange;
5435       NextFreeVGPR = Val;
5436     } else if (ID == ".amdhsa_next_free_sgpr") {
5437       SGPRRange = ValRange;
5438       NextFreeSGPR = Val;
5439     } else if (ID == ".amdhsa_accum_offset") {
5440       if (!isGFX90A())
5441         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5442       AccumOffset = Val;
5443     } else if (ID == ".amdhsa_reserve_vcc") {
5444       if (!isUInt<1>(Val))
5445         return OutOfRangeError(ValRange);
5446       ReserveVCC = Val;
5447     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5448       if (IVersion.Major < 7)
5449         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5450       if (hasArchitectedFlatScratch())
5451         return Error(IDRange.Start,
5452                      "directive is not supported with architected flat scratch",
5453                      IDRange);
5454       if (!isUInt<1>(Val))
5455         return OutOfRangeError(ValRange);
5456       ReserveFlatScr = Val;
5457     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5458       if (IVersion.Major < 8)
5459         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5460       if (!isUInt<1>(Val))
5461         return OutOfRangeError(ValRange);
5462       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5463         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5464                                  IDRange);
5465     } else if (ID == ".amdhsa_float_round_mode_32") {
5466       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5467                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5468     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5469       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5470                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5471     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5472       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5473                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5474     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5475       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5476                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5477                        ValRange);
5478     } else if (ID == ".amdhsa_dx10_clamp") {
5479       if (IVersion.Major >= 12)
5480         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5481       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5482                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
5483                        ValRange);
5484     } else if (ID == ".amdhsa_ieee_mode") {
5485       if (IVersion.Major >= 12)
5486         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5487       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5488                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
5489                        ValRange);
5490     } else if (ID == ".amdhsa_fp16_overflow") {
5491       if (IVersion.Major < 9)
5492         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5493       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
5494                        ValRange);
5495     } else if (ID == ".amdhsa_tg_split") {
5496       if (!isGFX90A())
5497         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5498       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5499                        ValRange);
5500     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5501       if (IVersion.Major < 10)
5502         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5503       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
5504                        ValRange);
5505     } else if (ID == ".amdhsa_memory_ordered") {
5506       if (IVersion.Major < 10)
5507         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5508       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
5509                        ValRange);
5510     } else if (ID == ".amdhsa_forward_progress") {
5511       if (IVersion.Major < 10)
5512         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5513       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
5514                        ValRange);
5515     } else if (ID == ".amdhsa_shared_vgpr_count") {
5516       if (IVersion.Major < 10 || IVersion.Major >= 12)
5517         return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5518                      IDRange);
5519       SharedVGPRCount = Val;
5520       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5521                        COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
5522                        ValRange);
5523     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5524       PARSE_BITS_ENTRY(
5525           KD.compute_pgm_rsrc2,
5526           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5527           ValRange);
5528     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5529       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5530                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5531                        Val, ValRange);
5532     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5533       PARSE_BITS_ENTRY(
5534           KD.compute_pgm_rsrc2,
5535           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5536           ValRange);
5537     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5538       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5539                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5540                        Val, ValRange);
5541     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5542       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5543                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5544                        Val, ValRange);
5545     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5546       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5547                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5548                        Val, ValRange);
5549     } else if (ID == ".amdhsa_exception_int_div_zero") {
5550       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5551                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5552                        Val, ValRange);
5553     } else if (ID == ".amdhsa_round_robin_scheduling") {
5554       if (IVersion.Major < 12)
5555         return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5556       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5557                        COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
5558                        ValRange);
5559     } else {
5560       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5561     }
5562 
5563 #undef PARSE_BITS_ENTRY
5564   }
5565 
5566   if (!Seen.contains(".amdhsa_next_free_vgpr"))
5567     return TokError(".amdhsa_next_free_vgpr directive is required");
5568 
5569   if (!Seen.contains(".amdhsa_next_free_sgpr"))
5570     return TokError(".amdhsa_next_free_sgpr directive is required");
5571 
5572   unsigned VGPRBlocks;
5573   unsigned SGPRBlocks;
5574   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5575                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5576                          EnableWavefrontSize32, NextFreeVGPR,
5577                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5578                          SGPRBlocks))
5579     return true;
5580 
5581   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5582           VGPRBlocks))
5583     return OutOfRangeError(VGPRRange);
5584   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5585                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5586 
5587   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5588           SGPRBlocks))
5589     return OutOfRangeError(SGPRRange);
5590   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5591                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5592                   SGPRBlocks);
5593 
5594   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5595     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5596                     "enabled user SGPRs");
5597 
5598   unsigned UserSGPRCount =
5599       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5600 
5601   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5602     return TokError("too many user SGPRs enabled");
5603   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5604                   UserSGPRCount);
5605 
5606   if (PreloadLength && KD.kernarg_size &&
5607       (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
5608     return TokError("Kernarg preload length + offset is larger than the "
5609                     "kernarg segment size");
5610 
5611   if (isGFX90A()) {
5612     if (!Seen.contains(".amdhsa_accum_offset"))
5613       return TokError(".amdhsa_accum_offset directive is required");
5614     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5615       return TokError("accum_offset should be in range [4..256] in "
5616                       "increments of 4");
5617     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5618       return TokError("accum_offset exceeds total VGPR allocation");
5619     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5620                     (AccumOffset / 4 - 1));
5621   }
5622 
5623   if (IVersion.Major >= 10 && IVersion.Major < 12) {
5624     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5625     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5626       return TokError("shared_vgpr_count directive not valid on "
5627                       "wavefront size 32");
5628     }
5629     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5630       return TokError("shared_vgpr_count*2 + "
5631                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5632                       "exceed 63\n");
5633     }
5634   }
5635 
5636   getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5637                                                  NextFreeVGPR, NextFreeSGPR,
5638                                                  ReserveVCC, ReserveFlatScr);
5639   return false;
5640 }
5641 
5642 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5643   uint32_t Version;
5644   if (ParseAsAbsoluteExpression(Version))
5645     return true;
5646 
5647   getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5648   return false;
5649 }
5650 
5651 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5652                                                amd_kernel_code_t &Header) {
5653   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5654   // assembly for backwards compatibility.
5655   if (ID == "max_scratch_backing_memory_byte_size") {
5656     Parser.eatToEndOfStatement();
5657     return false;
5658   }
5659 
5660   SmallString<40> ErrStr;
5661   raw_svector_ostream Err(ErrStr);
5662   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5663     return TokError(Err.str());
5664   }
5665   Lex();
5666 
5667   if (ID == "enable_dx10_clamp") {
5668     if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5669         isGFX12Plus())
5670       return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5671   }
5672 
5673   if (ID == "enable_ieee_mode") {
5674     if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5675         isGFX12Plus())
5676       return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5677   }
5678 
5679   if (ID == "enable_wavefront_size32") {
5680     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5681       if (!isGFX10Plus())
5682         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5683       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5684         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5685     } else {
5686       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5687         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5688     }
5689   }
5690 
5691   if (ID == "wavefront_size") {
5692     if (Header.wavefront_size == 5) {
5693       if (!isGFX10Plus())
5694         return TokError("wavefront_size=5 is only allowed on GFX10+");
5695       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5696         return TokError("wavefront_size=5 requires +WavefrontSize32");
5697     } else if (Header.wavefront_size == 6) {
5698       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5699         return TokError("wavefront_size=6 requires +WavefrontSize64");
5700     }
5701   }
5702 
5703   if (ID == "enable_wgp_mode") {
5704     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5705         !isGFX10Plus())
5706       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5707   }
5708 
5709   if (ID == "enable_mem_ordered") {
5710     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5711         !isGFX10Plus())
5712       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5713   }
5714 
5715   if (ID == "enable_fwd_progress") {
5716     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5717         !isGFX10Plus())
5718       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5719   }
5720 
5721   return false;
5722 }
5723 
5724 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5725   amd_kernel_code_t Header;
5726   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5727 
5728   while (true) {
5729     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5730     // will set the current token to EndOfStatement.
5731     while(trySkipToken(AsmToken::EndOfStatement));
5732 
5733     StringRef ID;
5734     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5735       return true;
5736 
5737     if (ID == ".end_amd_kernel_code_t")
5738       break;
5739 
5740     if (ParseAMDKernelCodeTValue(ID, Header))
5741       return true;
5742   }
5743 
5744   getTargetStreamer().EmitAMDKernelCodeT(Header);
5745 
5746   return false;
5747 }
5748 
5749 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5750   StringRef KernelName;
5751   if (!parseId(KernelName, "expected symbol name"))
5752     return true;
5753 
5754   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5755                                            ELF::STT_AMDGPU_HSA_KERNEL);
5756 
5757   KernelScope.initialize(getContext());
5758   return false;
5759 }
5760 
5761 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5762   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5763     return Error(getLoc(),
5764                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5765                  "architectures");
5766   }
5767 
5768   auto TargetIDDirective = getLexer().getTok().getStringContents();
5769   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5770     return Error(getParser().getTok().getLoc(), "target id must match options");
5771 
5772   getTargetStreamer().EmitISAVersion();
5773   Lex();
5774 
5775   return false;
5776 }
5777 
5778 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5779   assert(isHsaAbi(getSTI()));
5780 
5781   std::string HSAMetadataString;
5782   if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
5783                           HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
5784     return true;
5785 
5786   if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5787     return Error(getLoc(), "invalid HSA metadata");
5788 
5789   return false;
5790 }
5791 
5792 /// Common code to parse out a block of text (typically YAML) between start and
5793 /// end directives.
5794 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5795                                           const char *AssemblerDirectiveEnd,
5796                                           std::string &CollectString) {
5797 
5798   raw_string_ostream CollectStream(CollectString);
5799 
5800   getLexer().setSkipSpace(false);
5801 
5802   bool FoundEnd = false;
5803   while (!isToken(AsmToken::Eof)) {
5804     while (isToken(AsmToken::Space)) {
5805       CollectStream << getTokenStr();
5806       Lex();
5807     }
5808 
5809     if (trySkipId(AssemblerDirectiveEnd)) {
5810       FoundEnd = true;
5811       break;
5812     }
5813 
5814     CollectStream << Parser.parseStringToEndOfStatement()
5815                   << getContext().getAsmInfo()->getSeparatorString();
5816 
5817     Parser.eatToEndOfStatement();
5818   }
5819 
5820   getLexer().setSkipSpace(true);
5821 
5822   if (isToken(AsmToken::Eof) && !FoundEnd) {
5823     return TokError(Twine("expected directive ") +
5824                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5825   }
5826 
5827   CollectStream.flush();
5828   return false;
5829 }
5830 
5831 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5832 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5833   std::string String;
5834   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5835                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5836     return true;
5837 
5838   auto PALMetadata = getTargetStreamer().getPALMetadata();
5839   if (!PALMetadata->setFromString(String))
5840     return Error(getLoc(), "invalid PAL metadata");
5841   return false;
5842 }
5843 
5844 /// Parse the assembler directive for old linear-format PAL metadata.
5845 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5846   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5847     return Error(getLoc(),
5848                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5849                  "not available on non-amdpal OSes")).str());
5850   }
5851 
5852   auto PALMetadata = getTargetStreamer().getPALMetadata();
5853   PALMetadata->setLegacy();
5854   for (;;) {
5855     uint32_t Key, Value;
5856     if (ParseAsAbsoluteExpression(Key)) {
5857       return TokError(Twine("invalid value in ") +
5858                       Twine(PALMD::AssemblerDirective));
5859     }
5860     if (!trySkipToken(AsmToken::Comma)) {
5861       return TokError(Twine("expected an even number of values in ") +
5862                       Twine(PALMD::AssemblerDirective));
5863     }
5864     if (ParseAsAbsoluteExpression(Value)) {
5865       return TokError(Twine("invalid value in ") +
5866                       Twine(PALMD::AssemblerDirective));
5867     }
5868     PALMetadata->setRegister(Key, Value);
5869     if (!trySkipToken(AsmToken::Comma))
5870       break;
5871   }
5872   return false;
5873 }
5874 
5875 /// ParseDirectiveAMDGPULDS
5876 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5877 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5878   if (getParser().checkForValidSection())
5879     return true;
5880 
5881   StringRef Name;
5882   SMLoc NameLoc = getLoc();
5883   if (getParser().parseIdentifier(Name))
5884     return TokError("expected identifier in directive");
5885 
5886   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5887   if (getParser().parseComma())
5888     return true;
5889 
5890   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5891 
5892   int64_t Size;
5893   SMLoc SizeLoc = getLoc();
5894   if (getParser().parseAbsoluteExpression(Size))
5895     return true;
5896   if (Size < 0)
5897     return Error(SizeLoc, "size must be non-negative");
5898   if (Size > LocalMemorySize)
5899     return Error(SizeLoc, "size is too large");
5900 
5901   int64_t Alignment = 4;
5902   if (trySkipToken(AsmToken::Comma)) {
5903     SMLoc AlignLoc = getLoc();
5904     if (getParser().parseAbsoluteExpression(Alignment))
5905       return true;
5906     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5907       return Error(AlignLoc, "alignment must be a power of two");
5908 
5909     // Alignment larger than the size of LDS is possible in theory, as long
5910     // as the linker manages to place to symbol at address 0, but we do want
5911     // to make sure the alignment fits nicely into a 32-bit integer.
5912     if (Alignment >= 1u << 31)
5913       return Error(AlignLoc, "alignment is too large");
5914   }
5915 
5916   if (parseEOL())
5917     return true;
5918 
5919   Symbol->redefineIfPossible();
5920   if (!Symbol->isUndefined())
5921     return Error(NameLoc, "invalid symbol redefinition");
5922 
5923   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5924   return false;
5925 }
5926 
5927 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5928   StringRef IDVal = DirectiveID.getString();
5929 
5930   if (isHsaAbi(getSTI())) {
5931     if (IDVal == ".amdhsa_kernel")
5932      return ParseDirectiveAMDHSAKernel();
5933 
5934     if (IDVal == ".amdhsa_code_object_version")
5935       return ParseDirectiveAMDHSACodeObjectVersion();
5936 
5937     // TODO: Restructure/combine with PAL metadata directive.
5938     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5939       return ParseDirectiveHSAMetadata();
5940   } else {
5941     if (IDVal == ".amd_kernel_code_t")
5942       return ParseDirectiveAMDKernelCodeT();
5943 
5944     if (IDVal == ".amdgpu_hsa_kernel")
5945       return ParseDirectiveAMDGPUHsaKernel();
5946 
5947     if (IDVal == ".amd_amdgpu_isa")
5948       return ParseDirectiveISAVersion();
5949 
5950     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
5951       return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
5952                               Twine(" directive is "
5953                                     "not available on non-amdhsa OSes"))
5954                                  .str());
5955     }
5956   }
5957 
5958   if (IDVal == ".amdgcn_target")
5959     return ParseDirectiveAMDGCNTarget();
5960 
5961   if (IDVal == ".amdgpu_lds")
5962     return ParseDirectiveAMDGPULDS();
5963 
5964   if (IDVal == PALMD::AssemblerDirectiveBegin)
5965     return ParseDirectivePALMetadataBegin();
5966 
5967   if (IDVal == PALMD::AssemblerDirective)
5968     return ParseDirectivePALMetadata();
5969 
5970   return true;
5971 }
5972 
5973 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5974                                            unsigned RegNo) {
5975 
5976   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5977     return isGFX9Plus();
5978 
5979   // GFX10+ has 2 more SGPRs 104 and 105.
5980   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5981     return hasSGPR104_SGPR105();
5982 
5983   switch (RegNo) {
5984   case AMDGPU::SRC_SHARED_BASE_LO:
5985   case AMDGPU::SRC_SHARED_BASE:
5986   case AMDGPU::SRC_SHARED_LIMIT_LO:
5987   case AMDGPU::SRC_SHARED_LIMIT:
5988   case AMDGPU::SRC_PRIVATE_BASE_LO:
5989   case AMDGPU::SRC_PRIVATE_BASE:
5990   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5991   case AMDGPU::SRC_PRIVATE_LIMIT:
5992     return isGFX9Plus();
5993   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5994     return isGFX9Plus() && !isGFX11Plus();
5995   case AMDGPU::TBA:
5996   case AMDGPU::TBA_LO:
5997   case AMDGPU::TBA_HI:
5998   case AMDGPU::TMA:
5999   case AMDGPU::TMA_LO:
6000   case AMDGPU::TMA_HI:
6001     return !isGFX9Plus();
6002   case AMDGPU::XNACK_MASK:
6003   case AMDGPU::XNACK_MASK_LO:
6004   case AMDGPU::XNACK_MASK_HI:
6005     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6006   case AMDGPU::SGPR_NULL:
6007     return isGFX10Plus();
6008   default:
6009     break;
6010   }
6011 
6012   if (isCI())
6013     return true;
6014 
6015   if (isSI() || isGFX10Plus()) {
6016     // No flat_scr on SI.
6017     // On GFX10Plus flat scratch is not a valid register operand and can only be
6018     // accessed with s_setreg/s_getreg.
6019     switch (RegNo) {
6020     case AMDGPU::FLAT_SCR:
6021     case AMDGPU::FLAT_SCR_LO:
6022     case AMDGPU::FLAT_SCR_HI:
6023       return false;
6024     default:
6025       return true;
6026     }
6027   }
6028 
6029   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6030   // SI/CI have.
6031   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6032     return hasSGPR102_SGPR103();
6033 
6034   return true;
6035 }
6036 
6037 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6038                                           StringRef Mnemonic,
6039                                           OperandMode Mode) {
6040   ParseStatus Res = parseVOPD(Operands);
6041   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6042     return Res;
6043 
6044   // Try to parse with a custom parser
6045   Res = MatchOperandParserImpl(Operands, Mnemonic);
6046 
6047   // If we successfully parsed the operand or if there as an error parsing,
6048   // we are done.
6049   //
6050   // If we are parsing after we reach EndOfStatement then this means we
6051   // are appending default values to the Operands list.  This is only done
6052   // by custom parser, so we shouldn't continue on to the generic parsing.
6053   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6054     return Res;
6055 
6056   SMLoc RBraceLoc;
6057   SMLoc LBraceLoc = getLoc();
6058   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6059     unsigned Prefix = Operands.size();
6060 
6061     for (;;) {
6062       auto Loc = getLoc();
6063       Res = parseReg(Operands);
6064       if (Res.isNoMatch())
6065         Error(Loc, "expected a register");
6066       if (!Res.isSuccess())
6067         return ParseStatus::Failure;
6068 
6069       RBraceLoc = getLoc();
6070       if (trySkipToken(AsmToken::RBrac))
6071         break;
6072 
6073       if (!skipToken(AsmToken::Comma,
6074                      "expected a comma or a closing square bracket"))
6075         return ParseStatus::Failure;
6076     }
6077 
6078     if (Operands.size() - Prefix > 1) {
6079       Operands.insert(Operands.begin() + Prefix,
6080                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6081       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6082     }
6083 
6084     return ParseStatus::Success;
6085   }
6086 
6087   return parseRegOrImm(Operands);
6088 }
6089 
6090 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6091   // Clear any forced encodings from the previous instruction.
6092   setForcedEncodingSize(0);
6093   setForcedDPP(false);
6094   setForcedSDWA(false);
6095 
6096   if (Name.ends_with("_e64_dpp")) {
6097     setForcedDPP(true);
6098     setForcedEncodingSize(64);
6099     return Name.substr(0, Name.size() - 8);
6100   } else if (Name.ends_with("_e64")) {
6101     setForcedEncodingSize(64);
6102     return Name.substr(0, Name.size() - 4);
6103   } else if (Name.ends_with("_e32")) {
6104     setForcedEncodingSize(32);
6105     return Name.substr(0, Name.size() - 4);
6106   } else if (Name.ends_with("_dpp")) {
6107     setForcedDPP(true);
6108     return Name.substr(0, Name.size() - 4);
6109   } else if (Name.ends_with("_sdwa")) {
6110     setForcedSDWA(true);
6111     return Name.substr(0, Name.size() - 5);
6112   }
6113   return Name;
6114 }
6115 
6116 static void applyMnemonicAliases(StringRef &Mnemonic,
6117                                  const FeatureBitset &Features,
6118                                  unsigned VariantID);
6119 
6120 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6121                                        StringRef Name,
6122                                        SMLoc NameLoc, OperandVector &Operands) {
6123   // Add the instruction mnemonic
6124   Name = parseMnemonicSuffix(Name);
6125 
6126   // If the target architecture uses MnemonicAlias, call it here to parse
6127   // operands correctly.
6128   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6129 
6130   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6131 
6132   bool IsMIMG = Name.starts_with("image_");
6133 
6134   while (!trySkipToken(AsmToken::EndOfStatement)) {
6135     OperandMode Mode = OperandMode_Default;
6136     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6137       Mode = OperandMode_NSA;
6138     ParseStatus Res = parseOperand(Operands, Name, Mode);
6139 
6140     if (!Res.isSuccess()) {
6141       checkUnsupportedInstruction(Name, NameLoc);
6142       if (!Parser.hasPendingError()) {
6143         // FIXME: use real operand location rather than the current location.
6144         StringRef Msg = Res.isFailure() ? "failed parsing operand."
6145                                         : "not a valid operand.";
6146         Error(getLoc(), Msg);
6147       }
6148       while (!trySkipToken(AsmToken::EndOfStatement)) {
6149         lex();
6150       }
6151       return true;
6152     }
6153 
6154     // Eat the comma or space if there is one.
6155     trySkipToken(AsmToken::Comma);
6156   }
6157 
6158   return false;
6159 }
6160 
6161 //===----------------------------------------------------------------------===//
6162 // Utility functions
6163 //===----------------------------------------------------------------------===//
6164 
6165 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6166                                           OperandVector &Operands) {
6167   SMLoc S = getLoc();
6168   if (!trySkipId(Name))
6169     return ParseStatus::NoMatch;
6170 
6171   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6172   return ParseStatus::Success;
6173 }
6174 
6175 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6176                                                 int64_t &IntVal) {
6177 
6178   if (!trySkipId(Prefix, AsmToken::Colon))
6179     return ParseStatus::NoMatch;
6180 
6181   return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6182 }
6183 
6184 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6185     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6186     std::function<bool(int64_t &)> ConvertResult) {
6187   SMLoc S = getLoc();
6188   int64_t Value = 0;
6189 
6190   ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6191   if (!Res.isSuccess())
6192     return Res;
6193 
6194   if (ConvertResult && !ConvertResult(Value)) {
6195     Error(S, "invalid " + StringRef(Prefix) + " value.");
6196   }
6197 
6198   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6199   return ParseStatus::Success;
6200 }
6201 
6202 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6203     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6204     bool (*ConvertResult)(int64_t &)) {
6205   SMLoc S = getLoc();
6206   if (!trySkipId(Prefix, AsmToken::Colon))
6207     return ParseStatus::NoMatch;
6208 
6209   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6210     return ParseStatus::Failure;
6211 
6212   unsigned Val = 0;
6213   const unsigned MaxSize = 4;
6214 
6215   // FIXME: How to verify the number of elements matches the number of src
6216   // operands?
6217   for (int I = 0; ; ++I) {
6218     int64_t Op;
6219     SMLoc Loc = getLoc();
6220     if (!parseExpr(Op))
6221       return ParseStatus::Failure;
6222 
6223     if (Op != 0 && Op != 1)
6224       return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6225 
6226     Val |= (Op << I);
6227 
6228     if (trySkipToken(AsmToken::RBrac))
6229       break;
6230 
6231     if (I + 1 == MaxSize)
6232       return Error(getLoc(), "expected a closing square bracket");
6233 
6234     if (!skipToken(AsmToken::Comma, "expected a comma"))
6235       return ParseStatus::Failure;
6236   }
6237 
6238   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6239   return ParseStatus::Success;
6240 }
6241 
6242 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6243                                            OperandVector &Operands,
6244                                            AMDGPUOperand::ImmTy ImmTy) {
6245   int64_t Bit;
6246   SMLoc S = getLoc();
6247 
6248   if (trySkipId(Name)) {
6249     Bit = 1;
6250   } else if (trySkipId("no", Name)) {
6251     Bit = 0;
6252   } else {
6253     return ParseStatus::NoMatch;
6254   }
6255 
6256   if (Name == "r128" && !hasMIMG_R128())
6257     return Error(S, "r128 modifier is not supported on this GPU");
6258   if (Name == "a16" && !hasA16())
6259     return Error(S, "a16 modifier is not supported on this GPU");
6260 
6261   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6262     ImmTy = AMDGPUOperand::ImmTyR128A16;
6263 
6264   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6265   return ParseStatus::Success;
6266 }
6267 
6268 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6269                                       bool &Disabling) const {
6270   Disabling = Id.consume_front("no");
6271 
6272   if (isGFX940() && !Mnemo.starts_with("s_")) {
6273     return StringSwitch<unsigned>(Id)
6274         .Case("nt", AMDGPU::CPol::NT)
6275         .Case("sc0", AMDGPU::CPol::SC0)
6276         .Case("sc1", AMDGPU::CPol::SC1)
6277         .Default(0);
6278   }
6279 
6280   return StringSwitch<unsigned>(Id)
6281       .Case("dlc", AMDGPU::CPol::DLC)
6282       .Case("glc", AMDGPU::CPol::GLC)
6283       .Case("scc", AMDGPU::CPol::SCC)
6284       .Case("slc", AMDGPU::CPol::SLC)
6285       .Default(0);
6286 }
6287 
6288 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6289   if (isGFX12Plus()) {
6290     SMLoc StringLoc = getLoc();
6291 
6292     int64_t CPolVal = 0;
6293     ParseStatus ResTH = ParseStatus::NoMatch;
6294     ParseStatus ResScope = ParseStatus::NoMatch;
6295 
6296     for (;;) {
6297       if (ResTH.isNoMatch()) {
6298         int64_t TH;
6299         ResTH = parseTH(Operands, TH);
6300         if (ResTH.isFailure())
6301           return ResTH;
6302         if (ResTH.isSuccess()) {
6303           CPolVal |= TH;
6304           continue;
6305         }
6306       }
6307 
6308       if (ResScope.isNoMatch()) {
6309         int64_t Scope;
6310         ResScope = parseScope(Operands, Scope);
6311         if (ResScope.isFailure())
6312           return ResScope;
6313         if (ResScope.isSuccess()) {
6314           CPolVal |= Scope;
6315           continue;
6316         }
6317       }
6318 
6319       break;
6320     }
6321 
6322     if (ResTH.isNoMatch() && ResScope.isNoMatch())
6323       return ParseStatus::NoMatch;
6324 
6325     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6326                                                 AMDGPUOperand::ImmTyCPol));
6327     return ParseStatus::Success;
6328   }
6329 
6330   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6331   SMLoc OpLoc = getLoc();
6332   unsigned Enabled = 0, Seen = 0;
6333   for (;;) {
6334     SMLoc S = getLoc();
6335     bool Disabling;
6336     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6337     if (!CPol)
6338       break;
6339 
6340     lex();
6341 
6342     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6343       return Error(S, "dlc modifier is not supported on this GPU");
6344 
6345     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6346       return Error(S, "scc modifier is not supported on this GPU");
6347 
6348     if (Seen & CPol)
6349       return Error(S, "duplicate cache policy modifier");
6350 
6351     if (!Disabling)
6352       Enabled |= CPol;
6353 
6354     Seen |= CPol;
6355   }
6356 
6357   if (!Seen)
6358     return ParseStatus::NoMatch;
6359 
6360   Operands.push_back(
6361       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6362   return ParseStatus::Success;
6363 }
6364 
6365 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6366                                         int64_t &Scope) {
6367   Scope = AMDGPU::CPol::SCOPE_CU; // default;
6368 
6369   StringRef Value;
6370   SMLoc StringLoc;
6371   ParseStatus Res;
6372 
6373   Res = parseStringWithPrefix("scope", Value, StringLoc);
6374   if (!Res.isSuccess())
6375     return Res;
6376 
6377   Scope = StringSwitch<int64_t>(Value)
6378               .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6379               .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6380               .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6381               .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6382               .Default(0xffffffff);
6383 
6384   if (Scope == 0xffffffff)
6385     return Error(StringLoc, "invalid scope value");
6386 
6387   return ParseStatus::Success;
6388 }
6389 
6390 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6391   TH = AMDGPU::CPol::TH_RT; // default
6392 
6393   StringRef Value;
6394   SMLoc StringLoc;
6395   ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6396   if (!Res.isSuccess())
6397     return Res;
6398 
6399   if (Value == "TH_DEFAULT")
6400     TH = AMDGPU::CPol::TH_RT;
6401   else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6402            Value == "TH_LOAD_NT_WB") {
6403     return Error(StringLoc, "invalid th value");
6404   } else if (Value.starts_with("TH_ATOMIC_")) {
6405     Value = Value.drop_front(10);
6406     TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6407   } else if (Value.starts_with("TH_LOAD_")) {
6408     Value = Value.drop_front(8);
6409     TH = AMDGPU::CPol::TH_TYPE_LOAD;
6410   } else if (Value.starts_with("TH_STORE_")) {
6411     Value = Value.drop_front(9);
6412     TH = AMDGPU::CPol::TH_TYPE_STORE;
6413   } else {
6414     return Error(StringLoc, "invalid th value");
6415   }
6416 
6417   if (Value == "BYPASS")
6418     TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6419 
6420   if (TH != 0) {
6421     if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6422       TH |= StringSwitch<int64_t>(Value)
6423                 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6424                 .Case("RT", AMDGPU::CPol::TH_RT)
6425                 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6426                 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
6427                 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6428                                        AMDGPU::CPol::TH_ATOMIC_RETURN)
6429                 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
6430                 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6431                                         AMDGPU::CPol::TH_ATOMIC_NT)
6432                 .Default(0xffffffff);
6433     else
6434       TH |= StringSwitch<int64_t>(Value)
6435                 .Case("RT", AMDGPU::CPol::TH_RT)
6436                 .Case("NT", AMDGPU::CPol::TH_NT)
6437                 .Case("HT", AMDGPU::CPol::TH_HT)
6438                 .Case("LU", AMDGPU::CPol::TH_LU)
6439                 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6440                 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6441                 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6442                 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6443                 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6444                 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6445                 .Default(0xffffffff);
6446   }
6447 
6448   if (TH == 0xffffffff)
6449     return Error(StringLoc, "invalid th value");
6450 
6451   return ParseStatus::Success;
6452 }
6453 
6454 static void addOptionalImmOperand(
6455   MCInst& Inst, const OperandVector& Operands,
6456   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6457   AMDGPUOperand::ImmTy ImmT,
6458   int64_t Default = 0) {
6459   auto i = OptionalIdx.find(ImmT);
6460   if (i != OptionalIdx.end()) {
6461     unsigned Idx = i->second;
6462     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6463   } else {
6464     Inst.addOperand(MCOperand::createImm(Default));
6465   }
6466 }
6467 
6468 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6469                                                    StringRef &Value,
6470                                                    SMLoc &StringLoc) {
6471   if (!trySkipId(Prefix, AsmToken::Colon))
6472     return ParseStatus::NoMatch;
6473 
6474   StringLoc = getLoc();
6475   return parseId(Value, "expected an identifier") ? ParseStatus::Success
6476                                                   : ParseStatus::Failure;
6477 }
6478 
6479 //===----------------------------------------------------------------------===//
6480 // MTBUF format
6481 //===----------------------------------------------------------------------===//
6482 
6483 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6484                                   int64_t MaxVal,
6485                                   int64_t &Fmt) {
6486   int64_t Val;
6487   SMLoc Loc = getLoc();
6488 
6489   auto Res = parseIntWithPrefix(Pref, Val);
6490   if (Res.isFailure())
6491     return false;
6492   if (Res.isNoMatch())
6493     return true;
6494 
6495   if (Val < 0 || Val > MaxVal) {
6496     Error(Loc, Twine("out of range ", StringRef(Pref)));
6497     return false;
6498   }
6499 
6500   Fmt = Val;
6501   return true;
6502 }
6503 
6504 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6505                                               AMDGPUOperand::ImmTy ImmTy) {
6506   const char *Pref = "index_key";
6507   int64_t ImmVal = 0;
6508   SMLoc Loc = getLoc();
6509   auto Res = parseIntWithPrefix(Pref, ImmVal);
6510   if (!Res.isSuccess())
6511     return Res;
6512 
6513   if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6514     return Error(Loc, Twine("out of range ", StringRef(Pref)));
6515 
6516   if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6517     return Error(Loc, Twine("out of range ", StringRef(Pref)));
6518 
6519   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6520   return ParseStatus::Success;
6521 }
6522 
6523 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6524   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6525 }
6526 
6527 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6528   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6529 }
6530 
6531 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6532 // values to live in a joint format operand in the MCInst encoding.
6533 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6534   using namespace llvm::AMDGPU::MTBUFFormat;
6535 
6536   int64_t Dfmt = DFMT_UNDEF;
6537   int64_t Nfmt = NFMT_UNDEF;
6538 
6539   // dfmt and nfmt can appear in either order, and each is optional.
6540   for (int I = 0; I < 2; ++I) {
6541     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6542       return ParseStatus::Failure;
6543 
6544     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6545       return ParseStatus::Failure;
6546 
6547     // Skip optional comma between dfmt/nfmt
6548     // but guard against 2 commas following each other.
6549     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6550         !peekToken().is(AsmToken::Comma)) {
6551       trySkipToken(AsmToken::Comma);
6552     }
6553   }
6554 
6555   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6556     return ParseStatus::NoMatch;
6557 
6558   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6559   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6560 
6561   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6562   return ParseStatus::Success;
6563 }
6564 
6565 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6566   using namespace llvm::AMDGPU::MTBUFFormat;
6567 
6568   int64_t Fmt = UFMT_UNDEF;
6569 
6570   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6571     return ParseStatus::Failure;
6572 
6573   if (Fmt == UFMT_UNDEF)
6574     return ParseStatus::NoMatch;
6575 
6576   Format = Fmt;
6577   return ParseStatus::Success;
6578 }
6579 
6580 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6581                                     int64_t &Nfmt,
6582                                     StringRef FormatStr,
6583                                     SMLoc Loc) {
6584   using namespace llvm::AMDGPU::MTBUFFormat;
6585   int64_t Format;
6586 
6587   Format = getDfmt(FormatStr);
6588   if (Format != DFMT_UNDEF) {
6589     Dfmt = Format;
6590     return true;
6591   }
6592 
6593   Format = getNfmt(FormatStr, getSTI());
6594   if (Format != NFMT_UNDEF) {
6595     Nfmt = Format;
6596     return true;
6597   }
6598 
6599   Error(Loc, "unsupported format");
6600   return false;
6601 }
6602 
6603 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6604                                                       SMLoc FormatLoc,
6605                                                       int64_t &Format) {
6606   using namespace llvm::AMDGPU::MTBUFFormat;
6607 
6608   int64_t Dfmt = DFMT_UNDEF;
6609   int64_t Nfmt = NFMT_UNDEF;
6610   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6611     return ParseStatus::Failure;
6612 
6613   if (trySkipToken(AsmToken::Comma)) {
6614     StringRef Str;
6615     SMLoc Loc = getLoc();
6616     if (!parseId(Str, "expected a format string") ||
6617         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6618       return ParseStatus::Failure;
6619     if (Dfmt == DFMT_UNDEF)
6620       return Error(Loc, "duplicate numeric format");
6621     if (Nfmt == NFMT_UNDEF)
6622       return Error(Loc, "duplicate data format");
6623   }
6624 
6625   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6626   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6627 
6628   if (isGFX10Plus()) {
6629     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6630     if (Ufmt == UFMT_UNDEF)
6631       return Error(FormatLoc, "unsupported format");
6632     Format = Ufmt;
6633   } else {
6634     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6635   }
6636 
6637   return ParseStatus::Success;
6638 }
6639 
6640 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6641                                                         SMLoc Loc,
6642                                                         int64_t &Format) {
6643   using namespace llvm::AMDGPU::MTBUFFormat;
6644 
6645   auto Id = getUnifiedFormat(FormatStr, getSTI());
6646   if (Id == UFMT_UNDEF)
6647     return ParseStatus::NoMatch;
6648 
6649   if (!isGFX10Plus())
6650     return Error(Loc, "unified format is not supported on this GPU");
6651 
6652   Format = Id;
6653   return ParseStatus::Success;
6654 }
6655 
6656 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6657   using namespace llvm::AMDGPU::MTBUFFormat;
6658   SMLoc Loc = getLoc();
6659 
6660   if (!parseExpr(Format))
6661     return ParseStatus::Failure;
6662   if (!isValidFormatEncoding(Format, getSTI()))
6663     return Error(Loc, "out of range format");
6664 
6665   return ParseStatus::Success;
6666 }
6667 
6668 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6669   using namespace llvm::AMDGPU::MTBUFFormat;
6670 
6671   if (!trySkipId("format", AsmToken::Colon))
6672     return ParseStatus::NoMatch;
6673 
6674   if (trySkipToken(AsmToken::LBrac)) {
6675     StringRef FormatStr;
6676     SMLoc Loc = getLoc();
6677     if (!parseId(FormatStr, "expected a format string"))
6678       return ParseStatus::Failure;
6679 
6680     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6681     if (Res.isNoMatch())
6682       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6683     if (!Res.isSuccess())
6684       return Res;
6685 
6686     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6687       return ParseStatus::Failure;
6688 
6689     return ParseStatus::Success;
6690   }
6691 
6692   return parseNumericFormat(Format);
6693 }
6694 
6695 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6696   using namespace llvm::AMDGPU::MTBUFFormat;
6697 
6698   int64_t Format = getDefaultFormatEncoding(getSTI());
6699   ParseStatus Res;
6700   SMLoc Loc = getLoc();
6701 
6702   // Parse legacy format syntax.
6703   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6704   if (Res.isFailure())
6705     return Res;
6706 
6707   bool FormatFound = Res.isSuccess();
6708 
6709   Operands.push_back(
6710     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6711 
6712   if (FormatFound)
6713     trySkipToken(AsmToken::Comma);
6714 
6715   if (isToken(AsmToken::EndOfStatement)) {
6716     // We are expecting an soffset operand,
6717     // but let matcher handle the error.
6718     return ParseStatus::Success;
6719   }
6720 
6721   // Parse soffset.
6722   Res = parseRegOrImm(Operands);
6723   if (!Res.isSuccess())
6724     return Res;
6725 
6726   trySkipToken(AsmToken::Comma);
6727 
6728   if (!FormatFound) {
6729     Res = parseSymbolicOrNumericFormat(Format);
6730     if (Res.isFailure())
6731       return Res;
6732     if (Res.isSuccess()) {
6733       auto Size = Operands.size();
6734       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6735       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6736       Op.setImm(Format);
6737     }
6738     return ParseStatus::Success;
6739   }
6740 
6741   if (isId("format") && peekToken().is(AsmToken::Colon))
6742     return Error(getLoc(), "duplicate format");
6743   return ParseStatus::Success;
6744 }
6745 
6746 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6747   ParseStatus Res =
6748       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6749   if (Res.isNoMatch()) {
6750     Res = parseIntWithPrefix("inst_offset", Operands,
6751                              AMDGPUOperand::ImmTyInstOffset);
6752   }
6753   return Res;
6754 }
6755 
6756 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6757   ParseStatus Res =
6758       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6759   if (Res.isNoMatch())
6760     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6761   return Res;
6762 }
6763 
6764 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6765   ParseStatus Res =
6766       parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6767   if (Res.isNoMatch()) {
6768     Res =
6769         parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6770   }
6771   return Res;
6772 }
6773 
6774 //===----------------------------------------------------------------------===//
6775 // Exp
6776 //===----------------------------------------------------------------------===//
6777 
6778 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6779   OptionalImmIndexMap OptionalIdx;
6780 
6781   unsigned OperandIdx[4];
6782   unsigned EnMask = 0;
6783   int SrcIdx = 0;
6784 
6785   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6786     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6787 
6788     // Add the register arguments
6789     if (Op.isReg()) {
6790       assert(SrcIdx < 4);
6791       OperandIdx[SrcIdx] = Inst.size();
6792       Op.addRegOperands(Inst, 1);
6793       ++SrcIdx;
6794       continue;
6795     }
6796 
6797     if (Op.isOff()) {
6798       assert(SrcIdx < 4);
6799       OperandIdx[SrcIdx] = Inst.size();
6800       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6801       ++SrcIdx;
6802       continue;
6803     }
6804 
6805     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6806       Op.addImmOperands(Inst, 1);
6807       continue;
6808     }
6809 
6810     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6811       continue;
6812 
6813     // Handle optional arguments
6814     OptionalIdx[Op.getImmTy()] = i;
6815   }
6816 
6817   assert(SrcIdx == 4);
6818 
6819   bool Compr = false;
6820   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6821     Compr = true;
6822     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6823     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6824     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6825   }
6826 
6827   for (auto i = 0; i < SrcIdx; ++i) {
6828     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6829       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6830     }
6831   }
6832 
6833   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6834   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6835 
6836   Inst.addOperand(MCOperand::createImm(EnMask));
6837 }
6838 
6839 //===----------------------------------------------------------------------===//
6840 // s_waitcnt
6841 //===----------------------------------------------------------------------===//
6842 
6843 static bool
6844 encodeCnt(
6845   const AMDGPU::IsaVersion ISA,
6846   int64_t &IntVal,
6847   int64_t CntVal,
6848   bool Saturate,
6849   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6850   unsigned (*decode)(const IsaVersion &Version, unsigned))
6851 {
6852   bool Failed = false;
6853 
6854   IntVal = encode(ISA, IntVal, CntVal);
6855   if (CntVal != decode(ISA, IntVal)) {
6856     if (Saturate) {
6857       IntVal = encode(ISA, IntVal, -1);
6858     } else {
6859       Failed = true;
6860     }
6861   }
6862   return Failed;
6863 }
6864 
6865 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6866 
6867   SMLoc CntLoc = getLoc();
6868   StringRef CntName = getTokenStr();
6869 
6870   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6871       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6872     return false;
6873 
6874   int64_t CntVal;
6875   SMLoc ValLoc = getLoc();
6876   if (!parseExpr(CntVal))
6877     return false;
6878 
6879   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6880 
6881   bool Failed = true;
6882   bool Sat = CntName.ends_with("_sat");
6883 
6884   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6885     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6886   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6887     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6888   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6889     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6890   } else {
6891     Error(CntLoc, "invalid counter name " + CntName);
6892     return false;
6893   }
6894 
6895   if (Failed) {
6896     Error(ValLoc, "too large value for " + CntName);
6897     return false;
6898   }
6899 
6900   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6901     return false;
6902 
6903   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6904     if (isToken(AsmToken::EndOfStatement)) {
6905       Error(getLoc(), "expected a counter name");
6906       return false;
6907     }
6908   }
6909 
6910   return true;
6911 }
6912 
6913 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
6914   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6915   int64_t Waitcnt = getWaitcntBitMask(ISA);
6916   SMLoc S = getLoc();
6917 
6918   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6919     while (!isToken(AsmToken::EndOfStatement)) {
6920       if (!parseCnt(Waitcnt))
6921         return ParseStatus::Failure;
6922     }
6923   } else {
6924     if (!parseExpr(Waitcnt))
6925       return ParseStatus::Failure;
6926   }
6927 
6928   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6929   return ParseStatus::Success;
6930 }
6931 
6932 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6933   SMLoc FieldLoc = getLoc();
6934   StringRef FieldName = getTokenStr();
6935   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6936       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6937     return false;
6938 
6939   SMLoc ValueLoc = getLoc();
6940   StringRef ValueName = getTokenStr();
6941   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6942       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6943     return false;
6944 
6945   unsigned Shift;
6946   if (FieldName == "instid0") {
6947     Shift = 0;
6948   } else if (FieldName == "instskip") {
6949     Shift = 4;
6950   } else if (FieldName == "instid1") {
6951     Shift = 7;
6952   } else {
6953     Error(FieldLoc, "invalid field name " + FieldName);
6954     return false;
6955   }
6956 
6957   int Value;
6958   if (Shift == 4) {
6959     // Parse values for instskip.
6960     Value = StringSwitch<int>(ValueName)
6961                 .Case("SAME", 0)
6962                 .Case("NEXT", 1)
6963                 .Case("SKIP_1", 2)
6964                 .Case("SKIP_2", 3)
6965                 .Case("SKIP_3", 4)
6966                 .Case("SKIP_4", 5)
6967                 .Default(-1);
6968   } else {
6969     // Parse values for instid0 and instid1.
6970     Value = StringSwitch<int>(ValueName)
6971                 .Case("NO_DEP", 0)
6972                 .Case("VALU_DEP_1", 1)
6973                 .Case("VALU_DEP_2", 2)
6974                 .Case("VALU_DEP_3", 3)
6975                 .Case("VALU_DEP_4", 4)
6976                 .Case("TRANS32_DEP_1", 5)
6977                 .Case("TRANS32_DEP_2", 6)
6978                 .Case("TRANS32_DEP_3", 7)
6979                 .Case("FMA_ACCUM_CYCLE_1", 8)
6980                 .Case("SALU_CYCLE_1", 9)
6981                 .Case("SALU_CYCLE_2", 10)
6982                 .Case("SALU_CYCLE_3", 11)
6983                 .Default(-1);
6984   }
6985   if (Value < 0) {
6986     Error(ValueLoc, "invalid value name " + ValueName);
6987     return false;
6988   }
6989 
6990   Delay |= Value << Shift;
6991   return true;
6992 }
6993 
6994 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
6995   int64_t Delay = 0;
6996   SMLoc S = getLoc();
6997 
6998   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6999     do {
7000       if (!parseDelay(Delay))
7001         return ParseStatus::Failure;
7002     } while (trySkipToken(AsmToken::Pipe));
7003   } else {
7004     if (!parseExpr(Delay))
7005       return ParseStatus::Failure;
7006   }
7007 
7008   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7009   return ParseStatus::Success;
7010 }
7011 
7012 bool
7013 AMDGPUOperand::isSWaitCnt() const {
7014   return isImm();
7015 }
7016 
7017 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7018 
7019 //===----------------------------------------------------------------------===//
7020 // DepCtr
7021 //===----------------------------------------------------------------------===//
7022 
7023 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7024                                   StringRef DepCtrName) {
7025   switch (ErrorId) {
7026   case OPR_ID_UNKNOWN:
7027     Error(Loc, Twine("invalid counter name ", DepCtrName));
7028     return;
7029   case OPR_ID_UNSUPPORTED:
7030     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7031     return;
7032   case OPR_ID_DUPLICATE:
7033     Error(Loc, Twine("duplicate counter name ", DepCtrName));
7034     return;
7035   case OPR_VAL_INVALID:
7036     Error(Loc, Twine("invalid value for ", DepCtrName));
7037     return;
7038   default:
7039     assert(false);
7040   }
7041 }
7042 
7043 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7044 
7045   using namespace llvm::AMDGPU::DepCtr;
7046 
7047   SMLoc DepCtrLoc = getLoc();
7048   StringRef DepCtrName = getTokenStr();
7049 
7050   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7051       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7052     return false;
7053 
7054   int64_t ExprVal;
7055   if (!parseExpr(ExprVal))
7056     return false;
7057 
7058   unsigned PrevOprMask = UsedOprMask;
7059   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7060 
7061   if (CntVal < 0) {
7062     depCtrError(DepCtrLoc, CntVal, DepCtrName);
7063     return false;
7064   }
7065 
7066   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7067     return false;
7068 
7069   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7070     if (isToken(AsmToken::EndOfStatement)) {
7071       Error(getLoc(), "expected a counter name");
7072       return false;
7073     }
7074   }
7075 
7076   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7077   DepCtr = (DepCtr & ~CntValMask) | CntVal;
7078   return true;
7079 }
7080 
7081 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7082   using namespace llvm::AMDGPU::DepCtr;
7083 
7084   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7085   SMLoc Loc = getLoc();
7086 
7087   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7088     unsigned UsedOprMask = 0;
7089     while (!isToken(AsmToken::EndOfStatement)) {
7090       if (!parseDepCtr(DepCtr, UsedOprMask))
7091         return ParseStatus::Failure;
7092     }
7093   } else {
7094     if (!parseExpr(DepCtr))
7095       return ParseStatus::Failure;
7096   }
7097 
7098   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7099   return ParseStatus::Success;
7100 }
7101 
7102 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7103 
7104 //===----------------------------------------------------------------------===//
7105 // hwreg
7106 //===----------------------------------------------------------------------===//
7107 
7108 bool
7109 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
7110                                 OperandInfoTy &Offset,
7111                                 OperandInfoTy &Width) {
7112   using namespace llvm::AMDGPU::Hwreg;
7113 
7114   // The register may be specified by name or using a numeric code
7115   HwReg.Loc = getLoc();
7116   if (isToken(AsmToken::Identifier) &&
7117       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7118     HwReg.IsSymbolic = true;
7119     lex(); // skip register name
7120   } else if (!parseExpr(HwReg.Id, "a register name")) {
7121     return false;
7122   }
7123 
7124   if (trySkipToken(AsmToken::RParen))
7125     return true;
7126 
7127   // parse optional params
7128   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7129     return false;
7130 
7131   Offset.Loc = getLoc();
7132   if (!parseExpr(Offset.Id))
7133     return false;
7134 
7135   if (!skipToken(AsmToken::Comma, "expected a comma"))
7136     return false;
7137 
7138   Width.Loc = getLoc();
7139   return parseExpr(Width.Id) &&
7140          skipToken(AsmToken::RParen, "expected a closing parenthesis");
7141 }
7142 
7143 bool
7144 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
7145                                const OperandInfoTy &Offset,
7146                                const OperandInfoTy &Width) {
7147 
7148   using namespace llvm::AMDGPU::Hwreg;
7149 
7150   if (HwReg.IsSymbolic) {
7151     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
7152       Error(HwReg.Loc,
7153             "specified hardware register is not supported on this GPU");
7154       return false;
7155     }
7156   } else {
7157     if (!isValidHwreg(HwReg.Id)) {
7158       Error(HwReg.Loc,
7159             "invalid code of hardware register: only 6-bit values are legal");
7160       return false;
7161     }
7162   }
7163   if (!isValidHwregOffset(Offset.Id)) {
7164     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
7165     return false;
7166   }
7167   if (!isValidHwregWidth(Width.Id)) {
7168     Error(Width.Loc,
7169           "invalid bitfield width: only values from 1 to 32 are legal");
7170     return false;
7171   }
7172   return true;
7173 }
7174 
7175 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7176   using namespace llvm::AMDGPU::Hwreg;
7177 
7178   int64_t ImmVal = 0;
7179   SMLoc Loc = getLoc();
7180 
7181   if (trySkipId("hwreg", AsmToken::LParen)) {
7182     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
7183     OperandInfoTy Offset(OFFSET_DEFAULT_);
7184     OperandInfoTy Width(WIDTH_DEFAULT_);
7185     if (parseHwregBody(HwReg, Offset, Width) &&
7186         validateHwreg(HwReg, Offset, Width)) {
7187       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
7188     } else {
7189       return ParseStatus::Failure;
7190     }
7191   } else if (parseExpr(ImmVal, "a hwreg macro")) {
7192     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7193       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7194   } else {
7195     return ParseStatus::Failure;
7196   }
7197 
7198   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7199   return ParseStatus::Success;
7200 }
7201 
7202 bool AMDGPUOperand::isHwreg() const {
7203   return isImmTy(ImmTyHwreg);
7204 }
7205 
7206 //===----------------------------------------------------------------------===//
7207 // sendmsg
7208 //===----------------------------------------------------------------------===//
7209 
7210 bool
7211 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7212                                   OperandInfoTy &Op,
7213                                   OperandInfoTy &Stream) {
7214   using namespace llvm::AMDGPU::SendMsg;
7215 
7216   Msg.Loc = getLoc();
7217   if (isToken(AsmToken::Identifier) &&
7218       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7219     Msg.IsSymbolic = true;
7220     lex(); // skip message name
7221   } else if (!parseExpr(Msg.Id, "a message name")) {
7222     return false;
7223   }
7224 
7225   if (trySkipToken(AsmToken::Comma)) {
7226     Op.IsDefined = true;
7227     Op.Loc = getLoc();
7228     if (isToken(AsmToken::Identifier) &&
7229         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
7230       lex(); // skip operation name
7231     } else if (!parseExpr(Op.Id, "an operation name")) {
7232       return false;
7233     }
7234 
7235     if (trySkipToken(AsmToken::Comma)) {
7236       Stream.IsDefined = true;
7237       Stream.Loc = getLoc();
7238       if (!parseExpr(Stream.Id))
7239         return false;
7240     }
7241   }
7242 
7243   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7244 }
7245 
7246 bool
7247 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7248                                  const OperandInfoTy &Op,
7249                                  const OperandInfoTy &Stream) {
7250   using namespace llvm::AMDGPU::SendMsg;
7251 
7252   // Validation strictness depends on whether message is specified
7253   // in a symbolic or in a numeric form. In the latter case
7254   // only encoding possibility is checked.
7255   bool Strict = Msg.IsSymbolic;
7256 
7257   if (Strict) {
7258     if (Msg.Id == OPR_ID_UNSUPPORTED) {
7259       Error(Msg.Loc, "specified message id is not supported on this GPU");
7260       return false;
7261     }
7262   } else {
7263     if (!isValidMsgId(Msg.Id, getSTI())) {
7264       Error(Msg.Loc, "invalid message id");
7265       return false;
7266     }
7267   }
7268   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
7269     if (Op.IsDefined) {
7270       Error(Op.Loc, "message does not support operations");
7271     } else {
7272       Error(Msg.Loc, "missing message operation");
7273     }
7274     return false;
7275   }
7276   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
7277     Error(Op.Loc, "invalid operation id");
7278     return false;
7279   }
7280   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
7281       Stream.IsDefined) {
7282     Error(Stream.Loc, "message operation does not support streams");
7283     return false;
7284   }
7285   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
7286     Error(Stream.Loc, "invalid message stream id");
7287     return false;
7288   }
7289   return true;
7290 }
7291 
7292 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7293   using namespace llvm::AMDGPU::SendMsg;
7294 
7295   int64_t ImmVal = 0;
7296   SMLoc Loc = getLoc();
7297 
7298   if (trySkipId("sendmsg", AsmToken::LParen)) {
7299     OperandInfoTy Msg(OPR_ID_UNKNOWN);
7300     OperandInfoTy Op(OP_NONE_);
7301     OperandInfoTy Stream(STREAM_ID_NONE_);
7302     if (parseSendMsgBody(Msg, Op, Stream) &&
7303         validateSendMsg(Msg, Op, Stream)) {
7304       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
7305     } else {
7306       return ParseStatus::Failure;
7307     }
7308   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7309     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7310       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7311   } else {
7312     return ParseStatus::Failure;
7313   }
7314 
7315   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7316   return ParseStatus::Success;
7317 }
7318 
7319 bool AMDGPUOperand::isSendMsg() const {
7320   return isImmTy(ImmTySendMsg);
7321 }
7322 
7323 //===----------------------------------------------------------------------===//
7324 // v_interp
7325 //===----------------------------------------------------------------------===//
7326 
7327 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7328   StringRef Str;
7329   SMLoc S = getLoc();
7330 
7331   if (!parseId(Str))
7332     return ParseStatus::NoMatch;
7333 
7334   int Slot = StringSwitch<int>(Str)
7335     .Case("p10", 0)
7336     .Case("p20", 1)
7337     .Case("p0", 2)
7338     .Default(-1);
7339 
7340   if (Slot == -1)
7341     return Error(S, "invalid interpolation slot");
7342 
7343   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7344                                               AMDGPUOperand::ImmTyInterpSlot));
7345   return ParseStatus::Success;
7346 }
7347 
7348 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7349   StringRef Str;
7350   SMLoc S = getLoc();
7351 
7352   if (!parseId(Str))
7353     return ParseStatus::NoMatch;
7354 
7355   if (!Str.starts_with("attr"))
7356     return Error(S, "invalid interpolation attribute");
7357 
7358   StringRef Chan = Str.take_back(2);
7359   int AttrChan = StringSwitch<int>(Chan)
7360     .Case(".x", 0)
7361     .Case(".y", 1)
7362     .Case(".z", 2)
7363     .Case(".w", 3)
7364     .Default(-1);
7365   if (AttrChan == -1)
7366     return Error(S, "invalid or missing interpolation attribute channel");
7367 
7368   Str = Str.drop_back(2).drop_front(4);
7369 
7370   uint8_t Attr;
7371   if (Str.getAsInteger(10, Attr))
7372     return Error(S, "invalid or missing interpolation attribute number");
7373 
7374   if (Attr > 32)
7375     return Error(S, "out of bounds interpolation attribute number");
7376 
7377   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7378 
7379   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7380                                               AMDGPUOperand::ImmTyInterpAttr));
7381   Operands.push_back(AMDGPUOperand::CreateImm(
7382       this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7383   return ParseStatus::Success;
7384 }
7385 
7386 //===----------------------------------------------------------------------===//
7387 // exp
7388 //===----------------------------------------------------------------------===//
7389 
7390 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7391   using namespace llvm::AMDGPU::Exp;
7392 
7393   StringRef Str;
7394   SMLoc S = getLoc();
7395 
7396   if (!parseId(Str))
7397     return ParseStatus::NoMatch;
7398 
7399   unsigned Id = getTgtId(Str);
7400   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7401     return Error(S, (Id == ET_INVALID)
7402                         ? "invalid exp target"
7403                         : "exp target is not supported on this GPU");
7404 
7405   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7406                                               AMDGPUOperand::ImmTyExpTgt));
7407   return ParseStatus::Success;
7408 }
7409 
7410 //===----------------------------------------------------------------------===//
7411 // parser helpers
7412 //===----------------------------------------------------------------------===//
7413 
7414 bool
7415 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7416   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7417 }
7418 
7419 bool
7420 AMDGPUAsmParser::isId(const StringRef Id) const {
7421   return isId(getToken(), Id);
7422 }
7423 
7424 bool
7425 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7426   return getTokenKind() == Kind;
7427 }
7428 
7429 StringRef AMDGPUAsmParser::getId() const {
7430   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7431 }
7432 
7433 bool
7434 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7435   if (isId(Id)) {
7436     lex();
7437     return true;
7438   }
7439   return false;
7440 }
7441 
7442 bool
7443 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7444   if (isToken(AsmToken::Identifier)) {
7445     StringRef Tok = getTokenStr();
7446     if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7447       lex();
7448       return true;
7449     }
7450   }
7451   return false;
7452 }
7453 
7454 bool
7455 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7456   if (isId(Id) && peekToken().is(Kind)) {
7457     lex();
7458     lex();
7459     return true;
7460   }
7461   return false;
7462 }
7463 
7464 bool
7465 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7466   if (isToken(Kind)) {
7467     lex();
7468     return true;
7469   }
7470   return false;
7471 }
7472 
7473 bool
7474 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7475                            const StringRef ErrMsg) {
7476   if (!trySkipToken(Kind)) {
7477     Error(getLoc(), ErrMsg);
7478     return false;
7479   }
7480   return true;
7481 }
7482 
7483 bool
7484 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7485   SMLoc S = getLoc();
7486 
7487   const MCExpr *Expr;
7488   if (Parser.parseExpression(Expr))
7489     return false;
7490 
7491   if (Expr->evaluateAsAbsolute(Imm))
7492     return true;
7493 
7494   if (Expected.empty()) {
7495     Error(S, "expected absolute expression");
7496   } else {
7497     Error(S, Twine("expected ", Expected) +
7498              Twine(" or an absolute expression"));
7499   }
7500   return false;
7501 }
7502 
7503 bool
7504 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7505   SMLoc S = getLoc();
7506 
7507   const MCExpr *Expr;
7508   if (Parser.parseExpression(Expr))
7509     return false;
7510 
7511   int64_t IntVal;
7512   if (Expr->evaluateAsAbsolute(IntVal)) {
7513     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7514   } else {
7515     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7516   }
7517   return true;
7518 }
7519 
7520 bool
7521 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7522   if (isToken(AsmToken::String)) {
7523     Val = getToken().getStringContents();
7524     lex();
7525     return true;
7526   } else {
7527     Error(getLoc(), ErrMsg);
7528     return false;
7529   }
7530 }
7531 
7532 bool
7533 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7534   if (isToken(AsmToken::Identifier)) {
7535     Val = getTokenStr();
7536     lex();
7537     return true;
7538   } else {
7539     if (!ErrMsg.empty())
7540       Error(getLoc(), ErrMsg);
7541     return false;
7542   }
7543 }
7544 
7545 AsmToken
7546 AMDGPUAsmParser::getToken() const {
7547   return Parser.getTok();
7548 }
7549 
7550 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7551   return isToken(AsmToken::EndOfStatement)
7552              ? getToken()
7553              : getLexer().peekTok(ShouldSkipSpace);
7554 }
7555 
7556 void
7557 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7558   auto TokCount = getLexer().peekTokens(Tokens);
7559 
7560   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7561     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7562 }
7563 
7564 AsmToken::TokenKind
7565 AMDGPUAsmParser::getTokenKind() const {
7566   return getLexer().getKind();
7567 }
7568 
7569 SMLoc
7570 AMDGPUAsmParser::getLoc() const {
7571   return getToken().getLoc();
7572 }
7573 
7574 StringRef
7575 AMDGPUAsmParser::getTokenStr() const {
7576   return getToken().getString();
7577 }
7578 
7579 void
7580 AMDGPUAsmParser::lex() {
7581   Parser.Lex();
7582 }
7583 
7584 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7585   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7586 }
7587 
7588 SMLoc
7589 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7590                                const OperandVector &Operands) const {
7591   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7592     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7593     if (Test(Op))
7594       return Op.getStartLoc();
7595   }
7596   return getInstLoc(Operands);
7597 }
7598 
7599 SMLoc
7600 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7601                            const OperandVector &Operands) const {
7602   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7603   return getOperandLoc(Test, Operands);
7604 }
7605 
7606 SMLoc
7607 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7608                            const OperandVector &Operands) const {
7609   auto Test = [=](const AMDGPUOperand& Op) {
7610     return Op.isRegKind() && Op.getReg() == Reg;
7611   };
7612   return getOperandLoc(Test, Operands);
7613 }
7614 
7615 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7616                                  bool SearchMandatoryLiterals) const {
7617   auto Test = [](const AMDGPUOperand& Op) {
7618     return Op.IsImmKindLiteral() || Op.isExpr();
7619   };
7620   SMLoc Loc = getOperandLoc(Test, Operands);
7621   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7622     Loc = getMandatoryLitLoc(Operands);
7623   return Loc;
7624 }
7625 
7626 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7627   auto Test = [](const AMDGPUOperand &Op) {
7628     return Op.IsImmKindMandatoryLiteral();
7629   };
7630   return getOperandLoc(Test, Operands);
7631 }
7632 
7633 SMLoc
7634 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7635   auto Test = [](const AMDGPUOperand& Op) {
7636     return Op.isImmKindConst();
7637   };
7638   return getOperandLoc(Test, Operands);
7639 }
7640 
7641 //===----------------------------------------------------------------------===//
7642 // swizzle
7643 //===----------------------------------------------------------------------===//
7644 
7645 LLVM_READNONE
7646 static unsigned
7647 encodeBitmaskPerm(const unsigned AndMask,
7648                   const unsigned OrMask,
7649                   const unsigned XorMask) {
7650   using namespace llvm::AMDGPU::Swizzle;
7651 
7652   return BITMASK_PERM_ENC |
7653          (AndMask << BITMASK_AND_SHIFT) |
7654          (OrMask  << BITMASK_OR_SHIFT)  |
7655          (XorMask << BITMASK_XOR_SHIFT);
7656 }
7657 
7658 bool
7659 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7660                                      const unsigned MinVal,
7661                                      const unsigned MaxVal,
7662                                      const StringRef ErrMsg,
7663                                      SMLoc &Loc) {
7664   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7665     return false;
7666   }
7667   Loc = getLoc();
7668   if (!parseExpr(Op)) {
7669     return false;
7670   }
7671   if (Op < MinVal || Op > MaxVal) {
7672     Error(Loc, ErrMsg);
7673     return false;
7674   }
7675 
7676   return true;
7677 }
7678 
7679 bool
7680 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7681                                       const unsigned MinVal,
7682                                       const unsigned MaxVal,
7683                                       const StringRef ErrMsg) {
7684   SMLoc Loc;
7685   for (unsigned i = 0; i < OpNum; ++i) {
7686     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7687       return false;
7688   }
7689 
7690   return true;
7691 }
7692 
7693 bool
7694 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7695   using namespace llvm::AMDGPU::Swizzle;
7696 
7697   int64_t Lane[LANE_NUM];
7698   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7699                            "expected a 2-bit lane id")) {
7700     Imm = QUAD_PERM_ENC;
7701     for (unsigned I = 0; I < LANE_NUM; ++I) {
7702       Imm |= Lane[I] << (LANE_SHIFT * I);
7703     }
7704     return true;
7705   }
7706   return false;
7707 }
7708 
7709 bool
7710 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7711   using namespace llvm::AMDGPU::Swizzle;
7712 
7713   SMLoc Loc;
7714   int64_t GroupSize;
7715   int64_t LaneIdx;
7716 
7717   if (!parseSwizzleOperand(GroupSize,
7718                            2, 32,
7719                            "group size must be in the interval [2,32]",
7720                            Loc)) {
7721     return false;
7722   }
7723   if (!isPowerOf2_64(GroupSize)) {
7724     Error(Loc, "group size must be a power of two");
7725     return false;
7726   }
7727   if (parseSwizzleOperand(LaneIdx,
7728                           0, GroupSize - 1,
7729                           "lane id must be in the interval [0,group size - 1]",
7730                           Loc)) {
7731     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7732     return true;
7733   }
7734   return false;
7735 }
7736 
7737 bool
7738 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7739   using namespace llvm::AMDGPU::Swizzle;
7740 
7741   SMLoc Loc;
7742   int64_t GroupSize;
7743 
7744   if (!parseSwizzleOperand(GroupSize,
7745                            2, 32,
7746                            "group size must be in the interval [2,32]",
7747                            Loc)) {
7748     return false;
7749   }
7750   if (!isPowerOf2_64(GroupSize)) {
7751     Error(Loc, "group size must be a power of two");
7752     return false;
7753   }
7754 
7755   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7756   return true;
7757 }
7758 
7759 bool
7760 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7761   using namespace llvm::AMDGPU::Swizzle;
7762 
7763   SMLoc Loc;
7764   int64_t GroupSize;
7765 
7766   if (!parseSwizzleOperand(GroupSize,
7767                            1, 16,
7768                            "group size must be in the interval [1,16]",
7769                            Loc)) {
7770     return false;
7771   }
7772   if (!isPowerOf2_64(GroupSize)) {
7773     Error(Loc, "group size must be a power of two");
7774     return false;
7775   }
7776 
7777   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7778   return true;
7779 }
7780 
7781 bool
7782 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7783   using namespace llvm::AMDGPU::Swizzle;
7784 
7785   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7786     return false;
7787   }
7788 
7789   StringRef Ctl;
7790   SMLoc StrLoc = getLoc();
7791   if (!parseString(Ctl)) {
7792     return false;
7793   }
7794   if (Ctl.size() != BITMASK_WIDTH) {
7795     Error(StrLoc, "expected a 5-character mask");
7796     return false;
7797   }
7798 
7799   unsigned AndMask = 0;
7800   unsigned OrMask = 0;
7801   unsigned XorMask = 0;
7802 
7803   for (size_t i = 0; i < Ctl.size(); ++i) {
7804     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7805     switch(Ctl[i]) {
7806     default:
7807       Error(StrLoc, "invalid mask");
7808       return false;
7809     case '0':
7810       break;
7811     case '1':
7812       OrMask |= Mask;
7813       break;
7814     case 'p':
7815       AndMask |= Mask;
7816       break;
7817     case 'i':
7818       AndMask |= Mask;
7819       XorMask |= Mask;
7820       break;
7821     }
7822   }
7823 
7824   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7825   return true;
7826 }
7827 
7828 bool
7829 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7830 
7831   SMLoc OffsetLoc = getLoc();
7832 
7833   if (!parseExpr(Imm, "a swizzle macro")) {
7834     return false;
7835   }
7836   if (!isUInt<16>(Imm)) {
7837     Error(OffsetLoc, "expected a 16-bit offset");
7838     return false;
7839   }
7840   return true;
7841 }
7842 
7843 bool
7844 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7845   using namespace llvm::AMDGPU::Swizzle;
7846 
7847   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7848 
7849     SMLoc ModeLoc = getLoc();
7850     bool Ok = false;
7851 
7852     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7853       Ok = parseSwizzleQuadPerm(Imm);
7854     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7855       Ok = parseSwizzleBitmaskPerm(Imm);
7856     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7857       Ok = parseSwizzleBroadcast(Imm);
7858     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7859       Ok = parseSwizzleSwap(Imm);
7860     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7861       Ok = parseSwizzleReverse(Imm);
7862     } else {
7863       Error(ModeLoc, "expected a swizzle mode");
7864     }
7865 
7866     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7867   }
7868 
7869   return false;
7870 }
7871 
7872 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
7873   SMLoc S = getLoc();
7874   int64_t Imm = 0;
7875 
7876   if (trySkipId("offset")) {
7877 
7878     bool Ok = false;
7879     if (skipToken(AsmToken::Colon, "expected a colon")) {
7880       if (trySkipId("swizzle")) {
7881         Ok = parseSwizzleMacro(Imm);
7882       } else {
7883         Ok = parseSwizzleOffset(Imm);
7884       }
7885     }
7886 
7887     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7888 
7889     return Ok ? ParseStatus::Success : ParseStatus::Failure;
7890   }
7891   return ParseStatus::NoMatch;
7892 }
7893 
7894 bool
7895 AMDGPUOperand::isSwizzle() const {
7896   return isImmTy(ImmTySwizzle);
7897 }
7898 
7899 //===----------------------------------------------------------------------===//
7900 // VGPR Index Mode
7901 //===----------------------------------------------------------------------===//
7902 
7903 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7904 
7905   using namespace llvm::AMDGPU::VGPRIndexMode;
7906 
7907   if (trySkipToken(AsmToken::RParen)) {
7908     return OFF;
7909   }
7910 
7911   int64_t Imm = 0;
7912 
7913   while (true) {
7914     unsigned Mode = 0;
7915     SMLoc S = getLoc();
7916 
7917     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7918       if (trySkipId(IdSymbolic[ModeId])) {
7919         Mode = 1 << ModeId;
7920         break;
7921       }
7922     }
7923 
7924     if (Mode == 0) {
7925       Error(S, (Imm == 0)?
7926                "expected a VGPR index mode or a closing parenthesis" :
7927                "expected a VGPR index mode");
7928       return UNDEF;
7929     }
7930 
7931     if (Imm & Mode) {
7932       Error(S, "duplicate VGPR index mode");
7933       return UNDEF;
7934     }
7935     Imm |= Mode;
7936 
7937     if (trySkipToken(AsmToken::RParen))
7938       break;
7939     if (!skipToken(AsmToken::Comma,
7940                    "expected a comma or a closing parenthesis"))
7941       return UNDEF;
7942   }
7943 
7944   return Imm;
7945 }
7946 
7947 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7948 
7949   using namespace llvm::AMDGPU::VGPRIndexMode;
7950 
7951   int64_t Imm = 0;
7952   SMLoc S = getLoc();
7953 
7954   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7955     Imm = parseGPRIdxMacro();
7956     if (Imm == UNDEF)
7957       return ParseStatus::Failure;
7958   } else {
7959     if (getParser().parseAbsoluteExpression(Imm))
7960       return ParseStatus::Failure;
7961     if (Imm < 0 || !isUInt<4>(Imm))
7962       return Error(S, "invalid immediate: only 4-bit values are legal");
7963   }
7964 
7965   Operands.push_back(
7966       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7967   return ParseStatus::Success;
7968 }
7969 
7970 bool AMDGPUOperand::isGPRIdxMode() const {
7971   return isImmTy(ImmTyGprIdxMode);
7972 }
7973 
7974 //===----------------------------------------------------------------------===//
7975 // sopp branch targets
7976 //===----------------------------------------------------------------------===//
7977 
7978 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
7979 
7980   // Make sure we are not parsing something
7981   // that looks like a label or an expression but is not.
7982   // This will improve error messages.
7983   if (isRegister() || isModifier())
7984     return ParseStatus::NoMatch;
7985 
7986   if (!parseExpr(Operands))
7987     return ParseStatus::Failure;
7988 
7989   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7990   assert(Opr.isImm() || Opr.isExpr());
7991   SMLoc Loc = Opr.getStartLoc();
7992 
7993   // Currently we do not support arbitrary expressions as branch targets.
7994   // Only labels and absolute expressions are accepted.
7995   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7996     Error(Loc, "expected an absolute expression or a label");
7997   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7998     Error(Loc, "expected a 16-bit signed jump offset");
7999   }
8000 
8001   return ParseStatus::Success;
8002 }
8003 
8004 //===----------------------------------------------------------------------===//
8005 // Boolean holding registers
8006 //===----------------------------------------------------------------------===//
8007 
8008 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8009   return parseReg(Operands);
8010 }
8011 
8012 //===----------------------------------------------------------------------===//
8013 // mubuf
8014 //===----------------------------------------------------------------------===//
8015 
8016 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8017                                    const OperandVector &Operands,
8018                                    bool IsAtomic) {
8019   OptionalImmIndexMap OptionalIdx;
8020   unsigned FirstOperandIdx = 1;
8021   bool IsAtomicReturn = false;
8022 
8023   if (IsAtomic) {
8024     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8025       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8026       if (!Op.isCPol())
8027         continue;
8028       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
8029       break;
8030     }
8031 
8032     if (!IsAtomicReturn) {
8033       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
8034       if (NewOpc != -1)
8035         Inst.setOpcode(NewOpc);
8036     }
8037 
8038     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
8039                       SIInstrFlags::IsAtomicRet;
8040   }
8041 
8042   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8043     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8044 
8045     // Add the register arguments
8046     if (Op.isReg()) {
8047       Op.addRegOperands(Inst, 1);
8048       // Insert a tied src for atomic return dst.
8049       // This cannot be postponed as subsequent calls to
8050       // addImmOperands rely on correct number of MC operands.
8051       if (IsAtomicReturn && i == FirstOperandIdx)
8052         Op.addRegOperands(Inst, 1);
8053       continue;
8054     }
8055 
8056     // Handle the case where soffset is an immediate
8057     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8058       Op.addImmOperands(Inst, 1);
8059       continue;
8060     }
8061 
8062     // Handle tokens like 'offen' which are sometimes hard-coded into the
8063     // asm string.  There are no MCInst operands for these.
8064     if (Op.isToken()) {
8065       continue;
8066     }
8067     assert(Op.isImm());
8068 
8069     // Handle optional arguments
8070     OptionalIdx[Op.getImmTy()] = i;
8071   }
8072 
8073   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8074   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8075 }
8076 
8077 //===----------------------------------------------------------------------===//
8078 // smrd
8079 //===----------------------------------------------------------------------===//
8080 
8081 bool AMDGPUOperand::isSMRDOffset8() const {
8082   return isImmLiteral() && isUInt<8>(getImm());
8083 }
8084 
8085 bool AMDGPUOperand::isSMEMOffset() const {
8086   // Offset range is checked later by validator.
8087   return isImmLiteral();
8088 }
8089 
8090 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8091   // 32-bit literals are only supported on CI and we only want to use them
8092   // when the offset is > 8-bits.
8093   return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8094 }
8095 
8096 //===----------------------------------------------------------------------===//
8097 // vop3
8098 //===----------------------------------------------------------------------===//
8099 
8100 static bool ConvertOmodMul(int64_t &Mul) {
8101   if (Mul != 1 && Mul != 2 && Mul != 4)
8102     return false;
8103 
8104   Mul >>= 1;
8105   return true;
8106 }
8107 
8108 static bool ConvertOmodDiv(int64_t &Div) {
8109   if (Div == 1) {
8110     Div = 0;
8111     return true;
8112   }
8113 
8114   if (Div == 2) {
8115     Div = 3;
8116     return true;
8117   }
8118 
8119   return false;
8120 }
8121 
8122 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8123 // This is intentional and ensures compatibility with sp3.
8124 // See bug 35397 for details.
8125 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8126   if (BoundCtrl == 0 || BoundCtrl == 1) {
8127     if (!isGFX11Plus())
8128       BoundCtrl = 1;
8129     return true;
8130   }
8131   return false;
8132 }
8133 
8134 void AMDGPUAsmParser::onBeginOfFile() {
8135   if (!getParser().getStreamer().getTargetStreamer() ||
8136       getSTI().getTargetTriple().getArch() == Triple::r600)
8137     return;
8138 
8139   if (!getTargetStreamer().getTargetID())
8140     getTargetStreamer().initializeTargetID(getSTI(),
8141                                            getSTI().getFeatureString());
8142 
8143   if (isHsaAbi(getSTI()))
8144     getTargetStreamer().EmitDirectiveAMDGCNTarget();
8145 }
8146 
8147 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8148   StringRef Name = getTokenStr();
8149   if (Name == "mul") {
8150     return parseIntWithPrefix("mul", Operands,
8151                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8152   }
8153 
8154   if (Name == "div") {
8155     return parseIntWithPrefix("div", Operands,
8156                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8157   }
8158 
8159   return ParseStatus::NoMatch;
8160 }
8161 
8162 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8163 // the number of src operands present, then copies that bit into src0_modifiers.
8164 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8165   int Opc = Inst.getOpcode();
8166   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8167   if (OpSelIdx == -1)
8168     return;
8169 
8170   int SrcNum;
8171   const int Ops[] = { AMDGPU::OpName::src0,
8172                       AMDGPU::OpName::src1,
8173                       AMDGPU::OpName::src2 };
8174   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8175        ++SrcNum)
8176     ;
8177   assert(SrcNum > 0);
8178 
8179   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8180 
8181   if ((OpSel & (1 << SrcNum)) != 0) {
8182     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8183     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8184     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8185   }
8186 }
8187 
8188 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8189                                    const OperandVector &Operands) {
8190   cvtVOP3P(Inst, Operands);
8191   cvtVOP3DstOpSelOnly(Inst);
8192 }
8193 
8194 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8195                                    OptionalImmIndexMap &OptionalIdx) {
8196   cvtVOP3P(Inst, Operands, OptionalIdx);
8197   cvtVOP3DstOpSelOnly(Inst);
8198 }
8199 
8200 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8201   return
8202       // 1. This operand is input modifiers
8203       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8204       // 2. This is not last operand
8205       && Desc.NumOperands > (OpNum + 1)
8206       // 3. Next operand is register class
8207       && Desc.operands()[OpNum + 1].RegClass != -1
8208       // 4. Next register is not tied to any other operand
8209       && Desc.getOperandConstraint(OpNum + 1,
8210                                    MCOI::OperandConstraint::TIED_TO) == -1;
8211 }
8212 
8213 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8214 {
8215   OptionalImmIndexMap OptionalIdx;
8216   unsigned Opc = Inst.getOpcode();
8217 
8218   unsigned I = 1;
8219   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8220   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8221     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8222   }
8223 
8224   for (unsigned E = Operands.size(); I != E; ++I) {
8225     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8226     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8227       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8228     } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8229                Op.isInterpAttrChan()) {
8230       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8231     } else if (Op.isImmModifier()) {
8232       OptionalIdx[Op.getImmTy()] = I;
8233     } else {
8234       llvm_unreachable("unhandled operand type");
8235     }
8236   }
8237 
8238   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8239     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8240                           AMDGPUOperand::ImmTyHigh);
8241 
8242   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8243     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8244                           AMDGPUOperand::ImmTyClampSI);
8245 
8246   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8247     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8248                           AMDGPUOperand::ImmTyOModSI);
8249 }
8250 
8251 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8252 {
8253   OptionalImmIndexMap OptionalIdx;
8254   unsigned Opc = Inst.getOpcode();
8255 
8256   unsigned I = 1;
8257   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8258   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8259     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8260   }
8261 
8262   for (unsigned E = Operands.size(); I != E; ++I) {
8263     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8264     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8265       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8266     } else if (Op.isImmModifier()) {
8267       OptionalIdx[Op.getImmTy()] = I;
8268     } else {
8269       llvm_unreachable("unhandled operand type");
8270     }
8271   }
8272 
8273   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8274 
8275   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8276   if (OpSelIdx != -1)
8277     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8278 
8279   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8280 
8281   if (OpSelIdx == -1)
8282     return;
8283 
8284   const int Ops[] = { AMDGPU::OpName::src0,
8285                       AMDGPU::OpName::src1,
8286                       AMDGPU::OpName::src2 };
8287   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8288                          AMDGPU::OpName::src1_modifiers,
8289                          AMDGPU::OpName::src2_modifiers };
8290 
8291   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8292 
8293   for (int J = 0; J < 3; ++J) {
8294     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8295     if (OpIdx == -1)
8296       break;
8297 
8298     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8299     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8300 
8301     if ((OpSel & (1 << J)) != 0)
8302       ModVal |= SISrcMods::OP_SEL_0;
8303     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8304         (OpSel & (1 << 3)) != 0)
8305       ModVal |= SISrcMods::DST_OP_SEL;
8306 
8307     Inst.getOperand(ModIdx).setImm(ModVal);
8308   }
8309 }
8310 
8311 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8312                               OptionalImmIndexMap &OptionalIdx) {
8313   unsigned Opc = Inst.getOpcode();
8314 
8315   unsigned I = 1;
8316   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8317   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8318     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8319   }
8320 
8321   for (unsigned E = Operands.size(); I != E; ++I) {
8322     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8323     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8324       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8325     } else if (Op.isImmModifier()) {
8326       OptionalIdx[Op.getImmTy()] = I;
8327     } else if (Op.isRegOrImm()) {
8328       Op.addRegOrImmOperands(Inst, 1);
8329     } else {
8330       llvm_unreachable("unhandled operand type");
8331     }
8332   }
8333 
8334   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8335     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8336                           AMDGPUOperand::ImmTyClampSI);
8337 
8338   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8339     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8340                           AMDGPUOperand::ImmTyOModSI);
8341 
8342   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8343   // it has src2 register operand that is tied to dst operand
8344   // we don't allow modifiers for this operand in assembler so src2_modifiers
8345   // should be 0.
8346   if (isMAC(Opc)) {
8347     auto it = Inst.begin();
8348     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8349     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8350     ++it;
8351     // Copy the operand to ensure it's not invalidated when Inst grows.
8352     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8353   }
8354 }
8355 
8356 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8357   OptionalImmIndexMap OptionalIdx;
8358   cvtVOP3(Inst, Operands, OptionalIdx);
8359 }
8360 
8361 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8362                                OptionalImmIndexMap &OptIdx) {
8363   const int Opc = Inst.getOpcode();
8364   const MCInstrDesc &Desc = MII.get(Opc);
8365 
8366   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8367 
8368   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8369       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8370       Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
8371       Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
8372     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8373     Inst.addOperand(Inst.getOperand(0));
8374   }
8375 
8376   // Adding vdst_in operand is already covered for these DPP instructions in
8377   // cvtVOP3DPP.
8378   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8379       !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8380         Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8381         Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8382         Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
8383     assert(!IsPacked);
8384     Inst.addOperand(Inst.getOperand(0));
8385   }
8386 
8387   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8388   // instruction, and then figure out where to actually put the modifiers
8389 
8390   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8391   if (OpSelIdx != -1) {
8392     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8393   }
8394 
8395   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8396   if (OpSelHiIdx != -1) {
8397     int DefaultVal = IsPacked ? -1 : 0;
8398     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8399                           DefaultVal);
8400   }
8401 
8402   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8403   if (NegLoIdx != -1)
8404     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8405 
8406   int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8407   if (NegHiIdx != -1)
8408     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8409 
8410   const int Ops[] = { AMDGPU::OpName::src0,
8411                       AMDGPU::OpName::src1,
8412                       AMDGPU::OpName::src2 };
8413   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8414                          AMDGPU::OpName::src1_modifiers,
8415                          AMDGPU::OpName::src2_modifiers };
8416 
8417   unsigned OpSel = 0;
8418   unsigned OpSelHi = 0;
8419   unsigned NegLo = 0;
8420   unsigned NegHi = 0;
8421 
8422   if (OpSelIdx != -1)
8423     OpSel = Inst.getOperand(OpSelIdx).getImm();
8424 
8425   if (OpSelHiIdx != -1)
8426     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8427 
8428   if (NegLoIdx != -1)
8429     NegLo = Inst.getOperand(NegLoIdx).getImm();
8430 
8431   if (NegHiIdx != -1)
8432     NegHi = Inst.getOperand(NegHiIdx).getImm();
8433 
8434   for (int J = 0; J < 3; ++J) {
8435     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8436     if (OpIdx == -1)
8437       break;
8438 
8439     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8440 
8441     if (ModIdx == -1)
8442       continue;
8443 
8444     uint32_t ModVal = 0;
8445 
8446     if ((OpSel & (1 << J)) != 0)
8447       ModVal |= SISrcMods::OP_SEL_0;
8448 
8449     if ((OpSelHi & (1 << J)) != 0)
8450       ModVal |= SISrcMods::OP_SEL_1;
8451 
8452     if ((NegLo & (1 << J)) != 0)
8453       ModVal |= SISrcMods::NEG;
8454 
8455     if ((NegHi & (1 << J)) != 0)
8456       ModVal |= SISrcMods::NEG_HI;
8457 
8458     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8459   }
8460 }
8461 
8462 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8463   OptionalImmIndexMap OptIdx;
8464   cvtVOP3(Inst, Operands, OptIdx);
8465   cvtVOP3P(Inst, Operands, OptIdx);
8466 }
8467 
8468 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
8469                                   unsigned i, unsigned Opc, unsigned OpName) {
8470   if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8471     ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8472   else
8473     ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8474 }
8475 
8476 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8477   unsigned Opc = Inst.getOpcode();
8478 
8479   ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8480   addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8481   addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8482   ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8483   ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8484 
8485   OptionalImmIndexMap OptIdx;
8486   for (unsigned i = 5; i < Operands.size(); ++i) {
8487     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8488     OptIdx[Op.getImmTy()] = i;
8489   }
8490 
8491   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8492     addOptionalImmOperand(Inst, Operands, OptIdx,
8493                           AMDGPUOperand::ImmTyIndexKey8bit);
8494 
8495   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8496     addOptionalImmOperand(Inst, Operands, OptIdx,
8497                           AMDGPUOperand::ImmTyIndexKey16bit);
8498 
8499   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8500     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8501 
8502   cvtVOP3P(Inst, Operands, OptIdx);
8503 }
8504 
8505 //===----------------------------------------------------------------------===//
8506 // VOPD
8507 //===----------------------------------------------------------------------===//
8508 
8509 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8510   if (!hasVOPD(getSTI()))
8511     return ParseStatus::NoMatch;
8512 
8513   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8514     SMLoc S = getLoc();
8515     lex();
8516     lex();
8517     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8518     SMLoc OpYLoc = getLoc();
8519     StringRef OpYName;
8520     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8521       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8522       return ParseStatus::Success;
8523     }
8524     return Error(OpYLoc, "expected a VOPDY instruction after ::");
8525   }
8526   return ParseStatus::NoMatch;
8527 }
8528 
8529 // Create VOPD MCInst operands using parsed assembler operands.
8530 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8531   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8532     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8533     if (Op.isReg()) {
8534       Op.addRegOperands(Inst, 1);
8535       return;
8536     }
8537     if (Op.isImm()) {
8538       Op.addImmOperands(Inst, 1);
8539       return;
8540     }
8541     llvm_unreachable("Unhandled operand type in cvtVOPD");
8542   };
8543 
8544   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8545 
8546   // MCInst operands are ordered as follows:
8547   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8548 
8549   for (auto CompIdx : VOPD::COMPONENTS) {
8550     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8551   }
8552 
8553   for (auto CompIdx : VOPD::COMPONENTS) {
8554     const auto &CInfo = InstInfo[CompIdx];
8555     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8556     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8557       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8558     if (CInfo.hasSrc2Acc())
8559       addOp(CInfo.getIndexOfDstInParsedOperands());
8560   }
8561 }
8562 
8563 //===----------------------------------------------------------------------===//
8564 // dpp
8565 //===----------------------------------------------------------------------===//
8566 
8567 bool AMDGPUOperand::isDPP8() const {
8568   return isImmTy(ImmTyDPP8);
8569 }
8570 
8571 bool AMDGPUOperand::isDPPCtrl() const {
8572   using namespace AMDGPU::DPP;
8573 
8574   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8575   if (result) {
8576     int64_t Imm = getImm();
8577     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8578            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8579            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8580            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8581            (Imm == DppCtrl::WAVE_SHL1) ||
8582            (Imm == DppCtrl::WAVE_ROL1) ||
8583            (Imm == DppCtrl::WAVE_SHR1) ||
8584            (Imm == DppCtrl::WAVE_ROR1) ||
8585            (Imm == DppCtrl::ROW_MIRROR) ||
8586            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8587            (Imm == DppCtrl::BCAST15) ||
8588            (Imm == DppCtrl::BCAST31) ||
8589            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8590            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8591   }
8592   return false;
8593 }
8594 
8595 //===----------------------------------------------------------------------===//
8596 // mAI
8597 //===----------------------------------------------------------------------===//
8598 
8599 bool AMDGPUOperand::isBLGP() const {
8600   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8601 }
8602 
8603 bool AMDGPUOperand::isCBSZ() const {
8604   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8605 }
8606 
8607 bool AMDGPUOperand::isABID() const {
8608   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8609 }
8610 
8611 bool AMDGPUOperand::isS16Imm() const {
8612   return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8613 }
8614 
8615 bool AMDGPUOperand::isU16Imm() const {
8616   return isImmLiteral() && isUInt<16>(getImm());
8617 }
8618 
8619 //===----------------------------------------------------------------------===//
8620 // dim
8621 //===----------------------------------------------------------------------===//
8622 
8623 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8624   // We want to allow "dim:1D" etc.,
8625   // but the initial 1 is tokenized as an integer.
8626   std::string Token;
8627   if (isToken(AsmToken::Integer)) {
8628     SMLoc Loc = getToken().getEndLoc();
8629     Token = std::string(getTokenStr());
8630     lex();
8631     if (getLoc() != Loc)
8632       return false;
8633   }
8634 
8635   StringRef Suffix;
8636   if (!parseId(Suffix))
8637     return false;
8638   Token += Suffix;
8639 
8640   StringRef DimId = Token;
8641   if (DimId.starts_with("SQ_RSRC_IMG_"))
8642     DimId = DimId.drop_front(12);
8643 
8644   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8645   if (!DimInfo)
8646     return false;
8647 
8648   Encoding = DimInfo->Encoding;
8649   return true;
8650 }
8651 
8652 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8653   if (!isGFX10Plus())
8654     return ParseStatus::NoMatch;
8655 
8656   SMLoc S = getLoc();
8657 
8658   if (!trySkipId("dim", AsmToken::Colon))
8659     return ParseStatus::NoMatch;
8660 
8661   unsigned Encoding;
8662   SMLoc Loc = getLoc();
8663   if (!parseDimId(Encoding))
8664     return Error(Loc, "invalid dim value");
8665 
8666   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8667                                               AMDGPUOperand::ImmTyDim));
8668   return ParseStatus::Success;
8669 }
8670 
8671 //===----------------------------------------------------------------------===//
8672 // dpp
8673 //===----------------------------------------------------------------------===//
8674 
8675 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8676   SMLoc S = getLoc();
8677 
8678   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8679     return ParseStatus::NoMatch;
8680 
8681   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8682 
8683   int64_t Sels[8];
8684 
8685   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8686     return ParseStatus::Failure;
8687 
8688   for (size_t i = 0; i < 8; ++i) {
8689     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8690       return ParseStatus::Failure;
8691 
8692     SMLoc Loc = getLoc();
8693     if (getParser().parseAbsoluteExpression(Sels[i]))
8694       return ParseStatus::Failure;
8695     if (0 > Sels[i] || 7 < Sels[i])
8696       return Error(Loc, "expected a 3-bit value");
8697   }
8698 
8699   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8700     return ParseStatus::Failure;
8701 
8702   unsigned DPP8 = 0;
8703   for (size_t i = 0; i < 8; ++i)
8704     DPP8 |= (Sels[i] << (i * 3));
8705 
8706   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8707   return ParseStatus::Success;
8708 }
8709 
8710 bool
8711 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8712                                     const OperandVector &Operands) {
8713   if (Ctrl == "row_newbcast")
8714     return isGFX90A();
8715 
8716   if (Ctrl == "row_share" ||
8717       Ctrl == "row_xmask")
8718     return isGFX10Plus();
8719 
8720   if (Ctrl == "wave_shl" ||
8721       Ctrl == "wave_shr" ||
8722       Ctrl == "wave_rol" ||
8723       Ctrl == "wave_ror" ||
8724       Ctrl == "row_bcast")
8725     return isVI() || isGFX9();
8726 
8727   return Ctrl == "row_mirror" ||
8728          Ctrl == "row_half_mirror" ||
8729          Ctrl == "quad_perm" ||
8730          Ctrl == "row_shl" ||
8731          Ctrl == "row_shr" ||
8732          Ctrl == "row_ror";
8733 }
8734 
8735 int64_t
8736 AMDGPUAsmParser::parseDPPCtrlPerm() {
8737   // quad_perm:[%d,%d,%d,%d]
8738 
8739   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8740     return -1;
8741 
8742   int64_t Val = 0;
8743   for (int i = 0; i < 4; ++i) {
8744     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8745       return -1;
8746 
8747     int64_t Temp;
8748     SMLoc Loc = getLoc();
8749     if (getParser().parseAbsoluteExpression(Temp))
8750       return -1;
8751     if (Temp < 0 || Temp > 3) {
8752       Error(Loc, "expected a 2-bit value");
8753       return -1;
8754     }
8755 
8756     Val += (Temp << i * 2);
8757   }
8758 
8759   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8760     return -1;
8761 
8762   return Val;
8763 }
8764 
8765 int64_t
8766 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8767   using namespace AMDGPU::DPP;
8768 
8769   // sel:%d
8770 
8771   int64_t Val;
8772   SMLoc Loc = getLoc();
8773 
8774   if (getParser().parseAbsoluteExpression(Val))
8775     return -1;
8776 
8777   struct DppCtrlCheck {
8778     int64_t Ctrl;
8779     int Lo;
8780     int Hi;
8781   };
8782 
8783   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8784     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8785     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8786     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8787     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8788     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8789     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8790     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8791     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8792     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8793     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8794     .Default({-1, 0, 0});
8795 
8796   bool Valid;
8797   if (Check.Ctrl == -1) {
8798     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8799     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8800   } else {
8801     Valid = Check.Lo <= Val && Val <= Check.Hi;
8802     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8803   }
8804 
8805   if (!Valid) {
8806     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8807     return -1;
8808   }
8809 
8810   return Val;
8811 }
8812 
8813 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8814   using namespace AMDGPU::DPP;
8815 
8816   if (!isToken(AsmToken::Identifier) ||
8817       !isSupportedDPPCtrl(getTokenStr(), Operands))
8818     return ParseStatus::NoMatch;
8819 
8820   SMLoc S = getLoc();
8821   int64_t Val = -1;
8822   StringRef Ctrl;
8823 
8824   parseId(Ctrl);
8825 
8826   if (Ctrl == "row_mirror") {
8827     Val = DppCtrl::ROW_MIRROR;
8828   } else if (Ctrl == "row_half_mirror") {
8829     Val = DppCtrl::ROW_HALF_MIRROR;
8830   } else {
8831     if (skipToken(AsmToken::Colon, "expected a colon")) {
8832       if (Ctrl == "quad_perm") {
8833         Val = parseDPPCtrlPerm();
8834       } else {
8835         Val = parseDPPCtrlSel(Ctrl);
8836       }
8837     }
8838   }
8839 
8840   if (Val == -1)
8841     return ParseStatus::Failure;
8842 
8843   Operands.push_back(
8844     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8845   return ParseStatus::Success;
8846 }
8847 
8848 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8849                                  bool IsDPP8) {
8850   OptionalImmIndexMap OptionalIdx;
8851   unsigned Opc = Inst.getOpcode();
8852   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8853 
8854   // MAC instructions are special because they have 'old'
8855   // operand which is not tied to dst (but assumed to be).
8856   // They also have dummy unused src2_modifiers.
8857   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8858   int Src2ModIdx =
8859       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8860   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8861                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8862 
8863   unsigned I = 1;
8864   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8865     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8866   }
8867 
8868   int Fi = 0;
8869   for (unsigned E = Operands.size(); I != E; ++I) {
8870 
8871     if (IsMAC) {
8872       int NumOperands = Inst.getNumOperands();
8873       if (OldIdx == NumOperands) {
8874         // Handle old operand
8875         constexpr int DST_IDX = 0;
8876         Inst.addOperand(Inst.getOperand(DST_IDX));
8877       } else if (Src2ModIdx == NumOperands) {
8878         // Add unused dummy src2_modifiers
8879         Inst.addOperand(MCOperand::createImm(0));
8880       }
8881     }
8882 
8883     int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
8884     if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
8885       Inst.addOperand(Inst.getOperand(0));
8886     }
8887 
8888     bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
8889                           Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
8890                           Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
8891                           Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
8892     if (IsVOP3CvtSrDpp) {
8893       if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
8894         Inst.addOperand(MCOperand::createImm(0));
8895         Inst.addOperand(MCOperand::createReg(0));
8896       }
8897     }
8898 
8899     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8900                                             MCOI::TIED_TO);
8901     if (TiedTo != -1) {
8902       assert((unsigned)TiedTo < Inst.getNumOperands());
8903       // handle tied old or src2 for MAC instructions
8904       Inst.addOperand(Inst.getOperand(TiedTo));
8905     }
8906     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8907     // Add the register arguments
8908     if (IsDPP8 && Op.isDppFI()) {
8909       Fi = Op.getImm();
8910     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8911       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8912     } else if (Op.isReg()) {
8913       Op.addRegOperands(Inst, 1);
8914     } else if (Op.isImm() &&
8915                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8916       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8917       Op.addImmOperands(Inst, 1);
8918     } else if (Op.isImm()) {
8919       OptionalIdx[Op.getImmTy()] = I;
8920     } else {
8921       llvm_unreachable("unhandled operand type");
8922     }
8923   }
8924   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8925     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8926 
8927   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8928     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8929 
8930   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8931     cvtVOP3P(Inst, Operands, OptionalIdx);
8932   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8933     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8934   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8935     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8936   }
8937 
8938   if (IsDPP8) {
8939     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8940     using namespace llvm::AMDGPU::DPP;
8941     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8942   } else {
8943     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8944     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8945     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8946     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8947 
8948     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8949       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8950                             AMDGPUOperand::ImmTyDppFI);
8951   }
8952 }
8953 
8954 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8955   OptionalImmIndexMap OptionalIdx;
8956 
8957   unsigned I = 1;
8958   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8959   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8960     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8961   }
8962 
8963   int Fi = 0;
8964   for (unsigned E = Operands.size(); I != E; ++I) {
8965     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8966                                             MCOI::TIED_TO);
8967     if (TiedTo != -1) {
8968       assert((unsigned)TiedTo < Inst.getNumOperands());
8969       // handle tied old or src2 for MAC instructions
8970       Inst.addOperand(Inst.getOperand(TiedTo));
8971     }
8972     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8973     // Add the register arguments
8974     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8975       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8976       // Skip it.
8977       continue;
8978     }
8979 
8980     if (IsDPP8) {
8981       if (Op.isDPP8()) {
8982         Op.addImmOperands(Inst, 1);
8983       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8984         Op.addRegWithFPInputModsOperands(Inst, 2);
8985       } else if (Op.isDppFI()) {
8986         Fi = Op.getImm();
8987       } else if (Op.isReg()) {
8988         Op.addRegOperands(Inst, 1);
8989       } else {
8990         llvm_unreachable("Invalid operand type");
8991       }
8992     } else {
8993       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8994         Op.addRegWithFPInputModsOperands(Inst, 2);
8995       } else if (Op.isReg()) {
8996         Op.addRegOperands(Inst, 1);
8997       } else if (Op.isDPPCtrl()) {
8998         Op.addImmOperands(Inst, 1);
8999       } else if (Op.isImm()) {
9000         // Handle optional arguments
9001         OptionalIdx[Op.getImmTy()] = I;
9002       } else {
9003         llvm_unreachable("Invalid operand type");
9004       }
9005     }
9006   }
9007 
9008   if (IsDPP8) {
9009     using namespace llvm::AMDGPU::DPP;
9010     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9011   } else {
9012     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9013     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9014     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9015     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9016       addOptionalImmOperand(Inst, Operands, OptionalIdx,
9017                             AMDGPUOperand::ImmTyDppFI);
9018     }
9019   }
9020 }
9021 
9022 //===----------------------------------------------------------------------===//
9023 // sdwa
9024 //===----------------------------------------------------------------------===//
9025 
9026 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9027                                           StringRef Prefix,
9028                                           AMDGPUOperand::ImmTy Type) {
9029   using namespace llvm::AMDGPU::SDWA;
9030 
9031   SMLoc S = getLoc();
9032   StringRef Value;
9033 
9034   SMLoc StringLoc;
9035   ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9036   if (!Res.isSuccess())
9037     return Res;
9038 
9039   int64_t Int;
9040   Int = StringSwitch<int64_t>(Value)
9041         .Case("BYTE_0", SdwaSel::BYTE_0)
9042         .Case("BYTE_1", SdwaSel::BYTE_1)
9043         .Case("BYTE_2", SdwaSel::BYTE_2)
9044         .Case("BYTE_3", SdwaSel::BYTE_3)
9045         .Case("WORD_0", SdwaSel::WORD_0)
9046         .Case("WORD_1", SdwaSel::WORD_1)
9047         .Case("DWORD", SdwaSel::DWORD)
9048         .Default(0xffffffff);
9049 
9050   if (Int == 0xffffffff)
9051     return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9052 
9053   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9054   return ParseStatus::Success;
9055 }
9056 
9057 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9058   using namespace llvm::AMDGPU::SDWA;
9059 
9060   SMLoc S = getLoc();
9061   StringRef Value;
9062 
9063   SMLoc StringLoc;
9064   ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9065   if (!Res.isSuccess())
9066     return Res;
9067 
9068   int64_t Int;
9069   Int = StringSwitch<int64_t>(Value)
9070         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9071         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9072         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9073         .Default(0xffffffff);
9074 
9075   if (Int == 0xffffffff)
9076     return Error(StringLoc, "invalid dst_unused value");
9077 
9078   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9079   return ParseStatus::Success;
9080 }
9081 
9082 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9083   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9084 }
9085 
9086 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9087   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9088 }
9089 
9090 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9091   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9092 }
9093 
9094 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9095   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9096 }
9097 
9098 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9099   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9100 }
9101 
9102 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9103                               uint64_t BasicInstType,
9104                               bool SkipDstVcc,
9105                               bool SkipSrcVcc) {
9106   using namespace llvm::AMDGPU::SDWA;
9107 
9108   OptionalImmIndexMap OptionalIdx;
9109   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9110   bool SkippedVcc = false;
9111 
9112   unsigned I = 1;
9113   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9114   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9115     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9116   }
9117 
9118   for (unsigned E = Operands.size(); I != E; ++I) {
9119     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9120     if (SkipVcc && !SkippedVcc && Op.isReg() &&
9121         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9122       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9123       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9124       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9125       // Skip VCC only if we didn't skip it on previous iteration.
9126       // Note that src0 and src1 occupy 2 slots each because of modifiers.
9127       if (BasicInstType == SIInstrFlags::VOP2 &&
9128           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9129            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9130         SkippedVcc = true;
9131         continue;
9132       } else if (BasicInstType == SIInstrFlags::VOPC &&
9133                  Inst.getNumOperands() == 0) {
9134         SkippedVcc = true;
9135         continue;
9136       }
9137     }
9138     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9139       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9140     } else if (Op.isImm()) {
9141       // Handle optional arguments
9142       OptionalIdx[Op.getImmTy()] = I;
9143     } else {
9144       llvm_unreachable("Invalid operand type");
9145     }
9146     SkippedVcc = false;
9147   }
9148 
9149   const unsigned Opc = Inst.getOpcode();
9150   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9151       Opc != AMDGPU::V_NOP_sdwa_vi) {
9152     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9153     switch (BasicInstType) {
9154     case SIInstrFlags::VOP1:
9155       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9156         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9157                               AMDGPUOperand::ImmTyClampSI, 0);
9158 
9159       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9160         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9161                               AMDGPUOperand::ImmTyOModSI, 0);
9162 
9163       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9164         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9165                               AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9166 
9167       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9168         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9169                               AMDGPUOperand::ImmTySDWADstUnused,
9170                               DstUnused::UNUSED_PRESERVE);
9171 
9172       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9173       break;
9174 
9175     case SIInstrFlags::VOP2:
9176       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9177 
9178       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9179         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9180 
9181       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9182       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9183       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9184       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9185       break;
9186 
9187     case SIInstrFlags::VOPC:
9188       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9189         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9190       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9191       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9192       break;
9193 
9194     default:
9195       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9196     }
9197   }
9198 
9199   // special case v_mac_{f16, f32}:
9200   // it has src2 register operand that is tied to dst operand
9201   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9202       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9203     auto it = Inst.begin();
9204     std::advance(
9205       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9206     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9207   }
9208 }
9209 
9210 /// Force static initialization.
9211 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9212   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9213   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9214 }
9215 
9216 #define GET_REGISTER_MATCHER
9217 #define GET_MATCHER_IMPLEMENTATION
9218 #define GET_MNEMONIC_SPELL_CHECKER
9219 #define GET_MNEMONIC_CHECKER
9220 #include "AMDGPUGenAsmMatcher.inc"
9221 
9222 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9223                                                 unsigned MCK) {
9224   switch (MCK) {
9225   case MCK_addr64:
9226     return parseTokenOp("addr64", Operands);
9227   case MCK_done:
9228     return parseTokenOp("done", Operands);
9229   case MCK_idxen:
9230     return parseTokenOp("idxen", Operands);
9231   case MCK_lds:
9232     return parseTokenOp("lds", Operands);
9233   case MCK_offen:
9234     return parseTokenOp("offen", Operands);
9235   case MCK_off:
9236     return parseTokenOp("off", Operands);
9237   case MCK_row_95_en:
9238     return parseTokenOp("row_en", Operands);
9239   case MCK_gds:
9240     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9241   case MCK_tfe:
9242     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9243   }
9244   return tryCustomParseOperand(Operands, MCK);
9245 }
9246 
9247 // This function should be defined after auto-generated include so that we have
9248 // MatchClassKind enum defined
9249 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9250                                                      unsigned Kind) {
9251   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9252   // But MatchInstructionImpl() expects to meet token and fails to validate
9253   // operand. This method checks if we are given immediate operand but expect to
9254   // get corresponding token.
9255   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9256   switch (Kind) {
9257   case MCK_addr64:
9258     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9259   case MCK_gds:
9260     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9261   case MCK_lds:
9262     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9263   case MCK_idxen:
9264     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9265   case MCK_offen:
9266     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9267   case MCK_tfe:
9268     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9269   case MCK_SSrcB32:
9270     // When operands have expression values, they will return true for isToken,
9271     // because it is not possible to distinguish between a token and an
9272     // expression at parse time. MatchInstructionImpl() will always try to
9273     // match an operand as a token, when isToken returns true, and when the
9274     // name of the expression is not a valid token, the match will fail,
9275     // so we need to handle it here.
9276     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9277   case MCK_SSrcF32:
9278     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9279   case MCK_SOPPBrTarget:
9280     return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9281   case MCK_VReg32OrOff:
9282     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9283   case MCK_InterpSlot:
9284     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9285   case MCK_InterpAttr:
9286     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9287   case MCK_InterpAttrChan:
9288     return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9289   case MCK_SReg_64:
9290   case MCK_SReg_64_XEXEC:
9291     // Null is defined as a 32-bit register but
9292     // it should also be enabled with 64-bit operands.
9293     // The following code enables it for SReg_64 operands
9294     // used as source and destination. Remaining source
9295     // operands are handled in isInlinableImm.
9296     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9297   default:
9298     return Match_InvalidOperand;
9299   }
9300 }
9301 
9302 //===----------------------------------------------------------------------===//
9303 // endpgm
9304 //===----------------------------------------------------------------------===//
9305 
9306 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9307   SMLoc S = getLoc();
9308   int64_t Imm = 0;
9309 
9310   if (!parseExpr(Imm)) {
9311     // The operand is optional, if not present default to 0
9312     Imm = 0;
9313   }
9314 
9315   if (!isUInt<16>(Imm))
9316     return Error(S, "expected a 16-bit value");
9317 
9318   Operands.push_back(
9319       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9320   return ParseStatus::Success;
9321 }
9322 
9323 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9324 
9325 //===----------------------------------------------------------------------===//
9326 // LDSDIR
9327 //===----------------------------------------------------------------------===//
9328 
9329 bool AMDGPUOperand::isWaitVDST() const {
9330   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9331 }
9332 
9333 bool AMDGPUOperand::isWaitVAVDst() const {
9334   return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9335 }
9336 
9337 bool AMDGPUOperand::isWaitVMVSrc() const {
9338   return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9339 }
9340 
9341 //===----------------------------------------------------------------------===//
9342 // VINTERP
9343 //===----------------------------------------------------------------------===//
9344 
9345 bool AMDGPUOperand::isWaitEXP() const {
9346   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9347 }
9348 
9349 //===----------------------------------------------------------------------===//
9350 // Split Barrier
9351 //===----------------------------------------------------------------------===//
9352 
9353 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
9354