xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/CodeGen/MachineValueType.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCExpr.h"
28 #include "llvm/MC/MCInst.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCParser/MCAsmLexer.h"
31 #include "llvm/MC/MCParser/MCAsmParser.h"
32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDGPUMetadata.h"
37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/TargetParser/TargetParser.h"
41 #include <optional>
42 
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 using namespace llvm::amdhsa;
46 
47 namespace {
48 
49 class AMDGPUAsmParser;
50 
51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52 
53 //===----------------------------------------------------------------------===//
54 // Operand
55 //===----------------------------------------------------------------------===//
56 
57 class AMDGPUOperand : public MCParsedAsmOperand {
58   enum KindTy {
59     Token,
60     Immediate,
61     Register,
62     Expression
63   } Kind;
64 
65   SMLoc StartLoc, EndLoc;
66   const AMDGPUAsmParser *AsmParser;
67 
68 public:
69   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70       : Kind(Kind_), AsmParser(AsmParser_) {}
71 
72   using Ptr = std::unique_ptr<AMDGPUOperand>;
73 
74   struct Modifiers {
75     bool Abs = false;
76     bool Neg = false;
77     bool Sext = false;
78     bool Lit = false;
79 
80     bool hasFPModifiers() const { return Abs || Neg; }
81     bool hasIntModifiers() const { return Sext; }
82     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
83 
84     int64_t getFPModifiersOperand() const {
85       int64_t Operand = 0;
86       Operand |= Abs ? SISrcMods::ABS : 0u;
87       Operand |= Neg ? SISrcMods::NEG : 0u;
88       return Operand;
89     }
90 
91     int64_t getIntModifiersOperand() const {
92       int64_t Operand = 0;
93       Operand |= Sext ? SISrcMods::SEXT : 0u;
94       return Operand;
95     }
96 
97     int64_t getModifiersOperand() const {
98       assert(!(hasFPModifiers() && hasIntModifiers())
99            && "fp and int modifiers should not be used simultaneously");
100       if (hasFPModifiers()) {
101         return getFPModifiersOperand();
102       } else if (hasIntModifiers()) {
103         return getIntModifiersOperand();
104       } else {
105         return 0;
106       }
107     }
108 
109     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110   };
111 
112   enum ImmTy {
113     ImmTyNone,
114     ImmTyGDS,
115     ImmTyLDS,
116     ImmTyOffen,
117     ImmTyIdxen,
118     ImmTyAddr64,
119     ImmTyOffset,
120     ImmTyInstOffset,
121     ImmTyOffset0,
122     ImmTyOffset1,
123     ImmTySMEMOffsetMod,
124     ImmTyCPol,
125     ImmTyTFE,
126     ImmTyD16,
127     ImmTyClampSI,
128     ImmTyOModSI,
129     ImmTySDWADstSel,
130     ImmTySDWASrc0Sel,
131     ImmTySDWASrc1Sel,
132     ImmTySDWADstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyInterpAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTyDPP8,
155     ImmTyDppCtrl,
156     ImmTyDppRowMask,
157     ImmTyDppBankMask,
158     ImmTyDppBoundCtrl,
159     ImmTyDppFI,
160     ImmTySwizzle,
161     ImmTyGprIdxMode,
162     ImmTyHigh,
163     ImmTyBLGP,
164     ImmTyCBSZ,
165     ImmTyABID,
166     ImmTyEndpgm,
167     ImmTyWaitVDST,
168     ImmTyWaitEXP,
169     ImmTyWaitVAVDst,
170     ImmTyWaitVMVSrc,
171   };
172 
173   // Immediate operand kind.
174   // It helps to identify the location of an offending operand after an error.
175   // Note that regular literals and mandatory literals (KImm) must be handled
176   // differently. When looking for an offending operand, we should usually
177   // ignore mandatory literals because they are part of the instruction and
178   // cannot be changed. Report location of mandatory operands only for VOPD,
179   // when both OpX and OpY have a KImm and there are no other literals.
180   enum ImmKindTy {
181     ImmKindTyNone,
182     ImmKindTyLiteral,
183     ImmKindTyMandatoryLiteral,
184     ImmKindTyConst,
185   };
186 
187 private:
188   struct TokOp {
189     const char *Data;
190     unsigned Length;
191   };
192 
193   struct ImmOp {
194     int64_t Val;
195     ImmTy Type;
196     bool IsFPImm;
197     mutable ImmKindTy Kind;
198     Modifiers Mods;
199   };
200 
201   struct RegOp {
202     unsigned RegNo;
203     Modifiers Mods;
204   };
205 
206   union {
207     TokOp Tok;
208     ImmOp Imm;
209     RegOp Reg;
210     const MCExpr *Expr;
211   };
212 
213 public:
214   bool isToken() const override { return Kind == Token; }
215 
216   bool isSymbolRefExpr() const {
217     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
218   }
219 
220   bool isImm() const override {
221     return Kind == Immediate;
222   }
223 
224   void setImmKindNone() const {
225     assert(isImm());
226     Imm.Kind = ImmKindTyNone;
227   }
228 
229   void setImmKindLiteral() const {
230     assert(isImm());
231     Imm.Kind = ImmKindTyLiteral;
232   }
233 
234   void setImmKindMandatoryLiteral() const {
235     assert(isImm());
236     Imm.Kind = ImmKindTyMandatoryLiteral;
237   }
238 
239   void setImmKindConst() const {
240     assert(isImm());
241     Imm.Kind = ImmKindTyConst;
242   }
243 
244   bool IsImmKindLiteral() const {
245     return isImm() && Imm.Kind == ImmKindTyLiteral;
246   }
247 
248   bool IsImmKindMandatoryLiteral() const {
249     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
250   }
251 
252   bool isImmKindConst() const {
253     return isImm() && Imm.Kind == ImmKindTyConst;
254   }
255 
256   bool isInlinableImm(MVT type) const;
257   bool isLiteralImm(MVT type) const;
258 
259   bool isRegKind() const {
260     return Kind == Register;
261   }
262 
263   bool isReg() const override {
264     return isRegKind() && !hasModifiers();
265   }
266 
267   bool isRegOrInline(unsigned RCID, MVT type) const {
268     return isRegClass(RCID) || isInlinableImm(type);
269   }
270 
271   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
272     return isRegOrInline(RCID, type) || isLiteralImm(type);
273   }
274 
275   bool isRegOrImmWithInt16InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
277   }
278 
279   bool isRegOrImmWithIntT16InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
281   }
282 
283   bool isRegOrImmWithInt32InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
285   }
286 
287   bool isRegOrInlineImmWithInt16InputMods() const {
288     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
289   }
290 
291   bool isRegOrInlineImmWithInt32InputMods() const {
292     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
293   }
294 
295   bool isRegOrImmWithInt64InputMods() const {
296     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
297   }
298 
299   bool isRegOrImmWithFP16InputMods() const {
300     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
301   }
302 
303   bool isRegOrImmWithFPT16InputMods() const {
304     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
305   }
306 
307   bool isRegOrImmWithFP32InputMods() const {
308     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
309   }
310 
311   bool isRegOrImmWithFP64InputMods() const {
312     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
313   }
314 
315   bool isRegOrInlineImmWithFP16InputMods() const {
316     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
317   }
318 
319   bool isRegOrInlineImmWithFP32InputMods() const {
320     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
321   }
322 
323 
324   bool isVReg() const {
325     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
326            isRegClass(AMDGPU::VReg_64RegClassID) ||
327            isRegClass(AMDGPU::VReg_96RegClassID) ||
328            isRegClass(AMDGPU::VReg_128RegClassID) ||
329            isRegClass(AMDGPU::VReg_160RegClassID) ||
330            isRegClass(AMDGPU::VReg_192RegClassID) ||
331            isRegClass(AMDGPU::VReg_256RegClassID) ||
332            isRegClass(AMDGPU::VReg_512RegClassID) ||
333            isRegClass(AMDGPU::VReg_1024RegClassID);
334   }
335 
336   bool isVReg32() const {
337     return isRegClass(AMDGPU::VGPR_32RegClassID);
338   }
339 
340   bool isVReg32OrOff() const {
341     return isOff() || isVReg32();
342   }
343 
344   bool isNull() const {
345     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
346   }
347 
348   bool isVRegWithInputMods() const;
349   bool isT16VRegWithInputMods() const;
350 
351   bool isSDWAOperand(MVT type) const;
352   bool isSDWAFP16Operand() const;
353   bool isSDWAFP32Operand() const;
354   bool isSDWAInt16Operand() const;
355   bool isSDWAInt32Operand() const;
356 
357   bool isImmTy(ImmTy ImmT) const {
358     return isImm() && Imm.Type == ImmT;
359   }
360 
361   template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
362 
363   bool isImmLiteral() const { return isImmTy(ImmTyNone); }
364 
365   bool isImmModifier() const {
366     return isImm() && Imm.Type != ImmTyNone;
367   }
368 
369   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
370   bool isDMask() const { return isImmTy(ImmTyDMask); }
371   bool isDim() const { return isImmTy(ImmTyDim); }
372   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
373   bool isOff() const { return isImmTy(ImmTyOff); }
374   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
375   bool isOffen() const { return isImmTy(ImmTyOffen); }
376   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
377   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
378   bool isOffset() const { return isImmTy(ImmTyOffset); }
379   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
380   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
381   bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
382   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
383   bool isGDS() const { return isImmTy(ImmTyGDS); }
384   bool isLDS() const { return isImmTy(ImmTyLDS); }
385   bool isCPol() const { return isImmTy(ImmTyCPol); }
386   bool isTFE() const { return isImmTy(ImmTyTFE); }
387   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
388   bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
389   bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
390   bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
391   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
392   bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
393   bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
394   bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
395   bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
396   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
397   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
398   bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
399   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
400   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
401   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
402   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
403 
404   bool isRegOrImm() const {
405     return isReg() || isImm();
406   }
407 
408   bool isRegClass(unsigned RCID) const;
409 
410   bool isInlineValue() const;
411 
412   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
413     return isRegOrInline(RCID, type) && !hasModifiers();
414   }
415 
416   bool isSCSrcB16() const {
417     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
418   }
419 
420   bool isSCSrcV2B16() const {
421     return isSCSrcB16();
422   }
423 
424   bool isSCSrcB32() const {
425     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
426   }
427 
428   bool isSCSrcB64() const {
429     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
430   }
431 
432   bool isBoolReg() const;
433 
434   bool isSCSrcF16() const {
435     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
436   }
437 
438   bool isSCSrcV2F16() const {
439     return isSCSrcF16();
440   }
441 
442   bool isSCSrcF32() const {
443     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
444   }
445 
446   bool isSCSrcF64() const {
447     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
448   }
449 
450   bool isSSrcB32() const {
451     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
452   }
453 
454   bool isSSrcB16() const {
455     return isSCSrcB16() || isLiteralImm(MVT::i16);
456   }
457 
458   bool isSSrcV2B16() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB16();
461   }
462 
463   bool isSSrcB64() const {
464     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
465     // See isVSrc64().
466     return isSCSrcB64() || isLiteralImm(MVT::i64);
467   }
468 
469   bool isSSrcF32() const {
470     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
471   }
472 
473   bool isSSrcF64() const {
474     return isSCSrcB64() || isLiteralImm(MVT::f64);
475   }
476 
477   bool isSSrcF16() const {
478     return isSCSrcB16() || isLiteralImm(MVT::f16);
479   }
480 
481   bool isSSrcV2F16() const {
482     llvm_unreachable("cannot happen");
483     return isSSrcF16();
484   }
485 
486   bool isSSrcV2FP32() const {
487     llvm_unreachable("cannot happen");
488     return isSSrcF32();
489   }
490 
491   bool isSCSrcV2FP32() const {
492     llvm_unreachable("cannot happen");
493     return isSCSrcF32();
494   }
495 
496   bool isSSrcV2INT32() const {
497     llvm_unreachable("cannot happen");
498     return isSSrcB32();
499   }
500 
501   bool isSCSrcV2INT32() const {
502     llvm_unreachable("cannot happen");
503     return isSCSrcB32();
504   }
505 
506   bool isSSrcOrLdsB32() const {
507     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
508            isLiteralImm(MVT::i32) || isExpr();
509   }
510 
511   bool isVCSrcB32() const {
512     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
513   }
514 
515   bool isVCSrcB64() const {
516     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
517   }
518 
519   bool isVCSrcTB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
521   }
522 
523   bool isVCSrcTB16_Lo128() const {
524     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
525   }
526 
527   bool isVCSrcFake16B16_Lo128() const {
528     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
529   }
530 
531   bool isVCSrcB16() const {
532     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
533   }
534 
535   bool isVCSrcV2B16() const {
536     return isVCSrcB16();
537   }
538 
539   bool isVCSrcF32() const {
540     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
541   }
542 
543   bool isVCSrcF64() const {
544     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
545   }
546 
547   bool isVCSrcTF16() const {
548     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
549   }
550 
551   bool isVCSrcTF16_Lo128() const {
552     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
553   }
554 
555   bool isVCSrcFake16F16_Lo128() const {
556     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
557   }
558 
559   bool isVCSrcF16() const {
560     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
561   }
562 
563   bool isVCSrcV2F16() const {
564     return isVCSrcF16();
565   }
566 
567   bool isVSrcB32() const {
568     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
569   }
570 
571   bool isVSrcB64() const {
572     return isVCSrcF64() || isLiteralImm(MVT::i64);
573   }
574 
575   bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
576 
577   bool isVSrcTB16_Lo128() const {
578     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
579   }
580 
581   bool isVSrcFake16B16_Lo128() const {
582     return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
583   }
584 
585   bool isVSrcB16() const {
586     return isVCSrcB16() || isLiteralImm(MVT::i16);
587   }
588 
589   bool isVSrcV2B16() const {
590     return isVSrcB16() || isLiteralImm(MVT::v2i16);
591   }
592 
593   bool isVCSrcV2FP32() const {
594     return isVCSrcF64();
595   }
596 
597   bool isVSrcV2FP32() const {
598     return isVSrcF64() || isLiteralImm(MVT::v2f32);
599   }
600 
601   bool isVCSrcV2INT32() const {
602     return isVCSrcB64();
603   }
604 
605   bool isVSrcV2INT32() const {
606     return isVSrcB64() || isLiteralImm(MVT::v2i32);
607   }
608 
609   bool isVSrcF32() const {
610     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
611   }
612 
613   bool isVSrcF64() const {
614     return isVCSrcF64() || isLiteralImm(MVT::f64);
615   }
616 
617   bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
618 
619   bool isVSrcTF16_Lo128() const {
620     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
621   }
622 
623   bool isVSrcFake16F16_Lo128() const {
624     return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
625   }
626 
627   bool isVSrcF16() const {
628     return isVCSrcF16() || isLiteralImm(MVT::f16);
629   }
630 
631   bool isVSrcV2F16() const {
632     return isVSrcF16() || isLiteralImm(MVT::v2f16);
633   }
634 
635   bool isVISrcB32() const {
636     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
637   }
638 
639   bool isVISrcB16() const {
640     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
641   }
642 
643   bool isVISrcV2B16() const {
644     return isVISrcB16();
645   }
646 
647   bool isVISrcF32() const {
648     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
649   }
650 
651   bool isVISrcF16() const {
652     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
653   }
654 
655   bool isVISrcV2F16() const {
656     return isVISrcF16() || isVISrcB32();
657   }
658 
659   bool isVISrc_64B64() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
661   }
662 
663   bool isVISrc_64F64() const {
664     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
665   }
666 
667   bool isVISrc_64V2FP32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_64V2INT32() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
673   }
674 
675   bool isVISrc_256B64() const {
676     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
677   }
678 
679   bool isVISrc_256F64() const {
680     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
681   }
682 
683   bool isVISrc_128B16() const {
684     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
685   }
686 
687   bool isVISrc_128V2B16() const {
688     return isVISrc_128B16();
689   }
690 
691   bool isVISrc_128B32() const {
692     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
693   }
694 
695   bool isVISrc_128F32() const {
696     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
697   }
698 
699   bool isVISrc_256V2FP32() const {
700     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
701   }
702 
703   bool isVISrc_256V2INT32() const {
704     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
705   }
706 
707   bool isVISrc_512B32() const {
708     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
709   }
710 
711   bool isVISrc_512B16() const {
712     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
713   }
714 
715   bool isVISrc_512V2B16() const {
716     return isVISrc_512B16();
717   }
718 
719   bool isVISrc_512F32() const {
720     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
721   }
722 
723   bool isVISrc_512F16() const {
724     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
725   }
726 
727   bool isVISrc_512V2F16() const {
728     return isVISrc_512F16() || isVISrc_512B32();
729   }
730 
731   bool isVISrc_1024B32() const {
732     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
733   }
734 
735   bool isVISrc_1024B16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
737   }
738 
739   bool isVISrc_1024V2B16() const {
740     return isVISrc_1024B16();
741   }
742 
743   bool isVISrc_1024F32() const {
744     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
745   }
746 
747   bool isVISrc_1024F16() const {
748     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
749   }
750 
751   bool isVISrc_1024V2F16() const {
752     return isVISrc_1024F16() || isVISrc_1024B32();
753   }
754 
755   bool isAISrcB32() const {
756     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
757   }
758 
759   bool isAISrcB16() const {
760     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
761   }
762 
763   bool isAISrcV2B16() const {
764     return isAISrcB16();
765   }
766 
767   bool isAISrcF32() const {
768     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
769   }
770 
771   bool isAISrcF16() const {
772     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
773   }
774 
775   bool isAISrcV2F16() const {
776     return isAISrcF16() || isAISrcB32();
777   }
778 
779   bool isAISrc_64B64() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
781   }
782 
783   bool isAISrc_64F64() const {
784     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
785   }
786 
787   bool isAISrc_128B32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
789   }
790 
791   bool isAISrc_128B16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
793   }
794 
795   bool isAISrc_128V2B16() const {
796     return isAISrc_128B16();
797   }
798 
799   bool isAISrc_128F32() const {
800     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
801   }
802 
803   bool isAISrc_128F16() const {
804     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
805   }
806 
807   bool isAISrc_128V2F16() const {
808     return isAISrc_128F16() || isAISrc_128B32();
809   }
810 
811   bool isVISrc_128F16() const {
812     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
813   }
814 
815   bool isVISrc_128V2F16() const {
816     return isVISrc_128F16() || isVISrc_128B32();
817   }
818 
819   bool isAISrc_256B64() const {
820     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
821   }
822 
823   bool isAISrc_256F64() const {
824     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
825   }
826 
827   bool isAISrc_512B32() const {
828     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
829   }
830 
831   bool isAISrc_512B16() const {
832     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
833   }
834 
835   bool isAISrc_512V2B16() const {
836     return isAISrc_512B16();
837   }
838 
839   bool isAISrc_512F32() const {
840     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
841   }
842 
843   bool isAISrc_512F16() const {
844     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
845   }
846 
847   bool isAISrc_512V2F16() const {
848     return isAISrc_512F16() || isAISrc_512B32();
849   }
850 
851   bool isAISrc_1024B32() const {
852     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
853   }
854 
855   bool isAISrc_1024B16() const {
856     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
857   }
858 
859   bool isAISrc_1024V2B16() const {
860     return isAISrc_1024B16();
861   }
862 
863   bool isAISrc_1024F32() const {
864     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
865   }
866 
867   bool isAISrc_1024F16() const {
868     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
869   }
870 
871   bool isAISrc_1024V2F16() const {
872     return isAISrc_1024F16() || isAISrc_1024B32();
873   }
874 
875   bool isKImmFP32() const {
876     return isLiteralImm(MVT::f32);
877   }
878 
879   bool isKImmFP16() const {
880     return isLiteralImm(MVT::f16);
881   }
882 
883   bool isMem() const override {
884     return false;
885   }
886 
887   bool isExpr() const {
888     return Kind == Expression;
889   }
890 
891   bool isSOPPBrTarget() const { return isExpr() || isImm(); }
892 
893   bool isSWaitCnt() const;
894   bool isDepCtr() const;
895   bool isSDelayALU() const;
896   bool isHwreg() const;
897   bool isSendMsg() const;
898   bool isSplitBarrier() const;
899   bool isSwizzle() const;
900   bool isSMRDOffset8() const;
901   bool isSMEMOffset() const;
902   bool isSMRDLiteralOffset() const;
903   bool isDPP8() const;
904   bool isDPPCtrl() const;
905   bool isBLGP() const;
906   bool isCBSZ() const;
907   bool isABID() const;
908   bool isGPRIdxMode() const;
909   bool isS16Imm() const;
910   bool isU16Imm() const;
911   bool isEndpgm() const;
912   bool isWaitVDST() const;
913   bool isWaitEXP() const;
914   bool isWaitVAVDst() const;
915   bool isWaitVMVSrc() const;
916 
917   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
918     return std::bind(P, *this);
919   }
920 
921   StringRef getToken() const {
922     assert(isToken());
923     return StringRef(Tok.Data, Tok.Length);
924   }
925 
926   int64_t getImm() const {
927     assert(isImm());
928     return Imm.Val;
929   }
930 
931   void setImm(int64_t Val) {
932     assert(isImm());
933     Imm.Val = Val;
934   }
935 
936   ImmTy getImmTy() const {
937     assert(isImm());
938     return Imm.Type;
939   }
940 
941   unsigned getReg() const override {
942     assert(isRegKind());
943     return Reg.RegNo;
944   }
945 
946   SMLoc getStartLoc() const override {
947     return StartLoc;
948   }
949 
950   SMLoc getEndLoc() const override {
951     return EndLoc;
952   }
953 
954   SMRange getLocRange() const {
955     return SMRange(StartLoc, EndLoc);
956   }
957 
958   Modifiers getModifiers() const {
959     assert(isRegKind() || isImmTy(ImmTyNone));
960     return isRegKind() ? Reg.Mods : Imm.Mods;
961   }
962 
963   void setModifiers(Modifiers Mods) {
964     assert(isRegKind() || isImmTy(ImmTyNone));
965     if (isRegKind())
966       Reg.Mods = Mods;
967     else
968       Imm.Mods = Mods;
969   }
970 
971   bool hasModifiers() const {
972     return getModifiers().hasModifiers();
973   }
974 
975   bool hasFPModifiers() const {
976     return getModifiers().hasFPModifiers();
977   }
978 
979   bool hasIntModifiers() const {
980     return getModifiers().hasIntModifiers();
981   }
982 
983   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
984 
985   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
986 
987   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
988 
989   void addRegOperands(MCInst &Inst, unsigned N) const;
990 
991   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
992     if (isRegKind())
993       addRegOperands(Inst, N);
994     else
995       addImmOperands(Inst, N);
996   }
997 
998   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
999     Modifiers Mods = getModifiers();
1000     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1001     if (isRegKind()) {
1002       addRegOperands(Inst, N);
1003     } else {
1004       addImmOperands(Inst, N, false);
1005     }
1006   }
1007 
1008   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1009     assert(!hasIntModifiers());
1010     addRegOrImmWithInputModsOperands(Inst, N);
1011   }
1012 
1013   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1014     assert(!hasFPModifiers());
1015     addRegOrImmWithInputModsOperands(Inst, N);
1016   }
1017 
1018   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1019     Modifiers Mods = getModifiers();
1020     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1021     assert(isRegKind());
1022     addRegOperands(Inst, N);
1023   }
1024 
1025   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1026     assert(!hasIntModifiers());
1027     addRegWithInputModsOperands(Inst, N);
1028   }
1029 
1030   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1031     assert(!hasFPModifiers());
1032     addRegWithInputModsOperands(Inst, N);
1033   }
1034 
1035   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1036     // clang-format off
1037     switch (Type) {
1038     case ImmTyNone: OS << "None"; break;
1039     case ImmTyGDS: OS << "GDS"; break;
1040     case ImmTyLDS: OS << "LDS"; break;
1041     case ImmTyOffen: OS << "Offen"; break;
1042     case ImmTyIdxen: OS << "Idxen"; break;
1043     case ImmTyAddr64: OS << "Addr64"; break;
1044     case ImmTyOffset: OS << "Offset"; break;
1045     case ImmTyInstOffset: OS << "InstOffset"; break;
1046     case ImmTyOffset0: OS << "Offset0"; break;
1047     case ImmTyOffset1: OS << "Offset1"; break;
1048     case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1049     case ImmTyCPol: OS << "CPol"; break;
1050     case ImmTyTFE: OS << "TFE"; break;
1051     case ImmTyD16: OS << "D16"; break;
1052     case ImmTyFORMAT: OS << "FORMAT"; break;
1053     case ImmTyClampSI: OS << "ClampSI"; break;
1054     case ImmTyOModSI: OS << "OModSI"; break;
1055     case ImmTyDPP8: OS << "DPP8"; break;
1056     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1057     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1058     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1059     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1060     case ImmTyDppFI: OS << "DppFI"; break;
1061     case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1062     case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1063     case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1064     case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1065     case ImmTyDMask: OS << "DMask"; break;
1066     case ImmTyDim: OS << "Dim"; break;
1067     case ImmTyUNorm: OS << "UNorm"; break;
1068     case ImmTyDA: OS << "DA"; break;
1069     case ImmTyR128A16: OS << "R128A16"; break;
1070     case ImmTyA16: OS << "A16"; break;
1071     case ImmTyLWE: OS << "LWE"; break;
1072     case ImmTyOff: OS << "Off"; break;
1073     case ImmTyExpTgt: OS << "ExpTgt"; break;
1074     case ImmTyExpCompr: OS << "ExpCompr"; break;
1075     case ImmTyExpVM: OS << "ExpVM"; break;
1076     case ImmTyHwreg: OS << "Hwreg"; break;
1077     case ImmTySendMsg: OS << "SendMsg"; break;
1078     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1079     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1080     case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1081     case ImmTyOpSel: OS << "OpSel"; break;
1082     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1083     case ImmTyNegLo: OS << "NegLo"; break;
1084     case ImmTyNegHi: OS << "NegHi"; break;
1085     case ImmTySwizzle: OS << "Swizzle"; break;
1086     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1087     case ImmTyHigh: OS << "High"; break;
1088     case ImmTyBLGP: OS << "BLGP"; break;
1089     case ImmTyCBSZ: OS << "CBSZ"; break;
1090     case ImmTyABID: OS << "ABID"; break;
1091     case ImmTyEndpgm: OS << "Endpgm"; break;
1092     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1093     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1094     case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1095     case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1096     }
1097     // clang-format on
1098   }
1099 
1100   void print(raw_ostream &OS) const override {
1101     switch (Kind) {
1102     case Register:
1103       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1104       break;
1105     case Immediate:
1106       OS << '<' << getImm();
1107       if (getImmTy() != ImmTyNone) {
1108         OS << " type: "; printImmTy(OS, getImmTy());
1109       }
1110       OS << " mods: " << Imm.Mods << '>';
1111       break;
1112     case Token:
1113       OS << '\'' << getToken() << '\'';
1114       break;
1115     case Expression:
1116       OS << "<expr " << *Expr << '>';
1117       break;
1118     }
1119   }
1120 
1121   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1122                                       int64_t Val, SMLoc Loc,
1123                                       ImmTy Type = ImmTyNone,
1124                                       bool IsFPImm = false) {
1125     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1126     Op->Imm.Val = Val;
1127     Op->Imm.IsFPImm = IsFPImm;
1128     Op->Imm.Kind = ImmKindTyNone;
1129     Op->Imm.Type = Type;
1130     Op->Imm.Mods = Modifiers();
1131     Op->StartLoc = Loc;
1132     Op->EndLoc = Loc;
1133     return Op;
1134   }
1135 
1136   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1137                                         StringRef Str, SMLoc Loc,
1138                                         bool HasExplicitEncodingSize = true) {
1139     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1140     Res->Tok.Data = Str.data();
1141     Res->Tok.Length = Str.size();
1142     Res->StartLoc = Loc;
1143     Res->EndLoc = Loc;
1144     return Res;
1145   }
1146 
1147   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1148                                       unsigned RegNo, SMLoc S,
1149                                       SMLoc E) {
1150     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1151     Op->Reg.RegNo = RegNo;
1152     Op->Reg.Mods = Modifiers();
1153     Op->StartLoc = S;
1154     Op->EndLoc = E;
1155     return Op;
1156   }
1157 
1158   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1159                                        const class MCExpr *Expr, SMLoc S) {
1160     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1161     Op->Expr = Expr;
1162     Op->StartLoc = S;
1163     Op->EndLoc = S;
1164     return Op;
1165   }
1166 };
1167 
1168 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1169   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1170   return OS;
1171 }
1172 
1173 //===----------------------------------------------------------------------===//
1174 // AsmParser
1175 //===----------------------------------------------------------------------===//
1176 
1177 // Holds info related to the current kernel, e.g. count of SGPRs used.
1178 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1179 // .amdgpu_hsa_kernel or at EOF.
1180 class KernelScopeInfo {
1181   int SgprIndexUnusedMin = -1;
1182   int VgprIndexUnusedMin = -1;
1183   int AgprIndexUnusedMin = -1;
1184   MCContext *Ctx = nullptr;
1185   MCSubtargetInfo const *MSTI = nullptr;
1186 
1187   void usesSgprAt(int i) {
1188     if (i >= SgprIndexUnusedMin) {
1189       SgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1194       }
1195     }
1196   }
1197 
1198   void usesVgprAt(int i) {
1199     if (i >= VgprIndexUnusedMin) {
1200       VgprIndexUnusedMin = ++i;
1201       if (Ctx) {
1202         MCSymbol* const Sym =
1203           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1204         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1205                                          VgprIndexUnusedMin);
1206         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1207       }
1208     }
1209   }
1210 
1211   void usesAgprAt(int i) {
1212     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1213     if (!hasMAIInsts(*MSTI))
1214       return;
1215 
1216     if (i >= AgprIndexUnusedMin) {
1217       AgprIndexUnusedMin = ++i;
1218       if (Ctx) {
1219         MCSymbol* const Sym =
1220           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1221         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1222 
1223         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1224         MCSymbol* const vSym =
1225           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1226         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1227                                          VgprIndexUnusedMin);
1228         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1229       }
1230     }
1231   }
1232 
1233 public:
1234   KernelScopeInfo() = default;
1235 
1236   void initialize(MCContext &Context) {
1237     Ctx = &Context;
1238     MSTI = Ctx->getSubtargetInfo();
1239 
1240     usesSgprAt(SgprIndexUnusedMin = -1);
1241     usesVgprAt(VgprIndexUnusedMin = -1);
1242     if (hasMAIInsts(*MSTI)) {
1243       usesAgprAt(AgprIndexUnusedMin = -1);
1244     }
1245   }
1246 
1247   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1248                     unsigned RegWidth) {
1249     switch (RegKind) {
1250     case IS_SGPR:
1251       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1252       break;
1253     case IS_AGPR:
1254       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1255       break;
1256     case IS_VGPR:
1257       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1258       break;
1259     default:
1260       break;
1261     }
1262   }
1263 };
1264 
1265 class AMDGPUAsmParser : public MCTargetAsmParser {
1266   MCAsmParser &Parser;
1267 
1268   unsigned ForcedEncodingSize = 0;
1269   bool ForcedDPP = false;
1270   bool ForcedSDWA = false;
1271   KernelScopeInfo KernelScope;
1272 
1273   /// @name Auto-generated Match Functions
1274   /// {
1275 
1276 #define GET_ASSEMBLER_HEADER
1277 #include "AMDGPUGenAsmMatcher.inc"
1278 
1279   /// }
1280 
1281 private:
1282   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1283   bool OutOfRangeError(SMRange Range);
1284   /// Calculate VGPR/SGPR blocks required for given target, reserved
1285   /// registers, and user-specified NextFreeXGPR values.
1286   ///
1287   /// \param Features [in] Target features, used for bug corrections.
1288   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1289   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1290   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1291   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1292   /// descriptor field, if valid.
1293   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1294   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1295   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1296   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1297   /// \param VGPRBlocks [out] Result VGPR block count.
1298   /// \param SGPRBlocks [out] Result SGPR block count.
1299   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1300                           bool FlatScrUsed, bool XNACKUsed,
1301                           std::optional<bool> EnableWavefrontSize32,
1302                           unsigned NextFreeVGPR, SMRange VGPRRange,
1303                           unsigned NextFreeSGPR, SMRange SGPRRange,
1304                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1305   bool ParseDirectiveAMDGCNTarget();
1306   bool ParseDirectiveAMDHSAKernel();
1307   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1308   bool ParseDirectiveHSACodeObjectVersion();
1309   bool ParseDirectiveHSACodeObjectISA();
1310   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1311   bool ParseDirectiveAMDKernelCodeT();
1312   // TODO: Possibly make subtargetHasRegister const.
1313   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1314   bool ParseDirectiveAMDGPUHsaKernel();
1315 
1316   bool ParseDirectiveISAVersion();
1317   bool ParseDirectiveHSAMetadata();
1318   bool ParseDirectivePALMetadataBegin();
1319   bool ParseDirectivePALMetadata();
1320   bool ParseDirectiveAMDGPULDS();
1321 
1322   /// Common code to parse out a block of text (typically YAML) between start and
1323   /// end directives.
1324   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1325                            const char *AssemblerDirectiveEnd,
1326                            std::string &CollectString);
1327 
1328   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1329                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1330   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1331                            unsigned &RegNum, unsigned &RegWidth,
1332                            bool RestoreOnFailure = false);
1333   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1334                            unsigned &RegNum, unsigned &RegWidth,
1335                            SmallVectorImpl<AsmToken> &Tokens);
1336   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1337                            unsigned &RegWidth,
1338                            SmallVectorImpl<AsmToken> &Tokens);
1339   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1340                            unsigned &RegWidth,
1341                            SmallVectorImpl<AsmToken> &Tokens);
1342   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1343                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1344   bool ParseRegRange(unsigned& Num, unsigned& Width);
1345   unsigned getRegularReg(RegisterKind RegKind,
1346                          unsigned RegNum,
1347                          unsigned RegWidth,
1348                          SMLoc Loc);
1349 
1350   bool isRegister();
1351   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1352   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1353   void initializeGprCountSymbol(RegisterKind RegKind);
1354   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1355                              unsigned RegWidth);
1356   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1357                     bool IsAtomic);
1358 
1359 public:
1360   enum AMDGPUMatchResultTy {
1361     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1362   };
1363   enum OperandMode {
1364     OperandMode_Default,
1365     OperandMode_NSA,
1366   };
1367 
1368   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1369 
1370   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1371                const MCInstrInfo &MII,
1372                const MCTargetOptions &Options)
1373       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1374     MCAsmParserExtension::Initialize(Parser);
1375 
1376     if (getFeatureBits().none()) {
1377       // Set default features.
1378       copySTI().ToggleFeature("southern-islands");
1379     }
1380 
1381     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1382 
1383     {
1384       // TODO: make those pre-defined variables read-only.
1385       // Currently there is none suitable machinery in the core llvm-mc for this.
1386       // MCSymbol::isRedefinable is intended for another purpose, and
1387       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1388       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1389       MCContext &Ctx = getContext();
1390       if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1391         MCSymbol *Sym =
1392             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1393         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1394         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1395         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1396         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1397         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1398       } else {
1399         MCSymbol *Sym =
1400             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1401         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1402         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1403         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1404         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1405         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1406       }
1407       if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1408         initializeGprCountSymbol(IS_VGPR);
1409         initializeGprCountSymbol(IS_SGPR);
1410       } else
1411         KernelScope.initialize(getContext());
1412     }
1413   }
1414 
1415   bool hasMIMG_R128() const {
1416     return AMDGPU::hasMIMG_R128(getSTI());
1417   }
1418 
1419   bool hasPackedD16() const {
1420     return AMDGPU::hasPackedD16(getSTI());
1421   }
1422 
1423   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1424 
1425   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1426 
1427   bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1428 
1429   bool isSI() const {
1430     return AMDGPU::isSI(getSTI());
1431   }
1432 
1433   bool isCI() const {
1434     return AMDGPU::isCI(getSTI());
1435   }
1436 
1437   bool isVI() const {
1438     return AMDGPU::isVI(getSTI());
1439   }
1440 
1441   bool isGFX9() const {
1442     return AMDGPU::isGFX9(getSTI());
1443   }
1444 
1445   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1446   bool isGFX90A() const {
1447     return AMDGPU::isGFX90A(getSTI());
1448   }
1449 
1450   bool isGFX940() const {
1451     return AMDGPU::isGFX940(getSTI());
1452   }
1453 
1454   bool isGFX9Plus() const {
1455     return AMDGPU::isGFX9Plus(getSTI());
1456   }
1457 
1458   bool isGFX10() const {
1459     return AMDGPU::isGFX10(getSTI());
1460   }
1461 
1462   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1463 
1464   bool isGFX11() const {
1465     return AMDGPU::isGFX11(getSTI());
1466   }
1467 
1468   bool isGFX11Plus() const {
1469     return AMDGPU::isGFX11Plus(getSTI());
1470   }
1471 
1472   bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1473 
1474   bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1475 
1476   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1477 
1478   bool isGFX10_BEncoding() const {
1479     return AMDGPU::isGFX10_BEncoding(getSTI());
1480   }
1481 
1482   bool hasInv2PiInlineImm() const {
1483     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1484   }
1485 
1486   bool hasFlatOffsets() const {
1487     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1488   }
1489 
1490   bool hasArchitectedFlatScratch() const {
1491     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1492   }
1493 
1494   bool hasSGPR102_SGPR103() const {
1495     return !isVI() && !isGFX9();
1496   }
1497 
1498   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1499 
1500   bool hasIntClamp() const {
1501     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1502   }
1503 
1504   bool hasPartialNSAEncoding() const {
1505     return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1506   }
1507 
1508   unsigned getNSAMaxSize(bool HasSampler = false) const {
1509     return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1510   }
1511 
1512   unsigned getMaxNumUserSGPRs() const {
1513     return AMDGPU::getMaxNumUserSGPRs(getSTI());
1514   }
1515 
1516   bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1517 
1518   AMDGPUTargetStreamer &getTargetStreamer() {
1519     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1520     return static_cast<AMDGPUTargetStreamer &>(TS);
1521   }
1522 
1523   const MCRegisterInfo *getMRI() const {
1524     // We need this const_cast because for some reason getContext() is not const
1525     // in MCAsmParser.
1526     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1527   }
1528 
1529   const MCInstrInfo *getMII() const {
1530     return &MII;
1531   }
1532 
1533   const FeatureBitset &getFeatureBits() const {
1534     return getSTI().getFeatureBits();
1535   }
1536 
1537   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1538   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1539   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1540 
1541   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1542   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1543   bool isForcedDPP() const { return ForcedDPP; }
1544   bool isForcedSDWA() const { return ForcedSDWA; }
1545   ArrayRef<unsigned> getMatchedVariants() const;
1546   StringRef getMatchedVariantName() const;
1547 
1548   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1549   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1550                      bool RestoreOnFailure);
1551   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1552   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1553                                SMLoc &EndLoc) override;
1554   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1555   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1556                                       unsigned Kind) override;
1557   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1558                                OperandVector &Operands, MCStreamer &Out,
1559                                uint64_t &ErrorInfo,
1560                                bool MatchingInlineAsm) override;
1561   bool ParseDirective(AsmToken DirectiveID) override;
1562   ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1563                            OperandMode Mode = OperandMode_Default);
1564   StringRef parseMnemonicSuffix(StringRef Name);
1565   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1566                         SMLoc NameLoc, OperandVector &Operands) override;
1567   //bool ProcessInstruction(MCInst &Inst);
1568 
1569   ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1570 
1571   ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1572 
1573   ParseStatus
1574   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1575                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1576                      std::function<bool(int64_t &)> ConvertResult = nullptr);
1577 
1578   ParseStatus parseOperandArrayWithPrefix(
1579       const char *Prefix, OperandVector &Operands,
1580       AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1581       bool (*ConvertResult)(int64_t &) = nullptr);
1582 
1583   ParseStatus
1584   parseNamedBit(StringRef Name, OperandVector &Operands,
1585                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1586   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1587   ParseStatus parseCPol(OperandVector &Operands);
1588   ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1589   ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1590   ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1591                                     SMLoc &StringLoc);
1592 
1593   bool isModifier();
1594   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1595   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1596   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1597   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1598   bool parseSP3NegModifier();
1599   ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1600                        bool HasLit = false);
1601   ParseStatus parseReg(OperandVector &Operands);
1602   ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1603                             bool HasLit = false);
1604   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1605                                            bool AllowImm = true);
1606   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1607                                             bool AllowImm = true);
1608   ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1609   ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1610   ParseStatus parseVReg32OrOff(OperandVector &Operands);
1611   ParseStatus parseDfmtNfmt(int64_t &Format);
1612   ParseStatus parseUfmt(int64_t &Format);
1613   ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1614                                        int64_t &Format);
1615   ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1616                                          int64_t &Format);
1617   ParseStatus parseFORMAT(OperandVector &Operands);
1618   ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1619   ParseStatus parseNumericFormat(int64_t &Format);
1620   ParseStatus parseFlatOffset(OperandVector &Operands);
1621   ParseStatus parseR128A16(OperandVector &Operands);
1622   ParseStatus parseBLGP(OperandVector &Operands);
1623   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1624   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1625 
1626   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1627 
1628   bool parseCnt(int64_t &IntVal);
1629   ParseStatus parseSWaitCnt(OperandVector &Operands);
1630 
1631   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1632   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1633   ParseStatus parseDepCtr(OperandVector &Operands);
1634 
1635   bool parseDelay(int64_t &Delay);
1636   ParseStatus parseSDelayALU(OperandVector &Operands);
1637 
1638   ParseStatus parseHwreg(OperandVector &Operands);
1639 
1640 private:
1641   struct OperandInfoTy {
1642     SMLoc Loc;
1643     int64_t Id;
1644     bool IsSymbolic = false;
1645     bool IsDefined = false;
1646 
1647     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1648   };
1649 
1650   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1651   bool validateSendMsg(const OperandInfoTy &Msg,
1652                        const OperandInfoTy &Op,
1653                        const OperandInfoTy &Stream);
1654 
1655   bool parseHwregBody(OperandInfoTy &HwReg,
1656                       OperandInfoTy &Offset,
1657                       OperandInfoTy &Width);
1658   bool validateHwreg(const OperandInfoTy &HwReg,
1659                      const OperandInfoTy &Offset,
1660                      const OperandInfoTy &Width);
1661 
1662   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1663   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1664   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1665 
1666   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1667                       const OperandVector &Operands) const;
1668   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1669   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1670   SMLoc getLitLoc(const OperandVector &Operands,
1671                   bool SearchMandatoryLiterals = false) const;
1672   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1673   SMLoc getConstLoc(const OperandVector &Operands) const;
1674   SMLoc getInstLoc(const OperandVector &Operands) const;
1675 
1676   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1677   bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1678   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1679   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1680   bool validateSOPLiteral(const MCInst &Inst) const;
1681   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1682   bool validateVOPDRegBankConstraints(const MCInst &Inst,
1683                                       const OperandVector &Operands);
1684   bool validateIntClampSupported(const MCInst &Inst);
1685   bool validateMIMGAtomicDMask(const MCInst &Inst);
1686   bool validateMIMGGatherDMask(const MCInst &Inst);
1687   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1688   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1689   bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1690   bool validateMIMGD16(const MCInst &Inst);
1691   bool validateMIMGMSAA(const MCInst &Inst);
1692   bool validateOpSel(const MCInst &Inst);
1693   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1694   bool validateVccOperand(unsigned Reg) const;
1695   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1696   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1697   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1698   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1699   bool validateAGPRLdSt(const MCInst &Inst) const;
1700   bool validateVGPRAlign(const MCInst &Inst) const;
1701   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1702   bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1703   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1704   bool validateDivScale(const MCInst &Inst);
1705   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1706   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1707                              const SMLoc &IDLoc);
1708   bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1709                               const unsigned CPol);
1710   bool validateExeczVcczOperands(const OperandVector &Operands);
1711   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1712   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1713   unsigned getConstantBusLimit(unsigned Opcode) const;
1714   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1715   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1716   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1717 
1718   bool isSupportedMnemo(StringRef Mnemo,
1719                         const FeatureBitset &FBS);
1720   bool isSupportedMnemo(StringRef Mnemo,
1721                         const FeatureBitset &FBS,
1722                         ArrayRef<unsigned> Variants);
1723   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1724 
1725   bool isId(const StringRef Id) const;
1726   bool isId(const AsmToken &Token, const StringRef Id) const;
1727   bool isToken(const AsmToken::TokenKind Kind) const;
1728   StringRef getId() const;
1729   bool trySkipId(const StringRef Id);
1730   bool trySkipId(const StringRef Pref, const StringRef Id);
1731   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1732   bool trySkipToken(const AsmToken::TokenKind Kind);
1733   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1734   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1735   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1736 
1737   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1738   AsmToken::TokenKind getTokenKind() const;
1739   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1740   bool parseExpr(OperandVector &Operands);
1741   StringRef getTokenStr() const;
1742   AsmToken peekToken(bool ShouldSkipSpace = true);
1743   AsmToken getToken() const;
1744   SMLoc getLoc() const;
1745   void lex();
1746 
1747 public:
1748   void onBeginOfFile() override;
1749 
1750   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1751 
1752   ParseStatus parseExpTgt(OperandVector &Operands);
1753   ParseStatus parseSendMsg(OperandVector &Operands);
1754   ParseStatus parseInterpSlot(OperandVector &Operands);
1755   ParseStatus parseInterpAttr(OperandVector &Operands);
1756   ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1757   ParseStatus parseBoolReg(OperandVector &Operands);
1758 
1759   bool parseSwizzleOperand(int64_t &Op,
1760                            const unsigned MinVal,
1761                            const unsigned MaxVal,
1762                            const StringRef ErrMsg,
1763                            SMLoc &Loc);
1764   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1765                             const unsigned MinVal,
1766                             const unsigned MaxVal,
1767                             const StringRef ErrMsg);
1768   ParseStatus parseSwizzle(OperandVector &Operands);
1769   bool parseSwizzleOffset(int64_t &Imm);
1770   bool parseSwizzleMacro(int64_t &Imm);
1771   bool parseSwizzleQuadPerm(int64_t &Imm);
1772   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1773   bool parseSwizzleBroadcast(int64_t &Imm);
1774   bool parseSwizzleSwap(int64_t &Imm);
1775   bool parseSwizzleReverse(int64_t &Imm);
1776 
1777   ParseStatus parseGPRIdxMode(OperandVector &Operands);
1778   int64_t parseGPRIdxMacro();
1779 
1780   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1781   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1782 
1783   ParseStatus parseOModSI(OperandVector &Operands);
1784 
1785   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1786                OptionalImmIndexMap &OptionalIdx);
1787   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1788   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1789   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1790   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1791   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1792                     OptionalImmIndexMap &OptionalIdx);
1793   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1794                 OptionalImmIndexMap &OptionalIdx);
1795 
1796   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1797   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1798 
1799   bool parseDimId(unsigned &Encoding);
1800   ParseStatus parseDim(OperandVector &Operands);
1801   bool convertDppBoundCtrl(int64_t &BoundCtrl);
1802   ParseStatus parseDPP8(OperandVector &Operands);
1803   ParseStatus parseDPPCtrl(OperandVector &Operands);
1804   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1805   int64_t parseDPPCtrlSel(StringRef Ctrl);
1806   int64_t parseDPPCtrlPerm();
1807   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1808   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1809     cvtDPP(Inst, Operands, true);
1810   }
1811   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1812                   bool IsDPP8 = false);
1813   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1814     cvtVOP3DPP(Inst, Operands, true);
1815   }
1816 
1817   ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1818                            AMDGPUOperand::ImmTy Type);
1819   ParseStatus parseSDWADstUnused(OperandVector &Operands);
1820   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1821   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1822   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1823   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1824   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1825   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1826                uint64_t BasicInstType,
1827                bool SkipDstVcc = false,
1828                bool SkipSrcVcc = false);
1829 
1830   ParseStatus parseEndpgm(OperandVector &Operands);
1831 
1832   ParseStatus parseVOPD(OperandVector &Operands);
1833 };
1834 
1835 } // end anonymous namespace
1836 
1837 // May be called with integer type with equivalent bitwidth.
1838 static const fltSemantics *getFltSemantics(unsigned Size) {
1839   switch (Size) {
1840   case 4:
1841     return &APFloat::IEEEsingle();
1842   case 8:
1843     return &APFloat::IEEEdouble();
1844   case 2:
1845     return &APFloat::IEEEhalf();
1846   default:
1847     llvm_unreachable("unsupported fp type");
1848   }
1849 }
1850 
1851 static const fltSemantics *getFltSemantics(MVT VT) {
1852   return getFltSemantics(VT.getSizeInBits() / 8);
1853 }
1854 
1855 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1856   switch (OperandType) {
1857   case AMDGPU::OPERAND_REG_IMM_INT32:
1858   case AMDGPU::OPERAND_REG_IMM_FP32:
1859   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1860   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1861   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1862   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1863   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1864   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1865   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1866   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1867   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1868   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1869   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1870   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1871   case AMDGPU::OPERAND_KIMM32:
1872   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1873     return &APFloat::IEEEsingle();
1874   case AMDGPU::OPERAND_REG_IMM_INT64:
1875   case AMDGPU::OPERAND_REG_IMM_FP64:
1876   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1877   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1878   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1879     return &APFloat::IEEEdouble();
1880   case AMDGPU::OPERAND_REG_IMM_INT16:
1881   case AMDGPU::OPERAND_REG_IMM_FP16:
1882   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1883   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1884   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1885   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1886   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1887   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1888   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1889   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1890   case AMDGPU::OPERAND_KIMM16:
1891     return &APFloat::IEEEhalf();
1892   default:
1893     llvm_unreachable("unsupported fp type");
1894   }
1895 }
1896 
1897 //===----------------------------------------------------------------------===//
1898 // Operand
1899 //===----------------------------------------------------------------------===//
1900 
1901 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1902   bool Lost;
1903 
1904   // Convert literal to single precision
1905   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1906                                                APFloat::rmNearestTiesToEven,
1907                                                &Lost);
1908   // We allow precision lost but not overflow or underflow
1909   if (Status != APFloat::opOK &&
1910       Lost &&
1911       ((Status & APFloat::opOverflow)  != 0 ||
1912        (Status & APFloat::opUnderflow) != 0)) {
1913     return false;
1914   }
1915 
1916   return true;
1917 }
1918 
1919 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1920   return isUIntN(Size, Val) || isIntN(Size, Val);
1921 }
1922 
1923 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1924   if (VT.getScalarType() == MVT::i16) {
1925     // FP immediate values are broken.
1926     return isInlinableIntLiteral(Val);
1927   }
1928 
1929   // f16/v2f16 operands work correctly for all values.
1930   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1931 }
1932 
1933 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1934 
1935   // This is a hack to enable named inline values like
1936   // shared_base with both 32-bit and 64-bit operands.
1937   // Note that these values are defined as
1938   // 32-bit operands only.
1939   if (isInlineValue()) {
1940     return true;
1941   }
1942 
1943   if (!isImmTy(ImmTyNone)) {
1944     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1945     return false;
1946   }
1947   // TODO: We should avoid using host float here. It would be better to
1948   // check the float bit values which is what a few other places do.
1949   // We've had bot failures before due to weird NaN support on mips hosts.
1950 
1951   APInt Literal(64, Imm.Val);
1952 
1953   if (Imm.IsFPImm) { // We got fp literal token
1954     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1955       return AMDGPU::isInlinableLiteral64(Imm.Val,
1956                                           AsmParser->hasInv2PiInlineImm());
1957     }
1958 
1959     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1960     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1961       return false;
1962 
1963     if (type.getScalarSizeInBits() == 16) {
1964       return isInlineableLiteralOp16(
1965         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1966         type, AsmParser->hasInv2PiInlineImm());
1967     }
1968 
1969     // Check if single precision literal is inlinable
1970     return AMDGPU::isInlinableLiteral32(
1971       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1972       AsmParser->hasInv2PiInlineImm());
1973   }
1974 
1975   // We got int literal token.
1976   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1977     return AMDGPU::isInlinableLiteral64(Imm.Val,
1978                                         AsmParser->hasInv2PiInlineImm());
1979   }
1980 
1981   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1982     return false;
1983   }
1984 
1985   if (type.getScalarSizeInBits() == 16) {
1986     return isInlineableLiteralOp16(
1987       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1988       type, AsmParser->hasInv2PiInlineImm());
1989   }
1990 
1991   return AMDGPU::isInlinableLiteral32(
1992     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1993     AsmParser->hasInv2PiInlineImm());
1994 }
1995 
1996 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1997   // Check that this immediate can be added as literal
1998   if (!isImmTy(ImmTyNone)) {
1999     return false;
2000   }
2001 
2002   if (!Imm.IsFPImm) {
2003     // We got int literal token.
2004 
2005     if (type == MVT::f64 && hasFPModifiers()) {
2006       // Cannot apply fp modifiers to int literals preserving the same semantics
2007       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2008       // disable these cases.
2009       return false;
2010     }
2011 
2012     unsigned Size = type.getSizeInBits();
2013     if (Size == 64)
2014       Size = 32;
2015 
2016     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2017     // types.
2018     return isSafeTruncation(Imm.Val, Size);
2019   }
2020 
2021   // We got fp literal token
2022   if (type == MVT::f64) { // Expected 64-bit fp operand
2023     // We would set low 64-bits of literal to zeroes but we accept this literals
2024     return true;
2025   }
2026 
2027   if (type == MVT::i64) { // Expected 64-bit int operand
2028     // We don't allow fp literals in 64-bit integer instructions. It is
2029     // unclear how we should encode them.
2030     return false;
2031   }
2032 
2033   // We allow fp literals with f16x2 operands assuming that the specified
2034   // literal goes into the lower half and the upper half is zero. We also
2035   // require that the literal may be losslessly converted to f16.
2036   //
2037   // For i16x2 operands, we assume that the specified literal is encoded as a
2038   // single-precision float. This is pretty odd, but it matches SP3 and what
2039   // happens in hardware.
2040   MVT ExpectedType = (type == MVT::v2f16)   ? MVT::f16
2041                      : (type == MVT::v2i16) ? MVT::f32
2042                      : (type == MVT::v2f32) ? MVT::f32
2043                                             : type;
2044 
2045   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2046   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2047 }
2048 
2049 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2050   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2051 }
2052 
2053 bool AMDGPUOperand::isVRegWithInputMods() const {
2054   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2055          // GFX90A allows DPP on 64-bit operands.
2056          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2057           AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2058 }
2059 
2060 bool AMDGPUOperand::isT16VRegWithInputMods() const {
2061   return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2062 }
2063 
2064 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2065   if (AsmParser->isVI())
2066     return isVReg32();
2067   else if (AsmParser->isGFX9Plus())
2068     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2069   else
2070     return false;
2071 }
2072 
2073 bool AMDGPUOperand::isSDWAFP16Operand() const {
2074   return isSDWAOperand(MVT::f16);
2075 }
2076 
2077 bool AMDGPUOperand::isSDWAFP32Operand() const {
2078   return isSDWAOperand(MVT::f32);
2079 }
2080 
2081 bool AMDGPUOperand::isSDWAInt16Operand() const {
2082   return isSDWAOperand(MVT::i16);
2083 }
2084 
2085 bool AMDGPUOperand::isSDWAInt32Operand() const {
2086   return isSDWAOperand(MVT::i32);
2087 }
2088 
2089 bool AMDGPUOperand::isBoolReg() const {
2090   auto FB = AsmParser->getFeatureBits();
2091   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2092                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2093 }
2094 
2095 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2096 {
2097   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2098   assert(Size == 2 || Size == 4 || Size == 8);
2099 
2100   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2101 
2102   if (Imm.Mods.Abs) {
2103     Val &= ~FpSignMask;
2104   }
2105   if (Imm.Mods.Neg) {
2106     Val ^= FpSignMask;
2107   }
2108 
2109   return Val;
2110 }
2111 
2112 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2113   if (isExpr()) {
2114     Inst.addOperand(MCOperand::createExpr(Expr));
2115     return;
2116   }
2117 
2118   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2119                              Inst.getNumOperands())) {
2120     addLiteralImmOperand(Inst, Imm.Val,
2121                          ApplyModifiers &
2122                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2123   } else {
2124     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2125     Inst.addOperand(MCOperand::createImm(Imm.Val));
2126     setImmKindNone();
2127   }
2128 }
2129 
2130 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2131   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2132   auto OpNum = Inst.getNumOperands();
2133   // Check that this operand accepts literals
2134   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2135 
2136   if (ApplyModifiers) {
2137     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2138     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2139     Val = applyInputFPModifiers(Val, Size);
2140   }
2141 
2142   APInt Literal(64, Val);
2143   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2144 
2145   if (Imm.IsFPImm) { // We got fp literal token
2146     switch (OpTy) {
2147     case AMDGPU::OPERAND_REG_IMM_INT64:
2148     case AMDGPU::OPERAND_REG_IMM_FP64:
2149     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2150     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2151     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2152       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2153                                        AsmParser->hasInv2PiInlineImm())) {
2154         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2155         setImmKindConst();
2156         return;
2157       }
2158 
2159       // Non-inlineable
2160       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2161         // For fp operands we check if low 32 bits are zeros
2162         if (Literal.getLoBits(32) != 0) {
2163           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2164           "Can't encode literal as exact 64-bit floating-point operand. "
2165           "Low 32-bits will be set to zero");
2166           Val &= 0xffffffff00000000u;
2167         }
2168 
2169         Inst.addOperand(MCOperand::createImm(Val));
2170         setImmKindLiteral();
2171         return;
2172       }
2173 
2174       // We don't allow fp literals in 64-bit integer instructions. It is
2175       // unclear how we should encode them. This case should be checked earlier
2176       // in predicate methods (isLiteralImm())
2177       llvm_unreachable("fp literal in 64-bit integer instruction.");
2178 
2179     case AMDGPU::OPERAND_REG_IMM_INT32:
2180     case AMDGPU::OPERAND_REG_IMM_FP32:
2181     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2182     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2183     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2184     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2185     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2186     case AMDGPU::OPERAND_REG_IMM_INT16:
2187     case AMDGPU::OPERAND_REG_IMM_FP16:
2188     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2189     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2190     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2191     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2192     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2193     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2194     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2195     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2196     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2197     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2198     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2199     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2200     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2201     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2202     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2203     case AMDGPU::OPERAND_KIMM32:
2204     case AMDGPU::OPERAND_KIMM16:
2205     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2206       bool lost;
2207       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2208       // Convert literal to single precision
2209       FPLiteral.convert(*getOpFltSemantics(OpTy),
2210                         APFloat::rmNearestTiesToEven, &lost);
2211       // We allow precision lost but not overflow or underflow. This should be
2212       // checked earlier in isLiteralImm()
2213 
2214       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2215       Inst.addOperand(MCOperand::createImm(ImmVal));
2216       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2217         setImmKindMandatoryLiteral();
2218       } else {
2219         setImmKindLiteral();
2220       }
2221       return;
2222     }
2223     default:
2224       llvm_unreachable("invalid operand size");
2225     }
2226 
2227     return;
2228   }
2229 
2230   // We got int literal token.
2231   // Only sign extend inline immediates.
2232   switch (OpTy) {
2233   case AMDGPU::OPERAND_REG_IMM_INT32:
2234   case AMDGPU::OPERAND_REG_IMM_FP32:
2235   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2236   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2237   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2238   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2239   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2240   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2241   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2242   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2243   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2244   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2245   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2246   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2247     if (isSafeTruncation(Val, 32) &&
2248         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2249                                      AsmParser->hasInv2PiInlineImm())) {
2250       Inst.addOperand(MCOperand::createImm(Val));
2251       setImmKindConst();
2252       return;
2253     }
2254 
2255     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2256     setImmKindLiteral();
2257     return;
2258 
2259   case AMDGPU::OPERAND_REG_IMM_INT64:
2260   case AMDGPU::OPERAND_REG_IMM_FP64:
2261   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2262   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2263   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2264     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2265       Inst.addOperand(MCOperand::createImm(Val));
2266       setImmKindConst();
2267       return;
2268     }
2269 
2270     Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2271                                                     : Lo_32(Val);
2272 
2273     Inst.addOperand(MCOperand::createImm(Val));
2274     setImmKindLiteral();
2275     return;
2276 
2277   case AMDGPU::OPERAND_REG_IMM_INT16:
2278   case AMDGPU::OPERAND_REG_IMM_FP16:
2279   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2280   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2281   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2282   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2283   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2284     if (isSafeTruncation(Val, 16) &&
2285         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2286                                      AsmParser->hasInv2PiInlineImm())) {
2287       Inst.addOperand(MCOperand::createImm(Val));
2288       setImmKindConst();
2289       return;
2290     }
2291 
2292     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2293     setImmKindLiteral();
2294     return;
2295 
2296   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2297   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2298   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2299   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2300     assert(isSafeTruncation(Val, 16));
2301     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2302                                         AsmParser->hasInv2PiInlineImm()));
2303 
2304     Inst.addOperand(MCOperand::createImm(Val));
2305     return;
2306   }
2307   case AMDGPU::OPERAND_KIMM32:
2308     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2309     setImmKindMandatoryLiteral();
2310     return;
2311   case AMDGPU::OPERAND_KIMM16:
2312     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2313     setImmKindMandatoryLiteral();
2314     return;
2315   default:
2316     llvm_unreachable("invalid operand size");
2317   }
2318 }
2319 
2320 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2321   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2322 }
2323 
2324 bool AMDGPUOperand::isInlineValue() const {
2325   return isRegKind() && ::isInlineValue(getReg());
2326 }
2327 
2328 //===----------------------------------------------------------------------===//
2329 // AsmParser
2330 //===----------------------------------------------------------------------===//
2331 
2332 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2333   if (Is == IS_VGPR) {
2334     switch (RegWidth) {
2335       default: return -1;
2336       case 32:
2337         return AMDGPU::VGPR_32RegClassID;
2338       case 64:
2339         return AMDGPU::VReg_64RegClassID;
2340       case 96:
2341         return AMDGPU::VReg_96RegClassID;
2342       case 128:
2343         return AMDGPU::VReg_128RegClassID;
2344       case 160:
2345         return AMDGPU::VReg_160RegClassID;
2346       case 192:
2347         return AMDGPU::VReg_192RegClassID;
2348       case 224:
2349         return AMDGPU::VReg_224RegClassID;
2350       case 256:
2351         return AMDGPU::VReg_256RegClassID;
2352       case 288:
2353         return AMDGPU::VReg_288RegClassID;
2354       case 320:
2355         return AMDGPU::VReg_320RegClassID;
2356       case 352:
2357         return AMDGPU::VReg_352RegClassID;
2358       case 384:
2359         return AMDGPU::VReg_384RegClassID;
2360       case 512:
2361         return AMDGPU::VReg_512RegClassID;
2362       case 1024:
2363         return AMDGPU::VReg_1024RegClassID;
2364     }
2365   } else if (Is == IS_TTMP) {
2366     switch (RegWidth) {
2367       default: return -1;
2368       case 32:
2369         return AMDGPU::TTMP_32RegClassID;
2370       case 64:
2371         return AMDGPU::TTMP_64RegClassID;
2372       case 128:
2373         return AMDGPU::TTMP_128RegClassID;
2374       case 256:
2375         return AMDGPU::TTMP_256RegClassID;
2376       case 512:
2377         return AMDGPU::TTMP_512RegClassID;
2378     }
2379   } else if (Is == IS_SGPR) {
2380     switch (RegWidth) {
2381       default: return -1;
2382       case 32:
2383         return AMDGPU::SGPR_32RegClassID;
2384       case 64:
2385         return AMDGPU::SGPR_64RegClassID;
2386       case 96:
2387         return AMDGPU::SGPR_96RegClassID;
2388       case 128:
2389         return AMDGPU::SGPR_128RegClassID;
2390       case 160:
2391         return AMDGPU::SGPR_160RegClassID;
2392       case 192:
2393         return AMDGPU::SGPR_192RegClassID;
2394       case 224:
2395         return AMDGPU::SGPR_224RegClassID;
2396       case 256:
2397         return AMDGPU::SGPR_256RegClassID;
2398       case 288:
2399         return AMDGPU::SGPR_288RegClassID;
2400       case 320:
2401         return AMDGPU::SGPR_320RegClassID;
2402       case 352:
2403         return AMDGPU::SGPR_352RegClassID;
2404       case 384:
2405         return AMDGPU::SGPR_384RegClassID;
2406       case 512:
2407         return AMDGPU::SGPR_512RegClassID;
2408     }
2409   } else if (Is == IS_AGPR) {
2410     switch (RegWidth) {
2411       default: return -1;
2412       case 32:
2413         return AMDGPU::AGPR_32RegClassID;
2414       case 64:
2415         return AMDGPU::AReg_64RegClassID;
2416       case 96:
2417         return AMDGPU::AReg_96RegClassID;
2418       case 128:
2419         return AMDGPU::AReg_128RegClassID;
2420       case 160:
2421         return AMDGPU::AReg_160RegClassID;
2422       case 192:
2423         return AMDGPU::AReg_192RegClassID;
2424       case 224:
2425         return AMDGPU::AReg_224RegClassID;
2426       case 256:
2427         return AMDGPU::AReg_256RegClassID;
2428       case 288:
2429         return AMDGPU::AReg_288RegClassID;
2430       case 320:
2431         return AMDGPU::AReg_320RegClassID;
2432       case 352:
2433         return AMDGPU::AReg_352RegClassID;
2434       case 384:
2435         return AMDGPU::AReg_384RegClassID;
2436       case 512:
2437         return AMDGPU::AReg_512RegClassID;
2438       case 1024:
2439         return AMDGPU::AReg_1024RegClassID;
2440     }
2441   }
2442   return -1;
2443 }
2444 
2445 static unsigned getSpecialRegForName(StringRef RegName) {
2446   return StringSwitch<unsigned>(RegName)
2447     .Case("exec", AMDGPU::EXEC)
2448     .Case("vcc", AMDGPU::VCC)
2449     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2450     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2451     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2452     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2453     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2454     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2455     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2456     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2457     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2458     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2459     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2460     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2461     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2462     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2463     .Case("m0", AMDGPU::M0)
2464     .Case("vccz", AMDGPU::SRC_VCCZ)
2465     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2466     .Case("execz", AMDGPU::SRC_EXECZ)
2467     .Case("src_execz", AMDGPU::SRC_EXECZ)
2468     .Case("scc", AMDGPU::SRC_SCC)
2469     .Case("src_scc", AMDGPU::SRC_SCC)
2470     .Case("tba", AMDGPU::TBA)
2471     .Case("tma", AMDGPU::TMA)
2472     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2473     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2474     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2475     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2476     .Case("vcc_lo", AMDGPU::VCC_LO)
2477     .Case("vcc_hi", AMDGPU::VCC_HI)
2478     .Case("exec_lo", AMDGPU::EXEC_LO)
2479     .Case("exec_hi", AMDGPU::EXEC_HI)
2480     .Case("tma_lo", AMDGPU::TMA_LO)
2481     .Case("tma_hi", AMDGPU::TMA_HI)
2482     .Case("tba_lo", AMDGPU::TBA_LO)
2483     .Case("tba_hi", AMDGPU::TBA_HI)
2484     .Case("pc", AMDGPU::PC_REG)
2485     .Case("null", AMDGPU::SGPR_NULL)
2486     .Default(AMDGPU::NoRegister);
2487 }
2488 
2489 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2490                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2491   auto R = parseRegister();
2492   if (!R) return true;
2493   assert(R->isReg());
2494   RegNo = R->getReg();
2495   StartLoc = R->getStartLoc();
2496   EndLoc = R->getEndLoc();
2497   return false;
2498 }
2499 
2500 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2501                                     SMLoc &EndLoc) {
2502   return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2503 }
2504 
2505 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2506                                               SMLoc &EndLoc) {
2507   bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2508   bool PendingErrors = getParser().hasPendingError();
2509   getParser().clearPendingErrors();
2510   if (PendingErrors)
2511     return ParseStatus::Failure;
2512   if (Result)
2513     return ParseStatus::NoMatch;
2514   return ParseStatus::Success;
2515 }
2516 
2517 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2518                                             RegisterKind RegKind, unsigned Reg1,
2519                                             SMLoc Loc) {
2520   switch (RegKind) {
2521   case IS_SPECIAL:
2522     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2523       Reg = AMDGPU::EXEC;
2524       RegWidth = 64;
2525       return true;
2526     }
2527     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2528       Reg = AMDGPU::FLAT_SCR;
2529       RegWidth = 64;
2530       return true;
2531     }
2532     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2533       Reg = AMDGPU::XNACK_MASK;
2534       RegWidth = 64;
2535       return true;
2536     }
2537     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2538       Reg = AMDGPU::VCC;
2539       RegWidth = 64;
2540       return true;
2541     }
2542     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2543       Reg = AMDGPU::TBA;
2544       RegWidth = 64;
2545       return true;
2546     }
2547     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2548       Reg = AMDGPU::TMA;
2549       RegWidth = 64;
2550       return true;
2551     }
2552     Error(Loc, "register does not fit in the list");
2553     return false;
2554   case IS_VGPR:
2555   case IS_SGPR:
2556   case IS_AGPR:
2557   case IS_TTMP:
2558     if (Reg1 != Reg + RegWidth / 32) {
2559       Error(Loc, "registers in a list must have consecutive indices");
2560       return false;
2561     }
2562     RegWidth += 32;
2563     return true;
2564   default:
2565     llvm_unreachable("unexpected register kind");
2566   }
2567 }
2568 
2569 struct RegInfo {
2570   StringLiteral Name;
2571   RegisterKind Kind;
2572 };
2573 
2574 static constexpr RegInfo RegularRegisters[] = {
2575   {{"v"},    IS_VGPR},
2576   {{"s"},    IS_SGPR},
2577   {{"ttmp"}, IS_TTMP},
2578   {{"acc"},  IS_AGPR},
2579   {{"a"},    IS_AGPR},
2580 };
2581 
2582 static bool isRegularReg(RegisterKind Kind) {
2583   return Kind == IS_VGPR ||
2584          Kind == IS_SGPR ||
2585          Kind == IS_TTMP ||
2586          Kind == IS_AGPR;
2587 }
2588 
2589 static const RegInfo* getRegularRegInfo(StringRef Str) {
2590   for (const RegInfo &Reg : RegularRegisters)
2591     if (Str.starts_with(Reg.Name))
2592       return &Reg;
2593   return nullptr;
2594 }
2595 
2596 static bool getRegNum(StringRef Str, unsigned& Num) {
2597   return !Str.getAsInteger(10, Num);
2598 }
2599 
2600 bool
2601 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2602                             const AsmToken &NextToken) const {
2603 
2604   // A list of consecutive registers: [s0,s1,s2,s3]
2605   if (Token.is(AsmToken::LBrac))
2606     return true;
2607 
2608   if (!Token.is(AsmToken::Identifier))
2609     return false;
2610 
2611   // A single register like s0 or a range of registers like s[0:1]
2612 
2613   StringRef Str = Token.getString();
2614   const RegInfo *Reg = getRegularRegInfo(Str);
2615   if (Reg) {
2616     StringRef RegName = Reg->Name;
2617     StringRef RegSuffix = Str.substr(RegName.size());
2618     if (!RegSuffix.empty()) {
2619       unsigned Num;
2620       // A single register with an index: rXX
2621       if (getRegNum(RegSuffix, Num))
2622         return true;
2623     } else {
2624       // A range of registers: r[XX:YY].
2625       if (NextToken.is(AsmToken::LBrac))
2626         return true;
2627     }
2628   }
2629 
2630   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2631 }
2632 
2633 bool
2634 AMDGPUAsmParser::isRegister()
2635 {
2636   return isRegister(getToken(), peekToken());
2637 }
2638 
2639 unsigned
2640 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2641                                unsigned RegNum,
2642                                unsigned RegWidth,
2643                                SMLoc Loc) {
2644 
2645   assert(isRegularReg(RegKind));
2646 
2647   unsigned AlignSize = 1;
2648   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2649     // SGPR and TTMP registers must be aligned.
2650     // Max required alignment is 4 dwords.
2651     AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2652   }
2653 
2654   if (RegNum % AlignSize != 0) {
2655     Error(Loc, "invalid register alignment");
2656     return AMDGPU::NoRegister;
2657   }
2658 
2659   unsigned RegIdx = RegNum / AlignSize;
2660   int RCID = getRegClass(RegKind, RegWidth);
2661   if (RCID == -1) {
2662     Error(Loc, "invalid or unsupported register size");
2663     return AMDGPU::NoRegister;
2664   }
2665 
2666   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2667   const MCRegisterClass RC = TRI->getRegClass(RCID);
2668   if (RegIdx >= RC.getNumRegs()) {
2669     Error(Loc, "register index is out of range");
2670     return AMDGPU::NoRegister;
2671   }
2672 
2673   return RC.getRegister(RegIdx);
2674 }
2675 
2676 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2677   int64_t RegLo, RegHi;
2678   if (!skipToken(AsmToken::LBrac, "missing register index"))
2679     return false;
2680 
2681   SMLoc FirstIdxLoc = getLoc();
2682   SMLoc SecondIdxLoc;
2683 
2684   if (!parseExpr(RegLo))
2685     return false;
2686 
2687   if (trySkipToken(AsmToken::Colon)) {
2688     SecondIdxLoc = getLoc();
2689     if (!parseExpr(RegHi))
2690       return false;
2691   } else {
2692     RegHi = RegLo;
2693   }
2694 
2695   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2696     return false;
2697 
2698   if (!isUInt<32>(RegLo)) {
2699     Error(FirstIdxLoc, "invalid register index");
2700     return false;
2701   }
2702 
2703   if (!isUInt<32>(RegHi)) {
2704     Error(SecondIdxLoc, "invalid register index");
2705     return false;
2706   }
2707 
2708   if (RegLo > RegHi) {
2709     Error(FirstIdxLoc, "first register index should not exceed second index");
2710     return false;
2711   }
2712 
2713   Num = static_cast<unsigned>(RegLo);
2714   RegWidth = 32 * ((RegHi - RegLo) + 1);
2715   return true;
2716 }
2717 
2718 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2719                                           unsigned &RegNum, unsigned &RegWidth,
2720                                           SmallVectorImpl<AsmToken> &Tokens) {
2721   assert(isToken(AsmToken::Identifier));
2722   unsigned Reg = getSpecialRegForName(getTokenStr());
2723   if (Reg) {
2724     RegNum = 0;
2725     RegWidth = 32;
2726     RegKind = IS_SPECIAL;
2727     Tokens.push_back(getToken());
2728     lex(); // skip register name
2729   }
2730   return Reg;
2731 }
2732 
2733 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2734                                           unsigned &RegNum, unsigned &RegWidth,
2735                                           SmallVectorImpl<AsmToken> &Tokens) {
2736   assert(isToken(AsmToken::Identifier));
2737   StringRef RegName = getTokenStr();
2738   auto Loc = getLoc();
2739 
2740   const RegInfo *RI = getRegularRegInfo(RegName);
2741   if (!RI) {
2742     Error(Loc, "invalid register name");
2743     return AMDGPU::NoRegister;
2744   }
2745 
2746   Tokens.push_back(getToken());
2747   lex(); // skip register name
2748 
2749   RegKind = RI->Kind;
2750   StringRef RegSuffix = RegName.substr(RI->Name.size());
2751   if (!RegSuffix.empty()) {
2752     // Single 32-bit register: vXX.
2753     if (!getRegNum(RegSuffix, RegNum)) {
2754       Error(Loc, "invalid register index");
2755       return AMDGPU::NoRegister;
2756     }
2757     RegWidth = 32;
2758   } else {
2759     // Range of registers: v[XX:YY]. ":YY" is optional.
2760     if (!ParseRegRange(RegNum, RegWidth))
2761       return AMDGPU::NoRegister;
2762   }
2763 
2764   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2765 }
2766 
2767 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2768                                        unsigned &RegWidth,
2769                                        SmallVectorImpl<AsmToken> &Tokens) {
2770   unsigned Reg = AMDGPU::NoRegister;
2771   auto ListLoc = getLoc();
2772 
2773   if (!skipToken(AsmToken::LBrac,
2774                  "expected a register or a list of registers")) {
2775     return AMDGPU::NoRegister;
2776   }
2777 
2778   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2779 
2780   auto Loc = getLoc();
2781   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2782     return AMDGPU::NoRegister;
2783   if (RegWidth != 32) {
2784     Error(Loc, "expected a single 32-bit register");
2785     return AMDGPU::NoRegister;
2786   }
2787 
2788   for (; trySkipToken(AsmToken::Comma); ) {
2789     RegisterKind NextRegKind;
2790     unsigned NextReg, NextRegNum, NextRegWidth;
2791     Loc = getLoc();
2792 
2793     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2794                              NextRegNum, NextRegWidth,
2795                              Tokens)) {
2796       return AMDGPU::NoRegister;
2797     }
2798     if (NextRegWidth != 32) {
2799       Error(Loc, "expected a single 32-bit register");
2800       return AMDGPU::NoRegister;
2801     }
2802     if (NextRegKind != RegKind) {
2803       Error(Loc, "registers in a list must be of the same kind");
2804       return AMDGPU::NoRegister;
2805     }
2806     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2807       return AMDGPU::NoRegister;
2808   }
2809 
2810   if (!skipToken(AsmToken::RBrac,
2811                  "expected a comma or a closing square bracket")) {
2812     return AMDGPU::NoRegister;
2813   }
2814 
2815   if (isRegularReg(RegKind))
2816     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2817 
2818   return Reg;
2819 }
2820 
2821 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2822                                           unsigned &RegNum, unsigned &RegWidth,
2823                                           SmallVectorImpl<AsmToken> &Tokens) {
2824   auto Loc = getLoc();
2825   Reg = AMDGPU::NoRegister;
2826 
2827   if (isToken(AsmToken::Identifier)) {
2828     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2829     if (Reg == AMDGPU::NoRegister)
2830       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2831   } else {
2832     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2833   }
2834 
2835   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2836   if (Reg == AMDGPU::NoRegister) {
2837     assert(Parser.hasPendingError());
2838     return false;
2839   }
2840 
2841   if (!subtargetHasRegister(*TRI, Reg)) {
2842     if (Reg == AMDGPU::SGPR_NULL) {
2843       Error(Loc, "'null' operand is not supported on this GPU");
2844     } else {
2845       Error(Loc, "register not available on this GPU");
2846     }
2847     return false;
2848   }
2849 
2850   return true;
2851 }
2852 
2853 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2854                                           unsigned &RegNum, unsigned &RegWidth,
2855                                           bool RestoreOnFailure /*=false*/) {
2856   Reg = AMDGPU::NoRegister;
2857 
2858   SmallVector<AsmToken, 1> Tokens;
2859   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2860     if (RestoreOnFailure) {
2861       while (!Tokens.empty()) {
2862         getLexer().UnLex(Tokens.pop_back_val());
2863       }
2864     }
2865     return true;
2866   }
2867   return false;
2868 }
2869 
2870 std::optional<StringRef>
2871 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2872   switch (RegKind) {
2873   case IS_VGPR:
2874     return StringRef(".amdgcn.next_free_vgpr");
2875   case IS_SGPR:
2876     return StringRef(".amdgcn.next_free_sgpr");
2877   default:
2878     return std::nullopt;
2879   }
2880 }
2881 
2882 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2883   auto SymbolName = getGprCountSymbolName(RegKind);
2884   assert(SymbolName && "initializing invalid register kind");
2885   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2886   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2887 }
2888 
2889 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2890                                             unsigned DwordRegIndex,
2891                                             unsigned RegWidth) {
2892   // Symbols are only defined for GCN targets
2893   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2894     return true;
2895 
2896   auto SymbolName = getGprCountSymbolName(RegKind);
2897   if (!SymbolName)
2898     return true;
2899   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2900 
2901   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2902   int64_t OldCount;
2903 
2904   if (!Sym->isVariable())
2905     return !Error(getLoc(),
2906                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2907   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2908     return !Error(
2909         getLoc(),
2910         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2911 
2912   if (OldCount <= NewMax)
2913     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2914 
2915   return true;
2916 }
2917 
2918 std::unique_ptr<AMDGPUOperand>
2919 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2920   const auto &Tok = getToken();
2921   SMLoc StartLoc = Tok.getLoc();
2922   SMLoc EndLoc = Tok.getEndLoc();
2923   RegisterKind RegKind;
2924   unsigned Reg, RegNum, RegWidth;
2925 
2926   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2927     return nullptr;
2928   }
2929   if (isHsaAbi(getSTI())) {
2930     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2931       return nullptr;
2932   } else
2933     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2934   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2935 }
2936 
2937 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2938                                       bool HasSP3AbsModifier, bool HasLit) {
2939   // TODO: add syntactic sugar for 1/(2*PI)
2940 
2941   if (isRegister())
2942     return ParseStatus::NoMatch;
2943   assert(!isModifier());
2944 
2945   if (!HasLit) {
2946     HasLit = trySkipId("lit");
2947     if (HasLit) {
2948       if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
2949         return ParseStatus::Failure;
2950       ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
2951       if (S.isSuccess() &&
2952           !skipToken(AsmToken::RParen, "expected closing parentheses"))
2953         return ParseStatus::Failure;
2954       return S;
2955     }
2956   }
2957 
2958   const auto& Tok = getToken();
2959   const auto& NextTok = peekToken();
2960   bool IsReal = Tok.is(AsmToken::Real);
2961   SMLoc S = getLoc();
2962   bool Negate = false;
2963 
2964   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2965     lex();
2966     IsReal = true;
2967     Negate = true;
2968   }
2969 
2970   AMDGPUOperand::Modifiers Mods;
2971   Mods.Lit = HasLit;
2972 
2973   if (IsReal) {
2974     // Floating-point expressions are not supported.
2975     // Can only allow floating-point literals with an
2976     // optional sign.
2977 
2978     StringRef Num = getTokenStr();
2979     lex();
2980 
2981     APFloat RealVal(APFloat::IEEEdouble());
2982     auto roundMode = APFloat::rmNearestTiesToEven;
2983     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
2984       return ParseStatus::Failure;
2985     if (Negate)
2986       RealVal.changeSign();
2987 
2988     Operands.push_back(
2989       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2990                                AMDGPUOperand::ImmTyNone, true));
2991     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2992     Op.setModifiers(Mods);
2993 
2994     return ParseStatus::Success;
2995 
2996   } else {
2997     int64_t IntVal;
2998     const MCExpr *Expr;
2999     SMLoc S = getLoc();
3000 
3001     if (HasSP3AbsModifier) {
3002       // This is a workaround for handling expressions
3003       // as arguments of SP3 'abs' modifier, for example:
3004       //     |1.0|
3005       //     |-1|
3006       //     |1+x|
3007       // This syntax is not compatible with syntax of standard
3008       // MC expressions (due to the trailing '|').
3009       SMLoc EndLoc;
3010       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3011         return ParseStatus::Failure;
3012     } else {
3013       if (Parser.parseExpression(Expr))
3014         return ParseStatus::Failure;
3015     }
3016 
3017     if (Expr->evaluateAsAbsolute(IntVal)) {
3018       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3019       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3020       Op.setModifiers(Mods);
3021     } else {
3022       if (HasLit)
3023         return ParseStatus::NoMatch;
3024       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3025     }
3026 
3027     return ParseStatus::Success;
3028   }
3029 
3030   return ParseStatus::NoMatch;
3031 }
3032 
3033 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3034   if (!isRegister())
3035     return ParseStatus::NoMatch;
3036 
3037   if (auto R = parseRegister()) {
3038     assert(R->isReg());
3039     Operands.push_back(std::move(R));
3040     return ParseStatus::Success;
3041   }
3042   return ParseStatus::Failure;
3043 }
3044 
3045 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3046                                            bool HasSP3AbsMod, bool HasLit) {
3047   ParseStatus Res = parseReg(Operands);
3048   if (!Res.isNoMatch())
3049     return Res;
3050   if (isModifier())
3051     return ParseStatus::NoMatch;
3052   return parseImm(Operands, HasSP3AbsMod, HasLit);
3053 }
3054 
3055 bool
3056 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3057   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3058     const auto &str = Token.getString();
3059     return str == "abs" || str == "neg" || str == "sext";
3060   }
3061   return false;
3062 }
3063 
3064 bool
3065 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3066   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3067 }
3068 
3069 bool
3070 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3071   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3072 }
3073 
3074 bool
3075 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3076   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3077 }
3078 
3079 // Check if this is an operand modifier or an opcode modifier
3080 // which may look like an expression but it is not. We should
3081 // avoid parsing these modifiers as expressions. Currently
3082 // recognized sequences are:
3083 //   |...|
3084 //   abs(...)
3085 //   neg(...)
3086 //   sext(...)
3087 //   -reg
3088 //   -|...|
3089 //   -abs(...)
3090 //   name:...
3091 //
3092 bool
3093 AMDGPUAsmParser::isModifier() {
3094 
3095   AsmToken Tok = getToken();
3096   AsmToken NextToken[2];
3097   peekTokens(NextToken);
3098 
3099   return isOperandModifier(Tok, NextToken[0]) ||
3100          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3101          isOpcodeModifierWithVal(Tok, NextToken[0]);
3102 }
3103 
3104 // Check if the current token is an SP3 'neg' modifier.
3105 // Currently this modifier is allowed in the following context:
3106 //
3107 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3108 // 2. Before an 'abs' modifier: -abs(...)
3109 // 3. Before an SP3 'abs' modifier: -|...|
3110 //
3111 // In all other cases "-" is handled as a part
3112 // of an expression that follows the sign.
3113 //
3114 // Note: When "-" is followed by an integer literal,
3115 // this is interpreted as integer negation rather
3116 // than a floating-point NEG modifier applied to N.
3117 // Beside being contr-intuitive, such use of floating-point
3118 // NEG modifier would have resulted in different meaning
3119 // of integer literals used with VOP1/2/C and VOP3,
3120 // for example:
3121 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3122 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3123 // Negative fp literals with preceding "-" are
3124 // handled likewise for uniformity
3125 //
3126 bool
3127 AMDGPUAsmParser::parseSP3NegModifier() {
3128 
3129   AsmToken NextToken[2];
3130   peekTokens(NextToken);
3131 
3132   if (isToken(AsmToken::Minus) &&
3133       (isRegister(NextToken[0], NextToken[1]) ||
3134        NextToken[0].is(AsmToken::Pipe) ||
3135        isId(NextToken[0], "abs"))) {
3136     lex();
3137     return true;
3138   }
3139 
3140   return false;
3141 }
3142 
3143 ParseStatus
3144 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3145                                               bool AllowImm) {
3146   bool Neg, SP3Neg;
3147   bool Abs, SP3Abs;
3148   bool Lit;
3149   SMLoc Loc;
3150 
3151   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3152   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3153     return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3154 
3155   SP3Neg = parseSP3NegModifier();
3156 
3157   Loc = getLoc();
3158   Neg = trySkipId("neg");
3159   if (Neg && SP3Neg)
3160     return Error(Loc, "expected register or immediate");
3161   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3162     return ParseStatus::Failure;
3163 
3164   Abs = trySkipId("abs");
3165   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3166     return ParseStatus::Failure;
3167 
3168   Lit = trySkipId("lit");
3169   if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3170     return ParseStatus::Failure;
3171 
3172   Loc = getLoc();
3173   SP3Abs = trySkipToken(AsmToken::Pipe);
3174   if (Abs && SP3Abs)
3175     return Error(Loc, "expected register or immediate");
3176 
3177   ParseStatus Res;
3178   if (AllowImm) {
3179     Res = parseRegOrImm(Operands, SP3Abs, Lit);
3180   } else {
3181     Res = parseReg(Operands);
3182   }
3183   if (!Res.isSuccess())
3184     return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3185 
3186   if (Lit && !Operands.back()->isImm())
3187     Error(Loc, "expected immediate with lit modifier");
3188 
3189   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3190     return ParseStatus::Failure;
3191   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3192     return ParseStatus::Failure;
3193   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3194     return ParseStatus::Failure;
3195   if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3196     return ParseStatus::Failure;
3197 
3198   AMDGPUOperand::Modifiers Mods;
3199   Mods.Abs = Abs || SP3Abs;
3200   Mods.Neg = Neg || SP3Neg;
3201   Mods.Lit = Lit;
3202 
3203   if (Mods.hasFPModifiers() || Lit) {
3204     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3205     if (Op.isExpr())
3206       return Error(Op.getStartLoc(), "expected an absolute expression");
3207     Op.setModifiers(Mods);
3208   }
3209   return ParseStatus::Success;
3210 }
3211 
3212 ParseStatus
3213 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3214                                                bool AllowImm) {
3215   bool Sext = trySkipId("sext");
3216   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3217     return ParseStatus::Failure;
3218 
3219   ParseStatus Res;
3220   if (AllowImm) {
3221     Res = parseRegOrImm(Operands);
3222   } else {
3223     Res = parseReg(Operands);
3224   }
3225   if (!Res.isSuccess())
3226     return Sext ? ParseStatus::Failure : Res;
3227 
3228   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3229     return ParseStatus::Failure;
3230 
3231   AMDGPUOperand::Modifiers Mods;
3232   Mods.Sext = Sext;
3233 
3234   if (Mods.hasIntModifiers()) {
3235     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3236     if (Op.isExpr())
3237       return Error(Op.getStartLoc(), "expected an absolute expression");
3238     Op.setModifiers(Mods);
3239   }
3240 
3241   return ParseStatus::Success;
3242 }
3243 
3244 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3245   return parseRegOrImmWithFPInputMods(Operands, false);
3246 }
3247 
3248 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3249   return parseRegOrImmWithIntInputMods(Operands, false);
3250 }
3251 
3252 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3253   auto Loc = getLoc();
3254   if (trySkipId("off")) {
3255     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3256                                                 AMDGPUOperand::ImmTyOff, false));
3257     return ParseStatus::Success;
3258   }
3259 
3260   if (!isRegister())
3261     return ParseStatus::NoMatch;
3262 
3263   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3264   if (Reg) {
3265     Operands.push_back(std::move(Reg));
3266     return ParseStatus::Success;
3267   }
3268 
3269   return ParseStatus::Failure;
3270 }
3271 
3272 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3273   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3274 
3275   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3276       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3277       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3278       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3279     return Match_InvalidOperand;
3280 
3281   if ((TSFlags & SIInstrFlags::VOP3) &&
3282       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3283       getForcedEncodingSize() != 64)
3284     return Match_PreferE32;
3285 
3286   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3287       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3288     // v_mac_f32/16 allow only dst_sel == DWORD;
3289     auto OpNum =
3290         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3291     const auto &Op = Inst.getOperand(OpNum);
3292     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3293       return Match_InvalidOperand;
3294     }
3295   }
3296 
3297   return Match_Success;
3298 }
3299 
3300 static ArrayRef<unsigned> getAllVariants() {
3301   static const unsigned Variants[] = {
3302     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3303     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3304     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3305   };
3306 
3307   return ArrayRef(Variants);
3308 }
3309 
3310 // What asm variants we should check
3311 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3312   if (isForcedDPP() && isForcedVOP3()) {
3313     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3314     return ArrayRef(Variants);
3315   }
3316   if (getForcedEncodingSize() == 32) {
3317     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3318     return ArrayRef(Variants);
3319   }
3320 
3321   if (isForcedVOP3()) {
3322     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3323     return ArrayRef(Variants);
3324   }
3325 
3326   if (isForcedSDWA()) {
3327     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3328                                         AMDGPUAsmVariants::SDWA9};
3329     return ArrayRef(Variants);
3330   }
3331 
3332   if (isForcedDPP()) {
3333     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3334     return ArrayRef(Variants);
3335   }
3336 
3337   return getAllVariants();
3338 }
3339 
3340 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3341   if (isForcedDPP() && isForcedVOP3())
3342     return "e64_dpp";
3343 
3344   if (getForcedEncodingSize() == 32)
3345     return "e32";
3346 
3347   if (isForcedVOP3())
3348     return "e64";
3349 
3350   if (isForcedSDWA())
3351     return "sdwa";
3352 
3353   if (isForcedDPP())
3354     return "dpp";
3355 
3356   return "";
3357 }
3358 
3359 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3360   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3361   for (MCPhysReg Reg : Desc.implicit_uses()) {
3362     switch (Reg) {
3363     case AMDGPU::FLAT_SCR:
3364     case AMDGPU::VCC:
3365     case AMDGPU::VCC_LO:
3366     case AMDGPU::VCC_HI:
3367     case AMDGPU::M0:
3368       return Reg;
3369     default:
3370       break;
3371     }
3372   }
3373   return AMDGPU::NoRegister;
3374 }
3375 
3376 // NB: This code is correct only when used to check constant
3377 // bus limitations because GFX7 support no f16 inline constants.
3378 // Note that there are no cases when a GFX7 opcode violates
3379 // constant bus limitations due to the use of an f16 constant.
3380 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3381                                        unsigned OpIdx) const {
3382   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3383 
3384   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3385       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3386     return false;
3387   }
3388 
3389   const MCOperand &MO = Inst.getOperand(OpIdx);
3390 
3391   int64_t Val = MO.getImm();
3392   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3393 
3394   switch (OpSize) { // expected operand size
3395   case 8:
3396     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3397   case 4:
3398     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3399   case 2: {
3400     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3401     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3402         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3403         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3404       return AMDGPU::isInlinableIntLiteral(Val);
3405 
3406     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3407         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3408         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3409       return AMDGPU::isInlinableLiteralV2I16(Val);
3410 
3411     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3412         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3413         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3414       return AMDGPU::isInlinableLiteralV2F16(Val);
3415 
3416     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3417   }
3418   default:
3419     llvm_unreachable("invalid operand size");
3420   }
3421 }
3422 
3423 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3424   if (!isGFX10Plus())
3425     return 1;
3426 
3427   switch (Opcode) {
3428   // 64-bit shift instructions can use only one scalar value input
3429   case AMDGPU::V_LSHLREV_B64_e64:
3430   case AMDGPU::V_LSHLREV_B64_gfx10:
3431   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3432   case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3433   case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3434   case AMDGPU::V_LSHRREV_B64_e64:
3435   case AMDGPU::V_LSHRREV_B64_gfx10:
3436   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3437   case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3438   case AMDGPU::V_ASHRREV_I64_e64:
3439   case AMDGPU::V_ASHRREV_I64_gfx10:
3440   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3441   case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3442   case AMDGPU::V_LSHL_B64_e64:
3443   case AMDGPU::V_LSHR_B64_e64:
3444   case AMDGPU::V_ASHR_I64_e64:
3445     return 1;
3446   default:
3447     return 2;
3448   }
3449 }
3450 
3451 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3452 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3453 
3454 // Get regular operand indices in the same order as specified
3455 // in the instruction (but append mandatory literals to the end).
3456 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3457                                            bool AddMandatoryLiterals = false) {
3458 
3459   int16_t ImmIdx =
3460       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3461 
3462   if (isVOPD(Opcode)) {
3463     int16_t ImmDeferredIdx =
3464         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3465                              : -1;
3466 
3467     return {getNamedOperandIdx(Opcode, OpName::src0X),
3468             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3469             getNamedOperandIdx(Opcode, OpName::src0Y),
3470             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3471             ImmDeferredIdx,
3472             ImmIdx};
3473   }
3474 
3475   return {getNamedOperandIdx(Opcode, OpName::src0),
3476           getNamedOperandIdx(Opcode, OpName::src1),
3477           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3478 }
3479 
3480 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3481   const MCOperand &MO = Inst.getOperand(OpIdx);
3482   if (MO.isImm()) {
3483     return !isInlineConstant(Inst, OpIdx);
3484   } else if (MO.isReg()) {
3485     auto Reg = MO.getReg();
3486     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3487     auto PReg = mc2PseudoReg(Reg);
3488     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3489   } else {
3490     return true;
3491   }
3492 }
3493 
3494 bool AMDGPUAsmParser::validateConstantBusLimitations(
3495     const MCInst &Inst, const OperandVector &Operands) {
3496   const unsigned Opcode = Inst.getOpcode();
3497   const MCInstrDesc &Desc = MII.get(Opcode);
3498   unsigned LastSGPR = AMDGPU::NoRegister;
3499   unsigned ConstantBusUseCount = 0;
3500   unsigned NumLiterals = 0;
3501   unsigned LiteralSize;
3502 
3503   if (!(Desc.TSFlags &
3504         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3505          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3506       !isVOPD(Opcode))
3507     return true;
3508 
3509   // Check special imm operands (used by madmk, etc)
3510   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3511     ++NumLiterals;
3512     LiteralSize = 4;
3513   }
3514 
3515   SmallDenseSet<unsigned> SGPRsUsed;
3516   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3517   if (SGPRUsed != AMDGPU::NoRegister) {
3518     SGPRsUsed.insert(SGPRUsed);
3519     ++ConstantBusUseCount;
3520   }
3521 
3522   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3523 
3524   for (int OpIdx : OpIndices) {
3525     if (OpIdx == -1)
3526       continue;
3527 
3528     const MCOperand &MO = Inst.getOperand(OpIdx);
3529     if (usesConstantBus(Inst, OpIdx)) {
3530       if (MO.isReg()) {
3531         LastSGPR = mc2PseudoReg(MO.getReg());
3532         // Pairs of registers with a partial intersections like these
3533         //   s0, s[0:1]
3534         //   flat_scratch_lo, flat_scratch
3535         //   flat_scratch_lo, flat_scratch_hi
3536         // are theoretically valid but they are disabled anyway.
3537         // Note that this code mimics SIInstrInfo::verifyInstruction
3538         if (SGPRsUsed.insert(LastSGPR).second) {
3539           ++ConstantBusUseCount;
3540         }
3541       } else { // Expression or a literal
3542 
3543         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3544           continue; // special operand like VINTERP attr_chan
3545 
3546         // An instruction may use only one literal.
3547         // This has been validated on the previous step.
3548         // See validateVOPLiteral.
3549         // This literal may be used as more than one operand.
3550         // If all these operands are of the same size,
3551         // this literal counts as one scalar value.
3552         // Otherwise it counts as 2 scalar values.
3553         // See "GFX10 Shader Programming", section 3.6.2.3.
3554 
3555         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3556         if (Size < 4)
3557           Size = 4;
3558 
3559         if (NumLiterals == 0) {
3560           NumLiterals = 1;
3561           LiteralSize = Size;
3562         } else if (LiteralSize != Size) {
3563           NumLiterals = 2;
3564         }
3565       }
3566     }
3567   }
3568   ConstantBusUseCount += NumLiterals;
3569 
3570   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3571     return true;
3572 
3573   SMLoc LitLoc = getLitLoc(Operands);
3574   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3575   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3576   Error(Loc, "invalid operand (violates constant bus restrictions)");
3577   return false;
3578 }
3579 
3580 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3581     const MCInst &Inst, const OperandVector &Operands) {
3582 
3583   const unsigned Opcode = Inst.getOpcode();
3584   if (!isVOPD(Opcode))
3585     return true;
3586 
3587   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3588 
3589   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3590     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3591     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3592                ? Opr.getReg()
3593                : MCRegister::NoRegister;
3594   };
3595 
3596   // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3597   bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3598 
3599   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3600   auto InvalidCompOprIdx =
3601       InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3602   if (!InvalidCompOprIdx)
3603     return true;
3604 
3605   auto CompOprIdx = *InvalidCompOprIdx;
3606   auto ParsedIdx =
3607       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3608                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3609   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3610 
3611   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3612   if (CompOprIdx == VOPD::Component::DST) {
3613     Error(Loc, "one dst register must be even and the other odd");
3614   } else {
3615     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3616     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3617                    " operands must use different VGPR banks");
3618   }
3619 
3620   return false;
3621 }
3622 
3623 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3624 
3625   const unsigned Opc = Inst.getOpcode();
3626   const MCInstrDesc &Desc = MII.get(Opc);
3627 
3628   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3629     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3630     assert(ClampIdx != -1);
3631     return Inst.getOperand(ClampIdx).getImm() == 0;
3632   }
3633 
3634   return true;
3635 }
3636 
3637 constexpr uint64_t MIMGFlags =
3638     SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3639 
3640 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3641                                            const SMLoc &IDLoc) {
3642 
3643   const unsigned Opc = Inst.getOpcode();
3644   const MCInstrDesc &Desc = MII.get(Opc);
3645 
3646   if ((Desc.TSFlags & MIMGFlags) == 0)
3647     return true;
3648 
3649   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3650   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3651   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3652 
3653   assert(VDataIdx != -1);
3654 
3655   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3656     return true;
3657 
3658   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3659   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3660   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3661   if (DMask == 0)
3662     DMask = 1;
3663 
3664   bool IsPackedD16 = false;
3665   unsigned DataSize =
3666       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3667   if (hasPackedD16()) {
3668     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3669     IsPackedD16 = D16Idx >= 0;
3670     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3671       DataSize = (DataSize + 1) / 2;
3672   }
3673 
3674   if ((VDataSize / 4) == DataSize + TFESize)
3675     return true;
3676 
3677   StringRef Modifiers;
3678   if (isGFX90A())
3679     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3680   else
3681     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3682 
3683   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3684   return false;
3685 }
3686 
3687 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3688                                            const SMLoc &IDLoc) {
3689   const unsigned Opc = Inst.getOpcode();
3690   const MCInstrDesc &Desc = MII.get(Opc);
3691 
3692   if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3693     return true;
3694 
3695   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3696 
3697   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3698       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3699   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3700   int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3701                                                      : AMDGPU::OpName::rsrc;
3702   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3703   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3704   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3705 
3706   assert(VAddr0Idx != -1);
3707   assert(SrsrcIdx != -1);
3708   assert(SrsrcIdx > VAddr0Idx);
3709 
3710   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3711   if (BaseOpcode->BVH) {
3712     if (IsA16 == BaseOpcode->A16)
3713       return true;
3714     Error(IDLoc, "image address size does not match a16");
3715     return false;
3716   }
3717 
3718   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3719   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3720   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3721   unsigned ActualAddrSize =
3722       IsNSA ? SrsrcIdx - VAddr0Idx
3723             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3724 
3725   unsigned ExpectedAddrSize =
3726       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3727 
3728   if (IsNSA) {
3729     if (hasPartialNSAEncoding() &&
3730         ExpectedAddrSize >
3731             getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3732       int VAddrLastIdx = SrsrcIdx - 1;
3733       unsigned VAddrLastSize =
3734           AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3735 
3736       ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3737     }
3738   } else {
3739     if (ExpectedAddrSize > 12)
3740       ExpectedAddrSize = 16;
3741 
3742     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3743     // This provides backward compatibility for assembly created
3744     // before 160b/192b/224b types were directly supported.
3745     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3746       return true;
3747   }
3748 
3749   if (ActualAddrSize == ExpectedAddrSize)
3750     return true;
3751 
3752   Error(IDLoc, "image address size does not match dim and a16");
3753   return false;
3754 }
3755 
3756 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3757 
3758   const unsigned Opc = Inst.getOpcode();
3759   const MCInstrDesc &Desc = MII.get(Opc);
3760 
3761   if ((Desc.TSFlags & MIMGFlags) == 0)
3762     return true;
3763   if (!Desc.mayLoad() || !Desc.mayStore())
3764     return true; // Not atomic
3765 
3766   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3767   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3768 
3769   // This is an incomplete check because image_atomic_cmpswap
3770   // may only use 0x3 and 0xf while other atomic operations
3771   // may use 0x1 and 0x3. However these limitations are
3772   // verified when we check that dmask matches dst size.
3773   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3774 }
3775 
3776 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3777 
3778   const unsigned Opc = Inst.getOpcode();
3779   const MCInstrDesc &Desc = MII.get(Opc);
3780 
3781   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3782     return true;
3783 
3784   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3785   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3786 
3787   // GATHER4 instructions use dmask in a different fashion compared to
3788   // other MIMG instructions. The only useful DMASK values are
3789   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3790   // (red,red,red,red) etc.) The ISA document doesn't mention
3791   // this.
3792   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3793 }
3794 
3795 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3796   const unsigned Opc = Inst.getOpcode();
3797   const MCInstrDesc &Desc = MII.get(Opc);
3798 
3799   if ((Desc.TSFlags & MIMGFlags) == 0)
3800     return true;
3801 
3802   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3803   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3804       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3805 
3806   if (!BaseOpcode->MSAA)
3807     return true;
3808 
3809   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3810   assert(DimIdx != -1);
3811 
3812   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3813   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3814 
3815   return DimInfo->MSAA;
3816 }
3817 
3818 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3819 {
3820   switch (Opcode) {
3821   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3822   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3823   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3824     return true;
3825   default:
3826     return false;
3827   }
3828 }
3829 
3830 // movrels* opcodes should only allow VGPRS as src0.
3831 // This is specified in .td description for vop1/vop3,
3832 // but sdwa is handled differently. See isSDWAOperand.
3833 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3834                                       const OperandVector &Operands) {
3835 
3836   const unsigned Opc = Inst.getOpcode();
3837   const MCInstrDesc &Desc = MII.get(Opc);
3838 
3839   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3840     return true;
3841 
3842   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3843   assert(Src0Idx != -1);
3844 
3845   SMLoc ErrLoc;
3846   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3847   if (Src0.isReg()) {
3848     auto Reg = mc2PseudoReg(Src0.getReg());
3849     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3850     if (!isSGPR(Reg, TRI))
3851       return true;
3852     ErrLoc = getRegLoc(Reg, Operands);
3853   } else {
3854     ErrLoc = getConstLoc(Operands);
3855   }
3856 
3857   Error(ErrLoc, "source operand must be a VGPR");
3858   return false;
3859 }
3860 
3861 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3862                                           const OperandVector &Operands) {
3863 
3864   const unsigned Opc = Inst.getOpcode();
3865 
3866   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3867     return true;
3868 
3869   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3870   assert(Src0Idx != -1);
3871 
3872   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3873   if (!Src0.isReg())
3874     return true;
3875 
3876   auto Reg = mc2PseudoReg(Src0.getReg());
3877   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3878   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3879     Error(getRegLoc(Reg, Operands),
3880           "source operand must be either a VGPR or an inline constant");
3881     return false;
3882   }
3883 
3884   return true;
3885 }
3886 
3887 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3888                                       const OperandVector &Operands) {
3889   unsigned Opcode = Inst.getOpcode();
3890   const MCInstrDesc &Desc = MII.get(Opcode);
3891 
3892   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3893       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3894     return true;
3895 
3896   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3897   if (Src2Idx == -1)
3898     return true;
3899 
3900   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3901     Error(getConstLoc(Operands),
3902           "inline constants are not allowed for this operand");
3903     return false;
3904   }
3905 
3906   return true;
3907 }
3908 
3909 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3910                                    const OperandVector &Operands) {
3911   const unsigned Opc = Inst.getOpcode();
3912   const MCInstrDesc &Desc = MII.get(Opc);
3913 
3914   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3915     return true;
3916 
3917   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3918   if (Src2Idx == -1)
3919     return true;
3920 
3921   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3922   if (!Src2.isReg())
3923     return true;
3924 
3925   MCRegister Src2Reg = Src2.getReg();
3926   MCRegister DstReg = Inst.getOperand(0).getReg();
3927   if (Src2Reg == DstReg)
3928     return true;
3929 
3930   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3931   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3932     return true;
3933 
3934   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3935     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3936           "source 2 operand must not partially overlap with dst");
3937     return false;
3938   }
3939 
3940   return true;
3941 }
3942 
3943 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3944   switch (Inst.getOpcode()) {
3945   default:
3946     return true;
3947   case V_DIV_SCALE_F32_gfx6_gfx7:
3948   case V_DIV_SCALE_F32_vi:
3949   case V_DIV_SCALE_F32_gfx10:
3950   case V_DIV_SCALE_F64_gfx6_gfx7:
3951   case V_DIV_SCALE_F64_vi:
3952   case V_DIV_SCALE_F64_gfx10:
3953     break;
3954   }
3955 
3956   // TODO: Check that src0 = src1 or src2.
3957 
3958   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3959                     AMDGPU::OpName::src2_modifiers,
3960                     AMDGPU::OpName::src2_modifiers}) {
3961     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3962             .getImm() &
3963         SISrcMods::ABS) {
3964       return false;
3965     }
3966   }
3967 
3968   return true;
3969 }
3970 
3971 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3972 
3973   const unsigned Opc = Inst.getOpcode();
3974   const MCInstrDesc &Desc = MII.get(Opc);
3975 
3976   if ((Desc.TSFlags & MIMGFlags) == 0)
3977     return true;
3978 
3979   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3980   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3981     if (isCI() || isSI())
3982       return false;
3983   }
3984 
3985   return true;
3986 }
3987 
3988 static bool IsRevOpcode(const unsigned Opcode)
3989 {
3990   switch (Opcode) {
3991   case AMDGPU::V_SUBREV_F32_e32:
3992   case AMDGPU::V_SUBREV_F32_e64:
3993   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3994   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3995   case AMDGPU::V_SUBREV_F32_e32_vi:
3996   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3997   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3998   case AMDGPU::V_SUBREV_F32_e64_vi:
3999 
4000   case AMDGPU::V_SUBREV_CO_U32_e32:
4001   case AMDGPU::V_SUBREV_CO_U32_e64:
4002   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4003   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4004 
4005   case AMDGPU::V_SUBBREV_U32_e32:
4006   case AMDGPU::V_SUBBREV_U32_e64:
4007   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4008   case AMDGPU::V_SUBBREV_U32_e32_vi:
4009   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4010   case AMDGPU::V_SUBBREV_U32_e64_vi:
4011 
4012   case AMDGPU::V_SUBREV_U32_e32:
4013   case AMDGPU::V_SUBREV_U32_e64:
4014   case AMDGPU::V_SUBREV_U32_e32_gfx9:
4015   case AMDGPU::V_SUBREV_U32_e32_vi:
4016   case AMDGPU::V_SUBREV_U32_e64_gfx9:
4017   case AMDGPU::V_SUBREV_U32_e64_vi:
4018 
4019   case AMDGPU::V_SUBREV_F16_e32:
4020   case AMDGPU::V_SUBREV_F16_e64:
4021   case AMDGPU::V_SUBREV_F16_e32_gfx10:
4022   case AMDGPU::V_SUBREV_F16_e32_vi:
4023   case AMDGPU::V_SUBREV_F16_e64_gfx10:
4024   case AMDGPU::V_SUBREV_F16_e64_vi:
4025 
4026   case AMDGPU::V_SUBREV_U16_e32:
4027   case AMDGPU::V_SUBREV_U16_e64:
4028   case AMDGPU::V_SUBREV_U16_e32_vi:
4029   case AMDGPU::V_SUBREV_U16_e64_vi:
4030 
4031   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4032   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4033   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4034 
4035   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4036   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4037 
4038   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4039   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4040 
4041   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4042   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4043 
4044   case AMDGPU::V_LSHRREV_B32_e32:
4045   case AMDGPU::V_LSHRREV_B32_e64:
4046   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4047   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4048   case AMDGPU::V_LSHRREV_B32_e32_vi:
4049   case AMDGPU::V_LSHRREV_B32_e64_vi:
4050   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4051   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4052 
4053   case AMDGPU::V_ASHRREV_I32_e32:
4054   case AMDGPU::V_ASHRREV_I32_e64:
4055   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4056   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4057   case AMDGPU::V_ASHRREV_I32_e32_vi:
4058   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4059   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4060   case AMDGPU::V_ASHRREV_I32_e64_vi:
4061 
4062   case AMDGPU::V_LSHLREV_B32_e32:
4063   case AMDGPU::V_LSHLREV_B32_e64:
4064   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4065   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4066   case AMDGPU::V_LSHLREV_B32_e32_vi:
4067   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4068   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4069   case AMDGPU::V_LSHLREV_B32_e64_vi:
4070 
4071   case AMDGPU::V_LSHLREV_B16_e32:
4072   case AMDGPU::V_LSHLREV_B16_e64:
4073   case AMDGPU::V_LSHLREV_B16_e32_vi:
4074   case AMDGPU::V_LSHLREV_B16_e64_vi:
4075   case AMDGPU::V_LSHLREV_B16_gfx10:
4076 
4077   case AMDGPU::V_LSHRREV_B16_e32:
4078   case AMDGPU::V_LSHRREV_B16_e64:
4079   case AMDGPU::V_LSHRREV_B16_e32_vi:
4080   case AMDGPU::V_LSHRREV_B16_e64_vi:
4081   case AMDGPU::V_LSHRREV_B16_gfx10:
4082 
4083   case AMDGPU::V_ASHRREV_I16_e32:
4084   case AMDGPU::V_ASHRREV_I16_e64:
4085   case AMDGPU::V_ASHRREV_I16_e32_vi:
4086   case AMDGPU::V_ASHRREV_I16_e64_vi:
4087   case AMDGPU::V_ASHRREV_I16_gfx10:
4088 
4089   case AMDGPU::V_LSHLREV_B64_e64:
4090   case AMDGPU::V_LSHLREV_B64_gfx10:
4091   case AMDGPU::V_LSHLREV_B64_vi:
4092 
4093   case AMDGPU::V_LSHRREV_B64_e64:
4094   case AMDGPU::V_LSHRREV_B64_gfx10:
4095   case AMDGPU::V_LSHRREV_B64_vi:
4096 
4097   case AMDGPU::V_ASHRREV_I64_e64:
4098   case AMDGPU::V_ASHRREV_I64_gfx10:
4099   case AMDGPU::V_ASHRREV_I64_vi:
4100 
4101   case AMDGPU::V_PK_LSHLREV_B16:
4102   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4103   case AMDGPU::V_PK_LSHLREV_B16_vi:
4104 
4105   case AMDGPU::V_PK_LSHRREV_B16:
4106   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4107   case AMDGPU::V_PK_LSHRREV_B16_vi:
4108   case AMDGPU::V_PK_ASHRREV_I16:
4109   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4110   case AMDGPU::V_PK_ASHRREV_I16_vi:
4111     return true;
4112   default:
4113     return false;
4114   }
4115 }
4116 
4117 std::optional<StringRef>
4118 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4119 
4120   using namespace SIInstrFlags;
4121   const unsigned Opcode = Inst.getOpcode();
4122   const MCInstrDesc &Desc = MII.get(Opcode);
4123 
4124   // lds_direct register is defined so that it can be used
4125   // with 9-bit operands only. Ignore encodings which do not accept these.
4126   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4127   if ((Desc.TSFlags & Enc) == 0)
4128     return std::nullopt;
4129 
4130   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4131     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4132     if (SrcIdx == -1)
4133       break;
4134     const auto &Src = Inst.getOperand(SrcIdx);
4135     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4136 
4137       if (isGFX90A() || isGFX11Plus())
4138         return StringRef("lds_direct is not supported on this GPU");
4139 
4140       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4141         return StringRef("lds_direct cannot be used with this instruction");
4142 
4143       if (SrcName != OpName::src0)
4144         return StringRef("lds_direct may be used as src0 only");
4145     }
4146   }
4147 
4148   return std::nullopt;
4149 }
4150 
4151 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4152   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4153     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4154     if (Op.isFlatOffset())
4155       return Op.getStartLoc();
4156   }
4157   return getLoc();
4158 }
4159 
4160 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4161                                      const OperandVector &Operands) {
4162   auto Opcode = Inst.getOpcode();
4163   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4164   if (OpNum == -1)
4165     return true;
4166 
4167   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4168   if ((TSFlags & SIInstrFlags::FLAT))
4169     return validateFlatOffset(Inst, Operands);
4170 
4171   if ((TSFlags & SIInstrFlags::SMRD))
4172     return validateSMEMOffset(Inst, Operands);
4173 
4174   const auto &Op = Inst.getOperand(OpNum);
4175   if (isGFX12Plus() &&
4176       (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4177     const unsigned OffsetSize = 24;
4178     if (!isIntN(OffsetSize, Op.getImm())) {
4179       Error(getFlatOffsetLoc(Operands),
4180             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4181       return false;
4182     }
4183   } else {
4184     const unsigned OffsetSize = 16;
4185     if (!isUIntN(OffsetSize, Op.getImm())) {
4186       Error(getFlatOffsetLoc(Operands),
4187             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4188       return false;
4189     }
4190   }
4191   return true;
4192 }
4193 
4194 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4195                                          const OperandVector &Operands) {
4196   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4197   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4198     return true;
4199 
4200   auto Opcode = Inst.getOpcode();
4201   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4202   assert(OpNum != -1);
4203 
4204   const auto &Op = Inst.getOperand(OpNum);
4205   if (!hasFlatOffsets() && Op.getImm() != 0) {
4206     Error(getFlatOffsetLoc(Operands),
4207           "flat offset modifier is not supported on this GPU");
4208     return false;
4209   }
4210 
4211   // For pre-GFX12 FLAT instructions the offset must be positive;
4212   // MSB is ignored and forced to zero.
4213   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4214   bool AllowNegative =
4215       (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4216       isGFX12Plus();
4217   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4218     Error(getFlatOffsetLoc(Operands),
4219           Twine("expected a ") +
4220               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4221                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4222     return false;
4223   }
4224 
4225   return true;
4226 }
4227 
4228 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4229   // Start with second operand because SMEM Offset cannot be dst or src0.
4230   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4231     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4232     if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4233       return Op.getStartLoc();
4234   }
4235   return getLoc();
4236 }
4237 
4238 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4239                                          const OperandVector &Operands) {
4240   if (isCI() || isSI())
4241     return true;
4242 
4243   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4244   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4245     return true;
4246 
4247   auto Opcode = Inst.getOpcode();
4248   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4249   if (OpNum == -1)
4250     return true;
4251 
4252   const auto &Op = Inst.getOperand(OpNum);
4253   if (!Op.isImm())
4254     return true;
4255 
4256   uint64_t Offset = Op.getImm();
4257   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4258   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4259       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4260     return true;
4261 
4262   Error(getSMEMOffsetLoc(Operands),
4263         isGFX12Plus()          ? "expected a 24-bit signed offset"
4264         : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4265                                : "expected a 21-bit signed offset");
4266 
4267   return false;
4268 }
4269 
4270 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4271   unsigned Opcode = Inst.getOpcode();
4272   const MCInstrDesc &Desc = MII.get(Opcode);
4273   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4274     return true;
4275 
4276   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4277   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4278 
4279   const int OpIndices[] = { Src0Idx, Src1Idx };
4280 
4281   unsigned NumExprs = 0;
4282   unsigned NumLiterals = 0;
4283   uint32_t LiteralValue;
4284 
4285   for (int OpIdx : OpIndices) {
4286     if (OpIdx == -1) break;
4287 
4288     const MCOperand &MO = Inst.getOperand(OpIdx);
4289     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4290     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4291       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4292         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4293         if (NumLiterals == 0 || LiteralValue != Value) {
4294           LiteralValue = Value;
4295           ++NumLiterals;
4296         }
4297       } else if (MO.isExpr()) {
4298         ++NumExprs;
4299       }
4300     }
4301   }
4302 
4303   return NumLiterals + NumExprs <= 1;
4304 }
4305 
4306 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4307   const unsigned Opc = Inst.getOpcode();
4308   if (isPermlane16(Opc)) {
4309     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4310     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4311 
4312     if (OpSel & ~3)
4313       return false;
4314   }
4315 
4316   uint64_t TSFlags = MII.get(Opc).TSFlags;
4317 
4318   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4319     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4320     if (OpSelIdx != -1) {
4321       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4322         return false;
4323     }
4324     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4325     if (OpSelHiIdx != -1) {
4326       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4327         return false;
4328     }
4329   }
4330 
4331   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4332   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4333       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4334     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4335     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4336     if (OpSel & 3)
4337       return false;
4338   }
4339 
4340   return true;
4341 }
4342 
4343 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4344                                   const OperandVector &Operands) {
4345   const unsigned Opc = Inst.getOpcode();
4346   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4347   if (DppCtrlIdx >= 0) {
4348     unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4349 
4350     if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4351         AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4352       // DP ALU DPP is supported for row_newbcast only on GFX9*
4353       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4354       Error(S, "DP ALU dpp only supports row_newbcast");
4355       return false;
4356     }
4357   }
4358 
4359   int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4360   bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4361 
4362   if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4363     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4364     if (Src1Idx >= 0) {
4365       const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4366       const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4367       if (Src1.isImm() ||
4368           (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4369         AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4370         Error(Op.getStartLoc(), "invalid operand for instruction");
4371         return false;
4372       }
4373     }
4374   }
4375 
4376   return true;
4377 }
4378 
4379 // Check if VCC register matches wavefront size
4380 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4381   auto FB = getFeatureBits();
4382   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4383     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4384 }
4385 
4386 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4387 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4388                                          const OperandVector &Operands) {
4389   unsigned Opcode = Inst.getOpcode();
4390   const MCInstrDesc &Desc = MII.get(Opcode);
4391   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4392   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4393       !HasMandatoryLiteral && !isVOPD(Opcode))
4394     return true;
4395 
4396   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4397 
4398   unsigned NumExprs = 0;
4399   unsigned NumLiterals = 0;
4400   uint32_t LiteralValue;
4401 
4402   for (int OpIdx : OpIndices) {
4403     if (OpIdx == -1)
4404       continue;
4405 
4406     const MCOperand &MO = Inst.getOperand(OpIdx);
4407     if (!MO.isImm() && !MO.isExpr())
4408       continue;
4409     if (!isSISrcOperand(Desc, OpIdx))
4410       continue;
4411 
4412     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4413       uint64_t Value = static_cast<uint64_t>(MO.getImm());
4414       bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4415                     AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4416       bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4417 
4418       if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4419         Error(getLitLoc(Operands), "invalid operand for instruction");
4420         return false;
4421       }
4422 
4423       if (IsFP64 && IsValid32Op)
4424         Value = Hi_32(Value);
4425 
4426       if (NumLiterals == 0 || LiteralValue != Value) {
4427         LiteralValue = Value;
4428         ++NumLiterals;
4429       }
4430     } else if (MO.isExpr()) {
4431       ++NumExprs;
4432     }
4433   }
4434   NumLiterals += NumExprs;
4435 
4436   if (!NumLiterals)
4437     return true;
4438 
4439   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4440     Error(getLitLoc(Operands), "literal operands are not supported");
4441     return false;
4442   }
4443 
4444   if (NumLiterals > 1) {
4445     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4446     return false;
4447   }
4448 
4449   return true;
4450 }
4451 
4452 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4453 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4454                          const MCRegisterInfo *MRI) {
4455   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4456   if (OpIdx < 0)
4457     return -1;
4458 
4459   const MCOperand &Op = Inst.getOperand(OpIdx);
4460   if (!Op.isReg())
4461     return -1;
4462 
4463   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4464   auto Reg = Sub ? Sub : Op.getReg();
4465   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4466   return AGPR32.contains(Reg) ? 1 : 0;
4467 }
4468 
4469 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4470   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4471   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4472                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4473                   SIInstrFlags::DS)) == 0)
4474     return true;
4475 
4476   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4477                                                       : AMDGPU::OpName::vdata;
4478 
4479   const MCRegisterInfo *MRI = getMRI();
4480   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4481   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4482 
4483   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4484     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4485     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4486       return false;
4487   }
4488 
4489   auto FB = getFeatureBits();
4490   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4491     if (DataAreg < 0 || DstAreg < 0)
4492       return true;
4493     return DstAreg == DataAreg;
4494   }
4495 
4496   return DstAreg < 1 && DataAreg < 1;
4497 }
4498 
4499 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4500   auto FB = getFeatureBits();
4501   if (!FB[AMDGPU::FeatureGFX90AInsts])
4502     return true;
4503 
4504   const MCRegisterInfo *MRI = getMRI();
4505   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4506   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4507   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4508     const MCOperand &Op = Inst.getOperand(I);
4509     if (!Op.isReg())
4510       continue;
4511 
4512     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4513     if (!Sub)
4514       continue;
4515 
4516     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4517       return false;
4518     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4519       return false;
4520   }
4521 
4522   return true;
4523 }
4524 
4525 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4526   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4527     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4528     if (Op.isBLGP())
4529       return Op.getStartLoc();
4530   }
4531   return SMLoc();
4532 }
4533 
4534 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4535                                    const OperandVector &Operands) {
4536   unsigned Opc = Inst.getOpcode();
4537   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4538   if (BlgpIdx == -1)
4539     return true;
4540   SMLoc BLGPLoc = getBLGPLoc(Operands);
4541   if (!BLGPLoc.isValid())
4542     return true;
4543   bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4544   auto FB = getFeatureBits();
4545   bool UsesNeg = false;
4546   if (FB[AMDGPU::FeatureGFX940Insts]) {
4547     switch (Opc) {
4548     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4549     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4550     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4551     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4552       UsesNeg = true;
4553     }
4554   }
4555 
4556   if (IsNeg == UsesNeg)
4557     return true;
4558 
4559   Error(BLGPLoc,
4560         UsesNeg ? "invalid modifier: blgp is not supported"
4561                 : "invalid modifier: neg is not supported");
4562 
4563   return false;
4564 }
4565 
4566 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4567                                       const OperandVector &Operands) {
4568   if (!isGFX11Plus())
4569     return true;
4570 
4571   unsigned Opc = Inst.getOpcode();
4572   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4573       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4574       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4575       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4576     return true;
4577 
4578   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4579   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4580   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4581   if (Reg == AMDGPU::SGPR_NULL)
4582     return true;
4583 
4584   SMLoc RegLoc = getRegLoc(Reg, Operands);
4585   Error(RegLoc, "src0 must be null");
4586   return false;
4587 }
4588 
4589 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4590                                  const OperandVector &Operands) {
4591   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4592   if ((TSFlags & SIInstrFlags::DS) == 0)
4593     return true;
4594   if (TSFlags & SIInstrFlags::GWS)
4595     return validateGWS(Inst, Operands);
4596   // Only validate GDS for non-GWS instructions.
4597   if (hasGDS())
4598     return true;
4599   int GDSIdx =
4600       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4601   if (GDSIdx < 0)
4602     return true;
4603   unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4604   if (GDS) {
4605     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4606     Error(S, "gds modifier is not supported on this GPU");
4607     return false;
4608   }
4609   return true;
4610 }
4611 
4612 // gfx90a has an undocumented limitation:
4613 // DS_GWS opcodes must use even aligned registers.
4614 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4615                                   const OperandVector &Operands) {
4616   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4617     return true;
4618 
4619   int Opc = Inst.getOpcode();
4620   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4621       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4622     return true;
4623 
4624   const MCRegisterInfo *MRI = getMRI();
4625   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4626   int Data0Pos =
4627       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4628   assert(Data0Pos != -1);
4629   auto Reg = Inst.getOperand(Data0Pos).getReg();
4630   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4631   if (RegIdx & 1) {
4632     SMLoc RegLoc = getRegLoc(Reg, Operands);
4633     Error(RegLoc, "vgpr must be even aligned");
4634     return false;
4635   }
4636 
4637   return true;
4638 }
4639 
4640 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4641                                             const OperandVector &Operands,
4642                                             const SMLoc &IDLoc) {
4643   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4644                                            AMDGPU::OpName::cpol);
4645   if (CPolPos == -1)
4646     return true;
4647 
4648   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4649 
4650   if (isGFX12Plus())
4651     return validateTHAndScopeBits(Inst, Operands, CPol);
4652 
4653   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4654   if (TSFlags & SIInstrFlags::SMRD) {
4655     if (CPol && (isSI() || isCI())) {
4656       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4657       Error(S, "cache policy is not supported for SMRD instructions");
4658       return false;
4659     }
4660     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4661       Error(IDLoc, "invalid cache policy for SMEM instruction");
4662       return false;
4663     }
4664   }
4665 
4666   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4667     const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4668                                       SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4669                                       SIInstrFlags::FLAT;
4670     if (!(TSFlags & AllowSCCModifier)) {
4671       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4672       StringRef CStr(S.getPointer());
4673       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4674       Error(S,
4675             "scc modifier is not supported for this instruction on this GPU");
4676       return false;
4677     }
4678   }
4679 
4680   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4681     return true;
4682 
4683   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4684     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4685       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4686                               : "instruction must use glc");
4687       return false;
4688     }
4689   } else {
4690     if (CPol & CPol::GLC) {
4691       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4692       StringRef CStr(S.getPointer());
4693       S = SMLoc::getFromPointer(
4694           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4695       Error(S, isGFX940() ? "instruction must not use sc0"
4696                           : "instruction must not use glc");
4697       return false;
4698     }
4699   }
4700 
4701   return true;
4702 }
4703 
4704 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4705                                              const OperandVector &Operands,
4706                                              const unsigned CPol) {
4707   const unsigned TH = CPol & AMDGPU::CPol::TH;
4708   const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4709 
4710   const unsigned Opcode = Inst.getOpcode();
4711   const MCInstrDesc &TID = MII.get(Opcode);
4712 
4713   auto PrintError = [&](StringRef Msg) {
4714     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4715     Error(S, Msg);
4716     return false;
4717   };
4718 
4719   if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4720       (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
4721       (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
4722     return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4723 
4724   if (TH == 0)
4725     return true;
4726 
4727   if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4728       ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4729        (TH == AMDGPU::CPol::TH_NT_HT)))
4730     return PrintError("invalid th value for SMEM instruction");
4731 
4732   if (TH == AMDGPU::CPol::TH_BYPASS) {
4733     if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4734          CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
4735         (Scope == AMDGPU::CPol::SCOPE_SYS &&
4736          !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
4737       return PrintError("scope and th combination is not valid");
4738   }
4739 
4740   bool IsStore = TID.mayStore();
4741   bool IsAtomic =
4742       TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
4743 
4744   if (IsAtomic) {
4745     if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
4746       return PrintError("invalid th value for atomic instructions");
4747   } else if (IsStore) {
4748     if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
4749       return PrintError("invalid th value for store instructions");
4750   } else {
4751     if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
4752       return PrintError("invalid th value for load instructions");
4753   }
4754 
4755   return true;
4756 }
4757 
4758 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4759   if (!isGFX11Plus())
4760     return true;
4761   for (auto &Operand : Operands) {
4762     if (!Operand->isReg())
4763       continue;
4764     unsigned Reg = Operand->getReg();
4765     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4766       Error(getRegLoc(Reg, Operands),
4767             "execz and vccz are not supported on this GPU");
4768       return false;
4769     }
4770   }
4771   return true;
4772 }
4773 
4774 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4775                                   const OperandVector &Operands) {
4776   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4777   if (Desc.mayStore() &&
4778       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4779     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4780     if (Loc != getInstLoc(Operands)) {
4781       Error(Loc, "TFE modifier has no meaning for store instructions");
4782       return false;
4783     }
4784   }
4785 
4786   return true;
4787 }
4788 
4789 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4790                                           const SMLoc &IDLoc,
4791                                           const OperandVector &Operands) {
4792   if (auto ErrMsg = validateLdsDirect(Inst)) {
4793     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4794     return false;
4795   }
4796   if (!validateSOPLiteral(Inst)) {
4797     Error(getLitLoc(Operands),
4798       "only one unique literal operand is allowed");
4799     return false;
4800   }
4801   if (!validateVOPLiteral(Inst, Operands)) {
4802     return false;
4803   }
4804   if (!validateConstantBusLimitations(Inst, Operands)) {
4805     return false;
4806   }
4807   if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4808     return false;
4809   }
4810   if (!validateIntClampSupported(Inst)) {
4811     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4812       "integer clamping is not supported on this GPU");
4813     return false;
4814   }
4815   if (!validateOpSel(Inst)) {
4816     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4817       "invalid op_sel operand");
4818     return false;
4819   }
4820   if (!validateDPP(Inst, Operands)) {
4821     return false;
4822   }
4823   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4824   if (!validateMIMGD16(Inst)) {
4825     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4826       "d16 modifier is not supported on this GPU");
4827     return false;
4828   }
4829   if (!validateMIMGMSAA(Inst)) {
4830     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4831           "invalid dim; must be MSAA type");
4832     return false;
4833   }
4834   if (!validateMIMGDataSize(Inst, IDLoc)) {
4835     return false;
4836   }
4837   if (!validateMIMGAddrSize(Inst, IDLoc))
4838     return false;
4839   if (!validateMIMGAtomicDMask(Inst)) {
4840     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4841       "invalid atomic image dmask");
4842     return false;
4843   }
4844   if (!validateMIMGGatherDMask(Inst)) {
4845     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4846       "invalid image_gather dmask: only one bit must be set");
4847     return false;
4848   }
4849   if (!validateMovrels(Inst, Operands)) {
4850     return false;
4851   }
4852   if (!validateOffset(Inst, Operands)) {
4853     return false;
4854   }
4855   if (!validateMAIAccWrite(Inst, Operands)) {
4856     return false;
4857   }
4858   if (!validateMAISrc2(Inst, Operands)) {
4859     return false;
4860   }
4861   if (!validateMFMA(Inst, Operands)) {
4862     return false;
4863   }
4864   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4865     return false;
4866   }
4867 
4868   if (!validateAGPRLdSt(Inst)) {
4869     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4870     ? "invalid register class: data and dst should be all VGPR or AGPR"
4871     : "invalid register class: agpr loads and stores not supported on this GPU"
4872     );
4873     return false;
4874   }
4875   if (!validateVGPRAlign(Inst)) {
4876     Error(IDLoc,
4877       "invalid register class: vgpr tuples must be 64 bit aligned");
4878     return false;
4879   }
4880   if (!validateDS(Inst, Operands)) {
4881     return false;
4882   }
4883 
4884   if (!validateBLGP(Inst, Operands)) {
4885     return false;
4886   }
4887 
4888   if (!validateDivScale(Inst)) {
4889     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4890     return false;
4891   }
4892   if (!validateWaitCnt(Inst, Operands)) {
4893     return false;
4894   }
4895   if (!validateExeczVcczOperands(Operands)) {
4896     return false;
4897   }
4898   if (!validateTFE(Inst, Operands)) {
4899     return false;
4900   }
4901 
4902   return true;
4903 }
4904 
4905 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4906                                             const FeatureBitset &FBS,
4907                                             unsigned VariantID = 0);
4908 
4909 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4910                                 const FeatureBitset &AvailableFeatures,
4911                                 unsigned VariantID);
4912 
4913 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4914                                        const FeatureBitset &FBS) {
4915   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4916 }
4917 
4918 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4919                                        const FeatureBitset &FBS,
4920                                        ArrayRef<unsigned> Variants) {
4921   for (auto Variant : Variants) {
4922     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4923       return true;
4924   }
4925 
4926   return false;
4927 }
4928 
4929 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4930                                                   const SMLoc &IDLoc) {
4931   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4932 
4933   // Check if requested instruction variant is supported.
4934   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4935     return false;
4936 
4937   // This instruction is not supported.
4938   // Clear any other pending errors because they are no longer relevant.
4939   getParser().clearPendingErrors();
4940 
4941   // Requested instruction variant is not supported.
4942   // Check if any other variants are supported.
4943   StringRef VariantName = getMatchedVariantName();
4944   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4945     return Error(IDLoc,
4946                  Twine(VariantName,
4947                        " variant of this instruction is not supported"));
4948   }
4949 
4950   // Check if this instruction may be used with a different wavesize.
4951   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4952       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4953 
4954     FeatureBitset FeaturesWS32 = getFeatureBits();
4955     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4956         .flip(AMDGPU::FeatureWavefrontSize32);
4957     FeatureBitset AvailableFeaturesWS32 =
4958         ComputeAvailableFeatures(FeaturesWS32);
4959 
4960     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4961       return Error(IDLoc, "instruction requires wavesize=32");
4962   }
4963 
4964   // Finally check if this instruction is supported on any other GPU.
4965   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4966     return Error(IDLoc, "instruction not supported on this GPU");
4967   }
4968 
4969   // Instruction not supported on any GPU. Probably a typo.
4970   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4971   return Error(IDLoc, "invalid instruction" + Suggestion);
4972 }
4973 
4974 static bool isInvalidVOPDY(const OperandVector &Operands,
4975                            uint64_t InvalidOprIdx) {
4976   assert(InvalidOprIdx < Operands.size());
4977   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4978   if (Op.isToken() && InvalidOprIdx > 1) {
4979     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4980     return PrevOp.isToken() && PrevOp.getToken() == "::";
4981   }
4982   return false;
4983 }
4984 
4985 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4986                                               OperandVector &Operands,
4987                                               MCStreamer &Out,
4988                                               uint64_t &ErrorInfo,
4989                                               bool MatchingInlineAsm) {
4990   MCInst Inst;
4991   unsigned Result = Match_Success;
4992   for (auto Variant : getMatchedVariants()) {
4993     uint64_t EI;
4994     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4995                                   Variant);
4996     // We order match statuses from least to most specific. We use most specific
4997     // status as resulting
4998     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4999     if ((R == Match_Success) ||
5000         (R == Match_PreferE32) ||
5001         (R == Match_MissingFeature && Result != Match_PreferE32) ||
5002         (R == Match_InvalidOperand && Result != Match_MissingFeature
5003                                    && Result != Match_PreferE32) ||
5004         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
5005                                    && Result != Match_MissingFeature
5006                                    && Result != Match_PreferE32)) {
5007       Result = R;
5008       ErrorInfo = EI;
5009     }
5010     if (R == Match_Success)
5011       break;
5012   }
5013 
5014   if (Result == Match_Success) {
5015     if (!validateInstruction(Inst, IDLoc, Operands)) {
5016       return true;
5017     }
5018     Inst.setLoc(IDLoc);
5019     Out.emitInstruction(Inst, getSTI());
5020     return false;
5021   }
5022 
5023   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5024   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5025     return true;
5026   }
5027 
5028   switch (Result) {
5029   default: break;
5030   case Match_MissingFeature:
5031     // It has been verified that the specified instruction
5032     // mnemonic is valid. A match was found but it requires
5033     // features which are not supported on this GPU.
5034     return Error(IDLoc, "operands are not valid for this GPU or mode");
5035 
5036   case Match_InvalidOperand: {
5037     SMLoc ErrorLoc = IDLoc;
5038     if (ErrorInfo != ~0ULL) {
5039       if (ErrorInfo >= Operands.size()) {
5040         return Error(IDLoc, "too few operands for instruction");
5041       }
5042       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5043       if (ErrorLoc == SMLoc())
5044         ErrorLoc = IDLoc;
5045 
5046       if (isInvalidVOPDY(Operands, ErrorInfo))
5047         return Error(ErrorLoc, "invalid VOPDY instruction");
5048     }
5049     return Error(ErrorLoc, "invalid operand for instruction");
5050   }
5051 
5052   case Match_PreferE32:
5053     return Error(IDLoc, "internal error: instruction without _e64 suffix "
5054                         "should be encoded as e32");
5055   case Match_MnemonicFail:
5056     llvm_unreachable("Invalid instructions should have been handled already");
5057   }
5058   llvm_unreachable("Implement any new match types added!");
5059 }
5060 
5061 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5062   int64_t Tmp = -1;
5063   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5064     return true;
5065   }
5066   if (getParser().parseAbsoluteExpression(Tmp)) {
5067     return true;
5068   }
5069   Ret = static_cast<uint32_t>(Tmp);
5070   return false;
5071 }
5072 
5073 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
5074                                                uint32_t &Minor) {
5075   if (ParseAsAbsoluteExpression(Major))
5076     return TokError("invalid major version");
5077 
5078   if (!trySkipToken(AsmToken::Comma))
5079     return TokError("minor version number required, comma expected");
5080 
5081   if (ParseAsAbsoluteExpression(Minor))
5082     return TokError("invalid minor version");
5083 
5084   return false;
5085 }
5086 
5087 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5088   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5089     return TokError("directive only supported for amdgcn architecture");
5090 
5091   std::string TargetIDDirective;
5092   SMLoc TargetStart = getTok().getLoc();
5093   if (getParser().parseEscapedString(TargetIDDirective))
5094     return true;
5095 
5096   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5097   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5098     return getParser().Error(TargetRange.Start,
5099         (Twine(".amdgcn_target directive's target id ") +
5100          Twine(TargetIDDirective) +
5101          Twine(" does not match the specified target id ") +
5102          Twine(getTargetStreamer().getTargetID()->toString())).str());
5103 
5104   return false;
5105 }
5106 
5107 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5108   return Error(Range.Start, "value out of range", Range);
5109 }
5110 
5111 bool AMDGPUAsmParser::calculateGPRBlocks(
5112     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5113     bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5114     unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5115     SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5116   // TODO(scott.linder): These calculations are duplicated from
5117   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5118   IsaVersion Version = getIsaVersion(getSTI().getCPU());
5119 
5120   unsigned NumVGPRs = NextFreeVGPR;
5121   unsigned NumSGPRs = NextFreeSGPR;
5122 
5123   if (Version.Major >= 10)
5124     NumSGPRs = 0;
5125   else {
5126     unsigned MaxAddressableNumSGPRs =
5127         IsaInfo::getAddressableNumSGPRs(&getSTI());
5128 
5129     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5130         NumSGPRs > MaxAddressableNumSGPRs)
5131       return OutOfRangeError(SGPRRange);
5132 
5133     NumSGPRs +=
5134         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5135 
5136     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5137         NumSGPRs > MaxAddressableNumSGPRs)
5138       return OutOfRangeError(SGPRRange);
5139 
5140     if (Features.test(FeatureSGPRInitBug))
5141       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
5142   }
5143 
5144   VGPRBlocks =
5145       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
5146   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5147 
5148   return false;
5149 }
5150 
5151 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5152   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5153     return TokError("directive only supported for amdgcn architecture");
5154 
5155   if (!isHsaAbi(getSTI()))
5156     return TokError("directive only supported for amdhsa OS");
5157 
5158   StringRef KernelName;
5159   if (getParser().parseIdentifier(KernelName))
5160     return true;
5161 
5162   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
5163 
5164   StringSet<> Seen;
5165 
5166   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5167 
5168   SMRange VGPRRange;
5169   uint64_t NextFreeVGPR = 0;
5170   uint64_t AccumOffset = 0;
5171   uint64_t SharedVGPRCount = 0;
5172   uint64_t PreloadLength = 0;
5173   uint64_t PreloadOffset = 0;
5174   SMRange SGPRRange;
5175   uint64_t NextFreeSGPR = 0;
5176 
5177   // Count the number of user SGPRs implied from the enabled feature bits.
5178   unsigned ImpliedUserSGPRCount = 0;
5179 
5180   // Track if the asm explicitly contains the directive for the user SGPR
5181   // count.
5182   std::optional<unsigned> ExplicitUserSGPRCount;
5183   bool ReserveVCC = true;
5184   bool ReserveFlatScr = true;
5185   std::optional<bool> EnableWavefrontSize32;
5186 
5187   while (true) {
5188     while (trySkipToken(AsmToken::EndOfStatement));
5189 
5190     StringRef ID;
5191     SMRange IDRange = getTok().getLocRange();
5192     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5193       return true;
5194 
5195     if (ID == ".end_amdhsa_kernel")
5196       break;
5197 
5198     if (!Seen.insert(ID).second)
5199       return TokError(".amdhsa_ directives cannot be repeated");
5200 
5201     SMLoc ValStart = getLoc();
5202     int64_t IVal;
5203     if (getParser().parseAbsoluteExpression(IVal))
5204       return true;
5205     SMLoc ValEnd = getLoc();
5206     SMRange ValRange = SMRange(ValStart, ValEnd);
5207 
5208     if (IVal < 0)
5209       return OutOfRangeError(ValRange);
5210 
5211     uint64_t Val = IVal;
5212 
5213 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
5214   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
5215     return OutOfRangeError(RANGE);                                             \
5216   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5217 
5218     if (ID == ".amdhsa_group_segment_fixed_size") {
5219       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5220         return OutOfRangeError(ValRange);
5221       KD.group_segment_fixed_size = Val;
5222     } else if (ID == ".amdhsa_private_segment_fixed_size") {
5223       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5224         return OutOfRangeError(ValRange);
5225       KD.private_segment_fixed_size = Val;
5226     } else if (ID == ".amdhsa_kernarg_size") {
5227       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5228         return OutOfRangeError(ValRange);
5229       KD.kernarg_size = Val;
5230     } else if (ID == ".amdhsa_user_sgpr_count") {
5231       ExplicitUserSGPRCount = Val;
5232     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5233       if (hasArchitectedFlatScratch())
5234         return Error(IDRange.Start,
5235                      "directive is not supported with architected flat scratch",
5236                      IDRange);
5237       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5238                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5239                        Val, ValRange);
5240       if (Val)
5241         ImpliedUserSGPRCount += 4;
5242     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5243       if (!hasKernargPreload())
5244         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5245 
5246       if (Val > getMaxNumUserSGPRs())
5247         return OutOfRangeError(ValRange);
5248       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
5249                        ValRange);
5250       if (Val) {
5251         ImpliedUserSGPRCount += Val;
5252         PreloadLength = Val;
5253       }
5254     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5255       if (!hasKernargPreload())
5256         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5257 
5258       if (Val >= 1024)
5259         return OutOfRangeError(ValRange);
5260       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
5261                        ValRange);
5262       if (Val)
5263         PreloadOffset = Val;
5264     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5265       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5266                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5267                        ValRange);
5268       if (Val)
5269         ImpliedUserSGPRCount += 2;
5270     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5271       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5272                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5273                        ValRange);
5274       if (Val)
5275         ImpliedUserSGPRCount += 2;
5276     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5277       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5278                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5279                        Val, ValRange);
5280       if (Val)
5281         ImpliedUserSGPRCount += 2;
5282     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5283       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5284                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5285                        ValRange);
5286       if (Val)
5287         ImpliedUserSGPRCount += 2;
5288     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5289       if (hasArchitectedFlatScratch())
5290         return Error(IDRange.Start,
5291                      "directive is not supported with architected flat scratch",
5292                      IDRange);
5293       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5294                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5295                        ValRange);
5296       if (Val)
5297         ImpliedUserSGPRCount += 2;
5298     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5299       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5300                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5301                        Val, ValRange);
5302       if (Val)
5303         ImpliedUserSGPRCount += 1;
5304     } else if (ID == ".amdhsa_wavefront_size32") {
5305       if (IVersion.Major < 10)
5306         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5307       EnableWavefrontSize32 = Val;
5308       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5309                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5310                        Val, ValRange);
5311     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5312       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5313                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5314     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5315       if (hasArchitectedFlatScratch())
5316         return Error(IDRange.Start,
5317                      "directive is not supported with architected flat scratch",
5318                      IDRange);
5319       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5320                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5321     } else if (ID == ".amdhsa_enable_private_segment") {
5322       if (!hasArchitectedFlatScratch())
5323         return Error(
5324             IDRange.Start,
5325             "directive is not supported without architected flat scratch",
5326             IDRange);
5327       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5328                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5329     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5330       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5331                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5332                        ValRange);
5333     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5334       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5335                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5336                        ValRange);
5337     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5338       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5339                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5340                        ValRange);
5341     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5342       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5343                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5344                        ValRange);
5345     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5346       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5347                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5348                        ValRange);
5349     } else if (ID == ".amdhsa_next_free_vgpr") {
5350       VGPRRange = ValRange;
5351       NextFreeVGPR = Val;
5352     } else if (ID == ".amdhsa_next_free_sgpr") {
5353       SGPRRange = ValRange;
5354       NextFreeSGPR = Val;
5355     } else if (ID == ".amdhsa_accum_offset") {
5356       if (!isGFX90A())
5357         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5358       AccumOffset = Val;
5359     } else if (ID == ".amdhsa_reserve_vcc") {
5360       if (!isUInt<1>(Val))
5361         return OutOfRangeError(ValRange);
5362       ReserveVCC = Val;
5363     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5364       if (IVersion.Major < 7)
5365         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5366       if (hasArchitectedFlatScratch())
5367         return Error(IDRange.Start,
5368                      "directive is not supported with architected flat scratch",
5369                      IDRange);
5370       if (!isUInt<1>(Val))
5371         return OutOfRangeError(ValRange);
5372       ReserveFlatScr = Val;
5373     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5374       if (IVersion.Major < 8)
5375         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5376       if (!isUInt<1>(Val))
5377         return OutOfRangeError(ValRange);
5378       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5379         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5380                                  IDRange);
5381     } else if (ID == ".amdhsa_float_round_mode_32") {
5382       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5383                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5384     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5385       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5386                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5387     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5388       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5389                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5390     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5391       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5392                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5393                        ValRange);
5394     } else if (ID == ".amdhsa_dx10_clamp") {
5395       if (IVersion.Major >= 12)
5396         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5397       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5398                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
5399                        ValRange);
5400     } else if (ID == ".amdhsa_ieee_mode") {
5401       if (IVersion.Major >= 12)
5402         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5403       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5404                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
5405                        ValRange);
5406     } else if (ID == ".amdhsa_fp16_overflow") {
5407       if (IVersion.Major < 9)
5408         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5409       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
5410                        ValRange);
5411     } else if (ID == ".amdhsa_tg_split") {
5412       if (!isGFX90A())
5413         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5414       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5415                        ValRange);
5416     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5417       if (IVersion.Major < 10)
5418         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5419       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
5420                        ValRange);
5421     } else if (ID == ".amdhsa_memory_ordered") {
5422       if (IVersion.Major < 10)
5423         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5424       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
5425                        ValRange);
5426     } else if (ID == ".amdhsa_forward_progress") {
5427       if (IVersion.Major < 10)
5428         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5429       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
5430                        ValRange);
5431     } else if (ID == ".amdhsa_shared_vgpr_count") {
5432       if (IVersion.Major < 10 || IVersion.Major >= 12)
5433         return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5434                      IDRange);
5435       SharedVGPRCount = Val;
5436       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5437                        COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
5438                        ValRange);
5439     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5440       PARSE_BITS_ENTRY(
5441           KD.compute_pgm_rsrc2,
5442           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5443           ValRange);
5444     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5445       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5446                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5447                        Val, ValRange);
5448     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5449       PARSE_BITS_ENTRY(
5450           KD.compute_pgm_rsrc2,
5451           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5452           ValRange);
5453     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5454       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5455                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5456                        Val, ValRange);
5457     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5458       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5459                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5460                        Val, ValRange);
5461     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5462       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5463                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5464                        Val, ValRange);
5465     } else if (ID == ".amdhsa_exception_int_div_zero") {
5466       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5467                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5468                        Val, ValRange);
5469     } else if (ID == ".amdhsa_round_robin_scheduling") {
5470       if (IVersion.Major < 12)
5471         return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5472       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5473                        COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
5474                        ValRange);
5475     } else {
5476       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5477     }
5478 
5479 #undef PARSE_BITS_ENTRY
5480   }
5481 
5482   if (!Seen.contains(".amdhsa_next_free_vgpr"))
5483     return TokError(".amdhsa_next_free_vgpr directive is required");
5484 
5485   if (!Seen.contains(".amdhsa_next_free_sgpr"))
5486     return TokError(".amdhsa_next_free_sgpr directive is required");
5487 
5488   unsigned VGPRBlocks;
5489   unsigned SGPRBlocks;
5490   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5491                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5492                          EnableWavefrontSize32, NextFreeVGPR,
5493                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5494                          SGPRBlocks))
5495     return true;
5496 
5497   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5498           VGPRBlocks))
5499     return OutOfRangeError(VGPRRange);
5500   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5501                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5502 
5503   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5504           SGPRBlocks))
5505     return OutOfRangeError(SGPRRange);
5506   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5507                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5508                   SGPRBlocks);
5509 
5510   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5511     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5512                     "enabled user SGPRs");
5513 
5514   unsigned UserSGPRCount =
5515       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5516 
5517   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5518     return TokError("too many user SGPRs enabled");
5519   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5520                   UserSGPRCount);
5521 
5522   if (PreloadLength && KD.kernarg_size &&
5523       (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
5524     return TokError("Kernarg preload length + offset is larger than the "
5525                     "kernarg segment size");
5526 
5527   if (isGFX90A()) {
5528     if (!Seen.contains(".amdhsa_accum_offset"))
5529       return TokError(".amdhsa_accum_offset directive is required");
5530     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5531       return TokError("accum_offset should be in range [4..256] in "
5532                       "increments of 4");
5533     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5534       return TokError("accum_offset exceeds total VGPR allocation");
5535     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5536                     (AccumOffset / 4 - 1));
5537   }
5538 
5539   if (IVersion.Major >= 10 && IVersion.Major < 12) {
5540     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5541     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5542       return TokError("shared_vgpr_count directive not valid on "
5543                       "wavefront size 32");
5544     }
5545     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5546       return TokError("shared_vgpr_count*2 + "
5547                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5548                       "exceed 63\n");
5549     }
5550   }
5551 
5552   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5553       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5554       ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion());
5555   return false;
5556 }
5557 
5558 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5559   uint32_t Major;
5560   uint32_t Minor;
5561 
5562   if (ParseDirectiveMajorMinor(Major, Minor))
5563     return true;
5564 
5565   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5566   return false;
5567 }
5568 
5569 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5570   uint32_t Major;
5571   uint32_t Minor;
5572   uint32_t Stepping;
5573   StringRef VendorName;
5574   StringRef ArchName;
5575 
5576   // If this directive has no arguments, then use the ISA version for the
5577   // targeted GPU.
5578   if (isToken(AsmToken::EndOfStatement)) {
5579     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5580     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5581                                                         ISA.Stepping,
5582                                                         "AMD", "AMDGPU");
5583     return false;
5584   }
5585 
5586   if (ParseDirectiveMajorMinor(Major, Minor))
5587     return true;
5588 
5589   if (!trySkipToken(AsmToken::Comma))
5590     return TokError("stepping version number required, comma expected");
5591 
5592   if (ParseAsAbsoluteExpression(Stepping))
5593     return TokError("invalid stepping version");
5594 
5595   if (!trySkipToken(AsmToken::Comma))
5596     return TokError("vendor name required, comma expected");
5597 
5598   if (!parseString(VendorName, "invalid vendor name"))
5599     return true;
5600 
5601   if (!trySkipToken(AsmToken::Comma))
5602     return TokError("arch name required, comma expected");
5603 
5604   if (!parseString(ArchName, "invalid arch name"))
5605     return true;
5606 
5607   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5608                                                       VendorName, ArchName);
5609   return false;
5610 }
5611 
5612 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5613                                                amd_kernel_code_t &Header) {
5614   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5615   // assembly for backwards compatibility.
5616   if (ID == "max_scratch_backing_memory_byte_size") {
5617     Parser.eatToEndOfStatement();
5618     return false;
5619   }
5620 
5621   SmallString<40> ErrStr;
5622   raw_svector_ostream Err(ErrStr);
5623   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5624     return TokError(Err.str());
5625   }
5626   Lex();
5627 
5628   if (ID == "enable_dx10_clamp") {
5629     if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5630         isGFX12Plus())
5631       return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5632   }
5633 
5634   if (ID == "enable_ieee_mode") {
5635     if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5636         isGFX12Plus())
5637       return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5638   }
5639 
5640   if (ID == "enable_wavefront_size32") {
5641     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5642       if (!isGFX10Plus())
5643         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5644       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5645         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5646     } else {
5647       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5648         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5649     }
5650   }
5651 
5652   if (ID == "wavefront_size") {
5653     if (Header.wavefront_size == 5) {
5654       if (!isGFX10Plus())
5655         return TokError("wavefront_size=5 is only allowed on GFX10+");
5656       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5657         return TokError("wavefront_size=5 requires +WavefrontSize32");
5658     } else if (Header.wavefront_size == 6) {
5659       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5660         return TokError("wavefront_size=6 requires +WavefrontSize64");
5661     }
5662   }
5663 
5664   if (ID == "enable_wgp_mode") {
5665     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5666         !isGFX10Plus())
5667       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5668   }
5669 
5670   if (ID == "enable_mem_ordered") {
5671     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5672         !isGFX10Plus())
5673       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5674   }
5675 
5676   if (ID == "enable_fwd_progress") {
5677     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5678         !isGFX10Plus())
5679       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5680   }
5681 
5682   return false;
5683 }
5684 
5685 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5686   amd_kernel_code_t Header;
5687   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5688 
5689   while (true) {
5690     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5691     // will set the current token to EndOfStatement.
5692     while(trySkipToken(AsmToken::EndOfStatement));
5693 
5694     StringRef ID;
5695     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5696       return true;
5697 
5698     if (ID == ".end_amd_kernel_code_t")
5699       break;
5700 
5701     if (ParseAMDKernelCodeTValue(ID, Header))
5702       return true;
5703   }
5704 
5705   getTargetStreamer().EmitAMDKernelCodeT(Header);
5706 
5707   return false;
5708 }
5709 
5710 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5711   StringRef KernelName;
5712   if (!parseId(KernelName, "expected symbol name"))
5713     return true;
5714 
5715   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5716                                            ELF::STT_AMDGPU_HSA_KERNEL);
5717 
5718   KernelScope.initialize(getContext());
5719   return false;
5720 }
5721 
5722 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5723   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5724     return Error(getLoc(),
5725                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5726                  "architectures");
5727   }
5728 
5729   auto TargetIDDirective = getLexer().getTok().getStringContents();
5730   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5731     return Error(getParser().getTok().getLoc(), "target id must match options");
5732 
5733   getTargetStreamer().EmitISAVersion();
5734   Lex();
5735 
5736   return false;
5737 }
5738 
5739 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5740   assert(isHsaAbi(getSTI()));
5741 
5742   std::string HSAMetadataString;
5743   if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
5744                           HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
5745     return true;
5746 
5747   if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5748     return Error(getLoc(), "invalid HSA metadata");
5749 
5750   return false;
5751 }
5752 
5753 /// Common code to parse out a block of text (typically YAML) between start and
5754 /// end directives.
5755 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5756                                           const char *AssemblerDirectiveEnd,
5757                                           std::string &CollectString) {
5758 
5759   raw_string_ostream CollectStream(CollectString);
5760 
5761   getLexer().setSkipSpace(false);
5762 
5763   bool FoundEnd = false;
5764   while (!isToken(AsmToken::Eof)) {
5765     while (isToken(AsmToken::Space)) {
5766       CollectStream << getTokenStr();
5767       Lex();
5768     }
5769 
5770     if (trySkipId(AssemblerDirectiveEnd)) {
5771       FoundEnd = true;
5772       break;
5773     }
5774 
5775     CollectStream << Parser.parseStringToEndOfStatement()
5776                   << getContext().getAsmInfo()->getSeparatorString();
5777 
5778     Parser.eatToEndOfStatement();
5779   }
5780 
5781   getLexer().setSkipSpace(true);
5782 
5783   if (isToken(AsmToken::Eof) && !FoundEnd) {
5784     return TokError(Twine("expected directive ") +
5785                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5786   }
5787 
5788   CollectStream.flush();
5789   return false;
5790 }
5791 
5792 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5793 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5794   std::string String;
5795   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5796                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5797     return true;
5798 
5799   auto PALMetadata = getTargetStreamer().getPALMetadata();
5800   if (!PALMetadata->setFromString(String))
5801     return Error(getLoc(), "invalid PAL metadata");
5802   return false;
5803 }
5804 
5805 /// Parse the assembler directive for old linear-format PAL metadata.
5806 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5807   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5808     return Error(getLoc(),
5809                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5810                  "not available on non-amdpal OSes")).str());
5811   }
5812 
5813   auto PALMetadata = getTargetStreamer().getPALMetadata();
5814   PALMetadata->setLegacy();
5815   for (;;) {
5816     uint32_t Key, Value;
5817     if (ParseAsAbsoluteExpression(Key)) {
5818       return TokError(Twine("invalid value in ") +
5819                       Twine(PALMD::AssemblerDirective));
5820     }
5821     if (!trySkipToken(AsmToken::Comma)) {
5822       return TokError(Twine("expected an even number of values in ") +
5823                       Twine(PALMD::AssemblerDirective));
5824     }
5825     if (ParseAsAbsoluteExpression(Value)) {
5826       return TokError(Twine("invalid value in ") +
5827                       Twine(PALMD::AssemblerDirective));
5828     }
5829     PALMetadata->setRegister(Key, Value);
5830     if (!trySkipToken(AsmToken::Comma))
5831       break;
5832   }
5833   return false;
5834 }
5835 
5836 /// ParseDirectiveAMDGPULDS
5837 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5838 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5839   if (getParser().checkForValidSection())
5840     return true;
5841 
5842   StringRef Name;
5843   SMLoc NameLoc = getLoc();
5844   if (getParser().parseIdentifier(Name))
5845     return TokError("expected identifier in directive");
5846 
5847   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5848   if (getParser().parseComma())
5849     return true;
5850 
5851   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5852 
5853   int64_t Size;
5854   SMLoc SizeLoc = getLoc();
5855   if (getParser().parseAbsoluteExpression(Size))
5856     return true;
5857   if (Size < 0)
5858     return Error(SizeLoc, "size must be non-negative");
5859   if (Size > LocalMemorySize)
5860     return Error(SizeLoc, "size is too large");
5861 
5862   int64_t Alignment = 4;
5863   if (trySkipToken(AsmToken::Comma)) {
5864     SMLoc AlignLoc = getLoc();
5865     if (getParser().parseAbsoluteExpression(Alignment))
5866       return true;
5867     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5868       return Error(AlignLoc, "alignment must be a power of two");
5869 
5870     // Alignment larger than the size of LDS is possible in theory, as long
5871     // as the linker manages to place to symbol at address 0, but we do want
5872     // to make sure the alignment fits nicely into a 32-bit integer.
5873     if (Alignment >= 1u << 31)
5874       return Error(AlignLoc, "alignment is too large");
5875   }
5876 
5877   if (parseEOL())
5878     return true;
5879 
5880   Symbol->redefineIfPossible();
5881   if (!Symbol->isUndefined())
5882     return Error(NameLoc, "invalid symbol redefinition");
5883 
5884   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5885   return false;
5886 }
5887 
5888 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5889   StringRef IDVal = DirectiveID.getString();
5890 
5891   if (isHsaAbi(getSTI())) {
5892     if (IDVal == ".amdhsa_kernel")
5893      return ParseDirectiveAMDHSAKernel();
5894 
5895     // TODO: Restructure/combine with PAL metadata directive.
5896     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5897       return ParseDirectiveHSAMetadata();
5898   } else {
5899     if (IDVal == ".hsa_code_object_version")
5900       return ParseDirectiveHSACodeObjectVersion();
5901 
5902     if (IDVal == ".hsa_code_object_isa")
5903       return ParseDirectiveHSACodeObjectISA();
5904 
5905     if (IDVal == ".amd_kernel_code_t")
5906       return ParseDirectiveAMDKernelCodeT();
5907 
5908     if (IDVal == ".amdgpu_hsa_kernel")
5909       return ParseDirectiveAMDGPUHsaKernel();
5910 
5911     if (IDVal == ".amd_amdgpu_isa")
5912       return ParseDirectiveISAVersion();
5913 
5914     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
5915       return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
5916                               Twine(" directive is "
5917                                     "not available on non-amdhsa OSes"))
5918                                  .str());
5919     }
5920   }
5921 
5922   if (IDVal == ".amdgcn_target")
5923     return ParseDirectiveAMDGCNTarget();
5924 
5925   if (IDVal == ".amdgpu_lds")
5926     return ParseDirectiveAMDGPULDS();
5927 
5928   if (IDVal == PALMD::AssemblerDirectiveBegin)
5929     return ParseDirectivePALMetadataBegin();
5930 
5931   if (IDVal == PALMD::AssemblerDirective)
5932     return ParseDirectivePALMetadata();
5933 
5934   return true;
5935 }
5936 
5937 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5938                                            unsigned RegNo) {
5939 
5940   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5941     return isGFX9Plus();
5942 
5943   // GFX10+ has 2 more SGPRs 104 and 105.
5944   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5945     return hasSGPR104_SGPR105();
5946 
5947   switch (RegNo) {
5948   case AMDGPU::SRC_SHARED_BASE_LO:
5949   case AMDGPU::SRC_SHARED_BASE:
5950   case AMDGPU::SRC_SHARED_LIMIT_LO:
5951   case AMDGPU::SRC_SHARED_LIMIT:
5952   case AMDGPU::SRC_PRIVATE_BASE_LO:
5953   case AMDGPU::SRC_PRIVATE_BASE:
5954   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5955   case AMDGPU::SRC_PRIVATE_LIMIT:
5956     return isGFX9Plus();
5957   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5958     return isGFX9Plus() && !isGFX11Plus();
5959   case AMDGPU::TBA:
5960   case AMDGPU::TBA_LO:
5961   case AMDGPU::TBA_HI:
5962   case AMDGPU::TMA:
5963   case AMDGPU::TMA_LO:
5964   case AMDGPU::TMA_HI:
5965     return !isGFX9Plus();
5966   case AMDGPU::XNACK_MASK:
5967   case AMDGPU::XNACK_MASK_LO:
5968   case AMDGPU::XNACK_MASK_HI:
5969     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5970   case AMDGPU::SGPR_NULL:
5971     return isGFX10Plus();
5972   default:
5973     break;
5974   }
5975 
5976   if (isCI())
5977     return true;
5978 
5979   if (isSI() || isGFX10Plus()) {
5980     // No flat_scr on SI.
5981     // On GFX10Plus flat scratch is not a valid register operand and can only be
5982     // accessed with s_setreg/s_getreg.
5983     switch (RegNo) {
5984     case AMDGPU::FLAT_SCR:
5985     case AMDGPU::FLAT_SCR_LO:
5986     case AMDGPU::FLAT_SCR_HI:
5987       return false;
5988     default:
5989       return true;
5990     }
5991   }
5992 
5993   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5994   // SI/CI have.
5995   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5996     return hasSGPR102_SGPR103();
5997 
5998   return true;
5999 }
6000 
6001 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6002                                           StringRef Mnemonic,
6003                                           OperandMode Mode) {
6004   ParseStatus Res = parseVOPD(Operands);
6005   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6006     return Res;
6007 
6008   // Try to parse with a custom parser
6009   Res = MatchOperandParserImpl(Operands, Mnemonic);
6010 
6011   // If we successfully parsed the operand or if there as an error parsing,
6012   // we are done.
6013   //
6014   // If we are parsing after we reach EndOfStatement then this means we
6015   // are appending default values to the Operands list.  This is only done
6016   // by custom parser, so we shouldn't continue on to the generic parsing.
6017   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6018     return Res;
6019 
6020   SMLoc RBraceLoc;
6021   SMLoc LBraceLoc = getLoc();
6022   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6023     unsigned Prefix = Operands.size();
6024 
6025     for (;;) {
6026       auto Loc = getLoc();
6027       Res = parseReg(Operands);
6028       if (Res.isNoMatch())
6029         Error(Loc, "expected a register");
6030       if (!Res.isSuccess())
6031         return ParseStatus::Failure;
6032 
6033       RBraceLoc = getLoc();
6034       if (trySkipToken(AsmToken::RBrac))
6035         break;
6036 
6037       if (!skipToken(AsmToken::Comma,
6038                      "expected a comma or a closing square bracket"))
6039         return ParseStatus::Failure;
6040     }
6041 
6042     if (Operands.size() - Prefix > 1) {
6043       Operands.insert(Operands.begin() + Prefix,
6044                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6045       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6046     }
6047 
6048     return ParseStatus::Success;
6049   }
6050 
6051   return parseRegOrImm(Operands);
6052 }
6053 
6054 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6055   // Clear any forced encodings from the previous instruction.
6056   setForcedEncodingSize(0);
6057   setForcedDPP(false);
6058   setForcedSDWA(false);
6059 
6060   if (Name.ends_with("_e64_dpp")) {
6061     setForcedDPP(true);
6062     setForcedEncodingSize(64);
6063     return Name.substr(0, Name.size() - 8);
6064   } else if (Name.ends_with("_e64")) {
6065     setForcedEncodingSize(64);
6066     return Name.substr(0, Name.size() - 4);
6067   } else if (Name.ends_with("_e32")) {
6068     setForcedEncodingSize(32);
6069     return Name.substr(0, Name.size() - 4);
6070   } else if (Name.ends_with("_dpp")) {
6071     setForcedDPP(true);
6072     return Name.substr(0, Name.size() - 4);
6073   } else if (Name.ends_with("_sdwa")) {
6074     setForcedSDWA(true);
6075     return Name.substr(0, Name.size() - 5);
6076   }
6077   return Name;
6078 }
6079 
6080 static void applyMnemonicAliases(StringRef &Mnemonic,
6081                                  const FeatureBitset &Features,
6082                                  unsigned VariantID);
6083 
6084 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6085                                        StringRef Name,
6086                                        SMLoc NameLoc, OperandVector &Operands) {
6087   // Add the instruction mnemonic
6088   Name = parseMnemonicSuffix(Name);
6089 
6090   // If the target architecture uses MnemonicAlias, call it here to parse
6091   // operands correctly.
6092   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6093 
6094   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6095 
6096   bool IsMIMG = Name.starts_with("image_");
6097 
6098   while (!trySkipToken(AsmToken::EndOfStatement)) {
6099     OperandMode Mode = OperandMode_Default;
6100     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6101       Mode = OperandMode_NSA;
6102     ParseStatus Res = parseOperand(Operands, Name, Mode);
6103 
6104     if (!Res.isSuccess()) {
6105       checkUnsupportedInstruction(Name, NameLoc);
6106       if (!Parser.hasPendingError()) {
6107         // FIXME: use real operand location rather than the current location.
6108         StringRef Msg = Res.isFailure() ? "failed parsing operand."
6109                                         : "not a valid operand.";
6110         Error(getLoc(), Msg);
6111       }
6112       while (!trySkipToken(AsmToken::EndOfStatement)) {
6113         lex();
6114       }
6115       return true;
6116     }
6117 
6118     // Eat the comma or space if there is one.
6119     trySkipToken(AsmToken::Comma);
6120   }
6121 
6122   return false;
6123 }
6124 
6125 //===----------------------------------------------------------------------===//
6126 // Utility functions
6127 //===----------------------------------------------------------------------===//
6128 
6129 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6130                                           OperandVector &Operands) {
6131   SMLoc S = getLoc();
6132   if (!trySkipId(Name))
6133     return ParseStatus::NoMatch;
6134 
6135   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6136   return ParseStatus::Success;
6137 }
6138 
6139 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6140                                                 int64_t &IntVal) {
6141 
6142   if (!trySkipId(Prefix, AsmToken::Colon))
6143     return ParseStatus::NoMatch;
6144 
6145   return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6146 }
6147 
6148 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6149     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6150     std::function<bool(int64_t &)> ConvertResult) {
6151   SMLoc S = getLoc();
6152   int64_t Value = 0;
6153 
6154   ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6155   if (!Res.isSuccess())
6156     return Res;
6157 
6158   if (ConvertResult && !ConvertResult(Value)) {
6159     Error(S, "invalid " + StringRef(Prefix) + " value.");
6160   }
6161 
6162   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6163   return ParseStatus::Success;
6164 }
6165 
6166 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6167     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6168     bool (*ConvertResult)(int64_t &)) {
6169   SMLoc S = getLoc();
6170   if (!trySkipId(Prefix, AsmToken::Colon))
6171     return ParseStatus::NoMatch;
6172 
6173   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6174     return ParseStatus::Failure;
6175 
6176   unsigned Val = 0;
6177   const unsigned MaxSize = 4;
6178 
6179   // FIXME: How to verify the number of elements matches the number of src
6180   // operands?
6181   for (int I = 0; ; ++I) {
6182     int64_t Op;
6183     SMLoc Loc = getLoc();
6184     if (!parseExpr(Op))
6185       return ParseStatus::Failure;
6186 
6187     if (Op != 0 && Op != 1)
6188       return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6189 
6190     Val |= (Op << I);
6191 
6192     if (trySkipToken(AsmToken::RBrac))
6193       break;
6194 
6195     if (I + 1 == MaxSize)
6196       return Error(getLoc(), "expected a closing square bracket");
6197 
6198     if (!skipToken(AsmToken::Comma, "expected a comma"))
6199       return ParseStatus::Failure;
6200   }
6201 
6202   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6203   return ParseStatus::Success;
6204 }
6205 
6206 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6207                                            OperandVector &Operands,
6208                                            AMDGPUOperand::ImmTy ImmTy) {
6209   int64_t Bit;
6210   SMLoc S = getLoc();
6211 
6212   if (trySkipId(Name)) {
6213     Bit = 1;
6214   } else if (trySkipId("no", Name)) {
6215     Bit = 0;
6216   } else {
6217     return ParseStatus::NoMatch;
6218   }
6219 
6220   if (Name == "r128" && !hasMIMG_R128())
6221     return Error(S, "r128 modifier is not supported on this GPU");
6222   if (Name == "a16" && !hasA16())
6223     return Error(S, "a16 modifier is not supported on this GPU");
6224 
6225   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6226     ImmTy = AMDGPUOperand::ImmTyR128A16;
6227 
6228   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6229   return ParseStatus::Success;
6230 }
6231 
6232 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6233                                       bool &Disabling) const {
6234   Disabling = Id.consume_front("no");
6235 
6236   if (isGFX940() && !Mnemo.starts_with("s_")) {
6237     return StringSwitch<unsigned>(Id)
6238         .Case("nt", AMDGPU::CPol::NT)
6239         .Case("sc0", AMDGPU::CPol::SC0)
6240         .Case("sc1", AMDGPU::CPol::SC1)
6241         .Default(0);
6242   }
6243 
6244   return StringSwitch<unsigned>(Id)
6245       .Case("dlc", AMDGPU::CPol::DLC)
6246       .Case("glc", AMDGPU::CPol::GLC)
6247       .Case("scc", AMDGPU::CPol::SCC)
6248       .Case("slc", AMDGPU::CPol::SLC)
6249       .Default(0);
6250 }
6251 
6252 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6253   if (isGFX12Plus()) {
6254     SMLoc StringLoc = getLoc();
6255 
6256     int64_t CPolVal = 0;
6257     ParseStatus ResTH = ParseStatus::NoMatch;
6258     ParseStatus ResScope = ParseStatus::NoMatch;
6259 
6260     for (;;) {
6261       if (ResTH.isNoMatch()) {
6262         int64_t TH;
6263         ResTH = parseTH(Operands, TH);
6264         if (ResTH.isFailure())
6265           return ResTH;
6266         if (ResTH.isSuccess()) {
6267           CPolVal |= TH;
6268           continue;
6269         }
6270       }
6271 
6272       if (ResScope.isNoMatch()) {
6273         int64_t Scope;
6274         ResScope = parseScope(Operands, Scope);
6275         if (ResScope.isFailure())
6276           return ResScope;
6277         if (ResScope.isSuccess()) {
6278           CPolVal |= Scope;
6279           continue;
6280         }
6281       }
6282 
6283       break;
6284     }
6285 
6286     if (ResTH.isNoMatch() && ResScope.isNoMatch())
6287       return ParseStatus::NoMatch;
6288 
6289     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6290                                                 AMDGPUOperand::ImmTyCPol));
6291     return ParseStatus::Success;
6292   }
6293 
6294   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6295   SMLoc OpLoc = getLoc();
6296   unsigned Enabled = 0, Seen = 0;
6297   for (;;) {
6298     SMLoc S = getLoc();
6299     bool Disabling;
6300     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6301     if (!CPol)
6302       break;
6303 
6304     lex();
6305 
6306     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6307       return Error(S, "dlc modifier is not supported on this GPU");
6308 
6309     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6310       return Error(S, "scc modifier is not supported on this GPU");
6311 
6312     if (Seen & CPol)
6313       return Error(S, "duplicate cache policy modifier");
6314 
6315     if (!Disabling)
6316       Enabled |= CPol;
6317 
6318     Seen |= CPol;
6319   }
6320 
6321   if (!Seen)
6322     return ParseStatus::NoMatch;
6323 
6324   Operands.push_back(
6325       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6326   return ParseStatus::Success;
6327 }
6328 
6329 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6330                                         int64_t &Scope) {
6331   Scope = AMDGPU::CPol::SCOPE_CU; // default;
6332 
6333   StringRef Value;
6334   SMLoc StringLoc;
6335   ParseStatus Res;
6336 
6337   Res = parseStringWithPrefix("scope", Value, StringLoc);
6338   if (!Res.isSuccess())
6339     return Res;
6340 
6341   Scope = StringSwitch<int64_t>(Value)
6342               .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6343               .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6344               .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6345               .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6346               .Default(0xffffffff);
6347 
6348   if (Scope == 0xffffffff)
6349     return Error(StringLoc, "invalid scope value");
6350 
6351   return ParseStatus::Success;
6352 }
6353 
6354 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6355   TH = AMDGPU::CPol::TH_RT; // default
6356 
6357   StringRef Value;
6358   SMLoc StringLoc;
6359   ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6360   if (!Res.isSuccess())
6361     return Res;
6362 
6363   if (Value == "TH_DEFAULT")
6364     TH = AMDGPU::CPol::TH_RT;
6365   else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6366            Value == "TH_LOAD_NT_WB") {
6367     return Error(StringLoc, "invalid th value");
6368   } else if (Value.starts_with("TH_ATOMIC_")) {
6369     Value = Value.drop_front(10);
6370     TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6371   } else if (Value.starts_with("TH_LOAD_")) {
6372     Value = Value.drop_front(8);
6373     TH = AMDGPU::CPol::TH_TYPE_LOAD;
6374   } else if (Value.starts_with("TH_STORE_")) {
6375     Value = Value.drop_front(9);
6376     TH = AMDGPU::CPol::TH_TYPE_STORE;
6377   } else {
6378     return Error(StringLoc, "invalid th value");
6379   }
6380 
6381   if (Value == "BYPASS")
6382     TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6383 
6384   if (TH != 0) {
6385     if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6386       TH |= StringSwitch<int64_t>(Value)
6387                 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6388                 .Case("RT", AMDGPU::CPol::TH_RT)
6389                 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6390                 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
6391                 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6392                                        AMDGPU::CPol::TH_ATOMIC_RETURN)
6393                 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
6394                 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6395                                         AMDGPU::CPol::TH_ATOMIC_NT)
6396                 .Default(0xffffffff);
6397     else
6398       TH |= StringSwitch<int64_t>(Value)
6399                 .Case("RT", AMDGPU::CPol::TH_RT)
6400                 .Case("NT", AMDGPU::CPol::TH_NT)
6401                 .Case("HT", AMDGPU::CPol::TH_HT)
6402                 .Case("LU", AMDGPU::CPol::TH_LU)
6403                 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6404                 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6405                 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6406                 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6407                 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6408                 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6409                 .Default(0xffffffff);
6410   }
6411 
6412   if (TH == 0xffffffff)
6413     return Error(StringLoc, "invalid th value");
6414 
6415   return ParseStatus::Success;
6416 }
6417 
6418 static void addOptionalImmOperand(
6419   MCInst& Inst, const OperandVector& Operands,
6420   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6421   AMDGPUOperand::ImmTy ImmT,
6422   int64_t Default = 0) {
6423   auto i = OptionalIdx.find(ImmT);
6424   if (i != OptionalIdx.end()) {
6425     unsigned Idx = i->second;
6426     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6427   } else {
6428     Inst.addOperand(MCOperand::createImm(Default));
6429   }
6430 }
6431 
6432 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6433                                                    StringRef &Value,
6434                                                    SMLoc &StringLoc) {
6435   if (!trySkipId(Prefix, AsmToken::Colon))
6436     return ParseStatus::NoMatch;
6437 
6438   StringLoc = getLoc();
6439   return parseId(Value, "expected an identifier") ? ParseStatus::Success
6440                                                   : ParseStatus::Failure;
6441 }
6442 
6443 //===----------------------------------------------------------------------===//
6444 // MTBUF format
6445 //===----------------------------------------------------------------------===//
6446 
6447 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6448                                   int64_t MaxVal,
6449                                   int64_t &Fmt) {
6450   int64_t Val;
6451   SMLoc Loc = getLoc();
6452 
6453   auto Res = parseIntWithPrefix(Pref, Val);
6454   if (Res.isFailure())
6455     return false;
6456   if (Res.isNoMatch())
6457     return true;
6458 
6459   if (Val < 0 || Val > MaxVal) {
6460     Error(Loc, Twine("out of range ", StringRef(Pref)));
6461     return false;
6462   }
6463 
6464   Fmt = Val;
6465   return true;
6466 }
6467 
6468 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6469 // values to live in a joint format operand in the MCInst encoding.
6470 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6471   using namespace llvm::AMDGPU::MTBUFFormat;
6472 
6473   int64_t Dfmt = DFMT_UNDEF;
6474   int64_t Nfmt = NFMT_UNDEF;
6475 
6476   // dfmt and nfmt can appear in either order, and each is optional.
6477   for (int I = 0; I < 2; ++I) {
6478     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6479       return ParseStatus::Failure;
6480 
6481     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6482       return ParseStatus::Failure;
6483 
6484     // Skip optional comma between dfmt/nfmt
6485     // but guard against 2 commas following each other.
6486     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6487         !peekToken().is(AsmToken::Comma)) {
6488       trySkipToken(AsmToken::Comma);
6489     }
6490   }
6491 
6492   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6493     return ParseStatus::NoMatch;
6494 
6495   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6496   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6497 
6498   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6499   return ParseStatus::Success;
6500 }
6501 
6502 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6503   using namespace llvm::AMDGPU::MTBUFFormat;
6504 
6505   int64_t Fmt = UFMT_UNDEF;
6506 
6507   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6508     return ParseStatus::Failure;
6509 
6510   if (Fmt == UFMT_UNDEF)
6511     return ParseStatus::NoMatch;
6512 
6513   Format = Fmt;
6514   return ParseStatus::Success;
6515 }
6516 
6517 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6518                                     int64_t &Nfmt,
6519                                     StringRef FormatStr,
6520                                     SMLoc Loc) {
6521   using namespace llvm::AMDGPU::MTBUFFormat;
6522   int64_t Format;
6523 
6524   Format = getDfmt(FormatStr);
6525   if (Format != DFMT_UNDEF) {
6526     Dfmt = Format;
6527     return true;
6528   }
6529 
6530   Format = getNfmt(FormatStr, getSTI());
6531   if (Format != NFMT_UNDEF) {
6532     Nfmt = Format;
6533     return true;
6534   }
6535 
6536   Error(Loc, "unsupported format");
6537   return false;
6538 }
6539 
6540 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6541                                                       SMLoc FormatLoc,
6542                                                       int64_t &Format) {
6543   using namespace llvm::AMDGPU::MTBUFFormat;
6544 
6545   int64_t Dfmt = DFMT_UNDEF;
6546   int64_t Nfmt = NFMT_UNDEF;
6547   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6548     return ParseStatus::Failure;
6549 
6550   if (trySkipToken(AsmToken::Comma)) {
6551     StringRef Str;
6552     SMLoc Loc = getLoc();
6553     if (!parseId(Str, "expected a format string") ||
6554         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6555       return ParseStatus::Failure;
6556     if (Dfmt == DFMT_UNDEF)
6557       return Error(Loc, "duplicate numeric format");
6558     if (Nfmt == NFMT_UNDEF)
6559       return Error(Loc, "duplicate data format");
6560   }
6561 
6562   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6563   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6564 
6565   if (isGFX10Plus()) {
6566     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6567     if (Ufmt == UFMT_UNDEF)
6568       return Error(FormatLoc, "unsupported format");
6569     Format = Ufmt;
6570   } else {
6571     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6572   }
6573 
6574   return ParseStatus::Success;
6575 }
6576 
6577 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6578                                                         SMLoc Loc,
6579                                                         int64_t &Format) {
6580   using namespace llvm::AMDGPU::MTBUFFormat;
6581 
6582   auto Id = getUnifiedFormat(FormatStr, getSTI());
6583   if (Id == UFMT_UNDEF)
6584     return ParseStatus::NoMatch;
6585 
6586   if (!isGFX10Plus())
6587     return Error(Loc, "unified format is not supported on this GPU");
6588 
6589   Format = Id;
6590   return ParseStatus::Success;
6591 }
6592 
6593 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6594   using namespace llvm::AMDGPU::MTBUFFormat;
6595   SMLoc Loc = getLoc();
6596 
6597   if (!parseExpr(Format))
6598     return ParseStatus::Failure;
6599   if (!isValidFormatEncoding(Format, getSTI()))
6600     return Error(Loc, "out of range format");
6601 
6602   return ParseStatus::Success;
6603 }
6604 
6605 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6606   using namespace llvm::AMDGPU::MTBUFFormat;
6607 
6608   if (!trySkipId("format", AsmToken::Colon))
6609     return ParseStatus::NoMatch;
6610 
6611   if (trySkipToken(AsmToken::LBrac)) {
6612     StringRef FormatStr;
6613     SMLoc Loc = getLoc();
6614     if (!parseId(FormatStr, "expected a format string"))
6615       return ParseStatus::Failure;
6616 
6617     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6618     if (Res.isNoMatch())
6619       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6620     if (!Res.isSuccess())
6621       return Res;
6622 
6623     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6624       return ParseStatus::Failure;
6625 
6626     return ParseStatus::Success;
6627   }
6628 
6629   return parseNumericFormat(Format);
6630 }
6631 
6632 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6633   using namespace llvm::AMDGPU::MTBUFFormat;
6634 
6635   int64_t Format = getDefaultFormatEncoding(getSTI());
6636   ParseStatus Res;
6637   SMLoc Loc = getLoc();
6638 
6639   // Parse legacy format syntax.
6640   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6641   if (Res.isFailure())
6642     return Res;
6643 
6644   bool FormatFound = Res.isSuccess();
6645 
6646   Operands.push_back(
6647     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6648 
6649   if (FormatFound)
6650     trySkipToken(AsmToken::Comma);
6651 
6652   if (isToken(AsmToken::EndOfStatement)) {
6653     // We are expecting an soffset operand,
6654     // but let matcher handle the error.
6655     return ParseStatus::Success;
6656   }
6657 
6658   // Parse soffset.
6659   Res = parseRegOrImm(Operands);
6660   if (!Res.isSuccess())
6661     return Res;
6662 
6663   trySkipToken(AsmToken::Comma);
6664 
6665   if (!FormatFound) {
6666     Res = parseSymbolicOrNumericFormat(Format);
6667     if (Res.isFailure())
6668       return Res;
6669     if (Res.isSuccess()) {
6670       auto Size = Operands.size();
6671       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6672       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6673       Op.setImm(Format);
6674     }
6675     return ParseStatus::Success;
6676   }
6677 
6678   if (isId("format") && peekToken().is(AsmToken::Colon))
6679     return Error(getLoc(), "duplicate format");
6680   return ParseStatus::Success;
6681 }
6682 
6683 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6684   ParseStatus Res =
6685       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6686   if (Res.isNoMatch()) {
6687     Res = parseIntWithPrefix("inst_offset", Operands,
6688                              AMDGPUOperand::ImmTyInstOffset);
6689   }
6690   return Res;
6691 }
6692 
6693 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6694   ParseStatus Res =
6695       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6696   if (Res.isNoMatch())
6697     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6698   return Res;
6699 }
6700 
6701 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6702   ParseStatus Res =
6703       parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6704   if (Res.isNoMatch()) {
6705     Res =
6706         parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6707   }
6708   return Res;
6709 }
6710 
6711 //===----------------------------------------------------------------------===//
6712 // Exp
6713 //===----------------------------------------------------------------------===//
6714 
6715 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6716   OptionalImmIndexMap OptionalIdx;
6717 
6718   unsigned OperandIdx[4];
6719   unsigned EnMask = 0;
6720   int SrcIdx = 0;
6721 
6722   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6723     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6724 
6725     // Add the register arguments
6726     if (Op.isReg()) {
6727       assert(SrcIdx < 4);
6728       OperandIdx[SrcIdx] = Inst.size();
6729       Op.addRegOperands(Inst, 1);
6730       ++SrcIdx;
6731       continue;
6732     }
6733 
6734     if (Op.isOff()) {
6735       assert(SrcIdx < 4);
6736       OperandIdx[SrcIdx] = Inst.size();
6737       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6738       ++SrcIdx;
6739       continue;
6740     }
6741 
6742     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6743       Op.addImmOperands(Inst, 1);
6744       continue;
6745     }
6746 
6747     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6748       continue;
6749 
6750     // Handle optional arguments
6751     OptionalIdx[Op.getImmTy()] = i;
6752   }
6753 
6754   assert(SrcIdx == 4);
6755 
6756   bool Compr = false;
6757   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6758     Compr = true;
6759     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6760     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6761     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6762   }
6763 
6764   for (auto i = 0; i < SrcIdx; ++i) {
6765     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6766       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6767     }
6768   }
6769 
6770   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6771   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6772 
6773   Inst.addOperand(MCOperand::createImm(EnMask));
6774 }
6775 
6776 //===----------------------------------------------------------------------===//
6777 // s_waitcnt
6778 //===----------------------------------------------------------------------===//
6779 
6780 static bool
6781 encodeCnt(
6782   const AMDGPU::IsaVersion ISA,
6783   int64_t &IntVal,
6784   int64_t CntVal,
6785   bool Saturate,
6786   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6787   unsigned (*decode)(const IsaVersion &Version, unsigned))
6788 {
6789   bool Failed = false;
6790 
6791   IntVal = encode(ISA, IntVal, CntVal);
6792   if (CntVal != decode(ISA, IntVal)) {
6793     if (Saturate) {
6794       IntVal = encode(ISA, IntVal, -1);
6795     } else {
6796       Failed = true;
6797     }
6798   }
6799   return Failed;
6800 }
6801 
6802 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6803 
6804   SMLoc CntLoc = getLoc();
6805   StringRef CntName = getTokenStr();
6806 
6807   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6808       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6809     return false;
6810 
6811   int64_t CntVal;
6812   SMLoc ValLoc = getLoc();
6813   if (!parseExpr(CntVal))
6814     return false;
6815 
6816   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6817 
6818   bool Failed = true;
6819   bool Sat = CntName.ends_with("_sat");
6820 
6821   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6822     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6823   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6824     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6825   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6826     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6827   } else {
6828     Error(CntLoc, "invalid counter name " + CntName);
6829     return false;
6830   }
6831 
6832   if (Failed) {
6833     Error(ValLoc, "too large value for " + CntName);
6834     return false;
6835   }
6836 
6837   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6838     return false;
6839 
6840   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6841     if (isToken(AsmToken::EndOfStatement)) {
6842       Error(getLoc(), "expected a counter name");
6843       return false;
6844     }
6845   }
6846 
6847   return true;
6848 }
6849 
6850 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
6851   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6852   int64_t Waitcnt = getWaitcntBitMask(ISA);
6853   SMLoc S = getLoc();
6854 
6855   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6856     while (!isToken(AsmToken::EndOfStatement)) {
6857       if (!parseCnt(Waitcnt))
6858         return ParseStatus::Failure;
6859     }
6860   } else {
6861     if (!parseExpr(Waitcnt))
6862       return ParseStatus::Failure;
6863   }
6864 
6865   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6866   return ParseStatus::Success;
6867 }
6868 
6869 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6870   SMLoc FieldLoc = getLoc();
6871   StringRef FieldName = getTokenStr();
6872   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6873       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6874     return false;
6875 
6876   SMLoc ValueLoc = getLoc();
6877   StringRef ValueName = getTokenStr();
6878   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6879       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6880     return false;
6881 
6882   unsigned Shift;
6883   if (FieldName == "instid0") {
6884     Shift = 0;
6885   } else if (FieldName == "instskip") {
6886     Shift = 4;
6887   } else if (FieldName == "instid1") {
6888     Shift = 7;
6889   } else {
6890     Error(FieldLoc, "invalid field name " + FieldName);
6891     return false;
6892   }
6893 
6894   int Value;
6895   if (Shift == 4) {
6896     // Parse values for instskip.
6897     Value = StringSwitch<int>(ValueName)
6898                 .Case("SAME", 0)
6899                 .Case("NEXT", 1)
6900                 .Case("SKIP_1", 2)
6901                 .Case("SKIP_2", 3)
6902                 .Case("SKIP_3", 4)
6903                 .Case("SKIP_4", 5)
6904                 .Default(-1);
6905   } else {
6906     // Parse values for instid0 and instid1.
6907     Value = StringSwitch<int>(ValueName)
6908                 .Case("NO_DEP", 0)
6909                 .Case("VALU_DEP_1", 1)
6910                 .Case("VALU_DEP_2", 2)
6911                 .Case("VALU_DEP_3", 3)
6912                 .Case("VALU_DEP_4", 4)
6913                 .Case("TRANS32_DEP_1", 5)
6914                 .Case("TRANS32_DEP_2", 6)
6915                 .Case("TRANS32_DEP_3", 7)
6916                 .Case("FMA_ACCUM_CYCLE_1", 8)
6917                 .Case("SALU_CYCLE_1", 9)
6918                 .Case("SALU_CYCLE_2", 10)
6919                 .Case("SALU_CYCLE_3", 11)
6920                 .Default(-1);
6921   }
6922   if (Value < 0) {
6923     Error(ValueLoc, "invalid value name " + ValueName);
6924     return false;
6925   }
6926 
6927   Delay |= Value << Shift;
6928   return true;
6929 }
6930 
6931 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
6932   int64_t Delay = 0;
6933   SMLoc S = getLoc();
6934 
6935   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6936     do {
6937       if (!parseDelay(Delay))
6938         return ParseStatus::Failure;
6939     } while (trySkipToken(AsmToken::Pipe));
6940   } else {
6941     if (!parseExpr(Delay))
6942       return ParseStatus::Failure;
6943   }
6944 
6945   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6946   return ParseStatus::Success;
6947 }
6948 
6949 bool
6950 AMDGPUOperand::isSWaitCnt() const {
6951   return isImm();
6952 }
6953 
6954 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
6955 
6956 //===----------------------------------------------------------------------===//
6957 // DepCtr
6958 //===----------------------------------------------------------------------===//
6959 
6960 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6961                                   StringRef DepCtrName) {
6962   switch (ErrorId) {
6963   case OPR_ID_UNKNOWN:
6964     Error(Loc, Twine("invalid counter name ", DepCtrName));
6965     return;
6966   case OPR_ID_UNSUPPORTED:
6967     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6968     return;
6969   case OPR_ID_DUPLICATE:
6970     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6971     return;
6972   case OPR_VAL_INVALID:
6973     Error(Loc, Twine("invalid value for ", DepCtrName));
6974     return;
6975   default:
6976     assert(false);
6977   }
6978 }
6979 
6980 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6981 
6982   using namespace llvm::AMDGPU::DepCtr;
6983 
6984   SMLoc DepCtrLoc = getLoc();
6985   StringRef DepCtrName = getTokenStr();
6986 
6987   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6988       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6989     return false;
6990 
6991   int64_t ExprVal;
6992   if (!parseExpr(ExprVal))
6993     return false;
6994 
6995   unsigned PrevOprMask = UsedOprMask;
6996   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6997 
6998   if (CntVal < 0) {
6999     depCtrError(DepCtrLoc, CntVal, DepCtrName);
7000     return false;
7001   }
7002 
7003   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7004     return false;
7005 
7006   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7007     if (isToken(AsmToken::EndOfStatement)) {
7008       Error(getLoc(), "expected a counter name");
7009       return false;
7010     }
7011   }
7012 
7013   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7014   DepCtr = (DepCtr & ~CntValMask) | CntVal;
7015   return true;
7016 }
7017 
7018 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7019   using namespace llvm::AMDGPU::DepCtr;
7020 
7021   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7022   SMLoc Loc = getLoc();
7023 
7024   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7025     unsigned UsedOprMask = 0;
7026     while (!isToken(AsmToken::EndOfStatement)) {
7027       if (!parseDepCtr(DepCtr, UsedOprMask))
7028         return ParseStatus::Failure;
7029     }
7030   } else {
7031     if (!parseExpr(DepCtr))
7032       return ParseStatus::Failure;
7033   }
7034 
7035   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7036   return ParseStatus::Success;
7037 }
7038 
7039 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7040 
7041 //===----------------------------------------------------------------------===//
7042 // hwreg
7043 //===----------------------------------------------------------------------===//
7044 
7045 bool
7046 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
7047                                 OperandInfoTy &Offset,
7048                                 OperandInfoTy &Width) {
7049   using namespace llvm::AMDGPU::Hwreg;
7050 
7051   // The register may be specified by name or using a numeric code
7052   HwReg.Loc = getLoc();
7053   if (isToken(AsmToken::Identifier) &&
7054       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7055     HwReg.IsSymbolic = true;
7056     lex(); // skip register name
7057   } else if (!parseExpr(HwReg.Id, "a register name")) {
7058     return false;
7059   }
7060 
7061   if (trySkipToken(AsmToken::RParen))
7062     return true;
7063 
7064   // parse optional params
7065   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7066     return false;
7067 
7068   Offset.Loc = getLoc();
7069   if (!parseExpr(Offset.Id))
7070     return false;
7071 
7072   if (!skipToken(AsmToken::Comma, "expected a comma"))
7073     return false;
7074 
7075   Width.Loc = getLoc();
7076   return parseExpr(Width.Id) &&
7077          skipToken(AsmToken::RParen, "expected a closing parenthesis");
7078 }
7079 
7080 bool
7081 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
7082                                const OperandInfoTy &Offset,
7083                                const OperandInfoTy &Width) {
7084 
7085   using namespace llvm::AMDGPU::Hwreg;
7086 
7087   if (HwReg.IsSymbolic) {
7088     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
7089       Error(HwReg.Loc,
7090             "specified hardware register is not supported on this GPU");
7091       return false;
7092     }
7093   } else {
7094     if (!isValidHwreg(HwReg.Id)) {
7095       Error(HwReg.Loc,
7096             "invalid code of hardware register: only 6-bit values are legal");
7097       return false;
7098     }
7099   }
7100   if (!isValidHwregOffset(Offset.Id)) {
7101     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
7102     return false;
7103   }
7104   if (!isValidHwregWidth(Width.Id)) {
7105     Error(Width.Loc,
7106           "invalid bitfield width: only values from 1 to 32 are legal");
7107     return false;
7108   }
7109   return true;
7110 }
7111 
7112 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7113   using namespace llvm::AMDGPU::Hwreg;
7114 
7115   int64_t ImmVal = 0;
7116   SMLoc Loc = getLoc();
7117 
7118   if (trySkipId("hwreg", AsmToken::LParen)) {
7119     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
7120     OperandInfoTy Offset(OFFSET_DEFAULT_);
7121     OperandInfoTy Width(WIDTH_DEFAULT_);
7122     if (parseHwregBody(HwReg, Offset, Width) &&
7123         validateHwreg(HwReg, Offset, Width)) {
7124       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
7125     } else {
7126       return ParseStatus::Failure;
7127     }
7128   } else if (parseExpr(ImmVal, "a hwreg macro")) {
7129     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7130       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7131   } else {
7132     return ParseStatus::Failure;
7133   }
7134 
7135   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7136   return ParseStatus::Success;
7137 }
7138 
7139 bool AMDGPUOperand::isHwreg() const {
7140   return isImmTy(ImmTyHwreg);
7141 }
7142 
7143 //===----------------------------------------------------------------------===//
7144 // sendmsg
7145 //===----------------------------------------------------------------------===//
7146 
7147 bool
7148 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7149                                   OperandInfoTy &Op,
7150                                   OperandInfoTy &Stream) {
7151   using namespace llvm::AMDGPU::SendMsg;
7152 
7153   Msg.Loc = getLoc();
7154   if (isToken(AsmToken::Identifier) &&
7155       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7156     Msg.IsSymbolic = true;
7157     lex(); // skip message name
7158   } else if (!parseExpr(Msg.Id, "a message name")) {
7159     return false;
7160   }
7161 
7162   if (trySkipToken(AsmToken::Comma)) {
7163     Op.IsDefined = true;
7164     Op.Loc = getLoc();
7165     if (isToken(AsmToken::Identifier) &&
7166         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
7167       lex(); // skip operation name
7168     } else if (!parseExpr(Op.Id, "an operation name")) {
7169       return false;
7170     }
7171 
7172     if (trySkipToken(AsmToken::Comma)) {
7173       Stream.IsDefined = true;
7174       Stream.Loc = getLoc();
7175       if (!parseExpr(Stream.Id))
7176         return false;
7177     }
7178   }
7179 
7180   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7181 }
7182 
7183 bool
7184 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7185                                  const OperandInfoTy &Op,
7186                                  const OperandInfoTy &Stream) {
7187   using namespace llvm::AMDGPU::SendMsg;
7188 
7189   // Validation strictness depends on whether message is specified
7190   // in a symbolic or in a numeric form. In the latter case
7191   // only encoding possibility is checked.
7192   bool Strict = Msg.IsSymbolic;
7193 
7194   if (Strict) {
7195     if (Msg.Id == OPR_ID_UNSUPPORTED) {
7196       Error(Msg.Loc, "specified message id is not supported on this GPU");
7197       return false;
7198     }
7199   } else {
7200     if (!isValidMsgId(Msg.Id, getSTI())) {
7201       Error(Msg.Loc, "invalid message id");
7202       return false;
7203     }
7204   }
7205   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
7206     if (Op.IsDefined) {
7207       Error(Op.Loc, "message does not support operations");
7208     } else {
7209       Error(Msg.Loc, "missing message operation");
7210     }
7211     return false;
7212   }
7213   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
7214     Error(Op.Loc, "invalid operation id");
7215     return false;
7216   }
7217   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
7218       Stream.IsDefined) {
7219     Error(Stream.Loc, "message operation does not support streams");
7220     return false;
7221   }
7222   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
7223     Error(Stream.Loc, "invalid message stream id");
7224     return false;
7225   }
7226   return true;
7227 }
7228 
7229 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7230   using namespace llvm::AMDGPU::SendMsg;
7231 
7232   int64_t ImmVal = 0;
7233   SMLoc Loc = getLoc();
7234 
7235   if (trySkipId("sendmsg", AsmToken::LParen)) {
7236     OperandInfoTy Msg(OPR_ID_UNKNOWN);
7237     OperandInfoTy Op(OP_NONE_);
7238     OperandInfoTy Stream(STREAM_ID_NONE_);
7239     if (parseSendMsgBody(Msg, Op, Stream) &&
7240         validateSendMsg(Msg, Op, Stream)) {
7241       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
7242     } else {
7243       return ParseStatus::Failure;
7244     }
7245   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7246     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7247       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7248   } else {
7249     return ParseStatus::Failure;
7250   }
7251 
7252   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7253   return ParseStatus::Success;
7254 }
7255 
7256 bool AMDGPUOperand::isSendMsg() const {
7257   return isImmTy(ImmTySendMsg);
7258 }
7259 
7260 //===----------------------------------------------------------------------===//
7261 // v_interp
7262 //===----------------------------------------------------------------------===//
7263 
7264 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7265   StringRef Str;
7266   SMLoc S = getLoc();
7267 
7268   if (!parseId(Str))
7269     return ParseStatus::NoMatch;
7270 
7271   int Slot = StringSwitch<int>(Str)
7272     .Case("p10", 0)
7273     .Case("p20", 1)
7274     .Case("p0", 2)
7275     .Default(-1);
7276 
7277   if (Slot == -1)
7278     return Error(S, "invalid interpolation slot");
7279 
7280   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7281                                               AMDGPUOperand::ImmTyInterpSlot));
7282   return ParseStatus::Success;
7283 }
7284 
7285 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7286   StringRef Str;
7287   SMLoc S = getLoc();
7288 
7289   if (!parseId(Str))
7290     return ParseStatus::NoMatch;
7291 
7292   if (!Str.starts_with("attr"))
7293     return Error(S, "invalid interpolation attribute");
7294 
7295   StringRef Chan = Str.take_back(2);
7296   int AttrChan = StringSwitch<int>(Chan)
7297     .Case(".x", 0)
7298     .Case(".y", 1)
7299     .Case(".z", 2)
7300     .Case(".w", 3)
7301     .Default(-1);
7302   if (AttrChan == -1)
7303     return Error(S, "invalid or missing interpolation attribute channel");
7304 
7305   Str = Str.drop_back(2).drop_front(4);
7306 
7307   uint8_t Attr;
7308   if (Str.getAsInteger(10, Attr))
7309     return Error(S, "invalid or missing interpolation attribute number");
7310 
7311   if (Attr > 32)
7312     return Error(S, "out of bounds interpolation attribute number");
7313 
7314   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7315 
7316   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7317                                               AMDGPUOperand::ImmTyInterpAttr));
7318   Operands.push_back(AMDGPUOperand::CreateImm(
7319       this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7320   return ParseStatus::Success;
7321 }
7322 
7323 //===----------------------------------------------------------------------===//
7324 // exp
7325 //===----------------------------------------------------------------------===//
7326 
7327 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7328   using namespace llvm::AMDGPU::Exp;
7329 
7330   StringRef Str;
7331   SMLoc S = getLoc();
7332 
7333   if (!parseId(Str))
7334     return ParseStatus::NoMatch;
7335 
7336   unsigned Id = getTgtId(Str);
7337   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7338     return Error(S, (Id == ET_INVALID)
7339                         ? "invalid exp target"
7340                         : "exp target is not supported on this GPU");
7341 
7342   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7343                                               AMDGPUOperand::ImmTyExpTgt));
7344   return ParseStatus::Success;
7345 }
7346 
7347 //===----------------------------------------------------------------------===//
7348 // parser helpers
7349 //===----------------------------------------------------------------------===//
7350 
7351 bool
7352 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7353   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7354 }
7355 
7356 bool
7357 AMDGPUAsmParser::isId(const StringRef Id) const {
7358   return isId(getToken(), Id);
7359 }
7360 
7361 bool
7362 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7363   return getTokenKind() == Kind;
7364 }
7365 
7366 StringRef AMDGPUAsmParser::getId() const {
7367   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7368 }
7369 
7370 bool
7371 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7372   if (isId(Id)) {
7373     lex();
7374     return true;
7375   }
7376   return false;
7377 }
7378 
7379 bool
7380 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7381   if (isToken(AsmToken::Identifier)) {
7382     StringRef Tok = getTokenStr();
7383     if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7384       lex();
7385       return true;
7386     }
7387   }
7388   return false;
7389 }
7390 
7391 bool
7392 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7393   if (isId(Id) && peekToken().is(Kind)) {
7394     lex();
7395     lex();
7396     return true;
7397   }
7398   return false;
7399 }
7400 
7401 bool
7402 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7403   if (isToken(Kind)) {
7404     lex();
7405     return true;
7406   }
7407   return false;
7408 }
7409 
7410 bool
7411 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7412                            const StringRef ErrMsg) {
7413   if (!trySkipToken(Kind)) {
7414     Error(getLoc(), ErrMsg);
7415     return false;
7416   }
7417   return true;
7418 }
7419 
7420 bool
7421 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7422   SMLoc S = getLoc();
7423 
7424   const MCExpr *Expr;
7425   if (Parser.parseExpression(Expr))
7426     return false;
7427 
7428   if (Expr->evaluateAsAbsolute(Imm))
7429     return true;
7430 
7431   if (Expected.empty()) {
7432     Error(S, "expected absolute expression");
7433   } else {
7434     Error(S, Twine("expected ", Expected) +
7435              Twine(" or an absolute expression"));
7436   }
7437   return false;
7438 }
7439 
7440 bool
7441 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7442   SMLoc S = getLoc();
7443 
7444   const MCExpr *Expr;
7445   if (Parser.parseExpression(Expr))
7446     return false;
7447 
7448   int64_t IntVal;
7449   if (Expr->evaluateAsAbsolute(IntVal)) {
7450     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7451   } else {
7452     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7453   }
7454   return true;
7455 }
7456 
7457 bool
7458 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7459   if (isToken(AsmToken::String)) {
7460     Val = getToken().getStringContents();
7461     lex();
7462     return true;
7463   } else {
7464     Error(getLoc(), ErrMsg);
7465     return false;
7466   }
7467 }
7468 
7469 bool
7470 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7471   if (isToken(AsmToken::Identifier)) {
7472     Val = getTokenStr();
7473     lex();
7474     return true;
7475   } else {
7476     if (!ErrMsg.empty())
7477       Error(getLoc(), ErrMsg);
7478     return false;
7479   }
7480 }
7481 
7482 AsmToken
7483 AMDGPUAsmParser::getToken() const {
7484   return Parser.getTok();
7485 }
7486 
7487 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7488   return isToken(AsmToken::EndOfStatement)
7489              ? getToken()
7490              : getLexer().peekTok(ShouldSkipSpace);
7491 }
7492 
7493 void
7494 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7495   auto TokCount = getLexer().peekTokens(Tokens);
7496 
7497   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7498     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7499 }
7500 
7501 AsmToken::TokenKind
7502 AMDGPUAsmParser::getTokenKind() const {
7503   return getLexer().getKind();
7504 }
7505 
7506 SMLoc
7507 AMDGPUAsmParser::getLoc() const {
7508   return getToken().getLoc();
7509 }
7510 
7511 StringRef
7512 AMDGPUAsmParser::getTokenStr() const {
7513   return getToken().getString();
7514 }
7515 
7516 void
7517 AMDGPUAsmParser::lex() {
7518   Parser.Lex();
7519 }
7520 
7521 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7522   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7523 }
7524 
7525 SMLoc
7526 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7527                                const OperandVector &Operands) const {
7528   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7529     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7530     if (Test(Op))
7531       return Op.getStartLoc();
7532   }
7533   return getInstLoc(Operands);
7534 }
7535 
7536 SMLoc
7537 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7538                            const OperandVector &Operands) const {
7539   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7540   return getOperandLoc(Test, Operands);
7541 }
7542 
7543 SMLoc
7544 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7545                            const OperandVector &Operands) const {
7546   auto Test = [=](const AMDGPUOperand& Op) {
7547     return Op.isRegKind() && Op.getReg() == Reg;
7548   };
7549   return getOperandLoc(Test, Operands);
7550 }
7551 
7552 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7553                                  bool SearchMandatoryLiterals) const {
7554   auto Test = [](const AMDGPUOperand& Op) {
7555     return Op.IsImmKindLiteral() || Op.isExpr();
7556   };
7557   SMLoc Loc = getOperandLoc(Test, Operands);
7558   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7559     Loc = getMandatoryLitLoc(Operands);
7560   return Loc;
7561 }
7562 
7563 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7564   auto Test = [](const AMDGPUOperand &Op) {
7565     return Op.IsImmKindMandatoryLiteral();
7566   };
7567   return getOperandLoc(Test, Operands);
7568 }
7569 
7570 SMLoc
7571 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7572   auto Test = [](const AMDGPUOperand& Op) {
7573     return Op.isImmKindConst();
7574   };
7575   return getOperandLoc(Test, Operands);
7576 }
7577 
7578 //===----------------------------------------------------------------------===//
7579 // swizzle
7580 //===----------------------------------------------------------------------===//
7581 
7582 LLVM_READNONE
7583 static unsigned
7584 encodeBitmaskPerm(const unsigned AndMask,
7585                   const unsigned OrMask,
7586                   const unsigned XorMask) {
7587   using namespace llvm::AMDGPU::Swizzle;
7588 
7589   return BITMASK_PERM_ENC |
7590          (AndMask << BITMASK_AND_SHIFT) |
7591          (OrMask  << BITMASK_OR_SHIFT)  |
7592          (XorMask << BITMASK_XOR_SHIFT);
7593 }
7594 
7595 bool
7596 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7597                                      const unsigned MinVal,
7598                                      const unsigned MaxVal,
7599                                      const StringRef ErrMsg,
7600                                      SMLoc &Loc) {
7601   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7602     return false;
7603   }
7604   Loc = getLoc();
7605   if (!parseExpr(Op)) {
7606     return false;
7607   }
7608   if (Op < MinVal || Op > MaxVal) {
7609     Error(Loc, ErrMsg);
7610     return false;
7611   }
7612 
7613   return true;
7614 }
7615 
7616 bool
7617 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7618                                       const unsigned MinVal,
7619                                       const unsigned MaxVal,
7620                                       const StringRef ErrMsg) {
7621   SMLoc Loc;
7622   for (unsigned i = 0; i < OpNum; ++i) {
7623     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7624       return false;
7625   }
7626 
7627   return true;
7628 }
7629 
7630 bool
7631 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7632   using namespace llvm::AMDGPU::Swizzle;
7633 
7634   int64_t Lane[LANE_NUM];
7635   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7636                            "expected a 2-bit lane id")) {
7637     Imm = QUAD_PERM_ENC;
7638     for (unsigned I = 0; I < LANE_NUM; ++I) {
7639       Imm |= Lane[I] << (LANE_SHIFT * I);
7640     }
7641     return true;
7642   }
7643   return false;
7644 }
7645 
7646 bool
7647 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7648   using namespace llvm::AMDGPU::Swizzle;
7649 
7650   SMLoc Loc;
7651   int64_t GroupSize;
7652   int64_t LaneIdx;
7653 
7654   if (!parseSwizzleOperand(GroupSize,
7655                            2, 32,
7656                            "group size must be in the interval [2,32]",
7657                            Loc)) {
7658     return false;
7659   }
7660   if (!isPowerOf2_64(GroupSize)) {
7661     Error(Loc, "group size must be a power of two");
7662     return false;
7663   }
7664   if (parseSwizzleOperand(LaneIdx,
7665                           0, GroupSize - 1,
7666                           "lane id must be in the interval [0,group size - 1]",
7667                           Loc)) {
7668     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7669     return true;
7670   }
7671   return false;
7672 }
7673 
7674 bool
7675 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7676   using namespace llvm::AMDGPU::Swizzle;
7677 
7678   SMLoc Loc;
7679   int64_t GroupSize;
7680 
7681   if (!parseSwizzleOperand(GroupSize,
7682                            2, 32,
7683                            "group size must be in the interval [2,32]",
7684                            Loc)) {
7685     return false;
7686   }
7687   if (!isPowerOf2_64(GroupSize)) {
7688     Error(Loc, "group size must be a power of two");
7689     return false;
7690   }
7691 
7692   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7693   return true;
7694 }
7695 
7696 bool
7697 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7698   using namespace llvm::AMDGPU::Swizzle;
7699 
7700   SMLoc Loc;
7701   int64_t GroupSize;
7702 
7703   if (!parseSwizzleOperand(GroupSize,
7704                            1, 16,
7705                            "group size must be in the interval [1,16]",
7706                            Loc)) {
7707     return false;
7708   }
7709   if (!isPowerOf2_64(GroupSize)) {
7710     Error(Loc, "group size must be a power of two");
7711     return false;
7712   }
7713 
7714   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7715   return true;
7716 }
7717 
7718 bool
7719 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7720   using namespace llvm::AMDGPU::Swizzle;
7721 
7722   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7723     return false;
7724   }
7725 
7726   StringRef Ctl;
7727   SMLoc StrLoc = getLoc();
7728   if (!parseString(Ctl)) {
7729     return false;
7730   }
7731   if (Ctl.size() != BITMASK_WIDTH) {
7732     Error(StrLoc, "expected a 5-character mask");
7733     return false;
7734   }
7735 
7736   unsigned AndMask = 0;
7737   unsigned OrMask = 0;
7738   unsigned XorMask = 0;
7739 
7740   for (size_t i = 0; i < Ctl.size(); ++i) {
7741     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7742     switch(Ctl[i]) {
7743     default:
7744       Error(StrLoc, "invalid mask");
7745       return false;
7746     case '0':
7747       break;
7748     case '1':
7749       OrMask |= Mask;
7750       break;
7751     case 'p':
7752       AndMask |= Mask;
7753       break;
7754     case 'i':
7755       AndMask |= Mask;
7756       XorMask |= Mask;
7757       break;
7758     }
7759   }
7760 
7761   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7762   return true;
7763 }
7764 
7765 bool
7766 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7767 
7768   SMLoc OffsetLoc = getLoc();
7769 
7770   if (!parseExpr(Imm, "a swizzle macro")) {
7771     return false;
7772   }
7773   if (!isUInt<16>(Imm)) {
7774     Error(OffsetLoc, "expected a 16-bit offset");
7775     return false;
7776   }
7777   return true;
7778 }
7779 
7780 bool
7781 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7782   using namespace llvm::AMDGPU::Swizzle;
7783 
7784   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7785 
7786     SMLoc ModeLoc = getLoc();
7787     bool Ok = false;
7788 
7789     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7790       Ok = parseSwizzleQuadPerm(Imm);
7791     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7792       Ok = parseSwizzleBitmaskPerm(Imm);
7793     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7794       Ok = parseSwizzleBroadcast(Imm);
7795     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7796       Ok = parseSwizzleSwap(Imm);
7797     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7798       Ok = parseSwizzleReverse(Imm);
7799     } else {
7800       Error(ModeLoc, "expected a swizzle mode");
7801     }
7802 
7803     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7804   }
7805 
7806   return false;
7807 }
7808 
7809 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
7810   SMLoc S = getLoc();
7811   int64_t Imm = 0;
7812 
7813   if (trySkipId("offset")) {
7814 
7815     bool Ok = false;
7816     if (skipToken(AsmToken::Colon, "expected a colon")) {
7817       if (trySkipId("swizzle")) {
7818         Ok = parseSwizzleMacro(Imm);
7819       } else {
7820         Ok = parseSwizzleOffset(Imm);
7821       }
7822     }
7823 
7824     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7825 
7826     return Ok ? ParseStatus::Success : ParseStatus::Failure;
7827   }
7828   return ParseStatus::NoMatch;
7829 }
7830 
7831 bool
7832 AMDGPUOperand::isSwizzle() const {
7833   return isImmTy(ImmTySwizzle);
7834 }
7835 
7836 //===----------------------------------------------------------------------===//
7837 // VGPR Index Mode
7838 //===----------------------------------------------------------------------===//
7839 
7840 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7841 
7842   using namespace llvm::AMDGPU::VGPRIndexMode;
7843 
7844   if (trySkipToken(AsmToken::RParen)) {
7845     return OFF;
7846   }
7847 
7848   int64_t Imm = 0;
7849 
7850   while (true) {
7851     unsigned Mode = 0;
7852     SMLoc S = getLoc();
7853 
7854     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7855       if (trySkipId(IdSymbolic[ModeId])) {
7856         Mode = 1 << ModeId;
7857         break;
7858       }
7859     }
7860 
7861     if (Mode == 0) {
7862       Error(S, (Imm == 0)?
7863                "expected a VGPR index mode or a closing parenthesis" :
7864                "expected a VGPR index mode");
7865       return UNDEF;
7866     }
7867 
7868     if (Imm & Mode) {
7869       Error(S, "duplicate VGPR index mode");
7870       return UNDEF;
7871     }
7872     Imm |= Mode;
7873 
7874     if (trySkipToken(AsmToken::RParen))
7875       break;
7876     if (!skipToken(AsmToken::Comma,
7877                    "expected a comma or a closing parenthesis"))
7878       return UNDEF;
7879   }
7880 
7881   return Imm;
7882 }
7883 
7884 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7885 
7886   using namespace llvm::AMDGPU::VGPRIndexMode;
7887 
7888   int64_t Imm = 0;
7889   SMLoc S = getLoc();
7890 
7891   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7892     Imm = parseGPRIdxMacro();
7893     if (Imm == UNDEF)
7894       return ParseStatus::Failure;
7895   } else {
7896     if (getParser().parseAbsoluteExpression(Imm))
7897       return ParseStatus::Failure;
7898     if (Imm < 0 || !isUInt<4>(Imm))
7899       return Error(S, "invalid immediate: only 4-bit values are legal");
7900   }
7901 
7902   Operands.push_back(
7903       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7904   return ParseStatus::Success;
7905 }
7906 
7907 bool AMDGPUOperand::isGPRIdxMode() const {
7908   return isImmTy(ImmTyGprIdxMode);
7909 }
7910 
7911 //===----------------------------------------------------------------------===//
7912 // sopp branch targets
7913 //===----------------------------------------------------------------------===//
7914 
7915 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
7916 
7917   // Make sure we are not parsing something
7918   // that looks like a label or an expression but is not.
7919   // This will improve error messages.
7920   if (isRegister() || isModifier())
7921     return ParseStatus::NoMatch;
7922 
7923   if (!parseExpr(Operands))
7924     return ParseStatus::Failure;
7925 
7926   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7927   assert(Opr.isImm() || Opr.isExpr());
7928   SMLoc Loc = Opr.getStartLoc();
7929 
7930   // Currently we do not support arbitrary expressions as branch targets.
7931   // Only labels and absolute expressions are accepted.
7932   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7933     Error(Loc, "expected an absolute expression or a label");
7934   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7935     Error(Loc, "expected a 16-bit signed jump offset");
7936   }
7937 
7938   return ParseStatus::Success;
7939 }
7940 
7941 //===----------------------------------------------------------------------===//
7942 // Boolean holding registers
7943 //===----------------------------------------------------------------------===//
7944 
7945 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7946   return parseReg(Operands);
7947 }
7948 
7949 //===----------------------------------------------------------------------===//
7950 // mubuf
7951 //===----------------------------------------------------------------------===//
7952 
7953 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7954                                    const OperandVector &Operands,
7955                                    bool IsAtomic) {
7956   OptionalImmIndexMap OptionalIdx;
7957   unsigned FirstOperandIdx = 1;
7958   bool IsAtomicReturn = false;
7959 
7960   if (IsAtomic) {
7961     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7962       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7963       if (!Op.isCPol())
7964         continue;
7965       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7966       break;
7967     }
7968 
7969     if (!IsAtomicReturn) {
7970       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7971       if (NewOpc != -1)
7972         Inst.setOpcode(NewOpc);
7973     }
7974 
7975     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7976                       SIInstrFlags::IsAtomicRet;
7977   }
7978 
7979   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7980     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7981 
7982     // Add the register arguments
7983     if (Op.isReg()) {
7984       Op.addRegOperands(Inst, 1);
7985       // Insert a tied src for atomic return dst.
7986       // This cannot be postponed as subsequent calls to
7987       // addImmOperands rely on correct number of MC operands.
7988       if (IsAtomicReturn && i == FirstOperandIdx)
7989         Op.addRegOperands(Inst, 1);
7990       continue;
7991     }
7992 
7993     // Handle the case where soffset is an immediate
7994     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7995       Op.addImmOperands(Inst, 1);
7996       continue;
7997     }
7998 
7999     // Handle tokens like 'offen' which are sometimes hard-coded into the
8000     // asm string.  There are no MCInst operands for these.
8001     if (Op.isToken()) {
8002       continue;
8003     }
8004     assert(Op.isImm());
8005 
8006     // Handle optional arguments
8007     OptionalIdx[Op.getImmTy()] = i;
8008   }
8009 
8010   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8011   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8012 }
8013 
8014 //===----------------------------------------------------------------------===//
8015 // smrd
8016 //===----------------------------------------------------------------------===//
8017 
8018 bool AMDGPUOperand::isSMRDOffset8() const {
8019   return isImmLiteral() && isUInt<8>(getImm());
8020 }
8021 
8022 bool AMDGPUOperand::isSMEMOffset() const {
8023   // Offset range is checked later by validator.
8024   return isImmLiteral();
8025 }
8026 
8027 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8028   // 32-bit literals are only supported on CI and we only want to use them
8029   // when the offset is > 8-bits.
8030   return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8031 }
8032 
8033 //===----------------------------------------------------------------------===//
8034 // vop3
8035 //===----------------------------------------------------------------------===//
8036 
8037 static bool ConvertOmodMul(int64_t &Mul) {
8038   if (Mul != 1 && Mul != 2 && Mul != 4)
8039     return false;
8040 
8041   Mul >>= 1;
8042   return true;
8043 }
8044 
8045 static bool ConvertOmodDiv(int64_t &Div) {
8046   if (Div == 1) {
8047     Div = 0;
8048     return true;
8049   }
8050 
8051   if (Div == 2) {
8052     Div = 3;
8053     return true;
8054   }
8055 
8056   return false;
8057 }
8058 
8059 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8060 // This is intentional and ensures compatibility with sp3.
8061 // See bug 35397 for details.
8062 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8063   if (BoundCtrl == 0 || BoundCtrl == 1) {
8064     if (!isGFX11Plus())
8065       BoundCtrl = 1;
8066     return true;
8067   }
8068   return false;
8069 }
8070 
8071 void AMDGPUAsmParser::onBeginOfFile() {
8072   if (!getParser().getStreamer().getTargetStreamer() ||
8073       getSTI().getTargetTriple().getArch() == Triple::r600)
8074     return;
8075 
8076   if (!getTargetStreamer().getTargetID())
8077     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
8078         // TODO: Should try to check code object version from directive???
8079         AMDGPU::getAmdhsaCodeObjectVersion());
8080 
8081   if (isHsaAbi(getSTI()))
8082     getTargetStreamer().EmitDirectiveAMDGCNTarget();
8083 }
8084 
8085 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8086   StringRef Name = getTokenStr();
8087   if (Name == "mul") {
8088     return parseIntWithPrefix("mul", Operands,
8089                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8090   }
8091 
8092   if (Name == "div") {
8093     return parseIntWithPrefix("div", Operands,
8094                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8095   }
8096 
8097   return ParseStatus::NoMatch;
8098 }
8099 
8100 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8101 // the number of src operands present, then copies that bit into src0_modifiers.
8102 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8103   int Opc = Inst.getOpcode();
8104   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8105   if (OpSelIdx == -1)
8106     return;
8107 
8108   int SrcNum;
8109   const int Ops[] = { AMDGPU::OpName::src0,
8110                       AMDGPU::OpName::src1,
8111                       AMDGPU::OpName::src2 };
8112   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8113        ++SrcNum)
8114     ;
8115   assert(SrcNum > 0);
8116 
8117   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8118 
8119   if ((OpSel & (1 << SrcNum)) != 0) {
8120     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8121     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8122     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8123   }
8124 }
8125 
8126 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8127                                    const OperandVector &Operands) {
8128   cvtVOP3P(Inst, Operands);
8129   cvtVOP3DstOpSelOnly(Inst);
8130 }
8131 
8132 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8133                                    OptionalImmIndexMap &OptionalIdx) {
8134   cvtVOP3P(Inst, Operands, OptionalIdx);
8135   cvtVOP3DstOpSelOnly(Inst);
8136 }
8137 
8138 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8139   return
8140       // 1. This operand is input modifiers
8141       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8142       // 2. This is not last operand
8143       && Desc.NumOperands > (OpNum + 1)
8144       // 3. Next operand is register class
8145       && Desc.operands()[OpNum + 1].RegClass != -1
8146       // 4. Next register is not tied to any other operand
8147       && Desc.getOperandConstraint(OpNum + 1,
8148                                    MCOI::OperandConstraint::TIED_TO) == -1;
8149 }
8150 
8151 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8152 {
8153   OptionalImmIndexMap OptionalIdx;
8154   unsigned Opc = Inst.getOpcode();
8155 
8156   unsigned I = 1;
8157   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8158   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8159     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8160   }
8161 
8162   for (unsigned E = Operands.size(); I != E; ++I) {
8163     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8164     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8165       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8166     } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8167                Op.isInterpAttrChan()) {
8168       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8169     } else if (Op.isImmModifier()) {
8170       OptionalIdx[Op.getImmTy()] = I;
8171     } else {
8172       llvm_unreachable("unhandled operand type");
8173     }
8174   }
8175 
8176   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8177     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8178                           AMDGPUOperand::ImmTyHigh);
8179 
8180   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8181     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8182                           AMDGPUOperand::ImmTyClampSI);
8183 
8184   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8185     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8186                           AMDGPUOperand::ImmTyOModSI);
8187 }
8188 
8189 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8190 {
8191   OptionalImmIndexMap OptionalIdx;
8192   unsigned Opc = Inst.getOpcode();
8193 
8194   unsigned I = 1;
8195   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8196   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8197     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8198   }
8199 
8200   for (unsigned E = Operands.size(); I != E; ++I) {
8201     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8202     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8203       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8204     } else if (Op.isImmModifier()) {
8205       OptionalIdx[Op.getImmTy()] = I;
8206     } else {
8207       llvm_unreachable("unhandled operand type");
8208     }
8209   }
8210 
8211   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8212 
8213   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8214   if (OpSelIdx != -1)
8215     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8216 
8217   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8218 
8219   if (OpSelIdx == -1)
8220     return;
8221 
8222   const int Ops[] = { AMDGPU::OpName::src0,
8223                       AMDGPU::OpName::src1,
8224                       AMDGPU::OpName::src2 };
8225   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8226                          AMDGPU::OpName::src1_modifiers,
8227                          AMDGPU::OpName::src2_modifiers };
8228 
8229   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8230 
8231   for (int J = 0; J < 3; ++J) {
8232     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8233     if (OpIdx == -1)
8234       break;
8235 
8236     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8237     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8238 
8239     if ((OpSel & (1 << J)) != 0)
8240       ModVal |= SISrcMods::OP_SEL_0;
8241     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8242         (OpSel & (1 << 3)) != 0)
8243       ModVal |= SISrcMods::DST_OP_SEL;
8244 
8245     Inst.getOperand(ModIdx).setImm(ModVal);
8246   }
8247 }
8248 
8249 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8250                               OptionalImmIndexMap &OptionalIdx) {
8251   unsigned Opc = Inst.getOpcode();
8252 
8253   unsigned I = 1;
8254   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8255   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8256     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8257   }
8258 
8259   for (unsigned E = Operands.size(); I != E; ++I) {
8260     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8261     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8262       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8263     } else if (Op.isImmModifier()) {
8264       OptionalIdx[Op.getImmTy()] = I;
8265     } else if (Op.isRegOrImm()) {
8266       Op.addRegOrImmOperands(Inst, 1);
8267     } else {
8268       llvm_unreachable("unhandled operand type");
8269     }
8270   }
8271 
8272   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8273     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8274                           AMDGPUOperand::ImmTyClampSI);
8275 
8276   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8277     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8278                           AMDGPUOperand::ImmTyOModSI);
8279 
8280   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8281   // it has src2 register operand that is tied to dst operand
8282   // we don't allow modifiers for this operand in assembler so src2_modifiers
8283   // should be 0.
8284   if (isMAC(Opc)) {
8285     auto it = Inst.begin();
8286     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8287     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8288     ++it;
8289     // Copy the operand to ensure it's not invalidated when Inst grows.
8290     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8291   }
8292 }
8293 
8294 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8295   OptionalImmIndexMap OptionalIdx;
8296   cvtVOP3(Inst, Operands, OptionalIdx);
8297 }
8298 
8299 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8300                                OptionalImmIndexMap &OptIdx) {
8301   const int Opc = Inst.getOpcode();
8302   const MCInstrDesc &Desc = MII.get(Opc);
8303 
8304   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8305 
8306   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8307       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
8308     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8309     Inst.addOperand(Inst.getOperand(0));
8310   }
8311 
8312   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
8313     assert(!IsPacked);
8314     Inst.addOperand(Inst.getOperand(0));
8315   }
8316 
8317   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8318   // instruction, and then figure out where to actually put the modifiers
8319 
8320   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8321   if (OpSelIdx != -1) {
8322     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8323   }
8324 
8325   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8326   if (OpSelHiIdx != -1) {
8327     int DefaultVal = IsPacked ? -1 : 0;
8328     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8329                           DefaultVal);
8330   }
8331 
8332   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8333   if (NegLoIdx != -1) {
8334     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8335     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8336   }
8337 
8338   const int Ops[] = { AMDGPU::OpName::src0,
8339                       AMDGPU::OpName::src1,
8340                       AMDGPU::OpName::src2 };
8341   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8342                          AMDGPU::OpName::src1_modifiers,
8343                          AMDGPU::OpName::src2_modifiers };
8344 
8345   unsigned OpSel = 0;
8346   unsigned OpSelHi = 0;
8347   unsigned NegLo = 0;
8348   unsigned NegHi = 0;
8349 
8350   if (OpSelIdx != -1)
8351     OpSel = Inst.getOperand(OpSelIdx).getImm();
8352 
8353   if (OpSelHiIdx != -1)
8354     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8355 
8356   if (NegLoIdx != -1) {
8357     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8358     NegLo = Inst.getOperand(NegLoIdx).getImm();
8359     NegHi = Inst.getOperand(NegHiIdx).getImm();
8360   }
8361 
8362   for (int J = 0; J < 3; ++J) {
8363     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8364     if (OpIdx == -1)
8365       break;
8366 
8367     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8368 
8369     if (ModIdx == -1)
8370       continue;
8371 
8372     uint32_t ModVal = 0;
8373 
8374     if ((OpSel & (1 << J)) != 0)
8375       ModVal |= SISrcMods::OP_SEL_0;
8376 
8377     if ((OpSelHi & (1 << J)) != 0)
8378       ModVal |= SISrcMods::OP_SEL_1;
8379 
8380     if ((NegLo & (1 << J)) != 0)
8381       ModVal |= SISrcMods::NEG;
8382 
8383     if ((NegHi & (1 << J)) != 0)
8384       ModVal |= SISrcMods::NEG_HI;
8385 
8386     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8387   }
8388 }
8389 
8390 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8391   OptionalImmIndexMap OptIdx;
8392   cvtVOP3(Inst, Operands, OptIdx);
8393   cvtVOP3P(Inst, Operands, OptIdx);
8394 }
8395 
8396 //===----------------------------------------------------------------------===//
8397 // VOPD
8398 //===----------------------------------------------------------------------===//
8399 
8400 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8401   if (!hasVOPD(getSTI()))
8402     return ParseStatus::NoMatch;
8403 
8404   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8405     SMLoc S = getLoc();
8406     lex();
8407     lex();
8408     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8409     SMLoc OpYLoc = getLoc();
8410     StringRef OpYName;
8411     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8412       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8413       return ParseStatus::Success;
8414     }
8415     return Error(OpYLoc, "expected a VOPDY instruction after ::");
8416   }
8417   return ParseStatus::NoMatch;
8418 }
8419 
8420 // Create VOPD MCInst operands using parsed assembler operands.
8421 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8422   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8423     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8424     if (Op.isReg()) {
8425       Op.addRegOperands(Inst, 1);
8426       return;
8427     }
8428     if (Op.isImm()) {
8429       Op.addImmOperands(Inst, 1);
8430       return;
8431     }
8432     llvm_unreachable("Unhandled operand type in cvtVOPD");
8433   };
8434 
8435   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8436 
8437   // MCInst operands are ordered as follows:
8438   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8439 
8440   for (auto CompIdx : VOPD::COMPONENTS) {
8441     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8442   }
8443 
8444   for (auto CompIdx : VOPD::COMPONENTS) {
8445     const auto &CInfo = InstInfo[CompIdx];
8446     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8447     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8448       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8449     if (CInfo.hasSrc2Acc())
8450       addOp(CInfo.getIndexOfDstInParsedOperands());
8451   }
8452 }
8453 
8454 //===----------------------------------------------------------------------===//
8455 // dpp
8456 //===----------------------------------------------------------------------===//
8457 
8458 bool AMDGPUOperand::isDPP8() const {
8459   return isImmTy(ImmTyDPP8);
8460 }
8461 
8462 bool AMDGPUOperand::isDPPCtrl() const {
8463   using namespace AMDGPU::DPP;
8464 
8465   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8466   if (result) {
8467     int64_t Imm = getImm();
8468     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8469            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8470            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8471            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8472            (Imm == DppCtrl::WAVE_SHL1) ||
8473            (Imm == DppCtrl::WAVE_ROL1) ||
8474            (Imm == DppCtrl::WAVE_SHR1) ||
8475            (Imm == DppCtrl::WAVE_ROR1) ||
8476            (Imm == DppCtrl::ROW_MIRROR) ||
8477            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8478            (Imm == DppCtrl::BCAST15) ||
8479            (Imm == DppCtrl::BCAST31) ||
8480            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8481            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8482   }
8483   return false;
8484 }
8485 
8486 //===----------------------------------------------------------------------===//
8487 // mAI
8488 //===----------------------------------------------------------------------===//
8489 
8490 bool AMDGPUOperand::isBLGP() const {
8491   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8492 }
8493 
8494 bool AMDGPUOperand::isCBSZ() const {
8495   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8496 }
8497 
8498 bool AMDGPUOperand::isABID() const {
8499   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8500 }
8501 
8502 bool AMDGPUOperand::isS16Imm() const {
8503   return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8504 }
8505 
8506 bool AMDGPUOperand::isU16Imm() const {
8507   return isImmLiteral() && isUInt<16>(getImm());
8508 }
8509 
8510 //===----------------------------------------------------------------------===//
8511 // dim
8512 //===----------------------------------------------------------------------===//
8513 
8514 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8515   // We want to allow "dim:1D" etc.,
8516   // but the initial 1 is tokenized as an integer.
8517   std::string Token;
8518   if (isToken(AsmToken::Integer)) {
8519     SMLoc Loc = getToken().getEndLoc();
8520     Token = std::string(getTokenStr());
8521     lex();
8522     if (getLoc() != Loc)
8523       return false;
8524   }
8525 
8526   StringRef Suffix;
8527   if (!parseId(Suffix))
8528     return false;
8529   Token += Suffix;
8530 
8531   StringRef DimId = Token;
8532   if (DimId.starts_with("SQ_RSRC_IMG_"))
8533     DimId = DimId.drop_front(12);
8534 
8535   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8536   if (!DimInfo)
8537     return false;
8538 
8539   Encoding = DimInfo->Encoding;
8540   return true;
8541 }
8542 
8543 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8544   if (!isGFX10Plus())
8545     return ParseStatus::NoMatch;
8546 
8547   SMLoc S = getLoc();
8548 
8549   if (!trySkipId("dim", AsmToken::Colon))
8550     return ParseStatus::NoMatch;
8551 
8552   unsigned Encoding;
8553   SMLoc Loc = getLoc();
8554   if (!parseDimId(Encoding))
8555     return Error(Loc, "invalid dim value");
8556 
8557   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8558                                               AMDGPUOperand::ImmTyDim));
8559   return ParseStatus::Success;
8560 }
8561 
8562 //===----------------------------------------------------------------------===//
8563 // dpp
8564 //===----------------------------------------------------------------------===//
8565 
8566 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8567   SMLoc S = getLoc();
8568 
8569   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8570     return ParseStatus::NoMatch;
8571 
8572   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8573 
8574   int64_t Sels[8];
8575 
8576   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8577     return ParseStatus::Failure;
8578 
8579   for (size_t i = 0; i < 8; ++i) {
8580     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8581       return ParseStatus::Failure;
8582 
8583     SMLoc Loc = getLoc();
8584     if (getParser().parseAbsoluteExpression(Sels[i]))
8585       return ParseStatus::Failure;
8586     if (0 > Sels[i] || 7 < Sels[i])
8587       return Error(Loc, "expected a 3-bit value");
8588   }
8589 
8590   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8591     return ParseStatus::Failure;
8592 
8593   unsigned DPP8 = 0;
8594   for (size_t i = 0; i < 8; ++i)
8595     DPP8 |= (Sels[i] << (i * 3));
8596 
8597   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8598   return ParseStatus::Success;
8599 }
8600 
8601 bool
8602 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8603                                     const OperandVector &Operands) {
8604   if (Ctrl == "row_newbcast")
8605     return isGFX90A();
8606 
8607   if (Ctrl == "row_share" ||
8608       Ctrl == "row_xmask")
8609     return isGFX10Plus();
8610 
8611   if (Ctrl == "wave_shl" ||
8612       Ctrl == "wave_shr" ||
8613       Ctrl == "wave_rol" ||
8614       Ctrl == "wave_ror" ||
8615       Ctrl == "row_bcast")
8616     return isVI() || isGFX9();
8617 
8618   return Ctrl == "row_mirror" ||
8619          Ctrl == "row_half_mirror" ||
8620          Ctrl == "quad_perm" ||
8621          Ctrl == "row_shl" ||
8622          Ctrl == "row_shr" ||
8623          Ctrl == "row_ror";
8624 }
8625 
8626 int64_t
8627 AMDGPUAsmParser::parseDPPCtrlPerm() {
8628   // quad_perm:[%d,%d,%d,%d]
8629 
8630   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8631     return -1;
8632 
8633   int64_t Val = 0;
8634   for (int i = 0; i < 4; ++i) {
8635     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8636       return -1;
8637 
8638     int64_t Temp;
8639     SMLoc Loc = getLoc();
8640     if (getParser().parseAbsoluteExpression(Temp))
8641       return -1;
8642     if (Temp < 0 || Temp > 3) {
8643       Error(Loc, "expected a 2-bit value");
8644       return -1;
8645     }
8646 
8647     Val += (Temp << i * 2);
8648   }
8649 
8650   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8651     return -1;
8652 
8653   return Val;
8654 }
8655 
8656 int64_t
8657 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8658   using namespace AMDGPU::DPP;
8659 
8660   // sel:%d
8661 
8662   int64_t Val;
8663   SMLoc Loc = getLoc();
8664 
8665   if (getParser().parseAbsoluteExpression(Val))
8666     return -1;
8667 
8668   struct DppCtrlCheck {
8669     int64_t Ctrl;
8670     int Lo;
8671     int Hi;
8672   };
8673 
8674   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8675     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8676     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8677     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8678     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8679     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8680     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8681     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8682     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8683     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8684     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8685     .Default({-1, 0, 0});
8686 
8687   bool Valid;
8688   if (Check.Ctrl == -1) {
8689     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8690     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8691   } else {
8692     Valid = Check.Lo <= Val && Val <= Check.Hi;
8693     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8694   }
8695 
8696   if (!Valid) {
8697     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8698     return -1;
8699   }
8700 
8701   return Val;
8702 }
8703 
8704 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8705   using namespace AMDGPU::DPP;
8706 
8707   if (!isToken(AsmToken::Identifier) ||
8708       !isSupportedDPPCtrl(getTokenStr(), Operands))
8709     return ParseStatus::NoMatch;
8710 
8711   SMLoc S = getLoc();
8712   int64_t Val = -1;
8713   StringRef Ctrl;
8714 
8715   parseId(Ctrl);
8716 
8717   if (Ctrl == "row_mirror") {
8718     Val = DppCtrl::ROW_MIRROR;
8719   } else if (Ctrl == "row_half_mirror") {
8720     Val = DppCtrl::ROW_HALF_MIRROR;
8721   } else {
8722     if (skipToken(AsmToken::Colon, "expected a colon")) {
8723       if (Ctrl == "quad_perm") {
8724         Val = parseDPPCtrlPerm();
8725       } else {
8726         Val = parseDPPCtrlSel(Ctrl);
8727       }
8728     }
8729   }
8730 
8731   if (Val == -1)
8732     return ParseStatus::Failure;
8733 
8734   Operands.push_back(
8735     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8736   return ParseStatus::Success;
8737 }
8738 
8739 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8740                                  bool IsDPP8) {
8741   OptionalImmIndexMap OptionalIdx;
8742   unsigned Opc = Inst.getOpcode();
8743   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8744 
8745   // MAC instructions are special because they have 'old'
8746   // operand which is not tied to dst (but assumed to be).
8747   // They also have dummy unused src2_modifiers.
8748   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8749   int Src2ModIdx =
8750       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8751   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8752                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8753 
8754   unsigned I = 1;
8755   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8756     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8757   }
8758 
8759   int Fi = 0;
8760   for (unsigned E = Operands.size(); I != E; ++I) {
8761 
8762     if (IsMAC) {
8763       int NumOperands = Inst.getNumOperands();
8764       if (OldIdx == NumOperands) {
8765         // Handle old operand
8766         constexpr int DST_IDX = 0;
8767         Inst.addOperand(Inst.getOperand(DST_IDX));
8768       } else if (Src2ModIdx == NumOperands) {
8769         // Add unused dummy src2_modifiers
8770         Inst.addOperand(MCOperand::createImm(0));
8771       }
8772     }
8773 
8774     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8775                                             MCOI::TIED_TO);
8776     if (TiedTo != -1) {
8777       assert((unsigned)TiedTo < Inst.getNumOperands());
8778       // handle tied old or src2 for MAC instructions
8779       Inst.addOperand(Inst.getOperand(TiedTo));
8780     }
8781     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8782     // Add the register arguments
8783     if (IsDPP8 && Op.isDppFI()) {
8784       Fi = Op.getImm();
8785     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8786       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8787     } else if (Op.isReg()) {
8788       Op.addRegOperands(Inst, 1);
8789     } else if (Op.isImm() &&
8790                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8791       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8792       Op.addImmOperands(Inst, 1);
8793     } else if (Op.isImm()) {
8794       OptionalIdx[Op.getImmTy()] = I;
8795     } else {
8796       llvm_unreachable("unhandled operand type");
8797     }
8798   }
8799   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8800     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8801 
8802   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8803     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8804 
8805   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8806     cvtVOP3P(Inst, Operands, OptionalIdx);
8807   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8808     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8809   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8810     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8811   }
8812 
8813   if (IsDPP8) {
8814     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8815     using namespace llvm::AMDGPU::DPP;
8816     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8817   } else {
8818     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8819     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8820     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8821     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8822 
8823     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8824       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8825                             AMDGPUOperand::ImmTyDppFI);
8826   }
8827 }
8828 
8829 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8830   OptionalImmIndexMap OptionalIdx;
8831 
8832   unsigned I = 1;
8833   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8834   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8835     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8836   }
8837 
8838   int Fi = 0;
8839   for (unsigned E = Operands.size(); I != E; ++I) {
8840     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8841                                             MCOI::TIED_TO);
8842     if (TiedTo != -1) {
8843       assert((unsigned)TiedTo < Inst.getNumOperands());
8844       // handle tied old or src2 for MAC instructions
8845       Inst.addOperand(Inst.getOperand(TiedTo));
8846     }
8847     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8848     // Add the register arguments
8849     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8850       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8851       // Skip it.
8852       continue;
8853     }
8854 
8855     if (IsDPP8) {
8856       if (Op.isDPP8()) {
8857         Op.addImmOperands(Inst, 1);
8858       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8859         Op.addRegWithFPInputModsOperands(Inst, 2);
8860       } else if (Op.isDppFI()) {
8861         Fi = Op.getImm();
8862       } else if (Op.isReg()) {
8863         Op.addRegOperands(Inst, 1);
8864       } else {
8865         llvm_unreachable("Invalid operand type");
8866       }
8867     } else {
8868       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8869         Op.addRegWithFPInputModsOperands(Inst, 2);
8870       } else if (Op.isReg()) {
8871         Op.addRegOperands(Inst, 1);
8872       } else if (Op.isDPPCtrl()) {
8873         Op.addImmOperands(Inst, 1);
8874       } else if (Op.isImm()) {
8875         // Handle optional arguments
8876         OptionalIdx[Op.getImmTy()] = I;
8877       } else {
8878         llvm_unreachable("Invalid operand type");
8879       }
8880     }
8881   }
8882 
8883   if (IsDPP8) {
8884     using namespace llvm::AMDGPU::DPP;
8885     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8886   } else {
8887     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8888     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8889     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8890     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
8891       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8892                             AMDGPUOperand::ImmTyDppFI);
8893     }
8894   }
8895 }
8896 
8897 //===----------------------------------------------------------------------===//
8898 // sdwa
8899 //===----------------------------------------------------------------------===//
8900 
8901 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
8902                                           StringRef Prefix,
8903                                           AMDGPUOperand::ImmTy Type) {
8904   using namespace llvm::AMDGPU::SDWA;
8905 
8906   SMLoc S = getLoc();
8907   StringRef Value;
8908 
8909   SMLoc StringLoc;
8910   ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
8911   if (!Res.isSuccess())
8912     return Res;
8913 
8914   int64_t Int;
8915   Int = StringSwitch<int64_t>(Value)
8916         .Case("BYTE_0", SdwaSel::BYTE_0)
8917         .Case("BYTE_1", SdwaSel::BYTE_1)
8918         .Case("BYTE_2", SdwaSel::BYTE_2)
8919         .Case("BYTE_3", SdwaSel::BYTE_3)
8920         .Case("WORD_0", SdwaSel::WORD_0)
8921         .Case("WORD_1", SdwaSel::WORD_1)
8922         .Case("DWORD", SdwaSel::DWORD)
8923         .Default(0xffffffff);
8924 
8925   if (Int == 0xffffffff)
8926     return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8927 
8928   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8929   return ParseStatus::Success;
8930 }
8931 
8932 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8933   using namespace llvm::AMDGPU::SDWA;
8934 
8935   SMLoc S = getLoc();
8936   StringRef Value;
8937 
8938   SMLoc StringLoc;
8939   ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8940   if (!Res.isSuccess())
8941     return Res;
8942 
8943   int64_t Int;
8944   Int = StringSwitch<int64_t>(Value)
8945         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8946         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8947         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8948         .Default(0xffffffff);
8949 
8950   if (Int == 0xffffffff)
8951     return Error(StringLoc, "invalid dst_unused value");
8952 
8953   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
8954   return ParseStatus::Success;
8955 }
8956 
8957 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8958   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8959 }
8960 
8961 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8962   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8963 }
8964 
8965 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8966   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8967 }
8968 
8969 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8970   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8971 }
8972 
8973 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8974   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8975 }
8976 
8977 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8978                               uint64_t BasicInstType,
8979                               bool SkipDstVcc,
8980                               bool SkipSrcVcc) {
8981   using namespace llvm::AMDGPU::SDWA;
8982 
8983   OptionalImmIndexMap OptionalIdx;
8984   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8985   bool SkippedVcc = false;
8986 
8987   unsigned I = 1;
8988   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8989   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8990     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8991   }
8992 
8993   for (unsigned E = Operands.size(); I != E; ++I) {
8994     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8995     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8996         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8997       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8998       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8999       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9000       // Skip VCC only if we didn't skip it on previous iteration.
9001       // Note that src0 and src1 occupy 2 slots each because of modifiers.
9002       if (BasicInstType == SIInstrFlags::VOP2 &&
9003           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9004            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9005         SkippedVcc = true;
9006         continue;
9007       } else if (BasicInstType == SIInstrFlags::VOPC &&
9008                  Inst.getNumOperands() == 0) {
9009         SkippedVcc = true;
9010         continue;
9011       }
9012     }
9013     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9014       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9015     } else if (Op.isImm()) {
9016       // Handle optional arguments
9017       OptionalIdx[Op.getImmTy()] = I;
9018     } else {
9019       llvm_unreachable("Invalid operand type");
9020     }
9021     SkippedVcc = false;
9022   }
9023 
9024   const unsigned Opc = Inst.getOpcode();
9025   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9026       Opc != AMDGPU::V_NOP_sdwa_vi) {
9027     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9028     switch (BasicInstType) {
9029     case SIInstrFlags::VOP1:
9030       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9031         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9032                               AMDGPUOperand::ImmTyClampSI, 0);
9033 
9034       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9035         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9036                               AMDGPUOperand::ImmTyOModSI, 0);
9037 
9038       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9039         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9040                               AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9041 
9042       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9043         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9044                               AMDGPUOperand::ImmTySDWADstUnused,
9045                               DstUnused::UNUSED_PRESERVE);
9046 
9047       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9048       break;
9049 
9050     case SIInstrFlags::VOP2:
9051       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9052 
9053       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9054         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9055 
9056       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9057       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9058       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9059       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9060       break;
9061 
9062     case SIInstrFlags::VOPC:
9063       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9064         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9065       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9066       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9067       break;
9068 
9069     default:
9070       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9071     }
9072   }
9073 
9074   // special case v_mac_{f16, f32}:
9075   // it has src2 register operand that is tied to dst operand
9076   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9077       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9078     auto it = Inst.begin();
9079     std::advance(
9080       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9081     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9082   }
9083 }
9084 
9085 /// Force static initialization.
9086 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9087   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9088   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9089 }
9090 
9091 #define GET_REGISTER_MATCHER
9092 #define GET_MATCHER_IMPLEMENTATION
9093 #define GET_MNEMONIC_SPELL_CHECKER
9094 #define GET_MNEMONIC_CHECKER
9095 #include "AMDGPUGenAsmMatcher.inc"
9096 
9097 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9098                                                 unsigned MCK) {
9099   switch (MCK) {
9100   case MCK_addr64:
9101     return parseTokenOp("addr64", Operands);
9102   case MCK_done:
9103     return parseTokenOp("done", Operands);
9104   case MCK_idxen:
9105     return parseTokenOp("idxen", Operands);
9106   case MCK_lds:
9107     return parseTokenOp("lds", Operands);
9108   case MCK_offen:
9109     return parseTokenOp("offen", Operands);
9110   case MCK_off:
9111     return parseTokenOp("off", Operands);
9112   case MCK_row_95_en:
9113     return parseTokenOp("row_en", Operands);
9114   case MCK_gds:
9115     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9116   case MCK_tfe:
9117     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9118   }
9119   return tryCustomParseOperand(Operands, MCK);
9120 }
9121 
9122 // This function should be defined after auto-generated include so that we have
9123 // MatchClassKind enum defined
9124 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9125                                                      unsigned Kind) {
9126   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9127   // But MatchInstructionImpl() expects to meet token and fails to validate
9128   // operand. This method checks if we are given immediate operand but expect to
9129   // get corresponding token.
9130   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9131   switch (Kind) {
9132   case MCK_addr64:
9133     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9134   case MCK_gds:
9135     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9136   case MCK_lds:
9137     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9138   case MCK_idxen:
9139     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9140   case MCK_offen:
9141     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9142   case MCK_tfe:
9143     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9144   case MCK_SSrcB32:
9145     // When operands have expression values, they will return true for isToken,
9146     // because it is not possible to distinguish between a token and an
9147     // expression at parse time. MatchInstructionImpl() will always try to
9148     // match an operand as a token, when isToken returns true, and when the
9149     // name of the expression is not a valid token, the match will fail,
9150     // so we need to handle it here.
9151     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9152   case MCK_SSrcF32:
9153     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9154   case MCK_SOPPBrTarget:
9155     return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9156   case MCK_VReg32OrOff:
9157     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9158   case MCK_InterpSlot:
9159     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9160   case MCK_InterpAttr:
9161     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9162   case MCK_InterpAttrChan:
9163     return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9164   case MCK_SReg_64:
9165   case MCK_SReg_64_XEXEC:
9166     // Null is defined as a 32-bit register but
9167     // it should also be enabled with 64-bit operands.
9168     // The following code enables it for SReg_64 operands
9169     // used as source and destination. Remaining source
9170     // operands are handled in isInlinableImm.
9171     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9172   default:
9173     return Match_InvalidOperand;
9174   }
9175 }
9176 
9177 //===----------------------------------------------------------------------===//
9178 // endpgm
9179 //===----------------------------------------------------------------------===//
9180 
9181 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9182   SMLoc S = getLoc();
9183   int64_t Imm = 0;
9184 
9185   if (!parseExpr(Imm)) {
9186     // The operand is optional, if not present default to 0
9187     Imm = 0;
9188   }
9189 
9190   if (!isUInt<16>(Imm))
9191     return Error(S, "expected a 16-bit value");
9192 
9193   Operands.push_back(
9194       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9195   return ParseStatus::Success;
9196 }
9197 
9198 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9199 
9200 //===----------------------------------------------------------------------===//
9201 // LDSDIR
9202 //===----------------------------------------------------------------------===//
9203 
9204 bool AMDGPUOperand::isWaitVDST() const {
9205   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9206 }
9207 
9208 bool AMDGPUOperand::isWaitVAVDst() const {
9209   return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9210 }
9211 
9212 bool AMDGPUOperand::isWaitVMVSrc() const {
9213   return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9214 }
9215 
9216 //===----------------------------------------------------------------------===//
9217 // VINTERP
9218 //===----------------------------------------------------------------------===//
9219 
9220 bool AMDGPUOperand::isWaitEXP() const {
9221   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9222 }
9223 
9224 //===----------------------------------------------------------------------===//
9225 // Split Barrier
9226 //===----------------------------------------------------------------------===//
9227 
9228 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
9229