xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision 46c59ea9b61755455ff6bf9f3e7b834e1af634ea)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/CodeGen/MachineValueType.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCExpr.h"
28 #include "llvm/MC/MCInst.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCParser/MCAsmLexer.h"
31 #include "llvm/MC/MCParser/MCAsmParser.h"
32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDGPUMetadata.h"
37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/TargetParser/TargetParser.h"
41 #include <optional>
42 
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 using namespace llvm::amdhsa;
46 
47 namespace {
48 
49 class AMDGPUAsmParser;
50 
51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52 
53 //===----------------------------------------------------------------------===//
54 // Operand
55 //===----------------------------------------------------------------------===//
56 
57 class AMDGPUOperand : public MCParsedAsmOperand {
58   enum KindTy {
59     Token,
60     Immediate,
61     Register,
62     Expression
63   } Kind;
64 
65   SMLoc StartLoc, EndLoc;
66   const AMDGPUAsmParser *AsmParser;
67 
68 public:
69   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70       : Kind(Kind_), AsmParser(AsmParser_) {}
71 
72   using Ptr = std::unique_ptr<AMDGPUOperand>;
73 
74   struct Modifiers {
75     bool Abs = false;
76     bool Neg = false;
77     bool Sext = false;
78     bool Lit = false;
79 
80     bool hasFPModifiers() const { return Abs || Neg; }
81     bool hasIntModifiers() const { return Sext; }
82     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
83 
84     int64_t getFPModifiersOperand() const {
85       int64_t Operand = 0;
86       Operand |= Abs ? SISrcMods::ABS : 0u;
87       Operand |= Neg ? SISrcMods::NEG : 0u;
88       return Operand;
89     }
90 
91     int64_t getIntModifiersOperand() const {
92       int64_t Operand = 0;
93       Operand |= Sext ? SISrcMods::SEXT : 0u;
94       return Operand;
95     }
96 
97     int64_t getModifiersOperand() const {
98       assert(!(hasFPModifiers() && hasIntModifiers())
99            && "fp and int modifiers should not be used simultaneously");
100       if (hasFPModifiers()) {
101         return getFPModifiersOperand();
102       } else if (hasIntModifiers()) {
103         return getIntModifiersOperand();
104       } else {
105         return 0;
106       }
107     }
108 
109     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110   };
111 
112   enum ImmTy {
113     ImmTyNone,
114     ImmTyGDS,
115     ImmTyLDS,
116     ImmTyOffen,
117     ImmTyIdxen,
118     ImmTyAddr64,
119     ImmTyOffset,
120     ImmTyInstOffset,
121     ImmTyOffset0,
122     ImmTyOffset1,
123     ImmTySMEMOffsetMod,
124     ImmTyCPol,
125     ImmTyTFE,
126     ImmTyD16,
127     ImmTyClampSI,
128     ImmTyOModSI,
129     ImmTySDWADstSel,
130     ImmTySDWASrc0Sel,
131     ImmTySDWASrc1Sel,
132     ImmTySDWADstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyInterpAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTyDPP8,
155     ImmTyDppCtrl,
156     ImmTyDppRowMask,
157     ImmTyDppBankMask,
158     ImmTyDppBoundCtrl,
159     ImmTyDppFI,
160     ImmTySwizzle,
161     ImmTyGprIdxMode,
162     ImmTyHigh,
163     ImmTyBLGP,
164     ImmTyCBSZ,
165     ImmTyABID,
166     ImmTyEndpgm,
167     ImmTyWaitVDST,
168     ImmTyWaitEXP,
169     ImmTyWaitVAVDst,
170     ImmTyWaitVMVSrc,
171   };
172 
173   // Immediate operand kind.
174   // It helps to identify the location of an offending operand after an error.
175   // Note that regular literals and mandatory literals (KImm) must be handled
176   // differently. When looking for an offending operand, we should usually
177   // ignore mandatory literals because they are part of the instruction and
178   // cannot be changed. Report location of mandatory operands only for VOPD,
179   // when both OpX and OpY have a KImm and there are no other literals.
180   enum ImmKindTy {
181     ImmKindTyNone,
182     ImmKindTyLiteral,
183     ImmKindTyMandatoryLiteral,
184     ImmKindTyConst,
185   };
186 
187 private:
188   struct TokOp {
189     const char *Data;
190     unsigned Length;
191   };
192 
193   struct ImmOp {
194     int64_t Val;
195     ImmTy Type;
196     bool IsFPImm;
197     mutable ImmKindTy Kind;
198     Modifiers Mods;
199   };
200 
201   struct RegOp {
202     unsigned RegNo;
203     Modifiers Mods;
204   };
205 
206   union {
207     TokOp Tok;
208     ImmOp Imm;
209     RegOp Reg;
210     const MCExpr *Expr;
211   };
212 
213 public:
214   bool isToken() const override { return Kind == Token; }
215 
216   bool isSymbolRefExpr() const {
217     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
218   }
219 
220   bool isImm() const override {
221     return Kind == Immediate;
222   }
223 
224   void setImmKindNone() const {
225     assert(isImm());
226     Imm.Kind = ImmKindTyNone;
227   }
228 
229   void setImmKindLiteral() const {
230     assert(isImm());
231     Imm.Kind = ImmKindTyLiteral;
232   }
233 
234   void setImmKindMandatoryLiteral() const {
235     assert(isImm());
236     Imm.Kind = ImmKindTyMandatoryLiteral;
237   }
238 
239   void setImmKindConst() const {
240     assert(isImm());
241     Imm.Kind = ImmKindTyConst;
242   }
243 
244   bool IsImmKindLiteral() const {
245     return isImm() && Imm.Kind == ImmKindTyLiteral;
246   }
247 
248   bool IsImmKindMandatoryLiteral() const {
249     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
250   }
251 
252   bool isImmKindConst() const {
253     return isImm() && Imm.Kind == ImmKindTyConst;
254   }
255 
256   bool isInlinableImm(MVT type) const;
257   bool isLiteralImm(MVT type) const;
258 
259   bool isRegKind() const {
260     return Kind == Register;
261   }
262 
263   bool isReg() const override {
264     return isRegKind() && !hasModifiers();
265   }
266 
267   bool isRegOrInline(unsigned RCID, MVT type) const {
268     return isRegClass(RCID) || isInlinableImm(type);
269   }
270 
271   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
272     return isRegOrInline(RCID, type) || isLiteralImm(type);
273   }
274 
275   bool isRegOrImmWithInt16InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
277   }
278 
279   bool isRegOrImmWithIntT16InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
281   }
282 
283   bool isRegOrImmWithInt32InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
285   }
286 
287   bool isRegOrInlineImmWithInt16InputMods() const {
288     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
289   }
290 
291   bool isRegOrInlineImmWithInt32InputMods() const {
292     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
293   }
294 
295   bool isRegOrImmWithInt64InputMods() const {
296     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
297   }
298 
299   bool isRegOrImmWithFP16InputMods() const {
300     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
301   }
302 
303   bool isRegOrImmWithFPT16InputMods() const {
304     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
305   }
306 
307   bool isRegOrImmWithFP32InputMods() const {
308     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
309   }
310 
311   bool isRegOrImmWithFP64InputMods() const {
312     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
313   }
314 
315   bool isRegOrInlineImmWithFP16InputMods() const {
316     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
317   }
318 
319   bool isRegOrInlineImmWithFP32InputMods() const {
320     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
321   }
322 
323 
324   bool isVReg() const {
325     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
326            isRegClass(AMDGPU::VReg_64RegClassID) ||
327            isRegClass(AMDGPU::VReg_96RegClassID) ||
328            isRegClass(AMDGPU::VReg_128RegClassID) ||
329            isRegClass(AMDGPU::VReg_160RegClassID) ||
330            isRegClass(AMDGPU::VReg_192RegClassID) ||
331            isRegClass(AMDGPU::VReg_256RegClassID) ||
332            isRegClass(AMDGPU::VReg_512RegClassID) ||
333            isRegClass(AMDGPU::VReg_1024RegClassID);
334   }
335 
336   bool isVReg32() const {
337     return isRegClass(AMDGPU::VGPR_32RegClassID);
338   }
339 
340   bool isVReg32OrOff() const {
341     return isOff() || isVReg32();
342   }
343 
344   bool isNull() const {
345     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
346   }
347 
348   bool isVRegWithInputMods() const;
349   bool isT16VRegWithInputMods() const;
350 
351   bool isSDWAOperand(MVT type) const;
352   bool isSDWAFP16Operand() const;
353   bool isSDWAFP32Operand() const;
354   bool isSDWAInt16Operand() const;
355   bool isSDWAInt32Operand() const;
356 
357   bool isImmTy(ImmTy ImmT) const {
358     return isImm() && Imm.Type == ImmT;
359   }
360 
361   template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
362 
363   bool isImmLiteral() const { return isImmTy(ImmTyNone); }
364 
365   bool isImmModifier() const {
366     return isImm() && Imm.Type != ImmTyNone;
367   }
368 
369   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
370   bool isDMask() const { return isImmTy(ImmTyDMask); }
371   bool isDim() const { return isImmTy(ImmTyDim); }
372   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
373   bool isOff() const { return isImmTy(ImmTyOff); }
374   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
375   bool isOffen() const { return isImmTy(ImmTyOffen); }
376   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
377   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
378   bool isOffset() const { return isImmTy(ImmTyOffset); }
379   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
380   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
381   bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
382   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
383   bool isGDS() const { return isImmTy(ImmTyGDS); }
384   bool isLDS() const { return isImmTy(ImmTyLDS); }
385   bool isCPol() const { return isImmTy(ImmTyCPol); }
386   bool isTFE() const { return isImmTy(ImmTyTFE); }
387   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
388   bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
389   bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
390   bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
391   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
392   bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
393   bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
394   bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
395   bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
396   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
397   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
398   bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
399   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
400   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
401   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
402   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
403 
404   bool isRegOrImm() const {
405     return isReg() || isImm();
406   }
407 
408   bool isRegClass(unsigned RCID) const;
409 
410   bool isInlineValue() const;
411 
412   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
413     return isRegOrInline(RCID, type) && !hasModifiers();
414   }
415 
416   bool isSCSrcB16() const {
417     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
418   }
419 
420   bool isSCSrcV2B16() const {
421     return isSCSrcB16();
422   }
423 
424   bool isSCSrcB32() const {
425     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
426   }
427 
428   bool isSCSrcB64() const {
429     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
430   }
431 
432   bool isBoolReg() const;
433 
434   bool isSCSrcF16() const {
435     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
436   }
437 
438   bool isSCSrcV2F16() const {
439     return isSCSrcF16();
440   }
441 
442   bool isSCSrcF32() const {
443     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
444   }
445 
446   bool isSCSrcF64() const {
447     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
448   }
449 
450   bool isSSrcB32() const {
451     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
452   }
453 
454   bool isSSrcB16() const {
455     return isSCSrcB16() || isLiteralImm(MVT::i16);
456   }
457 
458   bool isSSrcV2B16() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB16();
461   }
462 
463   bool isSSrcB64() const {
464     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
465     // See isVSrc64().
466     return isSCSrcB64() || isLiteralImm(MVT::i64);
467   }
468 
469   bool isSSrcF32() const {
470     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
471   }
472 
473   bool isSSrcF64() const {
474     return isSCSrcB64() || isLiteralImm(MVT::f64);
475   }
476 
477   bool isSSrcF16() const {
478     return isSCSrcB16() || isLiteralImm(MVT::f16);
479   }
480 
481   bool isSSrcV2F16() const {
482     llvm_unreachable("cannot happen");
483     return isSSrcF16();
484   }
485 
486   bool isSSrcV2FP32() const {
487     llvm_unreachable("cannot happen");
488     return isSSrcF32();
489   }
490 
491   bool isSCSrcV2FP32() const {
492     llvm_unreachable("cannot happen");
493     return isSCSrcF32();
494   }
495 
496   bool isSSrcV2INT32() const {
497     llvm_unreachable("cannot happen");
498     return isSSrcB32();
499   }
500 
501   bool isSCSrcV2INT32() const {
502     llvm_unreachable("cannot happen");
503     return isSCSrcB32();
504   }
505 
506   bool isSSrcOrLdsB32() const {
507     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
508            isLiteralImm(MVT::i32) || isExpr();
509   }
510 
511   bool isVCSrcB32() const {
512     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
513   }
514 
515   bool isVCSrcB64() const {
516     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
517   }
518 
519   bool isVCSrcTB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
521   }
522 
523   bool isVCSrcTB16_Lo128() const {
524     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
525   }
526 
527   bool isVCSrcFake16B16_Lo128() const {
528     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
529   }
530 
531   bool isVCSrcB16() const {
532     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
533   }
534 
535   bool isVCSrcV2B16() const {
536     return isVCSrcB16();
537   }
538 
539   bool isVCSrcF32() const {
540     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
541   }
542 
543   bool isVCSrcF64() const {
544     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
545   }
546 
547   bool isVCSrcTF16() const {
548     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
549   }
550 
551   bool isVCSrcTF16_Lo128() const {
552     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
553   }
554 
555   bool isVCSrcFake16F16_Lo128() const {
556     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
557   }
558 
559   bool isVCSrcF16() const {
560     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
561   }
562 
563   bool isVCSrcV2F16() const {
564     return isVCSrcF16();
565   }
566 
567   bool isVSrcB32() const {
568     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
569   }
570 
571   bool isVSrcB64() const {
572     return isVCSrcF64() || isLiteralImm(MVT::i64);
573   }
574 
575   bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
576 
577   bool isVSrcTB16_Lo128() const {
578     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
579   }
580 
581   bool isVSrcFake16B16_Lo128() const {
582     return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
583   }
584 
585   bool isVSrcB16() const {
586     return isVCSrcB16() || isLiteralImm(MVT::i16);
587   }
588 
589   bool isVSrcV2B16() const {
590     return isVSrcB16() || isLiteralImm(MVT::v2i16);
591   }
592 
593   bool isVCSrcV2FP32() const {
594     return isVCSrcF64();
595   }
596 
597   bool isVSrcV2FP32() const {
598     return isVSrcF64() || isLiteralImm(MVT::v2f32);
599   }
600 
601   bool isVCSrcV2INT32() const {
602     return isVCSrcB64();
603   }
604 
605   bool isVSrcV2INT32() const {
606     return isVSrcB64() || isLiteralImm(MVT::v2i32);
607   }
608 
609   bool isVSrcF32() const {
610     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
611   }
612 
613   bool isVSrcF64() const {
614     return isVCSrcF64() || isLiteralImm(MVT::f64);
615   }
616 
617   bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
618 
619   bool isVSrcTF16_Lo128() const {
620     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
621   }
622 
623   bool isVSrcFake16F16_Lo128() const {
624     return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
625   }
626 
627   bool isVSrcF16() const {
628     return isVCSrcF16() || isLiteralImm(MVT::f16);
629   }
630 
631   bool isVSrcV2F16() const {
632     return isVSrcF16() || isLiteralImm(MVT::v2f16);
633   }
634 
635   bool isVISrcB32() const {
636     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
637   }
638 
639   bool isVISrcB16() const {
640     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
641   }
642 
643   bool isVISrcV2B16() const {
644     return isVISrcB16();
645   }
646 
647   bool isVISrcF32() const {
648     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
649   }
650 
651   bool isVISrcF16() const {
652     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
653   }
654 
655   bool isVISrcV2F16() const {
656     return isVISrcF16() || isVISrcB32();
657   }
658 
659   bool isVISrc_64B64() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
661   }
662 
663   bool isVISrc_64F64() const {
664     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
665   }
666 
667   bool isVISrc_64V2FP32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_64V2INT32() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
673   }
674 
675   bool isVISrc_256B64() const {
676     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
677   }
678 
679   bool isVISrc_256F64() const {
680     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
681   }
682 
683   bool isVISrc_128B16() const {
684     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
685   }
686 
687   bool isVISrc_128V2B16() const {
688     return isVISrc_128B16();
689   }
690 
691   bool isVISrc_128B32() const {
692     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
693   }
694 
695   bool isVISrc_128F32() const {
696     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
697   }
698 
699   bool isVISrc_256V2FP32() const {
700     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
701   }
702 
703   bool isVISrc_256V2INT32() const {
704     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
705   }
706 
707   bool isVISrc_512B32() const {
708     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
709   }
710 
711   bool isVISrc_512B16() const {
712     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
713   }
714 
715   bool isVISrc_512V2B16() const {
716     return isVISrc_512B16();
717   }
718 
719   bool isVISrc_512F32() const {
720     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
721   }
722 
723   bool isVISrc_512F16() const {
724     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
725   }
726 
727   bool isVISrc_512V2F16() const {
728     return isVISrc_512F16() || isVISrc_512B32();
729   }
730 
731   bool isVISrc_1024B32() const {
732     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
733   }
734 
735   bool isVISrc_1024B16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
737   }
738 
739   bool isVISrc_1024V2B16() const {
740     return isVISrc_1024B16();
741   }
742 
743   bool isVISrc_1024F32() const {
744     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
745   }
746 
747   bool isVISrc_1024F16() const {
748     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
749   }
750 
751   bool isVISrc_1024V2F16() const {
752     return isVISrc_1024F16() || isVISrc_1024B32();
753   }
754 
755   bool isAISrcB32() const {
756     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
757   }
758 
759   bool isAISrcB16() const {
760     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
761   }
762 
763   bool isAISrcV2B16() const {
764     return isAISrcB16();
765   }
766 
767   bool isAISrcF32() const {
768     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
769   }
770 
771   bool isAISrcF16() const {
772     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
773   }
774 
775   bool isAISrcV2F16() const {
776     return isAISrcF16() || isAISrcB32();
777   }
778 
779   bool isAISrc_64B64() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
781   }
782 
783   bool isAISrc_64F64() const {
784     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
785   }
786 
787   bool isAISrc_128B32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
789   }
790 
791   bool isAISrc_128B16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
793   }
794 
795   bool isAISrc_128V2B16() const {
796     return isAISrc_128B16();
797   }
798 
799   bool isAISrc_128F32() const {
800     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
801   }
802 
803   bool isAISrc_128F16() const {
804     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
805   }
806 
807   bool isAISrc_128V2F16() const {
808     return isAISrc_128F16() || isAISrc_128B32();
809   }
810 
811   bool isVISrc_128F16() const {
812     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
813   }
814 
815   bool isVISrc_128V2F16() const {
816     return isVISrc_128F16() || isVISrc_128B32();
817   }
818 
819   bool isAISrc_256B64() const {
820     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
821   }
822 
823   bool isAISrc_256F64() const {
824     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
825   }
826 
827   bool isAISrc_512B32() const {
828     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
829   }
830 
831   bool isAISrc_512B16() const {
832     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
833   }
834 
835   bool isAISrc_512V2B16() const {
836     return isAISrc_512B16();
837   }
838 
839   bool isAISrc_512F32() const {
840     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
841   }
842 
843   bool isAISrc_512F16() const {
844     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
845   }
846 
847   bool isAISrc_512V2F16() const {
848     return isAISrc_512F16() || isAISrc_512B32();
849   }
850 
851   bool isAISrc_1024B32() const {
852     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
853   }
854 
855   bool isAISrc_1024B16() const {
856     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
857   }
858 
859   bool isAISrc_1024V2B16() const {
860     return isAISrc_1024B16();
861   }
862 
863   bool isAISrc_1024F32() const {
864     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
865   }
866 
867   bool isAISrc_1024F16() const {
868     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
869   }
870 
871   bool isAISrc_1024V2F16() const {
872     return isAISrc_1024F16() || isAISrc_1024B32();
873   }
874 
875   bool isKImmFP32() const {
876     return isLiteralImm(MVT::f32);
877   }
878 
879   bool isKImmFP16() const {
880     return isLiteralImm(MVT::f16);
881   }
882 
883   bool isMem() const override {
884     return false;
885   }
886 
887   bool isExpr() const {
888     return Kind == Expression;
889   }
890 
891   bool isSOPPBrTarget() const { return isExpr() || isImm(); }
892 
893   bool isSWaitCnt() const;
894   bool isDepCtr() const;
895   bool isSDelayALU() const;
896   bool isHwreg() const;
897   bool isSendMsg() const;
898   bool isSplitBarrier() const;
899   bool isSwizzle() const;
900   bool isSMRDOffset8() const;
901   bool isSMEMOffset() const;
902   bool isSMRDLiteralOffset() const;
903   bool isDPP8() const;
904   bool isDPPCtrl() const;
905   bool isBLGP() const;
906   bool isCBSZ() const;
907   bool isABID() const;
908   bool isGPRIdxMode() const;
909   bool isS16Imm() const;
910   bool isU16Imm() const;
911   bool isEndpgm() const;
912   bool isWaitVDST() const;
913   bool isWaitEXP() const;
914   bool isWaitVAVDst() const;
915   bool isWaitVMVSrc() const;
916 
917   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
918     return std::bind(P, *this);
919   }
920 
921   StringRef getToken() const {
922     assert(isToken());
923     return StringRef(Tok.Data, Tok.Length);
924   }
925 
926   int64_t getImm() const {
927     assert(isImm());
928     return Imm.Val;
929   }
930 
931   void setImm(int64_t Val) {
932     assert(isImm());
933     Imm.Val = Val;
934   }
935 
936   ImmTy getImmTy() const {
937     assert(isImm());
938     return Imm.Type;
939   }
940 
941   unsigned getReg() const override {
942     assert(isRegKind());
943     return Reg.RegNo;
944   }
945 
946   SMLoc getStartLoc() const override {
947     return StartLoc;
948   }
949 
950   SMLoc getEndLoc() const override {
951     return EndLoc;
952   }
953 
954   SMRange getLocRange() const {
955     return SMRange(StartLoc, EndLoc);
956   }
957 
958   Modifiers getModifiers() const {
959     assert(isRegKind() || isImmTy(ImmTyNone));
960     return isRegKind() ? Reg.Mods : Imm.Mods;
961   }
962 
963   void setModifiers(Modifiers Mods) {
964     assert(isRegKind() || isImmTy(ImmTyNone));
965     if (isRegKind())
966       Reg.Mods = Mods;
967     else
968       Imm.Mods = Mods;
969   }
970 
971   bool hasModifiers() const {
972     return getModifiers().hasModifiers();
973   }
974 
975   bool hasFPModifiers() const {
976     return getModifiers().hasFPModifiers();
977   }
978 
979   bool hasIntModifiers() const {
980     return getModifiers().hasIntModifiers();
981   }
982 
983   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
984 
985   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
986 
987   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
988 
989   void addRegOperands(MCInst &Inst, unsigned N) const;
990 
991   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
992     if (isRegKind())
993       addRegOperands(Inst, N);
994     else
995       addImmOperands(Inst, N);
996   }
997 
998   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
999     Modifiers Mods = getModifiers();
1000     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1001     if (isRegKind()) {
1002       addRegOperands(Inst, N);
1003     } else {
1004       addImmOperands(Inst, N, false);
1005     }
1006   }
1007 
1008   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1009     assert(!hasIntModifiers());
1010     addRegOrImmWithInputModsOperands(Inst, N);
1011   }
1012 
1013   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1014     assert(!hasFPModifiers());
1015     addRegOrImmWithInputModsOperands(Inst, N);
1016   }
1017 
1018   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1019     Modifiers Mods = getModifiers();
1020     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1021     assert(isRegKind());
1022     addRegOperands(Inst, N);
1023   }
1024 
1025   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1026     assert(!hasIntModifiers());
1027     addRegWithInputModsOperands(Inst, N);
1028   }
1029 
1030   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1031     assert(!hasFPModifiers());
1032     addRegWithInputModsOperands(Inst, N);
1033   }
1034 
1035   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1036     // clang-format off
1037     switch (Type) {
1038     case ImmTyNone: OS << "None"; break;
1039     case ImmTyGDS: OS << "GDS"; break;
1040     case ImmTyLDS: OS << "LDS"; break;
1041     case ImmTyOffen: OS << "Offen"; break;
1042     case ImmTyIdxen: OS << "Idxen"; break;
1043     case ImmTyAddr64: OS << "Addr64"; break;
1044     case ImmTyOffset: OS << "Offset"; break;
1045     case ImmTyInstOffset: OS << "InstOffset"; break;
1046     case ImmTyOffset0: OS << "Offset0"; break;
1047     case ImmTyOffset1: OS << "Offset1"; break;
1048     case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1049     case ImmTyCPol: OS << "CPol"; break;
1050     case ImmTyTFE: OS << "TFE"; break;
1051     case ImmTyD16: OS << "D16"; break;
1052     case ImmTyFORMAT: OS << "FORMAT"; break;
1053     case ImmTyClampSI: OS << "ClampSI"; break;
1054     case ImmTyOModSI: OS << "OModSI"; break;
1055     case ImmTyDPP8: OS << "DPP8"; break;
1056     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1057     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1058     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1059     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1060     case ImmTyDppFI: OS << "DppFI"; break;
1061     case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1062     case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1063     case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1064     case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1065     case ImmTyDMask: OS << "DMask"; break;
1066     case ImmTyDim: OS << "Dim"; break;
1067     case ImmTyUNorm: OS << "UNorm"; break;
1068     case ImmTyDA: OS << "DA"; break;
1069     case ImmTyR128A16: OS << "R128A16"; break;
1070     case ImmTyA16: OS << "A16"; break;
1071     case ImmTyLWE: OS << "LWE"; break;
1072     case ImmTyOff: OS << "Off"; break;
1073     case ImmTyExpTgt: OS << "ExpTgt"; break;
1074     case ImmTyExpCompr: OS << "ExpCompr"; break;
1075     case ImmTyExpVM: OS << "ExpVM"; break;
1076     case ImmTyHwreg: OS << "Hwreg"; break;
1077     case ImmTySendMsg: OS << "SendMsg"; break;
1078     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1079     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1080     case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1081     case ImmTyOpSel: OS << "OpSel"; break;
1082     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1083     case ImmTyNegLo: OS << "NegLo"; break;
1084     case ImmTyNegHi: OS << "NegHi"; break;
1085     case ImmTySwizzle: OS << "Swizzle"; break;
1086     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1087     case ImmTyHigh: OS << "High"; break;
1088     case ImmTyBLGP: OS << "BLGP"; break;
1089     case ImmTyCBSZ: OS << "CBSZ"; break;
1090     case ImmTyABID: OS << "ABID"; break;
1091     case ImmTyEndpgm: OS << "Endpgm"; break;
1092     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1093     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1094     case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1095     case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1096     }
1097     // clang-format on
1098   }
1099 
1100   void print(raw_ostream &OS) const override {
1101     switch (Kind) {
1102     case Register:
1103       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1104       break;
1105     case Immediate:
1106       OS << '<' << getImm();
1107       if (getImmTy() != ImmTyNone) {
1108         OS << " type: "; printImmTy(OS, getImmTy());
1109       }
1110       OS << " mods: " << Imm.Mods << '>';
1111       break;
1112     case Token:
1113       OS << '\'' << getToken() << '\'';
1114       break;
1115     case Expression:
1116       OS << "<expr " << *Expr << '>';
1117       break;
1118     }
1119   }
1120 
1121   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1122                                       int64_t Val, SMLoc Loc,
1123                                       ImmTy Type = ImmTyNone,
1124                                       bool IsFPImm = false) {
1125     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1126     Op->Imm.Val = Val;
1127     Op->Imm.IsFPImm = IsFPImm;
1128     Op->Imm.Kind = ImmKindTyNone;
1129     Op->Imm.Type = Type;
1130     Op->Imm.Mods = Modifiers();
1131     Op->StartLoc = Loc;
1132     Op->EndLoc = Loc;
1133     return Op;
1134   }
1135 
1136   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1137                                         StringRef Str, SMLoc Loc,
1138                                         bool HasExplicitEncodingSize = true) {
1139     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1140     Res->Tok.Data = Str.data();
1141     Res->Tok.Length = Str.size();
1142     Res->StartLoc = Loc;
1143     Res->EndLoc = Loc;
1144     return Res;
1145   }
1146 
1147   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1148                                       unsigned RegNo, SMLoc S,
1149                                       SMLoc E) {
1150     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1151     Op->Reg.RegNo = RegNo;
1152     Op->Reg.Mods = Modifiers();
1153     Op->StartLoc = S;
1154     Op->EndLoc = E;
1155     return Op;
1156   }
1157 
1158   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1159                                        const class MCExpr *Expr, SMLoc S) {
1160     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1161     Op->Expr = Expr;
1162     Op->StartLoc = S;
1163     Op->EndLoc = S;
1164     return Op;
1165   }
1166 };
1167 
1168 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1169   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1170   return OS;
1171 }
1172 
1173 //===----------------------------------------------------------------------===//
1174 // AsmParser
1175 //===----------------------------------------------------------------------===//
1176 
1177 // Holds info related to the current kernel, e.g. count of SGPRs used.
1178 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1179 // .amdgpu_hsa_kernel or at EOF.
1180 class KernelScopeInfo {
1181   int SgprIndexUnusedMin = -1;
1182   int VgprIndexUnusedMin = -1;
1183   int AgprIndexUnusedMin = -1;
1184   MCContext *Ctx = nullptr;
1185   MCSubtargetInfo const *MSTI = nullptr;
1186 
1187   void usesSgprAt(int i) {
1188     if (i >= SgprIndexUnusedMin) {
1189       SgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1194       }
1195     }
1196   }
1197 
1198   void usesVgprAt(int i) {
1199     if (i >= VgprIndexUnusedMin) {
1200       VgprIndexUnusedMin = ++i;
1201       if (Ctx) {
1202         MCSymbol* const Sym =
1203           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1204         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1205                                          VgprIndexUnusedMin);
1206         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1207       }
1208     }
1209   }
1210 
1211   void usesAgprAt(int i) {
1212     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1213     if (!hasMAIInsts(*MSTI))
1214       return;
1215 
1216     if (i >= AgprIndexUnusedMin) {
1217       AgprIndexUnusedMin = ++i;
1218       if (Ctx) {
1219         MCSymbol* const Sym =
1220           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1221         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1222 
1223         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1224         MCSymbol* const vSym =
1225           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1226         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1227                                          VgprIndexUnusedMin);
1228         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1229       }
1230     }
1231   }
1232 
1233 public:
1234   KernelScopeInfo() = default;
1235 
1236   void initialize(MCContext &Context) {
1237     Ctx = &Context;
1238     MSTI = Ctx->getSubtargetInfo();
1239 
1240     usesSgprAt(SgprIndexUnusedMin = -1);
1241     usesVgprAt(VgprIndexUnusedMin = -1);
1242     if (hasMAIInsts(*MSTI)) {
1243       usesAgprAt(AgprIndexUnusedMin = -1);
1244     }
1245   }
1246 
1247   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1248                     unsigned RegWidth) {
1249     switch (RegKind) {
1250     case IS_SGPR:
1251       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1252       break;
1253     case IS_AGPR:
1254       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1255       break;
1256     case IS_VGPR:
1257       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1258       break;
1259     default:
1260       break;
1261     }
1262   }
1263 };
1264 
1265 class AMDGPUAsmParser : public MCTargetAsmParser {
1266   MCAsmParser &Parser;
1267 
1268   unsigned ForcedEncodingSize = 0;
1269   bool ForcedDPP = false;
1270   bool ForcedSDWA = false;
1271   KernelScopeInfo KernelScope;
1272 
1273   /// @name Auto-generated Match Functions
1274   /// {
1275 
1276 #define GET_ASSEMBLER_HEADER
1277 #include "AMDGPUGenAsmMatcher.inc"
1278 
1279   /// }
1280 
1281 private:
1282   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1283   bool OutOfRangeError(SMRange Range);
1284   /// Calculate VGPR/SGPR blocks required for given target, reserved
1285   /// registers, and user-specified NextFreeXGPR values.
1286   ///
1287   /// \param Features [in] Target features, used for bug corrections.
1288   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1289   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1290   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1291   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1292   /// descriptor field, if valid.
1293   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1294   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1295   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1296   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1297   /// \param VGPRBlocks [out] Result VGPR block count.
1298   /// \param SGPRBlocks [out] Result SGPR block count.
1299   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1300                           bool FlatScrUsed, bool XNACKUsed,
1301                           std::optional<bool> EnableWavefrontSize32,
1302                           unsigned NextFreeVGPR, SMRange VGPRRange,
1303                           unsigned NextFreeSGPR, SMRange SGPRRange,
1304                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1305   bool ParseDirectiveAMDGCNTarget();
1306   bool ParseDirectiveAMDHSAKernel();
1307   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1308   bool ParseDirectiveHSACodeObjectVersion();
1309   bool ParseDirectiveHSACodeObjectISA();
1310   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1311   bool ParseDirectiveAMDKernelCodeT();
1312   // TODO: Possibly make subtargetHasRegister const.
1313   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1314   bool ParseDirectiveAMDGPUHsaKernel();
1315 
1316   bool ParseDirectiveISAVersion();
1317   bool ParseDirectiveHSAMetadata();
1318   bool ParseDirectivePALMetadataBegin();
1319   bool ParseDirectivePALMetadata();
1320   bool ParseDirectiveAMDGPULDS();
1321 
1322   /// Common code to parse out a block of text (typically YAML) between start and
1323   /// end directives.
1324   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1325                            const char *AssemblerDirectiveEnd,
1326                            std::string &CollectString);
1327 
1328   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1329                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1330   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1331                            unsigned &RegNum, unsigned &RegWidth,
1332                            bool RestoreOnFailure = false);
1333   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1334                            unsigned &RegNum, unsigned &RegWidth,
1335                            SmallVectorImpl<AsmToken> &Tokens);
1336   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1337                            unsigned &RegWidth,
1338                            SmallVectorImpl<AsmToken> &Tokens);
1339   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1340                            unsigned &RegWidth,
1341                            SmallVectorImpl<AsmToken> &Tokens);
1342   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1343                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1344   bool ParseRegRange(unsigned& Num, unsigned& Width);
1345   unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1346                          unsigned RegWidth, SMLoc Loc);
1347 
1348   bool isRegister();
1349   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1350   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1351   void initializeGprCountSymbol(RegisterKind RegKind);
1352   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1353                              unsigned RegWidth);
1354   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1355                     bool IsAtomic);
1356 
1357 public:
1358   enum AMDGPUMatchResultTy {
1359     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1360   };
1361   enum OperandMode {
1362     OperandMode_Default,
1363     OperandMode_NSA,
1364   };
1365 
1366   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1367 
1368   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1369                const MCInstrInfo &MII,
1370                const MCTargetOptions &Options)
1371       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1372     MCAsmParserExtension::Initialize(Parser);
1373 
1374     if (getFeatureBits().none()) {
1375       // Set default features.
1376       copySTI().ToggleFeature("southern-islands");
1377     }
1378 
1379     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1380 
1381     {
1382       // TODO: make those pre-defined variables read-only.
1383       // Currently there is none suitable machinery in the core llvm-mc for this.
1384       // MCSymbol::isRedefinable is intended for another purpose, and
1385       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1386       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1387       MCContext &Ctx = getContext();
1388       if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1389         MCSymbol *Sym =
1390             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1391         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1392         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1393         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1394         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1395         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1396       } else {
1397         MCSymbol *Sym =
1398             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1399         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1400         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1401         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1402         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1403         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1404       }
1405       if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1406         initializeGprCountSymbol(IS_VGPR);
1407         initializeGprCountSymbol(IS_SGPR);
1408       } else
1409         KernelScope.initialize(getContext());
1410     }
1411   }
1412 
1413   bool hasMIMG_R128() const {
1414     return AMDGPU::hasMIMG_R128(getSTI());
1415   }
1416 
1417   bool hasPackedD16() const {
1418     return AMDGPU::hasPackedD16(getSTI());
1419   }
1420 
1421   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1422 
1423   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1424 
1425   bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1426 
1427   bool isSI() const {
1428     return AMDGPU::isSI(getSTI());
1429   }
1430 
1431   bool isCI() const {
1432     return AMDGPU::isCI(getSTI());
1433   }
1434 
1435   bool isVI() const {
1436     return AMDGPU::isVI(getSTI());
1437   }
1438 
1439   bool isGFX9() const {
1440     return AMDGPU::isGFX9(getSTI());
1441   }
1442 
1443   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1444   bool isGFX90A() const {
1445     return AMDGPU::isGFX90A(getSTI());
1446   }
1447 
1448   bool isGFX940() const {
1449     return AMDGPU::isGFX940(getSTI());
1450   }
1451 
1452   bool isGFX9Plus() const {
1453     return AMDGPU::isGFX9Plus(getSTI());
1454   }
1455 
1456   bool isGFX10() const {
1457     return AMDGPU::isGFX10(getSTI());
1458   }
1459 
1460   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1461 
1462   bool isGFX11() const {
1463     return AMDGPU::isGFX11(getSTI());
1464   }
1465 
1466   bool isGFX11Plus() const {
1467     return AMDGPU::isGFX11Plus(getSTI());
1468   }
1469 
1470   bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1471 
1472   bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1473 
1474   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1475 
1476   bool isGFX10_BEncoding() const {
1477     return AMDGPU::isGFX10_BEncoding(getSTI());
1478   }
1479 
1480   bool hasInv2PiInlineImm() const {
1481     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1482   }
1483 
1484   bool hasFlatOffsets() const {
1485     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1486   }
1487 
1488   bool hasArchitectedFlatScratch() const {
1489     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1490   }
1491 
1492   bool hasSGPR102_SGPR103() const {
1493     return !isVI() && !isGFX9();
1494   }
1495 
1496   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1497 
1498   bool hasIntClamp() const {
1499     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1500   }
1501 
1502   bool hasPartialNSAEncoding() const {
1503     return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1504   }
1505 
1506   unsigned getNSAMaxSize(bool HasSampler = false) const {
1507     return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1508   }
1509 
1510   unsigned getMaxNumUserSGPRs() const {
1511     return AMDGPU::getMaxNumUserSGPRs(getSTI());
1512   }
1513 
1514   bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1515 
1516   AMDGPUTargetStreamer &getTargetStreamer() {
1517     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1518     return static_cast<AMDGPUTargetStreamer &>(TS);
1519   }
1520 
1521   const MCRegisterInfo *getMRI() const {
1522     // We need this const_cast because for some reason getContext() is not const
1523     // in MCAsmParser.
1524     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1525   }
1526 
1527   const MCInstrInfo *getMII() const {
1528     return &MII;
1529   }
1530 
1531   const FeatureBitset &getFeatureBits() const {
1532     return getSTI().getFeatureBits();
1533   }
1534 
1535   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1536   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1537   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1538 
1539   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1540   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1541   bool isForcedDPP() const { return ForcedDPP; }
1542   bool isForcedSDWA() const { return ForcedSDWA; }
1543   ArrayRef<unsigned> getMatchedVariants() const;
1544   StringRef getMatchedVariantName() const;
1545 
1546   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1547   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1548                      bool RestoreOnFailure);
1549   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1550   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1551                                SMLoc &EndLoc) override;
1552   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1553   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1554                                       unsigned Kind) override;
1555   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1556                                OperandVector &Operands, MCStreamer &Out,
1557                                uint64_t &ErrorInfo,
1558                                bool MatchingInlineAsm) override;
1559   bool ParseDirective(AsmToken DirectiveID) override;
1560   ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1561                            OperandMode Mode = OperandMode_Default);
1562   StringRef parseMnemonicSuffix(StringRef Name);
1563   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1564                         SMLoc NameLoc, OperandVector &Operands) override;
1565   //bool ProcessInstruction(MCInst &Inst);
1566 
1567   ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1568 
1569   ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1570 
1571   ParseStatus
1572   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1573                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1574                      std::function<bool(int64_t &)> ConvertResult = nullptr);
1575 
1576   ParseStatus parseOperandArrayWithPrefix(
1577       const char *Prefix, OperandVector &Operands,
1578       AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1579       bool (*ConvertResult)(int64_t &) = nullptr);
1580 
1581   ParseStatus
1582   parseNamedBit(StringRef Name, OperandVector &Operands,
1583                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1584   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1585   ParseStatus parseCPol(OperandVector &Operands);
1586   ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1587   ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1588   ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1589                                     SMLoc &StringLoc);
1590 
1591   bool isModifier();
1592   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1593   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1594   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1595   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1596   bool parseSP3NegModifier();
1597   ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1598                        bool HasLit = false);
1599   ParseStatus parseReg(OperandVector &Operands);
1600   ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1601                             bool HasLit = false);
1602   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1603                                            bool AllowImm = true);
1604   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1605                                             bool AllowImm = true);
1606   ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1607   ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1608   ParseStatus parseVReg32OrOff(OperandVector &Operands);
1609   ParseStatus parseDfmtNfmt(int64_t &Format);
1610   ParseStatus parseUfmt(int64_t &Format);
1611   ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1612                                        int64_t &Format);
1613   ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1614                                          int64_t &Format);
1615   ParseStatus parseFORMAT(OperandVector &Operands);
1616   ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1617   ParseStatus parseNumericFormat(int64_t &Format);
1618   ParseStatus parseFlatOffset(OperandVector &Operands);
1619   ParseStatus parseR128A16(OperandVector &Operands);
1620   ParseStatus parseBLGP(OperandVector &Operands);
1621   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1622   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1623 
1624   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1625 
1626   bool parseCnt(int64_t &IntVal);
1627   ParseStatus parseSWaitCnt(OperandVector &Operands);
1628 
1629   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1630   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1631   ParseStatus parseDepCtr(OperandVector &Operands);
1632 
1633   bool parseDelay(int64_t &Delay);
1634   ParseStatus parseSDelayALU(OperandVector &Operands);
1635 
1636   ParseStatus parseHwreg(OperandVector &Operands);
1637 
1638 private:
1639   struct OperandInfoTy {
1640     SMLoc Loc;
1641     int64_t Id;
1642     bool IsSymbolic = false;
1643     bool IsDefined = false;
1644 
1645     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1646   };
1647 
1648   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1649   bool validateSendMsg(const OperandInfoTy &Msg,
1650                        const OperandInfoTy &Op,
1651                        const OperandInfoTy &Stream);
1652 
1653   bool parseHwregBody(OperandInfoTy &HwReg,
1654                       OperandInfoTy &Offset,
1655                       OperandInfoTy &Width);
1656   bool validateHwreg(const OperandInfoTy &HwReg,
1657                      const OperandInfoTy &Offset,
1658                      const OperandInfoTy &Width);
1659 
1660   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1661   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1662   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1663 
1664   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1665                       const OperandVector &Operands) const;
1666   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1667   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1668   SMLoc getLitLoc(const OperandVector &Operands,
1669                   bool SearchMandatoryLiterals = false) const;
1670   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1671   SMLoc getConstLoc(const OperandVector &Operands) const;
1672   SMLoc getInstLoc(const OperandVector &Operands) const;
1673 
1674   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1675   bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1676   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1677   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1678   bool validateSOPLiteral(const MCInst &Inst) const;
1679   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1680   bool validateVOPDRegBankConstraints(const MCInst &Inst,
1681                                       const OperandVector &Operands);
1682   bool validateIntClampSupported(const MCInst &Inst);
1683   bool validateMIMGAtomicDMask(const MCInst &Inst);
1684   bool validateMIMGGatherDMask(const MCInst &Inst);
1685   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1686   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1687   bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1688   bool validateMIMGD16(const MCInst &Inst);
1689   bool validateMIMGMSAA(const MCInst &Inst);
1690   bool validateOpSel(const MCInst &Inst);
1691   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1692   bool validateVccOperand(unsigned Reg) const;
1693   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1694   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1695   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1696   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1697   bool validateAGPRLdSt(const MCInst &Inst) const;
1698   bool validateVGPRAlign(const MCInst &Inst) const;
1699   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1700   bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1701   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1702   bool validateDivScale(const MCInst &Inst);
1703   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1704   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1705                              const SMLoc &IDLoc);
1706   bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1707                               const unsigned CPol);
1708   bool validateExeczVcczOperands(const OperandVector &Operands);
1709   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1710   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1711   unsigned getConstantBusLimit(unsigned Opcode) const;
1712   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1713   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1714   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1715 
1716   bool isSupportedMnemo(StringRef Mnemo,
1717                         const FeatureBitset &FBS);
1718   bool isSupportedMnemo(StringRef Mnemo,
1719                         const FeatureBitset &FBS,
1720                         ArrayRef<unsigned> Variants);
1721   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1722 
1723   bool isId(const StringRef Id) const;
1724   bool isId(const AsmToken &Token, const StringRef Id) const;
1725   bool isToken(const AsmToken::TokenKind Kind) const;
1726   StringRef getId() const;
1727   bool trySkipId(const StringRef Id);
1728   bool trySkipId(const StringRef Pref, const StringRef Id);
1729   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1730   bool trySkipToken(const AsmToken::TokenKind Kind);
1731   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1732   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1733   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1734 
1735   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1736   AsmToken::TokenKind getTokenKind() const;
1737   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1738   bool parseExpr(OperandVector &Operands);
1739   StringRef getTokenStr() const;
1740   AsmToken peekToken(bool ShouldSkipSpace = true);
1741   AsmToken getToken() const;
1742   SMLoc getLoc() const;
1743   void lex();
1744 
1745 public:
1746   void onBeginOfFile() override;
1747 
1748   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1749 
1750   ParseStatus parseExpTgt(OperandVector &Operands);
1751   ParseStatus parseSendMsg(OperandVector &Operands);
1752   ParseStatus parseInterpSlot(OperandVector &Operands);
1753   ParseStatus parseInterpAttr(OperandVector &Operands);
1754   ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1755   ParseStatus parseBoolReg(OperandVector &Operands);
1756 
1757   bool parseSwizzleOperand(int64_t &Op,
1758                            const unsigned MinVal,
1759                            const unsigned MaxVal,
1760                            const StringRef ErrMsg,
1761                            SMLoc &Loc);
1762   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1763                             const unsigned MinVal,
1764                             const unsigned MaxVal,
1765                             const StringRef ErrMsg);
1766   ParseStatus parseSwizzle(OperandVector &Operands);
1767   bool parseSwizzleOffset(int64_t &Imm);
1768   bool parseSwizzleMacro(int64_t &Imm);
1769   bool parseSwizzleQuadPerm(int64_t &Imm);
1770   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1771   bool parseSwizzleBroadcast(int64_t &Imm);
1772   bool parseSwizzleSwap(int64_t &Imm);
1773   bool parseSwizzleReverse(int64_t &Imm);
1774 
1775   ParseStatus parseGPRIdxMode(OperandVector &Operands);
1776   int64_t parseGPRIdxMacro();
1777 
1778   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1779   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1780 
1781   ParseStatus parseOModSI(OperandVector &Operands);
1782 
1783   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1784                OptionalImmIndexMap &OptionalIdx);
1785   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1786   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1787   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1788   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1789   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1790                     OptionalImmIndexMap &OptionalIdx);
1791   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1792                 OptionalImmIndexMap &OptionalIdx);
1793 
1794   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1795   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1796 
1797   bool parseDimId(unsigned &Encoding);
1798   ParseStatus parseDim(OperandVector &Operands);
1799   bool convertDppBoundCtrl(int64_t &BoundCtrl);
1800   ParseStatus parseDPP8(OperandVector &Operands);
1801   ParseStatus parseDPPCtrl(OperandVector &Operands);
1802   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1803   int64_t parseDPPCtrlSel(StringRef Ctrl);
1804   int64_t parseDPPCtrlPerm();
1805   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1806   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1807     cvtDPP(Inst, Operands, true);
1808   }
1809   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1810                   bool IsDPP8 = false);
1811   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1812     cvtVOP3DPP(Inst, Operands, true);
1813   }
1814 
1815   ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1816                            AMDGPUOperand::ImmTy Type);
1817   ParseStatus parseSDWADstUnused(OperandVector &Operands);
1818   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1819   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1820   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1821   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1822   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1823   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1824                uint64_t BasicInstType,
1825                bool SkipDstVcc = false,
1826                bool SkipSrcVcc = false);
1827 
1828   ParseStatus parseEndpgm(OperandVector &Operands);
1829 
1830   ParseStatus parseVOPD(OperandVector &Operands);
1831 };
1832 
1833 } // end anonymous namespace
1834 
1835 // May be called with integer type with equivalent bitwidth.
1836 static const fltSemantics *getFltSemantics(unsigned Size) {
1837   switch (Size) {
1838   case 4:
1839     return &APFloat::IEEEsingle();
1840   case 8:
1841     return &APFloat::IEEEdouble();
1842   case 2:
1843     return &APFloat::IEEEhalf();
1844   default:
1845     llvm_unreachable("unsupported fp type");
1846   }
1847 }
1848 
1849 static const fltSemantics *getFltSemantics(MVT VT) {
1850   return getFltSemantics(VT.getSizeInBits() / 8);
1851 }
1852 
1853 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1854   switch (OperandType) {
1855   case AMDGPU::OPERAND_REG_IMM_INT32:
1856   case AMDGPU::OPERAND_REG_IMM_FP32:
1857   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1858   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1859   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1860   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1861   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1862   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1863   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1864   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1865   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1866   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1867   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1868   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1869   case AMDGPU::OPERAND_KIMM32:
1870   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1871     return &APFloat::IEEEsingle();
1872   case AMDGPU::OPERAND_REG_IMM_INT64:
1873   case AMDGPU::OPERAND_REG_IMM_FP64:
1874   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1875   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1876   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1877     return &APFloat::IEEEdouble();
1878   case AMDGPU::OPERAND_REG_IMM_INT16:
1879   case AMDGPU::OPERAND_REG_IMM_FP16:
1880   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1881   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1882   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1883   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1884   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1885   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1886   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1887   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1888   case AMDGPU::OPERAND_KIMM16:
1889     return &APFloat::IEEEhalf();
1890   default:
1891     llvm_unreachable("unsupported fp type");
1892   }
1893 }
1894 
1895 //===----------------------------------------------------------------------===//
1896 // Operand
1897 //===----------------------------------------------------------------------===//
1898 
1899 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1900   bool Lost;
1901 
1902   // Convert literal to single precision
1903   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1904                                                APFloat::rmNearestTiesToEven,
1905                                                &Lost);
1906   // We allow precision lost but not overflow or underflow
1907   if (Status != APFloat::opOK &&
1908       Lost &&
1909       ((Status & APFloat::opOverflow)  != 0 ||
1910        (Status & APFloat::opUnderflow) != 0)) {
1911     return false;
1912   }
1913 
1914   return true;
1915 }
1916 
1917 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1918   return isUIntN(Size, Val) || isIntN(Size, Val);
1919 }
1920 
1921 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1922   if (VT.getScalarType() == MVT::i16) {
1923     // FP immediate values are broken.
1924     return isInlinableIntLiteral(Val);
1925   }
1926 
1927   // f16/v2f16 operands work correctly for all values.
1928   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1929 }
1930 
1931 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1932 
1933   // This is a hack to enable named inline values like
1934   // shared_base with both 32-bit and 64-bit operands.
1935   // Note that these values are defined as
1936   // 32-bit operands only.
1937   if (isInlineValue()) {
1938     return true;
1939   }
1940 
1941   if (!isImmTy(ImmTyNone)) {
1942     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1943     return false;
1944   }
1945   // TODO: We should avoid using host float here. It would be better to
1946   // check the float bit values which is what a few other places do.
1947   // We've had bot failures before due to weird NaN support on mips hosts.
1948 
1949   APInt Literal(64, Imm.Val);
1950 
1951   if (Imm.IsFPImm) { // We got fp literal token
1952     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1953       return AMDGPU::isInlinableLiteral64(Imm.Val,
1954                                           AsmParser->hasInv2PiInlineImm());
1955     }
1956 
1957     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1958     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1959       return false;
1960 
1961     if (type.getScalarSizeInBits() == 16) {
1962       return isInlineableLiteralOp16(
1963         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1964         type, AsmParser->hasInv2PiInlineImm());
1965     }
1966 
1967     // Check if single precision literal is inlinable
1968     return AMDGPU::isInlinableLiteral32(
1969       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1970       AsmParser->hasInv2PiInlineImm());
1971   }
1972 
1973   // We got int literal token.
1974   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1975     return AMDGPU::isInlinableLiteral64(Imm.Val,
1976                                         AsmParser->hasInv2PiInlineImm());
1977   }
1978 
1979   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1980     return false;
1981   }
1982 
1983   if (type.getScalarSizeInBits() == 16) {
1984     return isInlineableLiteralOp16(
1985       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1986       type, AsmParser->hasInv2PiInlineImm());
1987   }
1988 
1989   return AMDGPU::isInlinableLiteral32(
1990     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1991     AsmParser->hasInv2PiInlineImm());
1992 }
1993 
1994 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1995   // Check that this immediate can be added as literal
1996   if (!isImmTy(ImmTyNone)) {
1997     return false;
1998   }
1999 
2000   if (!Imm.IsFPImm) {
2001     // We got int literal token.
2002 
2003     if (type == MVT::f64 && hasFPModifiers()) {
2004       // Cannot apply fp modifiers to int literals preserving the same semantics
2005       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2006       // disable these cases.
2007       return false;
2008     }
2009 
2010     unsigned Size = type.getSizeInBits();
2011     if (Size == 64)
2012       Size = 32;
2013 
2014     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2015     // types.
2016     return isSafeTruncation(Imm.Val, Size);
2017   }
2018 
2019   // We got fp literal token
2020   if (type == MVT::f64) { // Expected 64-bit fp operand
2021     // We would set low 64-bits of literal to zeroes but we accept this literals
2022     return true;
2023   }
2024 
2025   if (type == MVT::i64) { // Expected 64-bit int operand
2026     // We don't allow fp literals in 64-bit integer instructions. It is
2027     // unclear how we should encode them.
2028     return false;
2029   }
2030 
2031   // We allow fp literals with f16x2 operands assuming that the specified
2032   // literal goes into the lower half and the upper half is zero. We also
2033   // require that the literal may be losslessly converted to f16.
2034   //
2035   // For i16x2 operands, we assume that the specified literal is encoded as a
2036   // single-precision float. This is pretty odd, but it matches SP3 and what
2037   // happens in hardware.
2038   MVT ExpectedType = (type == MVT::v2f16)   ? MVT::f16
2039                      : (type == MVT::v2i16) ? MVT::f32
2040                      : (type == MVT::v2f32) ? MVT::f32
2041                                             : type;
2042 
2043   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2044   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2045 }
2046 
2047 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2048   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2049 }
2050 
2051 bool AMDGPUOperand::isVRegWithInputMods() const {
2052   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2053          // GFX90A allows DPP on 64-bit operands.
2054          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2055           AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2056 }
2057 
2058 bool AMDGPUOperand::isT16VRegWithInputMods() const {
2059   return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2060 }
2061 
2062 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2063   if (AsmParser->isVI())
2064     return isVReg32();
2065   else if (AsmParser->isGFX9Plus())
2066     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2067   else
2068     return false;
2069 }
2070 
2071 bool AMDGPUOperand::isSDWAFP16Operand() const {
2072   return isSDWAOperand(MVT::f16);
2073 }
2074 
2075 bool AMDGPUOperand::isSDWAFP32Operand() const {
2076   return isSDWAOperand(MVT::f32);
2077 }
2078 
2079 bool AMDGPUOperand::isSDWAInt16Operand() const {
2080   return isSDWAOperand(MVT::i16);
2081 }
2082 
2083 bool AMDGPUOperand::isSDWAInt32Operand() const {
2084   return isSDWAOperand(MVT::i32);
2085 }
2086 
2087 bool AMDGPUOperand::isBoolReg() const {
2088   auto FB = AsmParser->getFeatureBits();
2089   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2090                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2091 }
2092 
2093 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2094 {
2095   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2096   assert(Size == 2 || Size == 4 || Size == 8);
2097 
2098   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2099 
2100   if (Imm.Mods.Abs) {
2101     Val &= ~FpSignMask;
2102   }
2103   if (Imm.Mods.Neg) {
2104     Val ^= FpSignMask;
2105   }
2106 
2107   return Val;
2108 }
2109 
2110 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2111   if (isExpr()) {
2112     Inst.addOperand(MCOperand::createExpr(Expr));
2113     return;
2114   }
2115 
2116   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2117                              Inst.getNumOperands())) {
2118     addLiteralImmOperand(Inst, Imm.Val,
2119                          ApplyModifiers &
2120                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2121   } else {
2122     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2123     Inst.addOperand(MCOperand::createImm(Imm.Val));
2124     setImmKindNone();
2125   }
2126 }
2127 
2128 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2129   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2130   auto OpNum = Inst.getNumOperands();
2131   // Check that this operand accepts literals
2132   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2133 
2134   if (ApplyModifiers) {
2135     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2136     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2137     Val = applyInputFPModifiers(Val, Size);
2138   }
2139 
2140   APInt Literal(64, Val);
2141   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2142 
2143   if (Imm.IsFPImm) { // We got fp literal token
2144     switch (OpTy) {
2145     case AMDGPU::OPERAND_REG_IMM_INT64:
2146     case AMDGPU::OPERAND_REG_IMM_FP64:
2147     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2148     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2149     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2150       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2151                                        AsmParser->hasInv2PiInlineImm())) {
2152         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2153         setImmKindConst();
2154         return;
2155       }
2156 
2157       // Non-inlineable
2158       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2159         // For fp operands we check if low 32 bits are zeros
2160         if (Literal.getLoBits(32) != 0) {
2161           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2162           "Can't encode literal as exact 64-bit floating-point operand. "
2163           "Low 32-bits will be set to zero");
2164           Val &= 0xffffffff00000000u;
2165         }
2166 
2167         Inst.addOperand(MCOperand::createImm(Val));
2168         setImmKindLiteral();
2169         return;
2170       }
2171 
2172       // We don't allow fp literals in 64-bit integer instructions. It is
2173       // unclear how we should encode them. This case should be checked earlier
2174       // in predicate methods (isLiteralImm())
2175       llvm_unreachable("fp literal in 64-bit integer instruction.");
2176 
2177     case AMDGPU::OPERAND_REG_IMM_INT32:
2178     case AMDGPU::OPERAND_REG_IMM_FP32:
2179     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2180     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2181     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2182     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2183     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2184     case AMDGPU::OPERAND_REG_IMM_INT16:
2185     case AMDGPU::OPERAND_REG_IMM_FP16:
2186     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2187     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2188     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2189     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2190     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2191     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2192     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2193     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2194     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2195     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2196     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2197     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2198     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2199     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2200     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2201     case AMDGPU::OPERAND_KIMM32:
2202     case AMDGPU::OPERAND_KIMM16:
2203     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2204       bool lost;
2205       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2206       // Convert literal to single precision
2207       FPLiteral.convert(*getOpFltSemantics(OpTy),
2208                         APFloat::rmNearestTiesToEven, &lost);
2209       // We allow precision lost but not overflow or underflow. This should be
2210       // checked earlier in isLiteralImm()
2211 
2212       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2213       Inst.addOperand(MCOperand::createImm(ImmVal));
2214       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2215         setImmKindMandatoryLiteral();
2216       } else {
2217         setImmKindLiteral();
2218       }
2219       return;
2220     }
2221     default:
2222       llvm_unreachable("invalid operand size");
2223     }
2224 
2225     return;
2226   }
2227 
2228   // We got int literal token.
2229   // Only sign extend inline immediates.
2230   switch (OpTy) {
2231   case AMDGPU::OPERAND_REG_IMM_INT32:
2232   case AMDGPU::OPERAND_REG_IMM_FP32:
2233   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2234   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2235   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2236   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2237   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2238   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2239   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2240   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2241   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2242   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2243   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2244   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2245     if (isSafeTruncation(Val, 32) &&
2246         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2247                                      AsmParser->hasInv2PiInlineImm())) {
2248       Inst.addOperand(MCOperand::createImm(Val));
2249       setImmKindConst();
2250       return;
2251     }
2252 
2253     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2254     setImmKindLiteral();
2255     return;
2256 
2257   case AMDGPU::OPERAND_REG_IMM_INT64:
2258   case AMDGPU::OPERAND_REG_IMM_FP64:
2259   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2260   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2261   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2262     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2263       Inst.addOperand(MCOperand::createImm(Val));
2264       setImmKindConst();
2265       return;
2266     }
2267 
2268     Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2269                                                     : Lo_32(Val);
2270 
2271     Inst.addOperand(MCOperand::createImm(Val));
2272     setImmKindLiteral();
2273     return;
2274 
2275   case AMDGPU::OPERAND_REG_IMM_INT16:
2276   case AMDGPU::OPERAND_REG_IMM_FP16:
2277   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2278   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2279   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2280   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2281   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2282     if (isSafeTruncation(Val, 16) &&
2283         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2284                                      AsmParser->hasInv2PiInlineImm())) {
2285       Inst.addOperand(MCOperand::createImm(Val));
2286       setImmKindConst();
2287       return;
2288     }
2289 
2290     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2291     setImmKindLiteral();
2292     return;
2293 
2294   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2295   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2296   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2297   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2298     assert(isSafeTruncation(Val, 16));
2299     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2300                                         AsmParser->hasInv2PiInlineImm()));
2301 
2302     Inst.addOperand(MCOperand::createImm(Val));
2303     return;
2304   }
2305   case AMDGPU::OPERAND_KIMM32:
2306     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2307     setImmKindMandatoryLiteral();
2308     return;
2309   case AMDGPU::OPERAND_KIMM16:
2310     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2311     setImmKindMandatoryLiteral();
2312     return;
2313   default:
2314     llvm_unreachable("invalid operand size");
2315   }
2316 }
2317 
2318 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2319   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2320 }
2321 
2322 bool AMDGPUOperand::isInlineValue() const {
2323   return isRegKind() && ::isInlineValue(getReg());
2324 }
2325 
2326 //===----------------------------------------------------------------------===//
2327 // AsmParser
2328 //===----------------------------------------------------------------------===//
2329 
2330 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2331   if (Is == IS_VGPR) {
2332     switch (RegWidth) {
2333       default: return -1;
2334       case 32:
2335         return AMDGPU::VGPR_32RegClassID;
2336       case 64:
2337         return AMDGPU::VReg_64RegClassID;
2338       case 96:
2339         return AMDGPU::VReg_96RegClassID;
2340       case 128:
2341         return AMDGPU::VReg_128RegClassID;
2342       case 160:
2343         return AMDGPU::VReg_160RegClassID;
2344       case 192:
2345         return AMDGPU::VReg_192RegClassID;
2346       case 224:
2347         return AMDGPU::VReg_224RegClassID;
2348       case 256:
2349         return AMDGPU::VReg_256RegClassID;
2350       case 288:
2351         return AMDGPU::VReg_288RegClassID;
2352       case 320:
2353         return AMDGPU::VReg_320RegClassID;
2354       case 352:
2355         return AMDGPU::VReg_352RegClassID;
2356       case 384:
2357         return AMDGPU::VReg_384RegClassID;
2358       case 512:
2359         return AMDGPU::VReg_512RegClassID;
2360       case 1024:
2361         return AMDGPU::VReg_1024RegClassID;
2362     }
2363   } else if (Is == IS_TTMP) {
2364     switch (RegWidth) {
2365       default: return -1;
2366       case 32:
2367         return AMDGPU::TTMP_32RegClassID;
2368       case 64:
2369         return AMDGPU::TTMP_64RegClassID;
2370       case 128:
2371         return AMDGPU::TTMP_128RegClassID;
2372       case 256:
2373         return AMDGPU::TTMP_256RegClassID;
2374       case 512:
2375         return AMDGPU::TTMP_512RegClassID;
2376     }
2377   } else if (Is == IS_SGPR) {
2378     switch (RegWidth) {
2379       default: return -1;
2380       case 32:
2381         return AMDGPU::SGPR_32RegClassID;
2382       case 64:
2383         return AMDGPU::SGPR_64RegClassID;
2384       case 96:
2385         return AMDGPU::SGPR_96RegClassID;
2386       case 128:
2387         return AMDGPU::SGPR_128RegClassID;
2388       case 160:
2389         return AMDGPU::SGPR_160RegClassID;
2390       case 192:
2391         return AMDGPU::SGPR_192RegClassID;
2392       case 224:
2393         return AMDGPU::SGPR_224RegClassID;
2394       case 256:
2395         return AMDGPU::SGPR_256RegClassID;
2396       case 288:
2397         return AMDGPU::SGPR_288RegClassID;
2398       case 320:
2399         return AMDGPU::SGPR_320RegClassID;
2400       case 352:
2401         return AMDGPU::SGPR_352RegClassID;
2402       case 384:
2403         return AMDGPU::SGPR_384RegClassID;
2404       case 512:
2405         return AMDGPU::SGPR_512RegClassID;
2406     }
2407   } else if (Is == IS_AGPR) {
2408     switch (RegWidth) {
2409       default: return -1;
2410       case 32:
2411         return AMDGPU::AGPR_32RegClassID;
2412       case 64:
2413         return AMDGPU::AReg_64RegClassID;
2414       case 96:
2415         return AMDGPU::AReg_96RegClassID;
2416       case 128:
2417         return AMDGPU::AReg_128RegClassID;
2418       case 160:
2419         return AMDGPU::AReg_160RegClassID;
2420       case 192:
2421         return AMDGPU::AReg_192RegClassID;
2422       case 224:
2423         return AMDGPU::AReg_224RegClassID;
2424       case 256:
2425         return AMDGPU::AReg_256RegClassID;
2426       case 288:
2427         return AMDGPU::AReg_288RegClassID;
2428       case 320:
2429         return AMDGPU::AReg_320RegClassID;
2430       case 352:
2431         return AMDGPU::AReg_352RegClassID;
2432       case 384:
2433         return AMDGPU::AReg_384RegClassID;
2434       case 512:
2435         return AMDGPU::AReg_512RegClassID;
2436       case 1024:
2437         return AMDGPU::AReg_1024RegClassID;
2438     }
2439   }
2440   return -1;
2441 }
2442 
2443 static unsigned getSpecialRegForName(StringRef RegName) {
2444   return StringSwitch<unsigned>(RegName)
2445     .Case("exec", AMDGPU::EXEC)
2446     .Case("vcc", AMDGPU::VCC)
2447     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2448     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2449     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2450     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2451     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2452     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2453     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2454     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2455     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2456     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2457     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2458     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2459     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2460     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2461     .Case("m0", AMDGPU::M0)
2462     .Case("vccz", AMDGPU::SRC_VCCZ)
2463     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2464     .Case("execz", AMDGPU::SRC_EXECZ)
2465     .Case("src_execz", AMDGPU::SRC_EXECZ)
2466     .Case("scc", AMDGPU::SRC_SCC)
2467     .Case("src_scc", AMDGPU::SRC_SCC)
2468     .Case("tba", AMDGPU::TBA)
2469     .Case("tma", AMDGPU::TMA)
2470     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2471     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2472     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2473     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2474     .Case("vcc_lo", AMDGPU::VCC_LO)
2475     .Case("vcc_hi", AMDGPU::VCC_HI)
2476     .Case("exec_lo", AMDGPU::EXEC_LO)
2477     .Case("exec_hi", AMDGPU::EXEC_HI)
2478     .Case("tma_lo", AMDGPU::TMA_LO)
2479     .Case("tma_hi", AMDGPU::TMA_HI)
2480     .Case("tba_lo", AMDGPU::TBA_LO)
2481     .Case("tba_hi", AMDGPU::TBA_HI)
2482     .Case("pc", AMDGPU::PC_REG)
2483     .Case("null", AMDGPU::SGPR_NULL)
2484     .Default(AMDGPU::NoRegister);
2485 }
2486 
2487 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2488                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2489   auto R = parseRegister();
2490   if (!R) return true;
2491   assert(R->isReg());
2492   RegNo = R->getReg();
2493   StartLoc = R->getStartLoc();
2494   EndLoc = R->getEndLoc();
2495   return false;
2496 }
2497 
2498 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2499                                     SMLoc &EndLoc) {
2500   return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2501 }
2502 
2503 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2504                                               SMLoc &EndLoc) {
2505   bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2506   bool PendingErrors = getParser().hasPendingError();
2507   getParser().clearPendingErrors();
2508   if (PendingErrors)
2509     return ParseStatus::Failure;
2510   if (Result)
2511     return ParseStatus::NoMatch;
2512   return ParseStatus::Success;
2513 }
2514 
2515 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2516                                             RegisterKind RegKind, unsigned Reg1,
2517                                             SMLoc Loc) {
2518   switch (RegKind) {
2519   case IS_SPECIAL:
2520     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2521       Reg = AMDGPU::EXEC;
2522       RegWidth = 64;
2523       return true;
2524     }
2525     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2526       Reg = AMDGPU::FLAT_SCR;
2527       RegWidth = 64;
2528       return true;
2529     }
2530     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2531       Reg = AMDGPU::XNACK_MASK;
2532       RegWidth = 64;
2533       return true;
2534     }
2535     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2536       Reg = AMDGPU::VCC;
2537       RegWidth = 64;
2538       return true;
2539     }
2540     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2541       Reg = AMDGPU::TBA;
2542       RegWidth = 64;
2543       return true;
2544     }
2545     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2546       Reg = AMDGPU::TMA;
2547       RegWidth = 64;
2548       return true;
2549     }
2550     Error(Loc, "register does not fit in the list");
2551     return false;
2552   case IS_VGPR:
2553   case IS_SGPR:
2554   case IS_AGPR:
2555   case IS_TTMP:
2556     if (Reg1 != Reg + RegWidth / 32) {
2557       Error(Loc, "registers in a list must have consecutive indices");
2558       return false;
2559     }
2560     RegWidth += 32;
2561     return true;
2562   default:
2563     llvm_unreachable("unexpected register kind");
2564   }
2565 }
2566 
2567 struct RegInfo {
2568   StringLiteral Name;
2569   RegisterKind Kind;
2570 };
2571 
2572 static constexpr RegInfo RegularRegisters[] = {
2573   {{"v"},    IS_VGPR},
2574   {{"s"},    IS_SGPR},
2575   {{"ttmp"}, IS_TTMP},
2576   {{"acc"},  IS_AGPR},
2577   {{"a"},    IS_AGPR},
2578 };
2579 
2580 static bool isRegularReg(RegisterKind Kind) {
2581   return Kind == IS_VGPR ||
2582          Kind == IS_SGPR ||
2583          Kind == IS_TTMP ||
2584          Kind == IS_AGPR;
2585 }
2586 
2587 static const RegInfo* getRegularRegInfo(StringRef Str) {
2588   for (const RegInfo &Reg : RegularRegisters)
2589     if (Str.starts_with(Reg.Name))
2590       return &Reg;
2591   return nullptr;
2592 }
2593 
2594 static bool getRegNum(StringRef Str, unsigned& Num) {
2595   return !Str.getAsInteger(10, Num);
2596 }
2597 
2598 bool
2599 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2600                             const AsmToken &NextToken) const {
2601 
2602   // A list of consecutive registers: [s0,s1,s2,s3]
2603   if (Token.is(AsmToken::LBrac))
2604     return true;
2605 
2606   if (!Token.is(AsmToken::Identifier))
2607     return false;
2608 
2609   // A single register like s0 or a range of registers like s[0:1]
2610 
2611   StringRef Str = Token.getString();
2612   const RegInfo *Reg = getRegularRegInfo(Str);
2613   if (Reg) {
2614     StringRef RegName = Reg->Name;
2615     StringRef RegSuffix = Str.substr(RegName.size());
2616     if (!RegSuffix.empty()) {
2617       RegSuffix.consume_back(".l");
2618       RegSuffix.consume_back(".h");
2619       unsigned Num;
2620       // A single register with an index: rXX
2621       if (getRegNum(RegSuffix, Num))
2622         return true;
2623     } else {
2624       // A range of registers: r[XX:YY].
2625       if (NextToken.is(AsmToken::LBrac))
2626         return true;
2627     }
2628   }
2629 
2630   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2631 }
2632 
2633 bool
2634 AMDGPUAsmParser::isRegister()
2635 {
2636   return isRegister(getToken(), peekToken());
2637 }
2638 
2639 unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2640                                         unsigned SubReg, unsigned RegWidth,
2641                                         SMLoc Loc) {
2642   assert(isRegularReg(RegKind));
2643 
2644   unsigned AlignSize = 1;
2645   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2646     // SGPR and TTMP registers must be aligned.
2647     // Max required alignment is 4 dwords.
2648     AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2649   }
2650 
2651   if (RegNum % AlignSize != 0) {
2652     Error(Loc, "invalid register alignment");
2653     return AMDGPU::NoRegister;
2654   }
2655 
2656   unsigned RegIdx = RegNum / AlignSize;
2657   int RCID = getRegClass(RegKind, RegWidth);
2658   if (RCID == -1) {
2659     Error(Loc, "invalid or unsupported register size");
2660     return AMDGPU::NoRegister;
2661   }
2662 
2663   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2664   const MCRegisterClass RC = TRI->getRegClass(RCID);
2665   if (RegIdx >= RC.getNumRegs()) {
2666     Error(Loc, "register index is out of range");
2667     return AMDGPU::NoRegister;
2668   }
2669 
2670   unsigned Reg = RC.getRegister(RegIdx);
2671 
2672   if (SubReg) {
2673     Reg = TRI->getSubReg(Reg, SubReg);
2674 
2675     // Currently all regular registers have their .l and .h subregisters, so
2676     // we should never need to generate an error here.
2677     assert(Reg && "Invalid subregister!");
2678   }
2679 
2680   return Reg;
2681 }
2682 
2683 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2684   int64_t RegLo, RegHi;
2685   if (!skipToken(AsmToken::LBrac, "missing register index"))
2686     return false;
2687 
2688   SMLoc FirstIdxLoc = getLoc();
2689   SMLoc SecondIdxLoc;
2690 
2691   if (!parseExpr(RegLo))
2692     return false;
2693 
2694   if (trySkipToken(AsmToken::Colon)) {
2695     SecondIdxLoc = getLoc();
2696     if (!parseExpr(RegHi))
2697       return false;
2698   } else {
2699     RegHi = RegLo;
2700   }
2701 
2702   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2703     return false;
2704 
2705   if (!isUInt<32>(RegLo)) {
2706     Error(FirstIdxLoc, "invalid register index");
2707     return false;
2708   }
2709 
2710   if (!isUInt<32>(RegHi)) {
2711     Error(SecondIdxLoc, "invalid register index");
2712     return false;
2713   }
2714 
2715   if (RegLo > RegHi) {
2716     Error(FirstIdxLoc, "first register index should not exceed second index");
2717     return false;
2718   }
2719 
2720   Num = static_cast<unsigned>(RegLo);
2721   RegWidth = 32 * ((RegHi - RegLo) + 1);
2722   return true;
2723 }
2724 
2725 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2726                                           unsigned &RegNum, unsigned &RegWidth,
2727                                           SmallVectorImpl<AsmToken> &Tokens) {
2728   assert(isToken(AsmToken::Identifier));
2729   unsigned Reg = getSpecialRegForName(getTokenStr());
2730   if (Reg) {
2731     RegNum = 0;
2732     RegWidth = 32;
2733     RegKind = IS_SPECIAL;
2734     Tokens.push_back(getToken());
2735     lex(); // skip register name
2736   }
2737   return Reg;
2738 }
2739 
2740 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2741                                           unsigned &RegNum, unsigned &RegWidth,
2742                                           SmallVectorImpl<AsmToken> &Tokens) {
2743   assert(isToken(AsmToken::Identifier));
2744   StringRef RegName = getTokenStr();
2745   auto Loc = getLoc();
2746 
2747   const RegInfo *RI = getRegularRegInfo(RegName);
2748   if (!RI) {
2749     Error(Loc, "invalid register name");
2750     return AMDGPU::NoRegister;
2751   }
2752 
2753   Tokens.push_back(getToken());
2754   lex(); // skip register name
2755 
2756   RegKind = RI->Kind;
2757   StringRef RegSuffix = RegName.substr(RI->Name.size());
2758   unsigned SubReg = NoSubRegister;
2759   if (!RegSuffix.empty()) {
2760     // We don't know the opcode till we are done parsing, so we don't know if
2761     // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2762     // .h to correctly specify 16 bit registers. We also can't determine class
2763     // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2764     if (RegSuffix.consume_back(".l"))
2765       SubReg = AMDGPU::lo16;
2766     else if (RegSuffix.consume_back(".h"))
2767       SubReg = AMDGPU::hi16;
2768 
2769     // Single 32-bit register: vXX.
2770     if (!getRegNum(RegSuffix, RegNum)) {
2771       Error(Loc, "invalid register index");
2772       return AMDGPU::NoRegister;
2773     }
2774     RegWidth = 32;
2775   } else {
2776     // Range of registers: v[XX:YY]. ":YY" is optional.
2777     if (!ParseRegRange(RegNum, RegWidth))
2778       return AMDGPU::NoRegister;
2779   }
2780 
2781   return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2782 }
2783 
2784 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2785                                        unsigned &RegWidth,
2786                                        SmallVectorImpl<AsmToken> &Tokens) {
2787   unsigned Reg = AMDGPU::NoRegister;
2788   auto ListLoc = getLoc();
2789 
2790   if (!skipToken(AsmToken::LBrac,
2791                  "expected a register or a list of registers")) {
2792     return AMDGPU::NoRegister;
2793   }
2794 
2795   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2796 
2797   auto Loc = getLoc();
2798   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2799     return AMDGPU::NoRegister;
2800   if (RegWidth != 32) {
2801     Error(Loc, "expected a single 32-bit register");
2802     return AMDGPU::NoRegister;
2803   }
2804 
2805   for (; trySkipToken(AsmToken::Comma); ) {
2806     RegisterKind NextRegKind;
2807     unsigned NextReg, NextRegNum, NextRegWidth;
2808     Loc = getLoc();
2809 
2810     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2811                              NextRegNum, NextRegWidth,
2812                              Tokens)) {
2813       return AMDGPU::NoRegister;
2814     }
2815     if (NextRegWidth != 32) {
2816       Error(Loc, "expected a single 32-bit register");
2817       return AMDGPU::NoRegister;
2818     }
2819     if (NextRegKind != RegKind) {
2820       Error(Loc, "registers in a list must be of the same kind");
2821       return AMDGPU::NoRegister;
2822     }
2823     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2824       return AMDGPU::NoRegister;
2825   }
2826 
2827   if (!skipToken(AsmToken::RBrac,
2828                  "expected a comma or a closing square bracket")) {
2829     return AMDGPU::NoRegister;
2830   }
2831 
2832   if (isRegularReg(RegKind))
2833     Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
2834 
2835   return Reg;
2836 }
2837 
2838 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2839                                           unsigned &RegNum, unsigned &RegWidth,
2840                                           SmallVectorImpl<AsmToken> &Tokens) {
2841   auto Loc = getLoc();
2842   Reg = AMDGPU::NoRegister;
2843 
2844   if (isToken(AsmToken::Identifier)) {
2845     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2846     if (Reg == AMDGPU::NoRegister)
2847       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2848   } else {
2849     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2850   }
2851 
2852   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2853   if (Reg == AMDGPU::NoRegister) {
2854     assert(Parser.hasPendingError());
2855     return false;
2856   }
2857 
2858   if (!subtargetHasRegister(*TRI, Reg)) {
2859     if (Reg == AMDGPU::SGPR_NULL) {
2860       Error(Loc, "'null' operand is not supported on this GPU");
2861     } else {
2862       Error(Loc, "register not available on this GPU");
2863     }
2864     return false;
2865   }
2866 
2867   return true;
2868 }
2869 
2870 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2871                                           unsigned &RegNum, unsigned &RegWidth,
2872                                           bool RestoreOnFailure /*=false*/) {
2873   Reg = AMDGPU::NoRegister;
2874 
2875   SmallVector<AsmToken, 1> Tokens;
2876   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2877     if (RestoreOnFailure) {
2878       while (!Tokens.empty()) {
2879         getLexer().UnLex(Tokens.pop_back_val());
2880       }
2881     }
2882     return true;
2883   }
2884   return false;
2885 }
2886 
2887 std::optional<StringRef>
2888 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2889   switch (RegKind) {
2890   case IS_VGPR:
2891     return StringRef(".amdgcn.next_free_vgpr");
2892   case IS_SGPR:
2893     return StringRef(".amdgcn.next_free_sgpr");
2894   default:
2895     return std::nullopt;
2896   }
2897 }
2898 
2899 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2900   auto SymbolName = getGprCountSymbolName(RegKind);
2901   assert(SymbolName && "initializing invalid register kind");
2902   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2903   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2904 }
2905 
2906 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2907                                             unsigned DwordRegIndex,
2908                                             unsigned RegWidth) {
2909   // Symbols are only defined for GCN targets
2910   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2911     return true;
2912 
2913   auto SymbolName = getGprCountSymbolName(RegKind);
2914   if (!SymbolName)
2915     return true;
2916   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2917 
2918   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2919   int64_t OldCount;
2920 
2921   if (!Sym->isVariable())
2922     return !Error(getLoc(),
2923                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2924   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2925     return !Error(
2926         getLoc(),
2927         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2928 
2929   if (OldCount <= NewMax)
2930     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2931 
2932   return true;
2933 }
2934 
2935 std::unique_ptr<AMDGPUOperand>
2936 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2937   const auto &Tok = getToken();
2938   SMLoc StartLoc = Tok.getLoc();
2939   SMLoc EndLoc = Tok.getEndLoc();
2940   RegisterKind RegKind;
2941   unsigned Reg, RegNum, RegWidth;
2942 
2943   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2944     return nullptr;
2945   }
2946   if (isHsaAbi(getSTI())) {
2947     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2948       return nullptr;
2949   } else
2950     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2951   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2952 }
2953 
2954 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2955                                       bool HasSP3AbsModifier, bool HasLit) {
2956   // TODO: add syntactic sugar for 1/(2*PI)
2957 
2958   if (isRegister())
2959     return ParseStatus::NoMatch;
2960   assert(!isModifier());
2961 
2962   if (!HasLit) {
2963     HasLit = trySkipId("lit");
2964     if (HasLit) {
2965       if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
2966         return ParseStatus::Failure;
2967       ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
2968       if (S.isSuccess() &&
2969           !skipToken(AsmToken::RParen, "expected closing parentheses"))
2970         return ParseStatus::Failure;
2971       return S;
2972     }
2973   }
2974 
2975   const auto& Tok = getToken();
2976   const auto& NextTok = peekToken();
2977   bool IsReal = Tok.is(AsmToken::Real);
2978   SMLoc S = getLoc();
2979   bool Negate = false;
2980 
2981   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2982     lex();
2983     IsReal = true;
2984     Negate = true;
2985   }
2986 
2987   AMDGPUOperand::Modifiers Mods;
2988   Mods.Lit = HasLit;
2989 
2990   if (IsReal) {
2991     // Floating-point expressions are not supported.
2992     // Can only allow floating-point literals with an
2993     // optional sign.
2994 
2995     StringRef Num = getTokenStr();
2996     lex();
2997 
2998     APFloat RealVal(APFloat::IEEEdouble());
2999     auto roundMode = APFloat::rmNearestTiesToEven;
3000     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3001       return ParseStatus::Failure;
3002     if (Negate)
3003       RealVal.changeSign();
3004 
3005     Operands.push_back(
3006       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3007                                AMDGPUOperand::ImmTyNone, true));
3008     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3009     Op.setModifiers(Mods);
3010 
3011     return ParseStatus::Success;
3012 
3013   } else {
3014     int64_t IntVal;
3015     const MCExpr *Expr;
3016     SMLoc S = getLoc();
3017 
3018     if (HasSP3AbsModifier) {
3019       // This is a workaround for handling expressions
3020       // as arguments of SP3 'abs' modifier, for example:
3021       //     |1.0|
3022       //     |-1|
3023       //     |1+x|
3024       // This syntax is not compatible with syntax of standard
3025       // MC expressions (due to the trailing '|').
3026       SMLoc EndLoc;
3027       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3028         return ParseStatus::Failure;
3029     } else {
3030       if (Parser.parseExpression(Expr))
3031         return ParseStatus::Failure;
3032     }
3033 
3034     if (Expr->evaluateAsAbsolute(IntVal)) {
3035       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3036       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3037       Op.setModifiers(Mods);
3038     } else {
3039       if (HasLit)
3040         return ParseStatus::NoMatch;
3041       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3042     }
3043 
3044     return ParseStatus::Success;
3045   }
3046 
3047   return ParseStatus::NoMatch;
3048 }
3049 
3050 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3051   if (!isRegister())
3052     return ParseStatus::NoMatch;
3053 
3054   if (auto R = parseRegister()) {
3055     assert(R->isReg());
3056     Operands.push_back(std::move(R));
3057     return ParseStatus::Success;
3058   }
3059   return ParseStatus::Failure;
3060 }
3061 
3062 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3063                                            bool HasSP3AbsMod, bool HasLit) {
3064   ParseStatus Res = parseReg(Operands);
3065   if (!Res.isNoMatch())
3066     return Res;
3067   if (isModifier())
3068     return ParseStatus::NoMatch;
3069   return parseImm(Operands, HasSP3AbsMod, HasLit);
3070 }
3071 
3072 bool
3073 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3074   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3075     const auto &str = Token.getString();
3076     return str == "abs" || str == "neg" || str == "sext";
3077   }
3078   return false;
3079 }
3080 
3081 bool
3082 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3083   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3084 }
3085 
3086 bool
3087 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3088   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3089 }
3090 
3091 bool
3092 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3093   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3094 }
3095 
3096 // Check if this is an operand modifier or an opcode modifier
3097 // which may look like an expression but it is not. We should
3098 // avoid parsing these modifiers as expressions. Currently
3099 // recognized sequences are:
3100 //   |...|
3101 //   abs(...)
3102 //   neg(...)
3103 //   sext(...)
3104 //   -reg
3105 //   -|...|
3106 //   -abs(...)
3107 //   name:...
3108 //
3109 bool
3110 AMDGPUAsmParser::isModifier() {
3111 
3112   AsmToken Tok = getToken();
3113   AsmToken NextToken[2];
3114   peekTokens(NextToken);
3115 
3116   return isOperandModifier(Tok, NextToken[0]) ||
3117          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3118          isOpcodeModifierWithVal(Tok, NextToken[0]);
3119 }
3120 
3121 // Check if the current token is an SP3 'neg' modifier.
3122 // Currently this modifier is allowed in the following context:
3123 //
3124 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3125 // 2. Before an 'abs' modifier: -abs(...)
3126 // 3. Before an SP3 'abs' modifier: -|...|
3127 //
3128 // In all other cases "-" is handled as a part
3129 // of an expression that follows the sign.
3130 //
3131 // Note: When "-" is followed by an integer literal,
3132 // this is interpreted as integer negation rather
3133 // than a floating-point NEG modifier applied to N.
3134 // Beside being contr-intuitive, such use of floating-point
3135 // NEG modifier would have resulted in different meaning
3136 // of integer literals used with VOP1/2/C and VOP3,
3137 // for example:
3138 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3139 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3140 // Negative fp literals with preceding "-" are
3141 // handled likewise for uniformity
3142 //
3143 bool
3144 AMDGPUAsmParser::parseSP3NegModifier() {
3145 
3146   AsmToken NextToken[2];
3147   peekTokens(NextToken);
3148 
3149   if (isToken(AsmToken::Minus) &&
3150       (isRegister(NextToken[0], NextToken[1]) ||
3151        NextToken[0].is(AsmToken::Pipe) ||
3152        isId(NextToken[0], "abs"))) {
3153     lex();
3154     return true;
3155   }
3156 
3157   return false;
3158 }
3159 
3160 ParseStatus
3161 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3162                                               bool AllowImm) {
3163   bool Neg, SP3Neg;
3164   bool Abs, SP3Abs;
3165   bool Lit;
3166   SMLoc Loc;
3167 
3168   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3169   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3170     return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3171 
3172   SP3Neg = parseSP3NegModifier();
3173 
3174   Loc = getLoc();
3175   Neg = trySkipId("neg");
3176   if (Neg && SP3Neg)
3177     return Error(Loc, "expected register or immediate");
3178   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3179     return ParseStatus::Failure;
3180 
3181   Abs = trySkipId("abs");
3182   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3183     return ParseStatus::Failure;
3184 
3185   Lit = trySkipId("lit");
3186   if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3187     return ParseStatus::Failure;
3188 
3189   Loc = getLoc();
3190   SP3Abs = trySkipToken(AsmToken::Pipe);
3191   if (Abs && SP3Abs)
3192     return Error(Loc, "expected register or immediate");
3193 
3194   ParseStatus Res;
3195   if (AllowImm) {
3196     Res = parseRegOrImm(Operands, SP3Abs, Lit);
3197   } else {
3198     Res = parseReg(Operands);
3199   }
3200   if (!Res.isSuccess())
3201     return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3202 
3203   if (Lit && !Operands.back()->isImm())
3204     Error(Loc, "expected immediate with lit modifier");
3205 
3206   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3207     return ParseStatus::Failure;
3208   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3209     return ParseStatus::Failure;
3210   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3211     return ParseStatus::Failure;
3212   if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3213     return ParseStatus::Failure;
3214 
3215   AMDGPUOperand::Modifiers Mods;
3216   Mods.Abs = Abs || SP3Abs;
3217   Mods.Neg = Neg || SP3Neg;
3218   Mods.Lit = Lit;
3219 
3220   if (Mods.hasFPModifiers() || Lit) {
3221     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3222     if (Op.isExpr())
3223       return Error(Op.getStartLoc(), "expected an absolute expression");
3224     Op.setModifiers(Mods);
3225   }
3226   return ParseStatus::Success;
3227 }
3228 
3229 ParseStatus
3230 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3231                                                bool AllowImm) {
3232   bool Sext = trySkipId("sext");
3233   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3234     return ParseStatus::Failure;
3235 
3236   ParseStatus Res;
3237   if (AllowImm) {
3238     Res = parseRegOrImm(Operands);
3239   } else {
3240     Res = parseReg(Operands);
3241   }
3242   if (!Res.isSuccess())
3243     return Sext ? ParseStatus::Failure : Res;
3244 
3245   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3246     return ParseStatus::Failure;
3247 
3248   AMDGPUOperand::Modifiers Mods;
3249   Mods.Sext = Sext;
3250 
3251   if (Mods.hasIntModifiers()) {
3252     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3253     if (Op.isExpr())
3254       return Error(Op.getStartLoc(), "expected an absolute expression");
3255     Op.setModifiers(Mods);
3256   }
3257 
3258   return ParseStatus::Success;
3259 }
3260 
3261 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3262   return parseRegOrImmWithFPInputMods(Operands, false);
3263 }
3264 
3265 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3266   return parseRegOrImmWithIntInputMods(Operands, false);
3267 }
3268 
3269 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3270   auto Loc = getLoc();
3271   if (trySkipId("off")) {
3272     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3273                                                 AMDGPUOperand::ImmTyOff, false));
3274     return ParseStatus::Success;
3275   }
3276 
3277   if (!isRegister())
3278     return ParseStatus::NoMatch;
3279 
3280   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3281   if (Reg) {
3282     Operands.push_back(std::move(Reg));
3283     return ParseStatus::Success;
3284   }
3285 
3286   return ParseStatus::Failure;
3287 }
3288 
3289 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3290   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3291 
3292   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3293       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3294       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3295       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3296     return Match_InvalidOperand;
3297 
3298   if ((TSFlags & SIInstrFlags::VOP3) &&
3299       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3300       getForcedEncodingSize() != 64)
3301     return Match_PreferE32;
3302 
3303   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3304       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3305     // v_mac_f32/16 allow only dst_sel == DWORD;
3306     auto OpNum =
3307         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3308     const auto &Op = Inst.getOperand(OpNum);
3309     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3310       return Match_InvalidOperand;
3311     }
3312   }
3313 
3314   return Match_Success;
3315 }
3316 
3317 static ArrayRef<unsigned> getAllVariants() {
3318   static const unsigned Variants[] = {
3319     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3320     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3321     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3322   };
3323 
3324   return ArrayRef(Variants);
3325 }
3326 
3327 // What asm variants we should check
3328 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3329   if (isForcedDPP() && isForcedVOP3()) {
3330     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3331     return ArrayRef(Variants);
3332   }
3333   if (getForcedEncodingSize() == 32) {
3334     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3335     return ArrayRef(Variants);
3336   }
3337 
3338   if (isForcedVOP3()) {
3339     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3340     return ArrayRef(Variants);
3341   }
3342 
3343   if (isForcedSDWA()) {
3344     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3345                                         AMDGPUAsmVariants::SDWA9};
3346     return ArrayRef(Variants);
3347   }
3348 
3349   if (isForcedDPP()) {
3350     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3351     return ArrayRef(Variants);
3352   }
3353 
3354   return getAllVariants();
3355 }
3356 
3357 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3358   if (isForcedDPP() && isForcedVOP3())
3359     return "e64_dpp";
3360 
3361   if (getForcedEncodingSize() == 32)
3362     return "e32";
3363 
3364   if (isForcedVOP3())
3365     return "e64";
3366 
3367   if (isForcedSDWA())
3368     return "sdwa";
3369 
3370   if (isForcedDPP())
3371     return "dpp";
3372 
3373   return "";
3374 }
3375 
3376 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3377   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3378   for (MCPhysReg Reg : Desc.implicit_uses()) {
3379     switch (Reg) {
3380     case AMDGPU::FLAT_SCR:
3381     case AMDGPU::VCC:
3382     case AMDGPU::VCC_LO:
3383     case AMDGPU::VCC_HI:
3384     case AMDGPU::M0:
3385       return Reg;
3386     default:
3387       break;
3388     }
3389   }
3390   return AMDGPU::NoRegister;
3391 }
3392 
3393 // NB: This code is correct only when used to check constant
3394 // bus limitations because GFX7 support no f16 inline constants.
3395 // Note that there are no cases when a GFX7 opcode violates
3396 // constant bus limitations due to the use of an f16 constant.
3397 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3398                                        unsigned OpIdx) const {
3399   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3400 
3401   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3402       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3403     return false;
3404   }
3405 
3406   const MCOperand &MO = Inst.getOperand(OpIdx);
3407 
3408   int64_t Val = MO.getImm();
3409   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3410 
3411   switch (OpSize) { // expected operand size
3412   case 8:
3413     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3414   case 4:
3415     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3416   case 2: {
3417     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3418     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3419         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3420         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3421       return AMDGPU::isInlinableIntLiteral(Val);
3422 
3423     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3424         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3425         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3426       return AMDGPU::isInlinableLiteralV2I16(Val);
3427 
3428     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3429         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3430         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3431       return AMDGPU::isInlinableLiteralV2F16(Val);
3432 
3433     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3434   }
3435   default:
3436     llvm_unreachable("invalid operand size");
3437   }
3438 }
3439 
3440 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3441   if (!isGFX10Plus())
3442     return 1;
3443 
3444   switch (Opcode) {
3445   // 64-bit shift instructions can use only one scalar value input
3446   case AMDGPU::V_LSHLREV_B64_e64:
3447   case AMDGPU::V_LSHLREV_B64_gfx10:
3448   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3449   case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3450   case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3451   case AMDGPU::V_LSHRREV_B64_e64:
3452   case AMDGPU::V_LSHRREV_B64_gfx10:
3453   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3454   case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3455   case AMDGPU::V_ASHRREV_I64_e64:
3456   case AMDGPU::V_ASHRREV_I64_gfx10:
3457   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3458   case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3459   case AMDGPU::V_LSHL_B64_e64:
3460   case AMDGPU::V_LSHR_B64_e64:
3461   case AMDGPU::V_ASHR_I64_e64:
3462     return 1;
3463   default:
3464     return 2;
3465   }
3466 }
3467 
3468 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3469 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3470 
3471 // Get regular operand indices in the same order as specified
3472 // in the instruction (but append mandatory literals to the end).
3473 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3474                                            bool AddMandatoryLiterals = false) {
3475 
3476   int16_t ImmIdx =
3477       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3478 
3479   if (isVOPD(Opcode)) {
3480     int16_t ImmDeferredIdx =
3481         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3482                              : -1;
3483 
3484     return {getNamedOperandIdx(Opcode, OpName::src0X),
3485             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3486             getNamedOperandIdx(Opcode, OpName::src0Y),
3487             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3488             ImmDeferredIdx,
3489             ImmIdx};
3490   }
3491 
3492   return {getNamedOperandIdx(Opcode, OpName::src0),
3493           getNamedOperandIdx(Opcode, OpName::src1),
3494           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3495 }
3496 
3497 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3498   const MCOperand &MO = Inst.getOperand(OpIdx);
3499   if (MO.isImm()) {
3500     return !isInlineConstant(Inst, OpIdx);
3501   } else if (MO.isReg()) {
3502     auto Reg = MO.getReg();
3503     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3504     auto PReg = mc2PseudoReg(Reg);
3505     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3506   } else {
3507     return true;
3508   }
3509 }
3510 
3511 bool AMDGPUAsmParser::validateConstantBusLimitations(
3512     const MCInst &Inst, const OperandVector &Operands) {
3513   const unsigned Opcode = Inst.getOpcode();
3514   const MCInstrDesc &Desc = MII.get(Opcode);
3515   unsigned LastSGPR = AMDGPU::NoRegister;
3516   unsigned ConstantBusUseCount = 0;
3517   unsigned NumLiterals = 0;
3518   unsigned LiteralSize;
3519 
3520   if (!(Desc.TSFlags &
3521         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3522          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3523       !isVOPD(Opcode))
3524     return true;
3525 
3526   // Check special imm operands (used by madmk, etc)
3527   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3528     ++NumLiterals;
3529     LiteralSize = 4;
3530   }
3531 
3532   SmallDenseSet<unsigned> SGPRsUsed;
3533   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3534   if (SGPRUsed != AMDGPU::NoRegister) {
3535     SGPRsUsed.insert(SGPRUsed);
3536     ++ConstantBusUseCount;
3537   }
3538 
3539   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3540 
3541   for (int OpIdx : OpIndices) {
3542     if (OpIdx == -1)
3543       continue;
3544 
3545     const MCOperand &MO = Inst.getOperand(OpIdx);
3546     if (usesConstantBus(Inst, OpIdx)) {
3547       if (MO.isReg()) {
3548         LastSGPR = mc2PseudoReg(MO.getReg());
3549         // Pairs of registers with a partial intersections like these
3550         //   s0, s[0:1]
3551         //   flat_scratch_lo, flat_scratch
3552         //   flat_scratch_lo, flat_scratch_hi
3553         // are theoretically valid but they are disabled anyway.
3554         // Note that this code mimics SIInstrInfo::verifyInstruction
3555         if (SGPRsUsed.insert(LastSGPR).second) {
3556           ++ConstantBusUseCount;
3557         }
3558       } else { // Expression or a literal
3559 
3560         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3561           continue; // special operand like VINTERP attr_chan
3562 
3563         // An instruction may use only one literal.
3564         // This has been validated on the previous step.
3565         // See validateVOPLiteral.
3566         // This literal may be used as more than one operand.
3567         // If all these operands are of the same size,
3568         // this literal counts as one scalar value.
3569         // Otherwise it counts as 2 scalar values.
3570         // See "GFX10 Shader Programming", section 3.6.2.3.
3571 
3572         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3573         if (Size < 4)
3574           Size = 4;
3575 
3576         if (NumLiterals == 0) {
3577           NumLiterals = 1;
3578           LiteralSize = Size;
3579         } else if (LiteralSize != Size) {
3580           NumLiterals = 2;
3581         }
3582       }
3583     }
3584   }
3585   ConstantBusUseCount += NumLiterals;
3586 
3587   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3588     return true;
3589 
3590   SMLoc LitLoc = getLitLoc(Operands);
3591   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3592   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3593   Error(Loc, "invalid operand (violates constant bus restrictions)");
3594   return false;
3595 }
3596 
3597 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3598     const MCInst &Inst, const OperandVector &Operands) {
3599 
3600   const unsigned Opcode = Inst.getOpcode();
3601   if (!isVOPD(Opcode))
3602     return true;
3603 
3604   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3605 
3606   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3607     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3608     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3609                ? Opr.getReg()
3610                : MCRegister::NoRegister;
3611   };
3612 
3613   // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3614   bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3615 
3616   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3617   auto InvalidCompOprIdx =
3618       InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3619   if (!InvalidCompOprIdx)
3620     return true;
3621 
3622   auto CompOprIdx = *InvalidCompOprIdx;
3623   auto ParsedIdx =
3624       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3625                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3626   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3627 
3628   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3629   if (CompOprIdx == VOPD::Component::DST) {
3630     Error(Loc, "one dst register must be even and the other odd");
3631   } else {
3632     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3633     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3634                    " operands must use different VGPR banks");
3635   }
3636 
3637   return false;
3638 }
3639 
3640 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3641 
3642   const unsigned Opc = Inst.getOpcode();
3643   const MCInstrDesc &Desc = MII.get(Opc);
3644 
3645   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3646     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3647     assert(ClampIdx != -1);
3648     return Inst.getOperand(ClampIdx).getImm() == 0;
3649   }
3650 
3651   return true;
3652 }
3653 
3654 constexpr uint64_t MIMGFlags =
3655     SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3656 
3657 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3658                                            const SMLoc &IDLoc) {
3659 
3660   const unsigned Opc = Inst.getOpcode();
3661   const MCInstrDesc &Desc = MII.get(Opc);
3662 
3663   if ((Desc.TSFlags & MIMGFlags) == 0)
3664     return true;
3665 
3666   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3667   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3668   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3669 
3670   assert(VDataIdx != -1);
3671 
3672   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3673     return true;
3674 
3675   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3676   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3677   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3678   if (DMask == 0)
3679     DMask = 1;
3680 
3681   bool IsPackedD16 = false;
3682   unsigned DataSize =
3683       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3684   if (hasPackedD16()) {
3685     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3686     IsPackedD16 = D16Idx >= 0;
3687     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3688       DataSize = (DataSize + 1) / 2;
3689   }
3690 
3691   if ((VDataSize / 4) == DataSize + TFESize)
3692     return true;
3693 
3694   StringRef Modifiers;
3695   if (isGFX90A())
3696     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3697   else
3698     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3699 
3700   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3701   return false;
3702 }
3703 
3704 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3705                                            const SMLoc &IDLoc) {
3706   const unsigned Opc = Inst.getOpcode();
3707   const MCInstrDesc &Desc = MII.get(Opc);
3708 
3709   if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3710     return true;
3711 
3712   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3713 
3714   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3715       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3716   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3717   int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3718                                                      : AMDGPU::OpName::rsrc;
3719   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3720   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3721   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3722 
3723   assert(VAddr0Idx != -1);
3724   assert(SrsrcIdx != -1);
3725   assert(SrsrcIdx > VAddr0Idx);
3726 
3727   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3728   if (BaseOpcode->BVH) {
3729     if (IsA16 == BaseOpcode->A16)
3730       return true;
3731     Error(IDLoc, "image address size does not match a16");
3732     return false;
3733   }
3734 
3735   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3736   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3737   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3738   unsigned ActualAddrSize =
3739       IsNSA ? SrsrcIdx - VAddr0Idx
3740             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3741 
3742   unsigned ExpectedAddrSize =
3743       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3744 
3745   if (IsNSA) {
3746     if (hasPartialNSAEncoding() &&
3747         ExpectedAddrSize >
3748             getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3749       int VAddrLastIdx = SrsrcIdx - 1;
3750       unsigned VAddrLastSize =
3751           AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3752 
3753       ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3754     }
3755   } else {
3756     if (ExpectedAddrSize > 12)
3757       ExpectedAddrSize = 16;
3758 
3759     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3760     // This provides backward compatibility for assembly created
3761     // before 160b/192b/224b types were directly supported.
3762     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3763       return true;
3764   }
3765 
3766   if (ActualAddrSize == ExpectedAddrSize)
3767     return true;
3768 
3769   Error(IDLoc, "image address size does not match dim and a16");
3770   return false;
3771 }
3772 
3773 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3774 
3775   const unsigned Opc = Inst.getOpcode();
3776   const MCInstrDesc &Desc = MII.get(Opc);
3777 
3778   if ((Desc.TSFlags & MIMGFlags) == 0)
3779     return true;
3780   if (!Desc.mayLoad() || !Desc.mayStore())
3781     return true; // Not atomic
3782 
3783   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3784   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3785 
3786   // This is an incomplete check because image_atomic_cmpswap
3787   // may only use 0x3 and 0xf while other atomic operations
3788   // may use 0x1 and 0x3. However these limitations are
3789   // verified when we check that dmask matches dst size.
3790   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3791 }
3792 
3793 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3794 
3795   const unsigned Opc = Inst.getOpcode();
3796   const MCInstrDesc &Desc = MII.get(Opc);
3797 
3798   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3799     return true;
3800 
3801   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3802   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3803 
3804   // GATHER4 instructions use dmask in a different fashion compared to
3805   // other MIMG instructions. The only useful DMASK values are
3806   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3807   // (red,red,red,red) etc.) The ISA document doesn't mention
3808   // this.
3809   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3810 }
3811 
3812 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3813   const unsigned Opc = Inst.getOpcode();
3814   const MCInstrDesc &Desc = MII.get(Opc);
3815 
3816   if ((Desc.TSFlags & MIMGFlags) == 0)
3817     return true;
3818 
3819   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3820   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3821       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3822 
3823   if (!BaseOpcode->MSAA)
3824     return true;
3825 
3826   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3827   assert(DimIdx != -1);
3828 
3829   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3830   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3831 
3832   return DimInfo->MSAA;
3833 }
3834 
3835 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3836 {
3837   switch (Opcode) {
3838   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3839   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3840   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3841     return true;
3842   default:
3843     return false;
3844   }
3845 }
3846 
3847 // movrels* opcodes should only allow VGPRS as src0.
3848 // This is specified in .td description for vop1/vop3,
3849 // but sdwa is handled differently. See isSDWAOperand.
3850 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3851                                       const OperandVector &Operands) {
3852 
3853   const unsigned Opc = Inst.getOpcode();
3854   const MCInstrDesc &Desc = MII.get(Opc);
3855 
3856   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3857     return true;
3858 
3859   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3860   assert(Src0Idx != -1);
3861 
3862   SMLoc ErrLoc;
3863   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3864   if (Src0.isReg()) {
3865     auto Reg = mc2PseudoReg(Src0.getReg());
3866     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3867     if (!isSGPR(Reg, TRI))
3868       return true;
3869     ErrLoc = getRegLoc(Reg, Operands);
3870   } else {
3871     ErrLoc = getConstLoc(Operands);
3872   }
3873 
3874   Error(ErrLoc, "source operand must be a VGPR");
3875   return false;
3876 }
3877 
3878 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3879                                           const OperandVector &Operands) {
3880 
3881   const unsigned Opc = Inst.getOpcode();
3882 
3883   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3884     return true;
3885 
3886   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3887   assert(Src0Idx != -1);
3888 
3889   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3890   if (!Src0.isReg())
3891     return true;
3892 
3893   auto Reg = mc2PseudoReg(Src0.getReg());
3894   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3895   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3896     Error(getRegLoc(Reg, Operands),
3897           "source operand must be either a VGPR or an inline constant");
3898     return false;
3899   }
3900 
3901   return true;
3902 }
3903 
3904 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3905                                       const OperandVector &Operands) {
3906   unsigned Opcode = Inst.getOpcode();
3907   const MCInstrDesc &Desc = MII.get(Opcode);
3908 
3909   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3910       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3911     return true;
3912 
3913   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3914   if (Src2Idx == -1)
3915     return true;
3916 
3917   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3918     Error(getConstLoc(Operands),
3919           "inline constants are not allowed for this operand");
3920     return false;
3921   }
3922 
3923   return true;
3924 }
3925 
3926 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3927                                    const OperandVector &Operands) {
3928   const unsigned Opc = Inst.getOpcode();
3929   const MCInstrDesc &Desc = MII.get(Opc);
3930 
3931   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3932     return true;
3933 
3934   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3935   if (Src2Idx == -1)
3936     return true;
3937 
3938   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3939   if (!Src2.isReg())
3940     return true;
3941 
3942   MCRegister Src2Reg = Src2.getReg();
3943   MCRegister DstReg = Inst.getOperand(0).getReg();
3944   if (Src2Reg == DstReg)
3945     return true;
3946 
3947   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3948   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3949     return true;
3950 
3951   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3952     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3953           "source 2 operand must not partially overlap with dst");
3954     return false;
3955   }
3956 
3957   return true;
3958 }
3959 
3960 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3961   switch (Inst.getOpcode()) {
3962   default:
3963     return true;
3964   case V_DIV_SCALE_F32_gfx6_gfx7:
3965   case V_DIV_SCALE_F32_vi:
3966   case V_DIV_SCALE_F32_gfx10:
3967   case V_DIV_SCALE_F64_gfx6_gfx7:
3968   case V_DIV_SCALE_F64_vi:
3969   case V_DIV_SCALE_F64_gfx10:
3970     break;
3971   }
3972 
3973   // TODO: Check that src0 = src1 or src2.
3974 
3975   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3976                     AMDGPU::OpName::src2_modifiers,
3977                     AMDGPU::OpName::src2_modifiers}) {
3978     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3979             .getImm() &
3980         SISrcMods::ABS) {
3981       return false;
3982     }
3983   }
3984 
3985   return true;
3986 }
3987 
3988 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3989 
3990   const unsigned Opc = Inst.getOpcode();
3991   const MCInstrDesc &Desc = MII.get(Opc);
3992 
3993   if ((Desc.TSFlags & MIMGFlags) == 0)
3994     return true;
3995 
3996   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3997   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3998     if (isCI() || isSI())
3999       return false;
4000   }
4001 
4002   return true;
4003 }
4004 
4005 static bool IsRevOpcode(const unsigned Opcode)
4006 {
4007   switch (Opcode) {
4008   case AMDGPU::V_SUBREV_F32_e32:
4009   case AMDGPU::V_SUBREV_F32_e64:
4010   case AMDGPU::V_SUBREV_F32_e32_gfx10:
4011   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4012   case AMDGPU::V_SUBREV_F32_e32_vi:
4013   case AMDGPU::V_SUBREV_F32_e64_gfx10:
4014   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4015   case AMDGPU::V_SUBREV_F32_e64_vi:
4016 
4017   case AMDGPU::V_SUBREV_CO_U32_e32:
4018   case AMDGPU::V_SUBREV_CO_U32_e64:
4019   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4020   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4021 
4022   case AMDGPU::V_SUBBREV_U32_e32:
4023   case AMDGPU::V_SUBBREV_U32_e64:
4024   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4025   case AMDGPU::V_SUBBREV_U32_e32_vi:
4026   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4027   case AMDGPU::V_SUBBREV_U32_e64_vi:
4028 
4029   case AMDGPU::V_SUBREV_U32_e32:
4030   case AMDGPU::V_SUBREV_U32_e64:
4031   case AMDGPU::V_SUBREV_U32_e32_gfx9:
4032   case AMDGPU::V_SUBREV_U32_e32_vi:
4033   case AMDGPU::V_SUBREV_U32_e64_gfx9:
4034   case AMDGPU::V_SUBREV_U32_e64_vi:
4035 
4036   case AMDGPU::V_SUBREV_F16_e32:
4037   case AMDGPU::V_SUBREV_F16_e64:
4038   case AMDGPU::V_SUBREV_F16_e32_gfx10:
4039   case AMDGPU::V_SUBREV_F16_e32_vi:
4040   case AMDGPU::V_SUBREV_F16_e64_gfx10:
4041   case AMDGPU::V_SUBREV_F16_e64_vi:
4042 
4043   case AMDGPU::V_SUBREV_U16_e32:
4044   case AMDGPU::V_SUBREV_U16_e64:
4045   case AMDGPU::V_SUBREV_U16_e32_vi:
4046   case AMDGPU::V_SUBREV_U16_e64_vi:
4047 
4048   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4049   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4050   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4051 
4052   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4053   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4054 
4055   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4056   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4057 
4058   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4059   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4060 
4061   case AMDGPU::V_LSHRREV_B32_e32:
4062   case AMDGPU::V_LSHRREV_B32_e64:
4063   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4064   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4065   case AMDGPU::V_LSHRREV_B32_e32_vi:
4066   case AMDGPU::V_LSHRREV_B32_e64_vi:
4067   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4068   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4069 
4070   case AMDGPU::V_ASHRREV_I32_e32:
4071   case AMDGPU::V_ASHRREV_I32_e64:
4072   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4073   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4074   case AMDGPU::V_ASHRREV_I32_e32_vi:
4075   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4076   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4077   case AMDGPU::V_ASHRREV_I32_e64_vi:
4078 
4079   case AMDGPU::V_LSHLREV_B32_e32:
4080   case AMDGPU::V_LSHLREV_B32_e64:
4081   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4082   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4083   case AMDGPU::V_LSHLREV_B32_e32_vi:
4084   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4085   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4086   case AMDGPU::V_LSHLREV_B32_e64_vi:
4087 
4088   case AMDGPU::V_LSHLREV_B16_e32:
4089   case AMDGPU::V_LSHLREV_B16_e64:
4090   case AMDGPU::V_LSHLREV_B16_e32_vi:
4091   case AMDGPU::V_LSHLREV_B16_e64_vi:
4092   case AMDGPU::V_LSHLREV_B16_gfx10:
4093 
4094   case AMDGPU::V_LSHRREV_B16_e32:
4095   case AMDGPU::V_LSHRREV_B16_e64:
4096   case AMDGPU::V_LSHRREV_B16_e32_vi:
4097   case AMDGPU::V_LSHRREV_B16_e64_vi:
4098   case AMDGPU::V_LSHRREV_B16_gfx10:
4099 
4100   case AMDGPU::V_ASHRREV_I16_e32:
4101   case AMDGPU::V_ASHRREV_I16_e64:
4102   case AMDGPU::V_ASHRREV_I16_e32_vi:
4103   case AMDGPU::V_ASHRREV_I16_e64_vi:
4104   case AMDGPU::V_ASHRREV_I16_gfx10:
4105 
4106   case AMDGPU::V_LSHLREV_B64_e64:
4107   case AMDGPU::V_LSHLREV_B64_gfx10:
4108   case AMDGPU::V_LSHLREV_B64_vi:
4109 
4110   case AMDGPU::V_LSHRREV_B64_e64:
4111   case AMDGPU::V_LSHRREV_B64_gfx10:
4112   case AMDGPU::V_LSHRREV_B64_vi:
4113 
4114   case AMDGPU::V_ASHRREV_I64_e64:
4115   case AMDGPU::V_ASHRREV_I64_gfx10:
4116   case AMDGPU::V_ASHRREV_I64_vi:
4117 
4118   case AMDGPU::V_PK_LSHLREV_B16:
4119   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4120   case AMDGPU::V_PK_LSHLREV_B16_vi:
4121 
4122   case AMDGPU::V_PK_LSHRREV_B16:
4123   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4124   case AMDGPU::V_PK_LSHRREV_B16_vi:
4125   case AMDGPU::V_PK_ASHRREV_I16:
4126   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4127   case AMDGPU::V_PK_ASHRREV_I16_vi:
4128     return true;
4129   default:
4130     return false;
4131   }
4132 }
4133 
4134 std::optional<StringRef>
4135 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4136 
4137   using namespace SIInstrFlags;
4138   const unsigned Opcode = Inst.getOpcode();
4139   const MCInstrDesc &Desc = MII.get(Opcode);
4140 
4141   // lds_direct register is defined so that it can be used
4142   // with 9-bit operands only. Ignore encodings which do not accept these.
4143   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4144   if ((Desc.TSFlags & Enc) == 0)
4145     return std::nullopt;
4146 
4147   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4148     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4149     if (SrcIdx == -1)
4150       break;
4151     const auto &Src = Inst.getOperand(SrcIdx);
4152     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4153 
4154       if (isGFX90A() || isGFX11Plus())
4155         return StringRef("lds_direct is not supported on this GPU");
4156 
4157       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4158         return StringRef("lds_direct cannot be used with this instruction");
4159 
4160       if (SrcName != OpName::src0)
4161         return StringRef("lds_direct may be used as src0 only");
4162     }
4163   }
4164 
4165   return std::nullopt;
4166 }
4167 
4168 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4169   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4170     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4171     if (Op.isFlatOffset())
4172       return Op.getStartLoc();
4173   }
4174   return getLoc();
4175 }
4176 
4177 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4178                                      const OperandVector &Operands) {
4179   auto Opcode = Inst.getOpcode();
4180   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4181   if (OpNum == -1)
4182     return true;
4183 
4184   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4185   if ((TSFlags & SIInstrFlags::FLAT))
4186     return validateFlatOffset(Inst, Operands);
4187 
4188   if ((TSFlags & SIInstrFlags::SMRD))
4189     return validateSMEMOffset(Inst, Operands);
4190 
4191   const auto &Op = Inst.getOperand(OpNum);
4192   if (isGFX12Plus() &&
4193       (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4194     const unsigned OffsetSize = 24;
4195     if (!isIntN(OffsetSize, Op.getImm())) {
4196       Error(getFlatOffsetLoc(Operands),
4197             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4198       return false;
4199     }
4200   } else {
4201     const unsigned OffsetSize = 16;
4202     if (!isUIntN(OffsetSize, Op.getImm())) {
4203       Error(getFlatOffsetLoc(Operands),
4204             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4205       return false;
4206     }
4207   }
4208   return true;
4209 }
4210 
4211 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4212                                          const OperandVector &Operands) {
4213   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4214   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4215     return true;
4216 
4217   auto Opcode = Inst.getOpcode();
4218   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4219   assert(OpNum != -1);
4220 
4221   const auto &Op = Inst.getOperand(OpNum);
4222   if (!hasFlatOffsets() && Op.getImm() != 0) {
4223     Error(getFlatOffsetLoc(Operands),
4224           "flat offset modifier is not supported on this GPU");
4225     return false;
4226   }
4227 
4228   // For pre-GFX12 FLAT instructions the offset must be positive;
4229   // MSB is ignored and forced to zero.
4230   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4231   bool AllowNegative =
4232       (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4233       isGFX12Plus();
4234   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4235     Error(getFlatOffsetLoc(Operands),
4236           Twine("expected a ") +
4237               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4238                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4239     return false;
4240   }
4241 
4242   return true;
4243 }
4244 
4245 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4246   // Start with second operand because SMEM Offset cannot be dst or src0.
4247   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4248     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4249     if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4250       return Op.getStartLoc();
4251   }
4252   return getLoc();
4253 }
4254 
4255 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4256                                          const OperandVector &Operands) {
4257   if (isCI() || isSI())
4258     return true;
4259 
4260   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4261   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4262     return true;
4263 
4264   auto Opcode = Inst.getOpcode();
4265   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4266   if (OpNum == -1)
4267     return true;
4268 
4269   const auto &Op = Inst.getOperand(OpNum);
4270   if (!Op.isImm())
4271     return true;
4272 
4273   uint64_t Offset = Op.getImm();
4274   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4275   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4276       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4277     return true;
4278 
4279   Error(getSMEMOffsetLoc(Operands),
4280         isGFX12Plus()          ? "expected a 24-bit signed offset"
4281         : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4282                                : "expected a 21-bit signed offset");
4283 
4284   return false;
4285 }
4286 
4287 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4288   unsigned Opcode = Inst.getOpcode();
4289   const MCInstrDesc &Desc = MII.get(Opcode);
4290   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4291     return true;
4292 
4293   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4294   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4295 
4296   const int OpIndices[] = { Src0Idx, Src1Idx };
4297 
4298   unsigned NumExprs = 0;
4299   unsigned NumLiterals = 0;
4300   uint32_t LiteralValue;
4301 
4302   for (int OpIdx : OpIndices) {
4303     if (OpIdx == -1) break;
4304 
4305     const MCOperand &MO = Inst.getOperand(OpIdx);
4306     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4307     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4308       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4309         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4310         if (NumLiterals == 0 || LiteralValue != Value) {
4311           LiteralValue = Value;
4312           ++NumLiterals;
4313         }
4314       } else if (MO.isExpr()) {
4315         ++NumExprs;
4316       }
4317     }
4318   }
4319 
4320   return NumLiterals + NumExprs <= 1;
4321 }
4322 
4323 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4324   const unsigned Opc = Inst.getOpcode();
4325   if (isPermlane16(Opc)) {
4326     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4327     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4328 
4329     if (OpSel & ~3)
4330       return false;
4331   }
4332 
4333   uint64_t TSFlags = MII.get(Opc).TSFlags;
4334 
4335   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4336     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4337     if (OpSelIdx != -1) {
4338       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4339         return false;
4340     }
4341     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4342     if (OpSelHiIdx != -1) {
4343       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4344         return false;
4345     }
4346   }
4347 
4348   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4349   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4350       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4351     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4352     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4353     if (OpSel & 3)
4354       return false;
4355   }
4356 
4357   return true;
4358 }
4359 
4360 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4361                                   const OperandVector &Operands) {
4362   const unsigned Opc = Inst.getOpcode();
4363   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4364   if (DppCtrlIdx >= 0) {
4365     unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4366 
4367     if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4368         AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4369       // DP ALU DPP is supported for row_newbcast only on GFX9*
4370       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4371       Error(S, "DP ALU dpp only supports row_newbcast");
4372       return false;
4373     }
4374   }
4375 
4376   int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4377   bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4378 
4379   if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4380     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4381     if (Src1Idx >= 0) {
4382       const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4383       const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4384       if (Src1.isImm() ||
4385           (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4386         AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4387         Error(Op.getStartLoc(), "invalid operand for instruction");
4388         return false;
4389       }
4390     }
4391   }
4392 
4393   return true;
4394 }
4395 
4396 // Check if VCC register matches wavefront size
4397 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4398   auto FB = getFeatureBits();
4399   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4400     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4401 }
4402 
4403 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4404 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4405                                          const OperandVector &Operands) {
4406   unsigned Opcode = Inst.getOpcode();
4407   const MCInstrDesc &Desc = MII.get(Opcode);
4408   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4409   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4410       !HasMandatoryLiteral && !isVOPD(Opcode))
4411     return true;
4412 
4413   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4414 
4415   unsigned NumExprs = 0;
4416   unsigned NumLiterals = 0;
4417   uint32_t LiteralValue;
4418 
4419   for (int OpIdx : OpIndices) {
4420     if (OpIdx == -1)
4421       continue;
4422 
4423     const MCOperand &MO = Inst.getOperand(OpIdx);
4424     if (!MO.isImm() && !MO.isExpr())
4425       continue;
4426     if (!isSISrcOperand(Desc, OpIdx))
4427       continue;
4428 
4429     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4430       uint64_t Value = static_cast<uint64_t>(MO.getImm());
4431       bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4432                     AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4433       bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4434 
4435       if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4436         Error(getLitLoc(Operands), "invalid operand for instruction");
4437         return false;
4438       }
4439 
4440       if (IsFP64 && IsValid32Op)
4441         Value = Hi_32(Value);
4442 
4443       if (NumLiterals == 0 || LiteralValue != Value) {
4444         LiteralValue = Value;
4445         ++NumLiterals;
4446       }
4447     } else if (MO.isExpr()) {
4448       ++NumExprs;
4449     }
4450   }
4451   NumLiterals += NumExprs;
4452 
4453   if (!NumLiterals)
4454     return true;
4455 
4456   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4457     Error(getLitLoc(Operands), "literal operands are not supported");
4458     return false;
4459   }
4460 
4461   if (NumLiterals > 1) {
4462     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4463     return false;
4464   }
4465 
4466   return true;
4467 }
4468 
4469 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4470 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4471                          const MCRegisterInfo *MRI) {
4472   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4473   if (OpIdx < 0)
4474     return -1;
4475 
4476   const MCOperand &Op = Inst.getOperand(OpIdx);
4477   if (!Op.isReg())
4478     return -1;
4479 
4480   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4481   auto Reg = Sub ? Sub : Op.getReg();
4482   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4483   return AGPR32.contains(Reg) ? 1 : 0;
4484 }
4485 
4486 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4487   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4488   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4489                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4490                   SIInstrFlags::DS)) == 0)
4491     return true;
4492 
4493   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4494                                                       : AMDGPU::OpName::vdata;
4495 
4496   const MCRegisterInfo *MRI = getMRI();
4497   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4498   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4499 
4500   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4501     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4502     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4503       return false;
4504   }
4505 
4506   auto FB = getFeatureBits();
4507   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4508     if (DataAreg < 0 || DstAreg < 0)
4509       return true;
4510     return DstAreg == DataAreg;
4511   }
4512 
4513   return DstAreg < 1 && DataAreg < 1;
4514 }
4515 
4516 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4517   auto FB = getFeatureBits();
4518   if (!FB[AMDGPU::FeatureGFX90AInsts])
4519     return true;
4520 
4521   const MCRegisterInfo *MRI = getMRI();
4522   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4523   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4524   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4525     const MCOperand &Op = Inst.getOperand(I);
4526     if (!Op.isReg())
4527       continue;
4528 
4529     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4530     if (!Sub)
4531       continue;
4532 
4533     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4534       return false;
4535     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4536       return false;
4537   }
4538 
4539   return true;
4540 }
4541 
4542 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4543   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4544     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4545     if (Op.isBLGP())
4546       return Op.getStartLoc();
4547   }
4548   return SMLoc();
4549 }
4550 
4551 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4552                                    const OperandVector &Operands) {
4553   unsigned Opc = Inst.getOpcode();
4554   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4555   if (BlgpIdx == -1)
4556     return true;
4557   SMLoc BLGPLoc = getBLGPLoc(Operands);
4558   if (!BLGPLoc.isValid())
4559     return true;
4560   bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4561   auto FB = getFeatureBits();
4562   bool UsesNeg = false;
4563   if (FB[AMDGPU::FeatureGFX940Insts]) {
4564     switch (Opc) {
4565     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4566     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4567     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4568     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4569       UsesNeg = true;
4570     }
4571   }
4572 
4573   if (IsNeg == UsesNeg)
4574     return true;
4575 
4576   Error(BLGPLoc,
4577         UsesNeg ? "invalid modifier: blgp is not supported"
4578                 : "invalid modifier: neg is not supported");
4579 
4580   return false;
4581 }
4582 
4583 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4584                                       const OperandVector &Operands) {
4585   if (!isGFX11Plus())
4586     return true;
4587 
4588   unsigned Opc = Inst.getOpcode();
4589   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4590       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4591       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4592       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4593     return true;
4594 
4595   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4596   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4597   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4598   if (Reg == AMDGPU::SGPR_NULL)
4599     return true;
4600 
4601   SMLoc RegLoc = getRegLoc(Reg, Operands);
4602   Error(RegLoc, "src0 must be null");
4603   return false;
4604 }
4605 
4606 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4607                                  const OperandVector &Operands) {
4608   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4609   if ((TSFlags & SIInstrFlags::DS) == 0)
4610     return true;
4611   if (TSFlags & SIInstrFlags::GWS)
4612     return validateGWS(Inst, Operands);
4613   // Only validate GDS for non-GWS instructions.
4614   if (hasGDS())
4615     return true;
4616   int GDSIdx =
4617       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4618   if (GDSIdx < 0)
4619     return true;
4620   unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4621   if (GDS) {
4622     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4623     Error(S, "gds modifier is not supported on this GPU");
4624     return false;
4625   }
4626   return true;
4627 }
4628 
4629 // gfx90a has an undocumented limitation:
4630 // DS_GWS opcodes must use even aligned registers.
4631 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4632                                   const OperandVector &Operands) {
4633   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4634     return true;
4635 
4636   int Opc = Inst.getOpcode();
4637   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4638       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4639     return true;
4640 
4641   const MCRegisterInfo *MRI = getMRI();
4642   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4643   int Data0Pos =
4644       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4645   assert(Data0Pos != -1);
4646   auto Reg = Inst.getOperand(Data0Pos).getReg();
4647   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4648   if (RegIdx & 1) {
4649     SMLoc RegLoc = getRegLoc(Reg, Operands);
4650     Error(RegLoc, "vgpr must be even aligned");
4651     return false;
4652   }
4653 
4654   return true;
4655 }
4656 
4657 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4658                                             const OperandVector &Operands,
4659                                             const SMLoc &IDLoc) {
4660   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4661                                            AMDGPU::OpName::cpol);
4662   if (CPolPos == -1)
4663     return true;
4664 
4665   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4666 
4667   if (isGFX12Plus())
4668     return validateTHAndScopeBits(Inst, Operands, CPol);
4669 
4670   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4671   if (TSFlags & SIInstrFlags::SMRD) {
4672     if (CPol && (isSI() || isCI())) {
4673       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4674       Error(S, "cache policy is not supported for SMRD instructions");
4675       return false;
4676     }
4677     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4678       Error(IDLoc, "invalid cache policy for SMEM instruction");
4679       return false;
4680     }
4681   }
4682 
4683   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4684     const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4685                                       SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4686                                       SIInstrFlags::FLAT;
4687     if (!(TSFlags & AllowSCCModifier)) {
4688       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4689       StringRef CStr(S.getPointer());
4690       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4691       Error(S,
4692             "scc modifier is not supported for this instruction on this GPU");
4693       return false;
4694     }
4695   }
4696 
4697   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4698     return true;
4699 
4700   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4701     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4702       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4703                               : "instruction must use glc");
4704       return false;
4705     }
4706   } else {
4707     if (CPol & CPol::GLC) {
4708       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4709       StringRef CStr(S.getPointer());
4710       S = SMLoc::getFromPointer(
4711           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4712       Error(S, isGFX940() ? "instruction must not use sc0"
4713                           : "instruction must not use glc");
4714       return false;
4715     }
4716   }
4717 
4718   return true;
4719 }
4720 
4721 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4722                                              const OperandVector &Operands,
4723                                              const unsigned CPol) {
4724   const unsigned TH = CPol & AMDGPU::CPol::TH;
4725   const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4726 
4727   const unsigned Opcode = Inst.getOpcode();
4728   const MCInstrDesc &TID = MII.get(Opcode);
4729 
4730   auto PrintError = [&](StringRef Msg) {
4731     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4732     Error(S, Msg);
4733     return false;
4734   };
4735 
4736   if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4737       (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
4738       (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
4739     return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4740 
4741   if (TH == 0)
4742     return true;
4743 
4744   if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4745       ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4746        (TH == AMDGPU::CPol::TH_NT_HT)))
4747     return PrintError("invalid th value for SMEM instruction");
4748 
4749   if (TH == AMDGPU::CPol::TH_BYPASS) {
4750     if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4751          CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
4752         (Scope == AMDGPU::CPol::SCOPE_SYS &&
4753          !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
4754       return PrintError("scope and th combination is not valid");
4755   }
4756 
4757   bool IsStore = TID.mayStore();
4758   bool IsAtomic =
4759       TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
4760 
4761   if (IsAtomic) {
4762     if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
4763       return PrintError("invalid th value for atomic instructions");
4764   } else if (IsStore) {
4765     if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
4766       return PrintError("invalid th value for store instructions");
4767   } else {
4768     if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
4769       return PrintError("invalid th value for load instructions");
4770   }
4771 
4772   return true;
4773 }
4774 
4775 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4776   if (!isGFX11Plus())
4777     return true;
4778   for (auto &Operand : Operands) {
4779     if (!Operand->isReg())
4780       continue;
4781     unsigned Reg = Operand->getReg();
4782     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4783       Error(getRegLoc(Reg, Operands),
4784             "execz and vccz are not supported on this GPU");
4785       return false;
4786     }
4787   }
4788   return true;
4789 }
4790 
4791 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4792                                   const OperandVector &Operands) {
4793   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4794   if (Desc.mayStore() &&
4795       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4796     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4797     if (Loc != getInstLoc(Operands)) {
4798       Error(Loc, "TFE modifier has no meaning for store instructions");
4799       return false;
4800     }
4801   }
4802 
4803   return true;
4804 }
4805 
4806 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4807                                           const SMLoc &IDLoc,
4808                                           const OperandVector &Operands) {
4809   if (auto ErrMsg = validateLdsDirect(Inst)) {
4810     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4811     return false;
4812   }
4813   if (!validateSOPLiteral(Inst)) {
4814     Error(getLitLoc(Operands),
4815       "only one unique literal operand is allowed");
4816     return false;
4817   }
4818   if (!validateVOPLiteral(Inst, Operands)) {
4819     return false;
4820   }
4821   if (!validateConstantBusLimitations(Inst, Operands)) {
4822     return false;
4823   }
4824   if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4825     return false;
4826   }
4827   if (!validateIntClampSupported(Inst)) {
4828     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4829       "integer clamping is not supported on this GPU");
4830     return false;
4831   }
4832   if (!validateOpSel(Inst)) {
4833     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4834       "invalid op_sel operand");
4835     return false;
4836   }
4837   if (!validateDPP(Inst, Operands)) {
4838     return false;
4839   }
4840   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4841   if (!validateMIMGD16(Inst)) {
4842     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4843       "d16 modifier is not supported on this GPU");
4844     return false;
4845   }
4846   if (!validateMIMGMSAA(Inst)) {
4847     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4848           "invalid dim; must be MSAA type");
4849     return false;
4850   }
4851   if (!validateMIMGDataSize(Inst, IDLoc)) {
4852     return false;
4853   }
4854   if (!validateMIMGAddrSize(Inst, IDLoc))
4855     return false;
4856   if (!validateMIMGAtomicDMask(Inst)) {
4857     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4858       "invalid atomic image dmask");
4859     return false;
4860   }
4861   if (!validateMIMGGatherDMask(Inst)) {
4862     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4863       "invalid image_gather dmask: only one bit must be set");
4864     return false;
4865   }
4866   if (!validateMovrels(Inst, Operands)) {
4867     return false;
4868   }
4869   if (!validateOffset(Inst, Operands)) {
4870     return false;
4871   }
4872   if (!validateMAIAccWrite(Inst, Operands)) {
4873     return false;
4874   }
4875   if (!validateMAISrc2(Inst, Operands)) {
4876     return false;
4877   }
4878   if (!validateMFMA(Inst, Operands)) {
4879     return false;
4880   }
4881   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4882     return false;
4883   }
4884 
4885   if (!validateAGPRLdSt(Inst)) {
4886     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4887     ? "invalid register class: data and dst should be all VGPR or AGPR"
4888     : "invalid register class: agpr loads and stores not supported on this GPU"
4889     );
4890     return false;
4891   }
4892   if (!validateVGPRAlign(Inst)) {
4893     Error(IDLoc,
4894       "invalid register class: vgpr tuples must be 64 bit aligned");
4895     return false;
4896   }
4897   if (!validateDS(Inst, Operands)) {
4898     return false;
4899   }
4900 
4901   if (!validateBLGP(Inst, Operands)) {
4902     return false;
4903   }
4904 
4905   if (!validateDivScale(Inst)) {
4906     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4907     return false;
4908   }
4909   if (!validateWaitCnt(Inst, Operands)) {
4910     return false;
4911   }
4912   if (!validateExeczVcczOperands(Operands)) {
4913     return false;
4914   }
4915   if (!validateTFE(Inst, Operands)) {
4916     return false;
4917   }
4918 
4919   return true;
4920 }
4921 
4922 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4923                                             const FeatureBitset &FBS,
4924                                             unsigned VariantID = 0);
4925 
4926 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4927                                 const FeatureBitset &AvailableFeatures,
4928                                 unsigned VariantID);
4929 
4930 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4931                                        const FeatureBitset &FBS) {
4932   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4933 }
4934 
4935 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4936                                        const FeatureBitset &FBS,
4937                                        ArrayRef<unsigned> Variants) {
4938   for (auto Variant : Variants) {
4939     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4940       return true;
4941   }
4942 
4943   return false;
4944 }
4945 
4946 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4947                                                   const SMLoc &IDLoc) {
4948   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4949 
4950   // Check if requested instruction variant is supported.
4951   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4952     return false;
4953 
4954   // This instruction is not supported.
4955   // Clear any other pending errors because they are no longer relevant.
4956   getParser().clearPendingErrors();
4957 
4958   // Requested instruction variant is not supported.
4959   // Check if any other variants are supported.
4960   StringRef VariantName = getMatchedVariantName();
4961   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4962     return Error(IDLoc,
4963                  Twine(VariantName,
4964                        " variant of this instruction is not supported"));
4965   }
4966 
4967   // Check if this instruction may be used with a different wavesize.
4968   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4969       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4970 
4971     FeatureBitset FeaturesWS32 = getFeatureBits();
4972     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4973         .flip(AMDGPU::FeatureWavefrontSize32);
4974     FeatureBitset AvailableFeaturesWS32 =
4975         ComputeAvailableFeatures(FeaturesWS32);
4976 
4977     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4978       return Error(IDLoc, "instruction requires wavesize=32");
4979   }
4980 
4981   // Finally check if this instruction is supported on any other GPU.
4982   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4983     return Error(IDLoc, "instruction not supported on this GPU");
4984   }
4985 
4986   // Instruction not supported on any GPU. Probably a typo.
4987   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4988   return Error(IDLoc, "invalid instruction" + Suggestion);
4989 }
4990 
4991 static bool isInvalidVOPDY(const OperandVector &Operands,
4992                            uint64_t InvalidOprIdx) {
4993   assert(InvalidOprIdx < Operands.size());
4994   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4995   if (Op.isToken() && InvalidOprIdx > 1) {
4996     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4997     return PrevOp.isToken() && PrevOp.getToken() == "::";
4998   }
4999   return false;
5000 }
5001 
5002 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5003                                               OperandVector &Operands,
5004                                               MCStreamer &Out,
5005                                               uint64_t &ErrorInfo,
5006                                               bool MatchingInlineAsm) {
5007   MCInst Inst;
5008   unsigned Result = Match_Success;
5009   for (auto Variant : getMatchedVariants()) {
5010     uint64_t EI;
5011     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5012                                   Variant);
5013     // We order match statuses from least to most specific. We use most specific
5014     // status as resulting
5015     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5016     if ((R == Match_Success) ||
5017         (R == Match_PreferE32) ||
5018         (R == Match_MissingFeature && Result != Match_PreferE32) ||
5019         (R == Match_InvalidOperand && Result != Match_MissingFeature
5020                                    && Result != Match_PreferE32) ||
5021         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
5022                                    && Result != Match_MissingFeature
5023                                    && Result != Match_PreferE32)) {
5024       Result = R;
5025       ErrorInfo = EI;
5026     }
5027     if (R == Match_Success)
5028       break;
5029   }
5030 
5031   if (Result == Match_Success) {
5032     if (!validateInstruction(Inst, IDLoc, Operands)) {
5033       return true;
5034     }
5035     Inst.setLoc(IDLoc);
5036     Out.emitInstruction(Inst, getSTI());
5037     return false;
5038   }
5039 
5040   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5041   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5042     return true;
5043   }
5044 
5045   switch (Result) {
5046   default: break;
5047   case Match_MissingFeature:
5048     // It has been verified that the specified instruction
5049     // mnemonic is valid. A match was found but it requires
5050     // features which are not supported on this GPU.
5051     return Error(IDLoc, "operands are not valid for this GPU or mode");
5052 
5053   case Match_InvalidOperand: {
5054     SMLoc ErrorLoc = IDLoc;
5055     if (ErrorInfo != ~0ULL) {
5056       if (ErrorInfo >= Operands.size()) {
5057         return Error(IDLoc, "too few operands for instruction");
5058       }
5059       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5060       if (ErrorLoc == SMLoc())
5061         ErrorLoc = IDLoc;
5062 
5063       if (isInvalidVOPDY(Operands, ErrorInfo))
5064         return Error(ErrorLoc, "invalid VOPDY instruction");
5065     }
5066     return Error(ErrorLoc, "invalid operand for instruction");
5067   }
5068 
5069   case Match_PreferE32:
5070     return Error(IDLoc, "internal error: instruction without _e64 suffix "
5071                         "should be encoded as e32");
5072   case Match_MnemonicFail:
5073     llvm_unreachable("Invalid instructions should have been handled already");
5074   }
5075   llvm_unreachable("Implement any new match types added!");
5076 }
5077 
5078 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5079   int64_t Tmp = -1;
5080   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5081     return true;
5082   }
5083   if (getParser().parseAbsoluteExpression(Tmp)) {
5084     return true;
5085   }
5086   Ret = static_cast<uint32_t>(Tmp);
5087   return false;
5088 }
5089 
5090 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
5091                                                uint32_t &Minor) {
5092   if (ParseAsAbsoluteExpression(Major))
5093     return TokError("invalid major version");
5094 
5095   if (!trySkipToken(AsmToken::Comma))
5096     return TokError("minor version number required, comma expected");
5097 
5098   if (ParseAsAbsoluteExpression(Minor))
5099     return TokError("invalid minor version");
5100 
5101   return false;
5102 }
5103 
5104 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5105   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5106     return TokError("directive only supported for amdgcn architecture");
5107 
5108   std::string TargetIDDirective;
5109   SMLoc TargetStart = getTok().getLoc();
5110   if (getParser().parseEscapedString(TargetIDDirective))
5111     return true;
5112 
5113   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5114   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5115     return getParser().Error(TargetRange.Start,
5116         (Twine(".amdgcn_target directive's target id ") +
5117          Twine(TargetIDDirective) +
5118          Twine(" does not match the specified target id ") +
5119          Twine(getTargetStreamer().getTargetID()->toString())).str());
5120 
5121   return false;
5122 }
5123 
5124 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5125   return Error(Range.Start, "value out of range", Range);
5126 }
5127 
5128 bool AMDGPUAsmParser::calculateGPRBlocks(
5129     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5130     bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5131     unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5132     SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5133   // TODO(scott.linder): These calculations are duplicated from
5134   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5135   IsaVersion Version = getIsaVersion(getSTI().getCPU());
5136 
5137   unsigned NumVGPRs = NextFreeVGPR;
5138   unsigned NumSGPRs = NextFreeSGPR;
5139 
5140   if (Version.Major >= 10)
5141     NumSGPRs = 0;
5142   else {
5143     unsigned MaxAddressableNumSGPRs =
5144         IsaInfo::getAddressableNumSGPRs(&getSTI());
5145 
5146     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5147         NumSGPRs > MaxAddressableNumSGPRs)
5148       return OutOfRangeError(SGPRRange);
5149 
5150     NumSGPRs +=
5151         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5152 
5153     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5154         NumSGPRs > MaxAddressableNumSGPRs)
5155       return OutOfRangeError(SGPRRange);
5156 
5157     if (Features.test(FeatureSGPRInitBug))
5158       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
5159   }
5160 
5161   VGPRBlocks =
5162       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
5163   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5164 
5165   return false;
5166 }
5167 
5168 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5169   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5170     return TokError("directive only supported for amdgcn architecture");
5171 
5172   if (!isHsaAbi(getSTI()))
5173     return TokError("directive only supported for amdhsa OS");
5174 
5175   StringRef KernelName;
5176   if (getParser().parseIdentifier(KernelName))
5177     return true;
5178 
5179   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
5180 
5181   StringSet<> Seen;
5182 
5183   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5184 
5185   SMRange VGPRRange;
5186   uint64_t NextFreeVGPR = 0;
5187   uint64_t AccumOffset = 0;
5188   uint64_t SharedVGPRCount = 0;
5189   uint64_t PreloadLength = 0;
5190   uint64_t PreloadOffset = 0;
5191   SMRange SGPRRange;
5192   uint64_t NextFreeSGPR = 0;
5193 
5194   // Count the number of user SGPRs implied from the enabled feature bits.
5195   unsigned ImpliedUserSGPRCount = 0;
5196 
5197   // Track if the asm explicitly contains the directive for the user SGPR
5198   // count.
5199   std::optional<unsigned> ExplicitUserSGPRCount;
5200   bool ReserveVCC = true;
5201   bool ReserveFlatScr = true;
5202   std::optional<bool> EnableWavefrontSize32;
5203 
5204   while (true) {
5205     while (trySkipToken(AsmToken::EndOfStatement));
5206 
5207     StringRef ID;
5208     SMRange IDRange = getTok().getLocRange();
5209     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5210       return true;
5211 
5212     if (ID == ".end_amdhsa_kernel")
5213       break;
5214 
5215     if (!Seen.insert(ID).second)
5216       return TokError(".amdhsa_ directives cannot be repeated");
5217 
5218     SMLoc ValStart = getLoc();
5219     int64_t IVal;
5220     if (getParser().parseAbsoluteExpression(IVal))
5221       return true;
5222     SMLoc ValEnd = getLoc();
5223     SMRange ValRange = SMRange(ValStart, ValEnd);
5224 
5225     if (IVal < 0)
5226       return OutOfRangeError(ValRange);
5227 
5228     uint64_t Val = IVal;
5229 
5230 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
5231   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
5232     return OutOfRangeError(RANGE);                                             \
5233   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5234 
5235     if (ID == ".amdhsa_group_segment_fixed_size") {
5236       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5237         return OutOfRangeError(ValRange);
5238       KD.group_segment_fixed_size = Val;
5239     } else if (ID == ".amdhsa_private_segment_fixed_size") {
5240       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5241         return OutOfRangeError(ValRange);
5242       KD.private_segment_fixed_size = Val;
5243     } else if (ID == ".amdhsa_kernarg_size") {
5244       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5245         return OutOfRangeError(ValRange);
5246       KD.kernarg_size = Val;
5247     } else if (ID == ".amdhsa_user_sgpr_count") {
5248       ExplicitUserSGPRCount = Val;
5249     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5250       if (hasArchitectedFlatScratch())
5251         return Error(IDRange.Start,
5252                      "directive is not supported with architected flat scratch",
5253                      IDRange);
5254       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5255                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5256                        Val, ValRange);
5257       if (Val)
5258         ImpliedUserSGPRCount += 4;
5259     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5260       if (!hasKernargPreload())
5261         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5262 
5263       if (Val > getMaxNumUserSGPRs())
5264         return OutOfRangeError(ValRange);
5265       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
5266                        ValRange);
5267       if (Val) {
5268         ImpliedUserSGPRCount += Val;
5269         PreloadLength = Val;
5270       }
5271     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5272       if (!hasKernargPreload())
5273         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5274 
5275       if (Val >= 1024)
5276         return OutOfRangeError(ValRange);
5277       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
5278                        ValRange);
5279       if (Val)
5280         PreloadOffset = Val;
5281     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5282       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5283                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5284                        ValRange);
5285       if (Val)
5286         ImpliedUserSGPRCount += 2;
5287     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5288       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5289                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5290                        ValRange);
5291       if (Val)
5292         ImpliedUserSGPRCount += 2;
5293     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5294       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5295                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5296                        Val, ValRange);
5297       if (Val)
5298         ImpliedUserSGPRCount += 2;
5299     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5300       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5301                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5302                        ValRange);
5303       if (Val)
5304         ImpliedUserSGPRCount += 2;
5305     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5306       if (hasArchitectedFlatScratch())
5307         return Error(IDRange.Start,
5308                      "directive is not supported with architected flat scratch",
5309                      IDRange);
5310       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5311                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5312                        ValRange);
5313       if (Val)
5314         ImpliedUserSGPRCount += 2;
5315     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5316       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5317                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5318                        Val, ValRange);
5319       if (Val)
5320         ImpliedUserSGPRCount += 1;
5321     } else if (ID == ".amdhsa_wavefront_size32") {
5322       if (IVersion.Major < 10)
5323         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5324       EnableWavefrontSize32 = Val;
5325       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5326                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5327                        Val, ValRange);
5328     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5329       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5330                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5331     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5332       if (hasArchitectedFlatScratch())
5333         return Error(IDRange.Start,
5334                      "directive is not supported with architected flat scratch",
5335                      IDRange);
5336       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5337                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5338     } else if (ID == ".amdhsa_enable_private_segment") {
5339       if (!hasArchitectedFlatScratch())
5340         return Error(
5341             IDRange.Start,
5342             "directive is not supported without architected flat scratch",
5343             IDRange);
5344       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5345                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5346     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5347       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5348                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5349                        ValRange);
5350     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5351       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5352                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5353                        ValRange);
5354     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5355       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5356                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5357                        ValRange);
5358     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5359       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5360                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5361                        ValRange);
5362     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5363       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5364                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5365                        ValRange);
5366     } else if (ID == ".amdhsa_next_free_vgpr") {
5367       VGPRRange = ValRange;
5368       NextFreeVGPR = Val;
5369     } else if (ID == ".amdhsa_next_free_sgpr") {
5370       SGPRRange = ValRange;
5371       NextFreeSGPR = Val;
5372     } else if (ID == ".amdhsa_accum_offset") {
5373       if (!isGFX90A())
5374         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5375       AccumOffset = Val;
5376     } else if (ID == ".amdhsa_reserve_vcc") {
5377       if (!isUInt<1>(Val))
5378         return OutOfRangeError(ValRange);
5379       ReserveVCC = Val;
5380     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5381       if (IVersion.Major < 7)
5382         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5383       if (hasArchitectedFlatScratch())
5384         return Error(IDRange.Start,
5385                      "directive is not supported with architected flat scratch",
5386                      IDRange);
5387       if (!isUInt<1>(Val))
5388         return OutOfRangeError(ValRange);
5389       ReserveFlatScr = Val;
5390     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5391       if (IVersion.Major < 8)
5392         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5393       if (!isUInt<1>(Val))
5394         return OutOfRangeError(ValRange);
5395       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5396         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5397                                  IDRange);
5398     } else if (ID == ".amdhsa_float_round_mode_32") {
5399       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5400                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5401     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5402       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5403                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5404     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5405       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5406                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5407     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5408       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5409                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5410                        ValRange);
5411     } else if (ID == ".amdhsa_dx10_clamp") {
5412       if (IVersion.Major >= 12)
5413         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5414       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5415                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
5416                        ValRange);
5417     } else if (ID == ".amdhsa_ieee_mode") {
5418       if (IVersion.Major >= 12)
5419         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5420       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5421                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
5422                        ValRange);
5423     } else if (ID == ".amdhsa_fp16_overflow") {
5424       if (IVersion.Major < 9)
5425         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5426       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
5427                        ValRange);
5428     } else if (ID == ".amdhsa_tg_split") {
5429       if (!isGFX90A())
5430         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5431       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5432                        ValRange);
5433     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5434       if (IVersion.Major < 10)
5435         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5436       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
5437                        ValRange);
5438     } else if (ID == ".amdhsa_memory_ordered") {
5439       if (IVersion.Major < 10)
5440         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5441       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
5442                        ValRange);
5443     } else if (ID == ".amdhsa_forward_progress") {
5444       if (IVersion.Major < 10)
5445         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5446       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
5447                        ValRange);
5448     } else if (ID == ".amdhsa_shared_vgpr_count") {
5449       if (IVersion.Major < 10 || IVersion.Major >= 12)
5450         return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5451                      IDRange);
5452       SharedVGPRCount = Val;
5453       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5454                        COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
5455                        ValRange);
5456     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5457       PARSE_BITS_ENTRY(
5458           KD.compute_pgm_rsrc2,
5459           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5460           ValRange);
5461     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5462       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5463                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5464                        Val, ValRange);
5465     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5466       PARSE_BITS_ENTRY(
5467           KD.compute_pgm_rsrc2,
5468           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5469           ValRange);
5470     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5471       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5472                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5473                        Val, ValRange);
5474     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5475       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5476                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5477                        Val, ValRange);
5478     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5479       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5480                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5481                        Val, ValRange);
5482     } else if (ID == ".amdhsa_exception_int_div_zero") {
5483       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5484                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5485                        Val, ValRange);
5486     } else if (ID == ".amdhsa_round_robin_scheduling") {
5487       if (IVersion.Major < 12)
5488         return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5489       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5490                        COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
5491                        ValRange);
5492     } else {
5493       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5494     }
5495 
5496 #undef PARSE_BITS_ENTRY
5497   }
5498 
5499   if (!Seen.contains(".amdhsa_next_free_vgpr"))
5500     return TokError(".amdhsa_next_free_vgpr directive is required");
5501 
5502   if (!Seen.contains(".amdhsa_next_free_sgpr"))
5503     return TokError(".amdhsa_next_free_sgpr directive is required");
5504 
5505   unsigned VGPRBlocks;
5506   unsigned SGPRBlocks;
5507   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5508                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5509                          EnableWavefrontSize32, NextFreeVGPR,
5510                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5511                          SGPRBlocks))
5512     return true;
5513 
5514   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5515           VGPRBlocks))
5516     return OutOfRangeError(VGPRRange);
5517   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5518                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5519 
5520   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5521           SGPRBlocks))
5522     return OutOfRangeError(SGPRRange);
5523   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5524                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5525                   SGPRBlocks);
5526 
5527   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5528     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5529                     "enabled user SGPRs");
5530 
5531   unsigned UserSGPRCount =
5532       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5533 
5534   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5535     return TokError("too many user SGPRs enabled");
5536   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5537                   UserSGPRCount);
5538 
5539   if (PreloadLength && KD.kernarg_size &&
5540       (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
5541     return TokError("Kernarg preload length + offset is larger than the "
5542                     "kernarg segment size");
5543 
5544   if (isGFX90A()) {
5545     if (!Seen.contains(".amdhsa_accum_offset"))
5546       return TokError(".amdhsa_accum_offset directive is required");
5547     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5548       return TokError("accum_offset should be in range [4..256] in "
5549                       "increments of 4");
5550     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5551       return TokError("accum_offset exceeds total VGPR allocation");
5552     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5553                     (AccumOffset / 4 - 1));
5554   }
5555 
5556   if (IVersion.Major >= 10 && IVersion.Major < 12) {
5557     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5558     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5559       return TokError("shared_vgpr_count directive not valid on "
5560                       "wavefront size 32");
5561     }
5562     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5563       return TokError("shared_vgpr_count*2 + "
5564                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5565                       "exceed 63\n");
5566     }
5567   }
5568 
5569   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5570       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5571       ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion());
5572   return false;
5573 }
5574 
5575 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5576   uint32_t Major;
5577   uint32_t Minor;
5578 
5579   if (ParseDirectiveMajorMinor(Major, Minor))
5580     return true;
5581 
5582   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5583   return false;
5584 }
5585 
5586 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5587   uint32_t Major;
5588   uint32_t Minor;
5589   uint32_t Stepping;
5590   StringRef VendorName;
5591   StringRef ArchName;
5592 
5593   // If this directive has no arguments, then use the ISA version for the
5594   // targeted GPU.
5595   if (isToken(AsmToken::EndOfStatement)) {
5596     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5597     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5598                                                         ISA.Stepping,
5599                                                         "AMD", "AMDGPU");
5600     return false;
5601   }
5602 
5603   if (ParseDirectiveMajorMinor(Major, Minor))
5604     return true;
5605 
5606   if (!trySkipToken(AsmToken::Comma))
5607     return TokError("stepping version number required, comma expected");
5608 
5609   if (ParseAsAbsoluteExpression(Stepping))
5610     return TokError("invalid stepping version");
5611 
5612   if (!trySkipToken(AsmToken::Comma))
5613     return TokError("vendor name required, comma expected");
5614 
5615   if (!parseString(VendorName, "invalid vendor name"))
5616     return true;
5617 
5618   if (!trySkipToken(AsmToken::Comma))
5619     return TokError("arch name required, comma expected");
5620 
5621   if (!parseString(ArchName, "invalid arch name"))
5622     return true;
5623 
5624   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5625                                                       VendorName, ArchName);
5626   return false;
5627 }
5628 
5629 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5630                                                amd_kernel_code_t &Header) {
5631   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5632   // assembly for backwards compatibility.
5633   if (ID == "max_scratch_backing_memory_byte_size") {
5634     Parser.eatToEndOfStatement();
5635     return false;
5636   }
5637 
5638   SmallString<40> ErrStr;
5639   raw_svector_ostream Err(ErrStr);
5640   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5641     return TokError(Err.str());
5642   }
5643   Lex();
5644 
5645   if (ID == "enable_dx10_clamp") {
5646     if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5647         isGFX12Plus())
5648       return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5649   }
5650 
5651   if (ID == "enable_ieee_mode") {
5652     if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5653         isGFX12Plus())
5654       return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5655   }
5656 
5657   if (ID == "enable_wavefront_size32") {
5658     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5659       if (!isGFX10Plus())
5660         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5661       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5662         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5663     } else {
5664       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5665         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5666     }
5667   }
5668 
5669   if (ID == "wavefront_size") {
5670     if (Header.wavefront_size == 5) {
5671       if (!isGFX10Plus())
5672         return TokError("wavefront_size=5 is only allowed on GFX10+");
5673       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5674         return TokError("wavefront_size=5 requires +WavefrontSize32");
5675     } else if (Header.wavefront_size == 6) {
5676       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5677         return TokError("wavefront_size=6 requires +WavefrontSize64");
5678     }
5679   }
5680 
5681   if (ID == "enable_wgp_mode") {
5682     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5683         !isGFX10Plus())
5684       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5685   }
5686 
5687   if (ID == "enable_mem_ordered") {
5688     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5689         !isGFX10Plus())
5690       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5691   }
5692 
5693   if (ID == "enable_fwd_progress") {
5694     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5695         !isGFX10Plus())
5696       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5697   }
5698 
5699   return false;
5700 }
5701 
5702 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5703   amd_kernel_code_t Header;
5704   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5705 
5706   while (true) {
5707     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5708     // will set the current token to EndOfStatement.
5709     while(trySkipToken(AsmToken::EndOfStatement));
5710 
5711     StringRef ID;
5712     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5713       return true;
5714 
5715     if (ID == ".end_amd_kernel_code_t")
5716       break;
5717 
5718     if (ParseAMDKernelCodeTValue(ID, Header))
5719       return true;
5720   }
5721 
5722   getTargetStreamer().EmitAMDKernelCodeT(Header);
5723 
5724   return false;
5725 }
5726 
5727 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5728   StringRef KernelName;
5729   if (!parseId(KernelName, "expected symbol name"))
5730     return true;
5731 
5732   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5733                                            ELF::STT_AMDGPU_HSA_KERNEL);
5734 
5735   KernelScope.initialize(getContext());
5736   return false;
5737 }
5738 
5739 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5740   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5741     return Error(getLoc(),
5742                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5743                  "architectures");
5744   }
5745 
5746   auto TargetIDDirective = getLexer().getTok().getStringContents();
5747   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5748     return Error(getParser().getTok().getLoc(), "target id must match options");
5749 
5750   getTargetStreamer().EmitISAVersion();
5751   Lex();
5752 
5753   return false;
5754 }
5755 
5756 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5757   assert(isHsaAbi(getSTI()));
5758 
5759   std::string HSAMetadataString;
5760   if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
5761                           HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
5762     return true;
5763 
5764   if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5765     return Error(getLoc(), "invalid HSA metadata");
5766 
5767   return false;
5768 }
5769 
5770 /// Common code to parse out a block of text (typically YAML) between start and
5771 /// end directives.
5772 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5773                                           const char *AssemblerDirectiveEnd,
5774                                           std::string &CollectString) {
5775 
5776   raw_string_ostream CollectStream(CollectString);
5777 
5778   getLexer().setSkipSpace(false);
5779 
5780   bool FoundEnd = false;
5781   while (!isToken(AsmToken::Eof)) {
5782     while (isToken(AsmToken::Space)) {
5783       CollectStream << getTokenStr();
5784       Lex();
5785     }
5786 
5787     if (trySkipId(AssemblerDirectiveEnd)) {
5788       FoundEnd = true;
5789       break;
5790     }
5791 
5792     CollectStream << Parser.parseStringToEndOfStatement()
5793                   << getContext().getAsmInfo()->getSeparatorString();
5794 
5795     Parser.eatToEndOfStatement();
5796   }
5797 
5798   getLexer().setSkipSpace(true);
5799 
5800   if (isToken(AsmToken::Eof) && !FoundEnd) {
5801     return TokError(Twine("expected directive ") +
5802                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5803   }
5804 
5805   CollectStream.flush();
5806   return false;
5807 }
5808 
5809 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5810 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5811   std::string String;
5812   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5813                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5814     return true;
5815 
5816   auto PALMetadata = getTargetStreamer().getPALMetadata();
5817   if (!PALMetadata->setFromString(String))
5818     return Error(getLoc(), "invalid PAL metadata");
5819   return false;
5820 }
5821 
5822 /// Parse the assembler directive for old linear-format PAL metadata.
5823 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5824   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5825     return Error(getLoc(),
5826                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5827                  "not available on non-amdpal OSes")).str());
5828   }
5829 
5830   auto PALMetadata = getTargetStreamer().getPALMetadata();
5831   PALMetadata->setLegacy();
5832   for (;;) {
5833     uint32_t Key, Value;
5834     if (ParseAsAbsoluteExpression(Key)) {
5835       return TokError(Twine("invalid value in ") +
5836                       Twine(PALMD::AssemblerDirective));
5837     }
5838     if (!trySkipToken(AsmToken::Comma)) {
5839       return TokError(Twine("expected an even number of values in ") +
5840                       Twine(PALMD::AssemblerDirective));
5841     }
5842     if (ParseAsAbsoluteExpression(Value)) {
5843       return TokError(Twine("invalid value in ") +
5844                       Twine(PALMD::AssemblerDirective));
5845     }
5846     PALMetadata->setRegister(Key, Value);
5847     if (!trySkipToken(AsmToken::Comma))
5848       break;
5849   }
5850   return false;
5851 }
5852 
5853 /// ParseDirectiveAMDGPULDS
5854 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5855 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5856   if (getParser().checkForValidSection())
5857     return true;
5858 
5859   StringRef Name;
5860   SMLoc NameLoc = getLoc();
5861   if (getParser().parseIdentifier(Name))
5862     return TokError("expected identifier in directive");
5863 
5864   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5865   if (getParser().parseComma())
5866     return true;
5867 
5868   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5869 
5870   int64_t Size;
5871   SMLoc SizeLoc = getLoc();
5872   if (getParser().parseAbsoluteExpression(Size))
5873     return true;
5874   if (Size < 0)
5875     return Error(SizeLoc, "size must be non-negative");
5876   if (Size > LocalMemorySize)
5877     return Error(SizeLoc, "size is too large");
5878 
5879   int64_t Alignment = 4;
5880   if (trySkipToken(AsmToken::Comma)) {
5881     SMLoc AlignLoc = getLoc();
5882     if (getParser().parseAbsoluteExpression(Alignment))
5883       return true;
5884     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5885       return Error(AlignLoc, "alignment must be a power of two");
5886 
5887     // Alignment larger than the size of LDS is possible in theory, as long
5888     // as the linker manages to place to symbol at address 0, but we do want
5889     // to make sure the alignment fits nicely into a 32-bit integer.
5890     if (Alignment >= 1u << 31)
5891       return Error(AlignLoc, "alignment is too large");
5892   }
5893 
5894   if (parseEOL())
5895     return true;
5896 
5897   Symbol->redefineIfPossible();
5898   if (!Symbol->isUndefined())
5899     return Error(NameLoc, "invalid symbol redefinition");
5900 
5901   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5902   return false;
5903 }
5904 
5905 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5906   StringRef IDVal = DirectiveID.getString();
5907 
5908   if (isHsaAbi(getSTI())) {
5909     if (IDVal == ".amdhsa_kernel")
5910      return ParseDirectiveAMDHSAKernel();
5911 
5912     // TODO: Restructure/combine with PAL metadata directive.
5913     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5914       return ParseDirectiveHSAMetadata();
5915   } else {
5916     if (IDVal == ".hsa_code_object_version")
5917       return ParseDirectiveHSACodeObjectVersion();
5918 
5919     if (IDVal == ".hsa_code_object_isa")
5920       return ParseDirectiveHSACodeObjectISA();
5921 
5922     if (IDVal == ".amd_kernel_code_t")
5923       return ParseDirectiveAMDKernelCodeT();
5924 
5925     if (IDVal == ".amdgpu_hsa_kernel")
5926       return ParseDirectiveAMDGPUHsaKernel();
5927 
5928     if (IDVal == ".amd_amdgpu_isa")
5929       return ParseDirectiveISAVersion();
5930 
5931     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
5932       return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
5933                               Twine(" directive is "
5934                                     "not available on non-amdhsa OSes"))
5935                                  .str());
5936     }
5937   }
5938 
5939   if (IDVal == ".amdgcn_target")
5940     return ParseDirectiveAMDGCNTarget();
5941 
5942   if (IDVal == ".amdgpu_lds")
5943     return ParseDirectiveAMDGPULDS();
5944 
5945   if (IDVal == PALMD::AssemblerDirectiveBegin)
5946     return ParseDirectivePALMetadataBegin();
5947 
5948   if (IDVal == PALMD::AssemblerDirective)
5949     return ParseDirectivePALMetadata();
5950 
5951   return true;
5952 }
5953 
5954 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5955                                            unsigned RegNo) {
5956 
5957   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5958     return isGFX9Plus();
5959 
5960   // GFX10+ has 2 more SGPRs 104 and 105.
5961   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5962     return hasSGPR104_SGPR105();
5963 
5964   switch (RegNo) {
5965   case AMDGPU::SRC_SHARED_BASE_LO:
5966   case AMDGPU::SRC_SHARED_BASE:
5967   case AMDGPU::SRC_SHARED_LIMIT_LO:
5968   case AMDGPU::SRC_SHARED_LIMIT:
5969   case AMDGPU::SRC_PRIVATE_BASE_LO:
5970   case AMDGPU::SRC_PRIVATE_BASE:
5971   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5972   case AMDGPU::SRC_PRIVATE_LIMIT:
5973     return isGFX9Plus();
5974   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5975     return isGFX9Plus() && !isGFX11Plus();
5976   case AMDGPU::TBA:
5977   case AMDGPU::TBA_LO:
5978   case AMDGPU::TBA_HI:
5979   case AMDGPU::TMA:
5980   case AMDGPU::TMA_LO:
5981   case AMDGPU::TMA_HI:
5982     return !isGFX9Plus();
5983   case AMDGPU::XNACK_MASK:
5984   case AMDGPU::XNACK_MASK_LO:
5985   case AMDGPU::XNACK_MASK_HI:
5986     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5987   case AMDGPU::SGPR_NULL:
5988     return isGFX10Plus();
5989   default:
5990     break;
5991   }
5992 
5993   if (isCI())
5994     return true;
5995 
5996   if (isSI() || isGFX10Plus()) {
5997     // No flat_scr on SI.
5998     // On GFX10Plus flat scratch is not a valid register operand and can only be
5999     // accessed with s_setreg/s_getreg.
6000     switch (RegNo) {
6001     case AMDGPU::FLAT_SCR:
6002     case AMDGPU::FLAT_SCR_LO:
6003     case AMDGPU::FLAT_SCR_HI:
6004       return false;
6005     default:
6006       return true;
6007     }
6008   }
6009 
6010   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6011   // SI/CI have.
6012   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6013     return hasSGPR102_SGPR103();
6014 
6015   return true;
6016 }
6017 
6018 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6019                                           StringRef Mnemonic,
6020                                           OperandMode Mode) {
6021   ParseStatus Res = parseVOPD(Operands);
6022   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6023     return Res;
6024 
6025   // Try to parse with a custom parser
6026   Res = MatchOperandParserImpl(Operands, Mnemonic);
6027 
6028   // If we successfully parsed the operand or if there as an error parsing,
6029   // we are done.
6030   //
6031   // If we are parsing after we reach EndOfStatement then this means we
6032   // are appending default values to the Operands list.  This is only done
6033   // by custom parser, so we shouldn't continue on to the generic parsing.
6034   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6035     return Res;
6036 
6037   SMLoc RBraceLoc;
6038   SMLoc LBraceLoc = getLoc();
6039   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6040     unsigned Prefix = Operands.size();
6041 
6042     for (;;) {
6043       auto Loc = getLoc();
6044       Res = parseReg(Operands);
6045       if (Res.isNoMatch())
6046         Error(Loc, "expected a register");
6047       if (!Res.isSuccess())
6048         return ParseStatus::Failure;
6049 
6050       RBraceLoc = getLoc();
6051       if (trySkipToken(AsmToken::RBrac))
6052         break;
6053 
6054       if (!skipToken(AsmToken::Comma,
6055                      "expected a comma or a closing square bracket"))
6056         return ParseStatus::Failure;
6057     }
6058 
6059     if (Operands.size() - Prefix > 1) {
6060       Operands.insert(Operands.begin() + Prefix,
6061                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6062       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6063     }
6064 
6065     return ParseStatus::Success;
6066   }
6067 
6068   return parseRegOrImm(Operands);
6069 }
6070 
6071 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6072   // Clear any forced encodings from the previous instruction.
6073   setForcedEncodingSize(0);
6074   setForcedDPP(false);
6075   setForcedSDWA(false);
6076 
6077   if (Name.ends_with("_e64_dpp")) {
6078     setForcedDPP(true);
6079     setForcedEncodingSize(64);
6080     return Name.substr(0, Name.size() - 8);
6081   } else if (Name.ends_with("_e64")) {
6082     setForcedEncodingSize(64);
6083     return Name.substr(0, Name.size() - 4);
6084   } else if (Name.ends_with("_e32")) {
6085     setForcedEncodingSize(32);
6086     return Name.substr(0, Name.size() - 4);
6087   } else if (Name.ends_with("_dpp")) {
6088     setForcedDPP(true);
6089     return Name.substr(0, Name.size() - 4);
6090   } else if (Name.ends_with("_sdwa")) {
6091     setForcedSDWA(true);
6092     return Name.substr(0, Name.size() - 5);
6093   }
6094   return Name;
6095 }
6096 
6097 static void applyMnemonicAliases(StringRef &Mnemonic,
6098                                  const FeatureBitset &Features,
6099                                  unsigned VariantID);
6100 
6101 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6102                                        StringRef Name,
6103                                        SMLoc NameLoc, OperandVector &Operands) {
6104   // Add the instruction mnemonic
6105   Name = parseMnemonicSuffix(Name);
6106 
6107   // If the target architecture uses MnemonicAlias, call it here to parse
6108   // operands correctly.
6109   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6110 
6111   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6112 
6113   bool IsMIMG = Name.starts_with("image_");
6114 
6115   while (!trySkipToken(AsmToken::EndOfStatement)) {
6116     OperandMode Mode = OperandMode_Default;
6117     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6118       Mode = OperandMode_NSA;
6119     ParseStatus Res = parseOperand(Operands, Name, Mode);
6120 
6121     if (!Res.isSuccess()) {
6122       checkUnsupportedInstruction(Name, NameLoc);
6123       if (!Parser.hasPendingError()) {
6124         // FIXME: use real operand location rather than the current location.
6125         StringRef Msg = Res.isFailure() ? "failed parsing operand."
6126                                         : "not a valid operand.";
6127         Error(getLoc(), Msg);
6128       }
6129       while (!trySkipToken(AsmToken::EndOfStatement)) {
6130         lex();
6131       }
6132       return true;
6133     }
6134 
6135     // Eat the comma or space if there is one.
6136     trySkipToken(AsmToken::Comma);
6137   }
6138 
6139   return false;
6140 }
6141 
6142 //===----------------------------------------------------------------------===//
6143 // Utility functions
6144 //===----------------------------------------------------------------------===//
6145 
6146 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6147                                           OperandVector &Operands) {
6148   SMLoc S = getLoc();
6149   if (!trySkipId(Name))
6150     return ParseStatus::NoMatch;
6151 
6152   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6153   return ParseStatus::Success;
6154 }
6155 
6156 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6157                                                 int64_t &IntVal) {
6158 
6159   if (!trySkipId(Prefix, AsmToken::Colon))
6160     return ParseStatus::NoMatch;
6161 
6162   return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6163 }
6164 
6165 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6166     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6167     std::function<bool(int64_t &)> ConvertResult) {
6168   SMLoc S = getLoc();
6169   int64_t Value = 0;
6170 
6171   ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6172   if (!Res.isSuccess())
6173     return Res;
6174 
6175   if (ConvertResult && !ConvertResult(Value)) {
6176     Error(S, "invalid " + StringRef(Prefix) + " value.");
6177   }
6178 
6179   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6180   return ParseStatus::Success;
6181 }
6182 
6183 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6184     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6185     bool (*ConvertResult)(int64_t &)) {
6186   SMLoc S = getLoc();
6187   if (!trySkipId(Prefix, AsmToken::Colon))
6188     return ParseStatus::NoMatch;
6189 
6190   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6191     return ParseStatus::Failure;
6192 
6193   unsigned Val = 0;
6194   const unsigned MaxSize = 4;
6195 
6196   // FIXME: How to verify the number of elements matches the number of src
6197   // operands?
6198   for (int I = 0; ; ++I) {
6199     int64_t Op;
6200     SMLoc Loc = getLoc();
6201     if (!parseExpr(Op))
6202       return ParseStatus::Failure;
6203 
6204     if (Op != 0 && Op != 1)
6205       return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6206 
6207     Val |= (Op << I);
6208 
6209     if (trySkipToken(AsmToken::RBrac))
6210       break;
6211 
6212     if (I + 1 == MaxSize)
6213       return Error(getLoc(), "expected a closing square bracket");
6214 
6215     if (!skipToken(AsmToken::Comma, "expected a comma"))
6216       return ParseStatus::Failure;
6217   }
6218 
6219   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6220   return ParseStatus::Success;
6221 }
6222 
6223 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6224                                            OperandVector &Operands,
6225                                            AMDGPUOperand::ImmTy ImmTy) {
6226   int64_t Bit;
6227   SMLoc S = getLoc();
6228 
6229   if (trySkipId(Name)) {
6230     Bit = 1;
6231   } else if (trySkipId("no", Name)) {
6232     Bit = 0;
6233   } else {
6234     return ParseStatus::NoMatch;
6235   }
6236 
6237   if (Name == "r128" && !hasMIMG_R128())
6238     return Error(S, "r128 modifier is not supported on this GPU");
6239   if (Name == "a16" && !hasA16())
6240     return Error(S, "a16 modifier is not supported on this GPU");
6241 
6242   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6243     ImmTy = AMDGPUOperand::ImmTyR128A16;
6244 
6245   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6246   return ParseStatus::Success;
6247 }
6248 
6249 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6250                                       bool &Disabling) const {
6251   Disabling = Id.consume_front("no");
6252 
6253   if (isGFX940() && !Mnemo.starts_with("s_")) {
6254     return StringSwitch<unsigned>(Id)
6255         .Case("nt", AMDGPU::CPol::NT)
6256         .Case("sc0", AMDGPU::CPol::SC0)
6257         .Case("sc1", AMDGPU::CPol::SC1)
6258         .Default(0);
6259   }
6260 
6261   return StringSwitch<unsigned>(Id)
6262       .Case("dlc", AMDGPU::CPol::DLC)
6263       .Case("glc", AMDGPU::CPol::GLC)
6264       .Case("scc", AMDGPU::CPol::SCC)
6265       .Case("slc", AMDGPU::CPol::SLC)
6266       .Default(0);
6267 }
6268 
6269 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6270   if (isGFX12Plus()) {
6271     SMLoc StringLoc = getLoc();
6272 
6273     int64_t CPolVal = 0;
6274     ParseStatus ResTH = ParseStatus::NoMatch;
6275     ParseStatus ResScope = ParseStatus::NoMatch;
6276 
6277     for (;;) {
6278       if (ResTH.isNoMatch()) {
6279         int64_t TH;
6280         ResTH = parseTH(Operands, TH);
6281         if (ResTH.isFailure())
6282           return ResTH;
6283         if (ResTH.isSuccess()) {
6284           CPolVal |= TH;
6285           continue;
6286         }
6287       }
6288 
6289       if (ResScope.isNoMatch()) {
6290         int64_t Scope;
6291         ResScope = parseScope(Operands, Scope);
6292         if (ResScope.isFailure())
6293           return ResScope;
6294         if (ResScope.isSuccess()) {
6295           CPolVal |= Scope;
6296           continue;
6297         }
6298       }
6299 
6300       break;
6301     }
6302 
6303     if (ResTH.isNoMatch() && ResScope.isNoMatch())
6304       return ParseStatus::NoMatch;
6305 
6306     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6307                                                 AMDGPUOperand::ImmTyCPol));
6308     return ParseStatus::Success;
6309   }
6310 
6311   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6312   SMLoc OpLoc = getLoc();
6313   unsigned Enabled = 0, Seen = 0;
6314   for (;;) {
6315     SMLoc S = getLoc();
6316     bool Disabling;
6317     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6318     if (!CPol)
6319       break;
6320 
6321     lex();
6322 
6323     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6324       return Error(S, "dlc modifier is not supported on this GPU");
6325 
6326     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6327       return Error(S, "scc modifier is not supported on this GPU");
6328 
6329     if (Seen & CPol)
6330       return Error(S, "duplicate cache policy modifier");
6331 
6332     if (!Disabling)
6333       Enabled |= CPol;
6334 
6335     Seen |= CPol;
6336   }
6337 
6338   if (!Seen)
6339     return ParseStatus::NoMatch;
6340 
6341   Operands.push_back(
6342       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6343   return ParseStatus::Success;
6344 }
6345 
6346 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6347                                         int64_t &Scope) {
6348   Scope = AMDGPU::CPol::SCOPE_CU; // default;
6349 
6350   StringRef Value;
6351   SMLoc StringLoc;
6352   ParseStatus Res;
6353 
6354   Res = parseStringWithPrefix("scope", Value, StringLoc);
6355   if (!Res.isSuccess())
6356     return Res;
6357 
6358   Scope = StringSwitch<int64_t>(Value)
6359               .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6360               .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6361               .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6362               .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6363               .Default(0xffffffff);
6364 
6365   if (Scope == 0xffffffff)
6366     return Error(StringLoc, "invalid scope value");
6367 
6368   return ParseStatus::Success;
6369 }
6370 
6371 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6372   TH = AMDGPU::CPol::TH_RT; // default
6373 
6374   StringRef Value;
6375   SMLoc StringLoc;
6376   ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6377   if (!Res.isSuccess())
6378     return Res;
6379 
6380   if (Value == "TH_DEFAULT")
6381     TH = AMDGPU::CPol::TH_RT;
6382   else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6383            Value == "TH_LOAD_NT_WB") {
6384     return Error(StringLoc, "invalid th value");
6385   } else if (Value.starts_with("TH_ATOMIC_")) {
6386     Value = Value.drop_front(10);
6387     TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6388   } else if (Value.starts_with("TH_LOAD_")) {
6389     Value = Value.drop_front(8);
6390     TH = AMDGPU::CPol::TH_TYPE_LOAD;
6391   } else if (Value.starts_with("TH_STORE_")) {
6392     Value = Value.drop_front(9);
6393     TH = AMDGPU::CPol::TH_TYPE_STORE;
6394   } else {
6395     return Error(StringLoc, "invalid th value");
6396   }
6397 
6398   if (Value == "BYPASS")
6399     TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6400 
6401   if (TH != 0) {
6402     if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6403       TH |= StringSwitch<int64_t>(Value)
6404                 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6405                 .Case("RT", AMDGPU::CPol::TH_RT)
6406                 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6407                 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
6408                 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6409                                        AMDGPU::CPol::TH_ATOMIC_RETURN)
6410                 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
6411                 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6412                                         AMDGPU::CPol::TH_ATOMIC_NT)
6413                 .Default(0xffffffff);
6414     else
6415       TH |= StringSwitch<int64_t>(Value)
6416                 .Case("RT", AMDGPU::CPol::TH_RT)
6417                 .Case("NT", AMDGPU::CPol::TH_NT)
6418                 .Case("HT", AMDGPU::CPol::TH_HT)
6419                 .Case("LU", AMDGPU::CPol::TH_LU)
6420                 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6421                 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6422                 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6423                 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6424                 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6425                 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6426                 .Default(0xffffffff);
6427   }
6428 
6429   if (TH == 0xffffffff)
6430     return Error(StringLoc, "invalid th value");
6431 
6432   return ParseStatus::Success;
6433 }
6434 
6435 static void addOptionalImmOperand(
6436   MCInst& Inst, const OperandVector& Operands,
6437   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6438   AMDGPUOperand::ImmTy ImmT,
6439   int64_t Default = 0) {
6440   auto i = OptionalIdx.find(ImmT);
6441   if (i != OptionalIdx.end()) {
6442     unsigned Idx = i->second;
6443     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6444   } else {
6445     Inst.addOperand(MCOperand::createImm(Default));
6446   }
6447 }
6448 
6449 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6450                                                    StringRef &Value,
6451                                                    SMLoc &StringLoc) {
6452   if (!trySkipId(Prefix, AsmToken::Colon))
6453     return ParseStatus::NoMatch;
6454 
6455   StringLoc = getLoc();
6456   return parseId(Value, "expected an identifier") ? ParseStatus::Success
6457                                                   : ParseStatus::Failure;
6458 }
6459 
6460 //===----------------------------------------------------------------------===//
6461 // MTBUF format
6462 //===----------------------------------------------------------------------===//
6463 
6464 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6465                                   int64_t MaxVal,
6466                                   int64_t &Fmt) {
6467   int64_t Val;
6468   SMLoc Loc = getLoc();
6469 
6470   auto Res = parseIntWithPrefix(Pref, Val);
6471   if (Res.isFailure())
6472     return false;
6473   if (Res.isNoMatch())
6474     return true;
6475 
6476   if (Val < 0 || Val > MaxVal) {
6477     Error(Loc, Twine("out of range ", StringRef(Pref)));
6478     return false;
6479   }
6480 
6481   Fmt = Val;
6482   return true;
6483 }
6484 
6485 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6486 // values to live in a joint format operand in the MCInst encoding.
6487 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6488   using namespace llvm::AMDGPU::MTBUFFormat;
6489 
6490   int64_t Dfmt = DFMT_UNDEF;
6491   int64_t Nfmt = NFMT_UNDEF;
6492 
6493   // dfmt and nfmt can appear in either order, and each is optional.
6494   for (int I = 0; I < 2; ++I) {
6495     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6496       return ParseStatus::Failure;
6497 
6498     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6499       return ParseStatus::Failure;
6500 
6501     // Skip optional comma between dfmt/nfmt
6502     // but guard against 2 commas following each other.
6503     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6504         !peekToken().is(AsmToken::Comma)) {
6505       trySkipToken(AsmToken::Comma);
6506     }
6507   }
6508 
6509   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6510     return ParseStatus::NoMatch;
6511 
6512   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6513   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6514 
6515   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6516   return ParseStatus::Success;
6517 }
6518 
6519 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6520   using namespace llvm::AMDGPU::MTBUFFormat;
6521 
6522   int64_t Fmt = UFMT_UNDEF;
6523 
6524   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6525     return ParseStatus::Failure;
6526 
6527   if (Fmt == UFMT_UNDEF)
6528     return ParseStatus::NoMatch;
6529 
6530   Format = Fmt;
6531   return ParseStatus::Success;
6532 }
6533 
6534 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6535                                     int64_t &Nfmt,
6536                                     StringRef FormatStr,
6537                                     SMLoc Loc) {
6538   using namespace llvm::AMDGPU::MTBUFFormat;
6539   int64_t Format;
6540 
6541   Format = getDfmt(FormatStr);
6542   if (Format != DFMT_UNDEF) {
6543     Dfmt = Format;
6544     return true;
6545   }
6546 
6547   Format = getNfmt(FormatStr, getSTI());
6548   if (Format != NFMT_UNDEF) {
6549     Nfmt = Format;
6550     return true;
6551   }
6552 
6553   Error(Loc, "unsupported format");
6554   return false;
6555 }
6556 
6557 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6558                                                       SMLoc FormatLoc,
6559                                                       int64_t &Format) {
6560   using namespace llvm::AMDGPU::MTBUFFormat;
6561 
6562   int64_t Dfmt = DFMT_UNDEF;
6563   int64_t Nfmt = NFMT_UNDEF;
6564   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6565     return ParseStatus::Failure;
6566 
6567   if (trySkipToken(AsmToken::Comma)) {
6568     StringRef Str;
6569     SMLoc Loc = getLoc();
6570     if (!parseId(Str, "expected a format string") ||
6571         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6572       return ParseStatus::Failure;
6573     if (Dfmt == DFMT_UNDEF)
6574       return Error(Loc, "duplicate numeric format");
6575     if (Nfmt == NFMT_UNDEF)
6576       return Error(Loc, "duplicate data format");
6577   }
6578 
6579   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6580   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6581 
6582   if (isGFX10Plus()) {
6583     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6584     if (Ufmt == UFMT_UNDEF)
6585       return Error(FormatLoc, "unsupported format");
6586     Format = Ufmt;
6587   } else {
6588     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6589   }
6590 
6591   return ParseStatus::Success;
6592 }
6593 
6594 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6595                                                         SMLoc Loc,
6596                                                         int64_t &Format) {
6597   using namespace llvm::AMDGPU::MTBUFFormat;
6598 
6599   auto Id = getUnifiedFormat(FormatStr, getSTI());
6600   if (Id == UFMT_UNDEF)
6601     return ParseStatus::NoMatch;
6602 
6603   if (!isGFX10Plus())
6604     return Error(Loc, "unified format is not supported on this GPU");
6605 
6606   Format = Id;
6607   return ParseStatus::Success;
6608 }
6609 
6610 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6611   using namespace llvm::AMDGPU::MTBUFFormat;
6612   SMLoc Loc = getLoc();
6613 
6614   if (!parseExpr(Format))
6615     return ParseStatus::Failure;
6616   if (!isValidFormatEncoding(Format, getSTI()))
6617     return Error(Loc, "out of range format");
6618 
6619   return ParseStatus::Success;
6620 }
6621 
6622 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6623   using namespace llvm::AMDGPU::MTBUFFormat;
6624 
6625   if (!trySkipId("format", AsmToken::Colon))
6626     return ParseStatus::NoMatch;
6627 
6628   if (trySkipToken(AsmToken::LBrac)) {
6629     StringRef FormatStr;
6630     SMLoc Loc = getLoc();
6631     if (!parseId(FormatStr, "expected a format string"))
6632       return ParseStatus::Failure;
6633 
6634     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6635     if (Res.isNoMatch())
6636       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6637     if (!Res.isSuccess())
6638       return Res;
6639 
6640     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6641       return ParseStatus::Failure;
6642 
6643     return ParseStatus::Success;
6644   }
6645 
6646   return parseNumericFormat(Format);
6647 }
6648 
6649 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6650   using namespace llvm::AMDGPU::MTBUFFormat;
6651 
6652   int64_t Format = getDefaultFormatEncoding(getSTI());
6653   ParseStatus Res;
6654   SMLoc Loc = getLoc();
6655 
6656   // Parse legacy format syntax.
6657   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6658   if (Res.isFailure())
6659     return Res;
6660 
6661   bool FormatFound = Res.isSuccess();
6662 
6663   Operands.push_back(
6664     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6665 
6666   if (FormatFound)
6667     trySkipToken(AsmToken::Comma);
6668 
6669   if (isToken(AsmToken::EndOfStatement)) {
6670     // We are expecting an soffset operand,
6671     // but let matcher handle the error.
6672     return ParseStatus::Success;
6673   }
6674 
6675   // Parse soffset.
6676   Res = parseRegOrImm(Operands);
6677   if (!Res.isSuccess())
6678     return Res;
6679 
6680   trySkipToken(AsmToken::Comma);
6681 
6682   if (!FormatFound) {
6683     Res = parseSymbolicOrNumericFormat(Format);
6684     if (Res.isFailure())
6685       return Res;
6686     if (Res.isSuccess()) {
6687       auto Size = Operands.size();
6688       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6689       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6690       Op.setImm(Format);
6691     }
6692     return ParseStatus::Success;
6693   }
6694 
6695   if (isId("format") && peekToken().is(AsmToken::Colon))
6696     return Error(getLoc(), "duplicate format");
6697   return ParseStatus::Success;
6698 }
6699 
6700 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6701   ParseStatus Res =
6702       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6703   if (Res.isNoMatch()) {
6704     Res = parseIntWithPrefix("inst_offset", Operands,
6705                              AMDGPUOperand::ImmTyInstOffset);
6706   }
6707   return Res;
6708 }
6709 
6710 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6711   ParseStatus Res =
6712       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6713   if (Res.isNoMatch())
6714     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6715   return Res;
6716 }
6717 
6718 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6719   ParseStatus Res =
6720       parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6721   if (Res.isNoMatch()) {
6722     Res =
6723         parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6724   }
6725   return Res;
6726 }
6727 
6728 //===----------------------------------------------------------------------===//
6729 // Exp
6730 //===----------------------------------------------------------------------===//
6731 
6732 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6733   OptionalImmIndexMap OptionalIdx;
6734 
6735   unsigned OperandIdx[4];
6736   unsigned EnMask = 0;
6737   int SrcIdx = 0;
6738 
6739   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6740     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6741 
6742     // Add the register arguments
6743     if (Op.isReg()) {
6744       assert(SrcIdx < 4);
6745       OperandIdx[SrcIdx] = Inst.size();
6746       Op.addRegOperands(Inst, 1);
6747       ++SrcIdx;
6748       continue;
6749     }
6750 
6751     if (Op.isOff()) {
6752       assert(SrcIdx < 4);
6753       OperandIdx[SrcIdx] = Inst.size();
6754       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6755       ++SrcIdx;
6756       continue;
6757     }
6758 
6759     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6760       Op.addImmOperands(Inst, 1);
6761       continue;
6762     }
6763 
6764     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6765       continue;
6766 
6767     // Handle optional arguments
6768     OptionalIdx[Op.getImmTy()] = i;
6769   }
6770 
6771   assert(SrcIdx == 4);
6772 
6773   bool Compr = false;
6774   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6775     Compr = true;
6776     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6777     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6778     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6779   }
6780 
6781   for (auto i = 0; i < SrcIdx; ++i) {
6782     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6783       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6784     }
6785   }
6786 
6787   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6788   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6789 
6790   Inst.addOperand(MCOperand::createImm(EnMask));
6791 }
6792 
6793 //===----------------------------------------------------------------------===//
6794 // s_waitcnt
6795 //===----------------------------------------------------------------------===//
6796 
6797 static bool
6798 encodeCnt(
6799   const AMDGPU::IsaVersion ISA,
6800   int64_t &IntVal,
6801   int64_t CntVal,
6802   bool Saturate,
6803   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6804   unsigned (*decode)(const IsaVersion &Version, unsigned))
6805 {
6806   bool Failed = false;
6807 
6808   IntVal = encode(ISA, IntVal, CntVal);
6809   if (CntVal != decode(ISA, IntVal)) {
6810     if (Saturate) {
6811       IntVal = encode(ISA, IntVal, -1);
6812     } else {
6813       Failed = true;
6814     }
6815   }
6816   return Failed;
6817 }
6818 
6819 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6820 
6821   SMLoc CntLoc = getLoc();
6822   StringRef CntName = getTokenStr();
6823 
6824   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6825       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6826     return false;
6827 
6828   int64_t CntVal;
6829   SMLoc ValLoc = getLoc();
6830   if (!parseExpr(CntVal))
6831     return false;
6832 
6833   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6834 
6835   bool Failed = true;
6836   bool Sat = CntName.ends_with("_sat");
6837 
6838   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6839     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6840   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6841     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6842   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6843     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6844   } else {
6845     Error(CntLoc, "invalid counter name " + CntName);
6846     return false;
6847   }
6848 
6849   if (Failed) {
6850     Error(ValLoc, "too large value for " + CntName);
6851     return false;
6852   }
6853 
6854   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6855     return false;
6856 
6857   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6858     if (isToken(AsmToken::EndOfStatement)) {
6859       Error(getLoc(), "expected a counter name");
6860       return false;
6861     }
6862   }
6863 
6864   return true;
6865 }
6866 
6867 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
6868   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6869   int64_t Waitcnt = getWaitcntBitMask(ISA);
6870   SMLoc S = getLoc();
6871 
6872   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6873     while (!isToken(AsmToken::EndOfStatement)) {
6874       if (!parseCnt(Waitcnt))
6875         return ParseStatus::Failure;
6876     }
6877   } else {
6878     if (!parseExpr(Waitcnt))
6879       return ParseStatus::Failure;
6880   }
6881 
6882   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6883   return ParseStatus::Success;
6884 }
6885 
6886 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6887   SMLoc FieldLoc = getLoc();
6888   StringRef FieldName = getTokenStr();
6889   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6890       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6891     return false;
6892 
6893   SMLoc ValueLoc = getLoc();
6894   StringRef ValueName = getTokenStr();
6895   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6896       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6897     return false;
6898 
6899   unsigned Shift;
6900   if (FieldName == "instid0") {
6901     Shift = 0;
6902   } else if (FieldName == "instskip") {
6903     Shift = 4;
6904   } else if (FieldName == "instid1") {
6905     Shift = 7;
6906   } else {
6907     Error(FieldLoc, "invalid field name " + FieldName);
6908     return false;
6909   }
6910 
6911   int Value;
6912   if (Shift == 4) {
6913     // Parse values for instskip.
6914     Value = StringSwitch<int>(ValueName)
6915                 .Case("SAME", 0)
6916                 .Case("NEXT", 1)
6917                 .Case("SKIP_1", 2)
6918                 .Case("SKIP_2", 3)
6919                 .Case("SKIP_3", 4)
6920                 .Case("SKIP_4", 5)
6921                 .Default(-1);
6922   } else {
6923     // Parse values for instid0 and instid1.
6924     Value = StringSwitch<int>(ValueName)
6925                 .Case("NO_DEP", 0)
6926                 .Case("VALU_DEP_1", 1)
6927                 .Case("VALU_DEP_2", 2)
6928                 .Case("VALU_DEP_3", 3)
6929                 .Case("VALU_DEP_4", 4)
6930                 .Case("TRANS32_DEP_1", 5)
6931                 .Case("TRANS32_DEP_2", 6)
6932                 .Case("TRANS32_DEP_3", 7)
6933                 .Case("FMA_ACCUM_CYCLE_1", 8)
6934                 .Case("SALU_CYCLE_1", 9)
6935                 .Case("SALU_CYCLE_2", 10)
6936                 .Case("SALU_CYCLE_3", 11)
6937                 .Default(-1);
6938   }
6939   if (Value < 0) {
6940     Error(ValueLoc, "invalid value name " + ValueName);
6941     return false;
6942   }
6943 
6944   Delay |= Value << Shift;
6945   return true;
6946 }
6947 
6948 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
6949   int64_t Delay = 0;
6950   SMLoc S = getLoc();
6951 
6952   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6953     do {
6954       if (!parseDelay(Delay))
6955         return ParseStatus::Failure;
6956     } while (trySkipToken(AsmToken::Pipe));
6957   } else {
6958     if (!parseExpr(Delay))
6959       return ParseStatus::Failure;
6960   }
6961 
6962   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6963   return ParseStatus::Success;
6964 }
6965 
6966 bool
6967 AMDGPUOperand::isSWaitCnt() const {
6968   return isImm();
6969 }
6970 
6971 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
6972 
6973 //===----------------------------------------------------------------------===//
6974 // DepCtr
6975 //===----------------------------------------------------------------------===//
6976 
6977 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6978                                   StringRef DepCtrName) {
6979   switch (ErrorId) {
6980   case OPR_ID_UNKNOWN:
6981     Error(Loc, Twine("invalid counter name ", DepCtrName));
6982     return;
6983   case OPR_ID_UNSUPPORTED:
6984     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6985     return;
6986   case OPR_ID_DUPLICATE:
6987     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6988     return;
6989   case OPR_VAL_INVALID:
6990     Error(Loc, Twine("invalid value for ", DepCtrName));
6991     return;
6992   default:
6993     assert(false);
6994   }
6995 }
6996 
6997 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6998 
6999   using namespace llvm::AMDGPU::DepCtr;
7000 
7001   SMLoc DepCtrLoc = getLoc();
7002   StringRef DepCtrName = getTokenStr();
7003 
7004   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7005       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7006     return false;
7007 
7008   int64_t ExprVal;
7009   if (!parseExpr(ExprVal))
7010     return false;
7011 
7012   unsigned PrevOprMask = UsedOprMask;
7013   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7014 
7015   if (CntVal < 0) {
7016     depCtrError(DepCtrLoc, CntVal, DepCtrName);
7017     return false;
7018   }
7019 
7020   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7021     return false;
7022 
7023   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7024     if (isToken(AsmToken::EndOfStatement)) {
7025       Error(getLoc(), "expected a counter name");
7026       return false;
7027     }
7028   }
7029 
7030   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7031   DepCtr = (DepCtr & ~CntValMask) | CntVal;
7032   return true;
7033 }
7034 
7035 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7036   using namespace llvm::AMDGPU::DepCtr;
7037 
7038   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7039   SMLoc Loc = getLoc();
7040 
7041   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7042     unsigned UsedOprMask = 0;
7043     while (!isToken(AsmToken::EndOfStatement)) {
7044       if (!parseDepCtr(DepCtr, UsedOprMask))
7045         return ParseStatus::Failure;
7046     }
7047   } else {
7048     if (!parseExpr(DepCtr))
7049       return ParseStatus::Failure;
7050   }
7051 
7052   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7053   return ParseStatus::Success;
7054 }
7055 
7056 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7057 
7058 //===----------------------------------------------------------------------===//
7059 // hwreg
7060 //===----------------------------------------------------------------------===//
7061 
7062 bool
7063 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
7064                                 OperandInfoTy &Offset,
7065                                 OperandInfoTy &Width) {
7066   using namespace llvm::AMDGPU::Hwreg;
7067 
7068   // The register may be specified by name or using a numeric code
7069   HwReg.Loc = getLoc();
7070   if (isToken(AsmToken::Identifier) &&
7071       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7072     HwReg.IsSymbolic = true;
7073     lex(); // skip register name
7074   } else if (!parseExpr(HwReg.Id, "a register name")) {
7075     return false;
7076   }
7077 
7078   if (trySkipToken(AsmToken::RParen))
7079     return true;
7080 
7081   // parse optional params
7082   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7083     return false;
7084 
7085   Offset.Loc = getLoc();
7086   if (!parseExpr(Offset.Id))
7087     return false;
7088 
7089   if (!skipToken(AsmToken::Comma, "expected a comma"))
7090     return false;
7091 
7092   Width.Loc = getLoc();
7093   return parseExpr(Width.Id) &&
7094          skipToken(AsmToken::RParen, "expected a closing parenthesis");
7095 }
7096 
7097 bool
7098 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
7099                                const OperandInfoTy &Offset,
7100                                const OperandInfoTy &Width) {
7101 
7102   using namespace llvm::AMDGPU::Hwreg;
7103 
7104   if (HwReg.IsSymbolic) {
7105     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
7106       Error(HwReg.Loc,
7107             "specified hardware register is not supported on this GPU");
7108       return false;
7109     }
7110   } else {
7111     if (!isValidHwreg(HwReg.Id)) {
7112       Error(HwReg.Loc,
7113             "invalid code of hardware register: only 6-bit values are legal");
7114       return false;
7115     }
7116   }
7117   if (!isValidHwregOffset(Offset.Id)) {
7118     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
7119     return false;
7120   }
7121   if (!isValidHwregWidth(Width.Id)) {
7122     Error(Width.Loc,
7123           "invalid bitfield width: only values from 1 to 32 are legal");
7124     return false;
7125   }
7126   return true;
7127 }
7128 
7129 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7130   using namespace llvm::AMDGPU::Hwreg;
7131 
7132   int64_t ImmVal = 0;
7133   SMLoc Loc = getLoc();
7134 
7135   if (trySkipId("hwreg", AsmToken::LParen)) {
7136     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
7137     OperandInfoTy Offset(OFFSET_DEFAULT_);
7138     OperandInfoTy Width(WIDTH_DEFAULT_);
7139     if (parseHwregBody(HwReg, Offset, Width) &&
7140         validateHwreg(HwReg, Offset, Width)) {
7141       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
7142     } else {
7143       return ParseStatus::Failure;
7144     }
7145   } else if (parseExpr(ImmVal, "a hwreg macro")) {
7146     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7147       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7148   } else {
7149     return ParseStatus::Failure;
7150   }
7151 
7152   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7153   return ParseStatus::Success;
7154 }
7155 
7156 bool AMDGPUOperand::isHwreg() const {
7157   return isImmTy(ImmTyHwreg);
7158 }
7159 
7160 //===----------------------------------------------------------------------===//
7161 // sendmsg
7162 //===----------------------------------------------------------------------===//
7163 
7164 bool
7165 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7166                                   OperandInfoTy &Op,
7167                                   OperandInfoTy &Stream) {
7168   using namespace llvm::AMDGPU::SendMsg;
7169 
7170   Msg.Loc = getLoc();
7171   if (isToken(AsmToken::Identifier) &&
7172       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7173     Msg.IsSymbolic = true;
7174     lex(); // skip message name
7175   } else if (!parseExpr(Msg.Id, "a message name")) {
7176     return false;
7177   }
7178 
7179   if (trySkipToken(AsmToken::Comma)) {
7180     Op.IsDefined = true;
7181     Op.Loc = getLoc();
7182     if (isToken(AsmToken::Identifier) &&
7183         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
7184       lex(); // skip operation name
7185     } else if (!parseExpr(Op.Id, "an operation name")) {
7186       return false;
7187     }
7188 
7189     if (trySkipToken(AsmToken::Comma)) {
7190       Stream.IsDefined = true;
7191       Stream.Loc = getLoc();
7192       if (!parseExpr(Stream.Id))
7193         return false;
7194     }
7195   }
7196 
7197   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7198 }
7199 
7200 bool
7201 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7202                                  const OperandInfoTy &Op,
7203                                  const OperandInfoTy &Stream) {
7204   using namespace llvm::AMDGPU::SendMsg;
7205 
7206   // Validation strictness depends on whether message is specified
7207   // in a symbolic or in a numeric form. In the latter case
7208   // only encoding possibility is checked.
7209   bool Strict = Msg.IsSymbolic;
7210 
7211   if (Strict) {
7212     if (Msg.Id == OPR_ID_UNSUPPORTED) {
7213       Error(Msg.Loc, "specified message id is not supported on this GPU");
7214       return false;
7215     }
7216   } else {
7217     if (!isValidMsgId(Msg.Id, getSTI())) {
7218       Error(Msg.Loc, "invalid message id");
7219       return false;
7220     }
7221   }
7222   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
7223     if (Op.IsDefined) {
7224       Error(Op.Loc, "message does not support operations");
7225     } else {
7226       Error(Msg.Loc, "missing message operation");
7227     }
7228     return false;
7229   }
7230   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
7231     Error(Op.Loc, "invalid operation id");
7232     return false;
7233   }
7234   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
7235       Stream.IsDefined) {
7236     Error(Stream.Loc, "message operation does not support streams");
7237     return false;
7238   }
7239   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
7240     Error(Stream.Loc, "invalid message stream id");
7241     return false;
7242   }
7243   return true;
7244 }
7245 
7246 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7247   using namespace llvm::AMDGPU::SendMsg;
7248 
7249   int64_t ImmVal = 0;
7250   SMLoc Loc = getLoc();
7251 
7252   if (trySkipId("sendmsg", AsmToken::LParen)) {
7253     OperandInfoTy Msg(OPR_ID_UNKNOWN);
7254     OperandInfoTy Op(OP_NONE_);
7255     OperandInfoTy Stream(STREAM_ID_NONE_);
7256     if (parseSendMsgBody(Msg, Op, Stream) &&
7257         validateSendMsg(Msg, Op, Stream)) {
7258       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
7259     } else {
7260       return ParseStatus::Failure;
7261     }
7262   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7263     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7264       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7265   } else {
7266     return ParseStatus::Failure;
7267   }
7268 
7269   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7270   return ParseStatus::Success;
7271 }
7272 
7273 bool AMDGPUOperand::isSendMsg() const {
7274   return isImmTy(ImmTySendMsg);
7275 }
7276 
7277 //===----------------------------------------------------------------------===//
7278 // v_interp
7279 //===----------------------------------------------------------------------===//
7280 
7281 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7282   StringRef Str;
7283   SMLoc S = getLoc();
7284 
7285   if (!parseId(Str))
7286     return ParseStatus::NoMatch;
7287 
7288   int Slot = StringSwitch<int>(Str)
7289     .Case("p10", 0)
7290     .Case("p20", 1)
7291     .Case("p0", 2)
7292     .Default(-1);
7293 
7294   if (Slot == -1)
7295     return Error(S, "invalid interpolation slot");
7296 
7297   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7298                                               AMDGPUOperand::ImmTyInterpSlot));
7299   return ParseStatus::Success;
7300 }
7301 
7302 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7303   StringRef Str;
7304   SMLoc S = getLoc();
7305 
7306   if (!parseId(Str))
7307     return ParseStatus::NoMatch;
7308 
7309   if (!Str.starts_with("attr"))
7310     return Error(S, "invalid interpolation attribute");
7311 
7312   StringRef Chan = Str.take_back(2);
7313   int AttrChan = StringSwitch<int>(Chan)
7314     .Case(".x", 0)
7315     .Case(".y", 1)
7316     .Case(".z", 2)
7317     .Case(".w", 3)
7318     .Default(-1);
7319   if (AttrChan == -1)
7320     return Error(S, "invalid or missing interpolation attribute channel");
7321 
7322   Str = Str.drop_back(2).drop_front(4);
7323 
7324   uint8_t Attr;
7325   if (Str.getAsInteger(10, Attr))
7326     return Error(S, "invalid or missing interpolation attribute number");
7327 
7328   if (Attr > 32)
7329     return Error(S, "out of bounds interpolation attribute number");
7330 
7331   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7332 
7333   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7334                                               AMDGPUOperand::ImmTyInterpAttr));
7335   Operands.push_back(AMDGPUOperand::CreateImm(
7336       this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7337   return ParseStatus::Success;
7338 }
7339 
7340 //===----------------------------------------------------------------------===//
7341 // exp
7342 //===----------------------------------------------------------------------===//
7343 
7344 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7345   using namespace llvm::AMDGPU::Exp;
7346 
7347   StringRef Str;
7348   SMLoc S = getLoc();
7349 
7350   if (!parseId(Str))
7351     return ParseStatus::NoMatch;
7352 
7353   unsigned Id = getTgtId(Str);
7354   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7355     return Error(S, (Id == ET_INVALID)
7356                         ? "invalid exp target"
7357                         : "exp target is not supported on this GPU");
7358 
7359   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7360                                               AMDGPUOperand::ImmTyExpTgt));
7361   return ParseStatus::Success;
7362 }
7363 
7364 //===----------------------------------------------------------------------===//
7365 // parser helpers
7366 //===----------------------------------------------------------------------===//
7367 
7368 bool
7369 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7370   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7371 }
7372 
7373 bool
7374 AMDGPUAsmParser::isId(const StringRef Id) const {
7375   return isId(getToken(), Id);
7376 }
7377 
7378 bool
7379 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7380   return getTokenKind() == Kind;
7381 }
7382 
7383 StringRef AMDGPUAsmParser::getId() const {
7384   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7385 }
7386 
7387 bool
7388 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7389   if (isId(Id)) {
7390     lex();
7391     return true;
7392   }
7393   return false;
7394 }
7395 
7396 bool
7397 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7398   if (isToken(AsmToken::Identifier)) {
7399     StringRef Tok = getTokenStr();
7400     if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7401       lex();
7402       return true;
7403     }
7404   }
7405   return false;
7406 }
7407 
7408 bool
7409 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7410   if (isId(Id) && peekToken().is(Kind)) {
7411     lex();
7412     lex();
7413     return true;
7414   }
7415   return false;
7416 }
7417 
7418 bool
7419 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7420   if (isToken(Kind)) {
7421     lex();
7422     return true;
7423   }
7424   return false;
7425 }
7426 
7427 bool
7428 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7429                            const StringRef ErrMsg) {
7430   if (!trySkipToken(Kind)) {
7431     Error(getLoc(), ErrMsg);
7432     return false;
7433   }
7434   return true;
7435 }
7436 
7437 bool
7438 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7439   SMLoc S = getLoc();
7440 
7441   const MCExpr *Expr;
7442   if (Parser.parseExpression(Expr))
7443     return false;
7444 
7445   if (Expr->evaluateAsAbsolute(Imm))
7446     return true;
7447 
7448   if (Expected.empty()) {
7449     Error(S, "expected absolute expression");
7450   } else {
7451     Error(S, Twine("expected ", Expected) +
7452              Twine(" or an absolute expression"));
7453   }
7454   return false;
7455 }
7456 
7457 bool
7458 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7459   SMLoc S = getLoc();
7460 
7461   const MCExpr *Expr;
7462   if (Parser.parseExpression(Expr))
7463     return false;
7464 
7465   int64_t IntVal;
7466   if (Expr->evaluateAsAbsolute(IntVal)) {
7467     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7468   } else {
7469     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7470   }
7471   return true;
7472 }
7473 
7474 bool
7475 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7476   if (isToken(AsmToken::String)) {
7477     Val = getToken().getStringContents();
7478     lex();
7479     return true;
7480   } else {
7481     Error(getLoc(), ErrMsg);
7482     return false;
7483   }
7484 }
7485 
7486 bool
7487 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7488   if (isToken(AsmToken::Identifier)) {
7489     Val = getTokenStr();
7490     lex();
7491     return true;
7492   } else {
7493     if (!ErrMsg.empty())
7494       Error(getLoc(), ErrMsg);
7495     return false;
7496   }
7497 }
7498 
7499 AsmToken
7500 AMDGPUAsmParser::getToken() const {
7501   return Parser.getTok();
7502 }
7503 
7504 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7505   return isToken(AsmToken::EndOfStatement)
7506              ? getToken()
7507              : getLexer().peekTok(ShouldSkipSpace);
7508 }
7509 
7510 void
7511 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7512   auto TokCount = getLexer().peekTokens(Tokens);
7513 
7514   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7515     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7516 }
7517 
7518 AsmToken::TokenKind
7519 AMDGPUAsmParser::getTokenKind() const {
7520   return getLexer().getKind();
7521 }
7522 
7523 SMLoc
7524 AMDGPUAsmParser::getLoc() const {
7525   return getToken().getLoc();
7526 }
7527 
7528 StringRef
7529 AMDGPUAsmParser::getTokenStr() const {
7530   return getToken().getString();
7531 }
7532 
7533 void
7534 AMDGPUAsmParser::lex() {
7535   Parser.Lex();
7536 }
7537 
7538 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7539   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7540 }
7541 
7542 SMLoc
7543 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7544                                const OperandVector &Operands) const {
7545   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7546     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7547     if (Test(Op))
7548       return Op.getStartLoc();
7549   }
7550   return getInstLoc(Operands);
7551 }
7552 
7553 SMLoc
7554 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7555                            const OperandVector &Operands) const {
7556   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7557   return getOperandLoc(Test, Operands);
7558 }
7559 
7560 SMLoc
7561 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7562                            const OperandVector &Operands) const {
7563   auto Test = [=](const AMDGPUOperand& Op) {
7564     return Op.isRegKind() && Op.getReg() == Reg;
7565   };
7566   return getOperandLoc(Test, Operands);
7567 }
7568 
7569 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7570                                  bool SearchMandatoryLiterals) const {
7571   auto Test = [](const AMDGPUOperand& Op) {
7572     return Op.IsImmKindLiteral() || Op.isExpr();
7573   };
7574   SMLoc Loc = getOperandLoc(Test, Operands);
7575   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7576     Loc = getMandatoryLitLoc(Operands);
7577   return Loc;
7578 }
7579 
7580 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7581   auto Test = [](const AMDGPUOperand &Op) {
7582     return Op.IsImmKindMandatoryLiteral();
7583   };
7584   return getOperandLoc(Test, Operands);
7585 }
7586 
7587 SMLoc
7588 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7589   auto Test = [](const AMDGPUOperand& Op) {
7590     return Op.isImmKindConst();
7591   };
7592   return getOperandLoc(Test, Operands);
7593 }
7594 
7595 //===----------------------------------------------------------------------===//
7596 // swizzle
7597 //===----------------------------------------------------------------------===//
7598 
7599 LLVM_READNONE
7600 static unsigned
7601 encodeBitmaskPerm(const unsigned AndMask,
7602                   const unsigned OrMask,
7603                   const unsigned XorMask) {
7604   using namespace llvm::AMDGPU::Swizzle;
7605 
7606   return BITMASK_PERM_ENC |
7607          (AndMask << BITMASK_AND_SHIFT) |
7608          (OrMask  << BITMASK_OR_SHIFT)  |
7609          (XorMask << BITMASK_XOR_SHIFT);
7610 }
7611 
7612 bool
7613 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7614                                      const unsigned MinVal,
7615                                      const unsigned MaxVal,
7616                                      const StringRef ErrMsg,
7617                                      SMLoc &Loc) {
7618   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7619     return false;
7620   }
7621   Loc = getLoc();
7622   if (!parseExpr(Op)) {
7623     return false;
7624   }
7625   if (Op < MinVal || Op > MaxVal) {
7626     Error(Loc, ErrMsg);
7627     return false;
7628   }
7629 
7630   return true;
7631 }
7632 
7633 bool
7634 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7635                                       const unsigned MinVal,
7636                                       const unsigned MaxVal,
7637                                       const StringRef ErrMsg) {
7638   SMLoc Loc;
7639   for (unsigned i = 0; i < OpNum; ++i) {
7640     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7641       return false;
7642   }
7643 
7644   return true;
7645 }
7646 
7647 bool
7648 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7649   using namespace llvm::AMDGPU::Swizzle;
7650 
7651   int64_t Lane[LANE_NUM];
7652   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7653                            "expected a 2-bit lane id")) {
7654     Imm = QUAD_PERM_ENC;
7655     for (unsigned I = 0; I < LANE_NUM; ++I) {
7656       Imm |= Lane[I] << (LANE_SHIFT * I);
7657     }
7658     return true;
7659   }
7660   return false;
7661 }
7662 
7663 bool
7664 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7665   using namespace llvm::AMDGPU::Swizzle;
7666 
7667   SMLoc Loc;
7668   int64_t GroupSize;
7669   int64_t LaneIdx;
7670 
7671   if (!parseSwizzleOperand(GroupSize,
7672                            2, 32,
7673                            "group size must be in the interval [2,32]",
7674                            Loc)) {
7675     return false;
7676   }
7677   if (!isPowerOf2_64(GroupSize)) {
7678     Error(Loc, "group size must be a power of two");
7679     return false;
7680   }
7681   if (parseSwizzleOperand(LaneIdx,
7682                           0, GroupSize - 1,
7683                           "lane id must be in the interval [0,group size - 1]",
7684                           Loc)) {
7685     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7686     return true;
7687   }
7688   return false;
7689 }
7690 
7691 bool
7692 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7693   using namespace llvm::AMDGPU::Swizzle;
7694 
7695   SMLoc Loc;
7696   int64_t GroupSize;
7697 
7698   if (!parseSwizzleOperand(GroupSize,
7699                            2, 32,
7700                            "group size must be in the interval [2,32]",
7701                            Loc)) {
7702     return false;
7703   }
7704   if (!isPowerOf2_64(GroupSize)) {
7705     Error(Loc, "group size must be a power of two");
7706     return false;
7707   }
7708 
7709   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7710   return true;
7711 }
7712 
7713 bool
7714 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7715   using namespace llvm::AMDGPU::Swizzle;
7716 
7717   SMLoc Loc;
7718   int64_t GroupSize;
7719 
7720   if (!parseSwizzleOperand(GroupSize,
7721                            1, 16,
7722                            "group size must be in the interval [1,16]",
7723                            Loc)) {
7724     return false;
7725   }
7726   if (!isPowerOf2_64(GroupSize)) {
7727     Error(Loc, "group size must be a power of two");
7728     return false;
7729   }
7730 
7731   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7732   return true;
7733 }
7734 
7735 bool
7736 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7737   using namespace llvm::AMDGPU::Swizzle;
7738 
7739   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7740     return false;
7741   }
7742 
7743   StringRef Ctl;
7744   SMLoc StrLoc = getLoc();
7745   if (!parseString(Ctl)) {
7746     return false;
7747   }
7748   if (Ctl.size() != BITMASK_WIDTH) {
7749     Error(StrLoc, "expected a 5-character mask");
7750     return false;
7751   }
7752 
7753   unsigned AndMask = 0;
7754   unsigned OrMask = 0;
7755   unsigned XorMask = 0;
7756 
7757   for (size_t i = 0; i < Ctl.size(); ++i) {
7758     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7759     switch(Ctl[i]) {
7760     default:
7761       Error(StrLoc, "invalid mask");
7762       return false;
7763     case '0':
7764       break;
7765     case '1':
7766       OrMask |= Mask;
7767       break;
7768     case 'p':
7769       AndMask |= Mask;
7770       break;
7771     case 'i':
7772       AndMask |= Mask;
7773       XorMask |= Mask;
7774       break;
7775     }
7776   }
7777 
7778   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7779   return true;
7780 }
7781 
7782 bool
7783 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7784 
7785   SMLoc OffsetLoc = getLoc();
7786 
7787   if (!parseExpr(Imm, "a swizzle macro")) {
7788     return false;
7789   }
7790   if (!isUInt<16>(Imm)) {
7791     Error(OffsetLoc, "expected a 16-bit offset");
7792     return false;
7793   }
7794   return true;
7795 }
7796 
7797 bool
7798 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7799   using namespace llvm::AMDGPU::Swizzle;
7800 
7801   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7802 
7803     SMLoc ModeLoc = getLoc();
7804     bool Ok = false;
7805 
7806     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7807       Ok = parseSwizzleQuadPerm(Imm);
7808     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7809       Ok = parseSwizzleBitmaskPerm(Imm);
7810     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7811       Ok = parseSwizzleBroadcast(Imm);
7812     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7813       Ok = parseSwizzleSwap(Imm);
7814     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7815       Ok = parseSwizzleReverse(Imm);
7816     } else {
7817       Error(ModeLoc, "expected a swizzle mode");
7818     }
7819 
7820     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7821   }
7822 
7823   return false;
7824 }
7825 
7826 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
7827   SMLoc S = getLoc();
7828   int64_t Imm = 0;
7829 
7830   if (trySkipId("offset")) {
7831 
7832     bool Ok = false;
7833     if (skipToken(AsmToken::Colon, "expected a colon")) {
7834       if (trySkipId("swizzle")) {
7835         Ok = parseSwizzleMacro(Imm);
7836       } else {
7837         Ok = parseSwizzleOffset(Imm);
7838       }
7839     }
7840 
7841     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7842 
7843     return Ok ? ParseStatus::Success : ParseStatus::Failure;
7844   }
7845   return ParseStatus::NoMatch;
7846 }
7847 
7848 bool
7849 AMDGPUOperand::isSwizzle() const {
7850   return isImmTy(ImmTySwizzle);
7851 }
7852 
7853 //===----------------------------------------------------------------------===//
7854 // VGPR Index Mode
7855 //===----------------------------------------------------------------------===//
7856 
7857 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7858 
7859   using namespace llvm::AMDGPU::VGPRIndexMode;
7860 
7861   if (trySkipToken(AsmToken::RParen)) {
7862     return OFF;
7863   }
7864 
7865   int64_t Imm = 0;
7866 
7867   while (true) {
7868     unsigned Mode = 0;
7869     SMLoc S = getLoc();
7870 
7871     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7872       if (trySkipId(IdSymbolic[ModeId])) {
7873         Mode = 1 << ModeId;
7874         break;
7875       }
7876     }
7877 
7878     if (Mode == 0) {
7879       Error(S, (Imm == 0)?
7880                "expected a VGPR index mode or a closing parenthesis" :
7881                "expected a VGPR index mode");
7882       return UNDEF;
7883     }
7884 
7885     if (Imm & Mode) {
7886       Error(S, "duplicate VGPR index mode");
7887       return UNDEF;
7888     }
7889     Imm |= Mode;
7890 
7891     if (trySkipToken(AsmToken::RParen))
7892       break;
7893     if (!skipToken(AsmToken::Comma,
7894                    "expected a comma or a closing parenthesis"))
7895       return UNDEF;
7896   }
7897 
7898   return Imm;
7899 }
7900 
7901 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7902 
7903   using namespace llvm::AMDGPU::VGPRIndexMode;
7904 
7905   int64_t Imm = 0;
7906   SMLoc S = getLoc();
7907 
7908   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7909     Imm = parseGPRIdxMacro();
7910     if (Imm == UNDEF)
7911       return ParseStatus::Failure;
7912   } else {
7913     if (getParser().parseAbsoluteExpression(Imm))
7914       return ParseStatus::Failure;
7915     if (Imm < 0 || !isUInt<4>(Imm))
7916       return Error(S, "invalid immediate: only 4-bit values are legal");
7917   }
7918 
7919   Operands.push_back(
7920       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7921   return ParseStatus::Success;
7922 }
7923 
7924 bool AMDGPUOperand::isGPRIdxMode() const {
7925   return isImmTy(ImmTyGprIdxMode);
7926 }
7927 
7928 //===----------------------------------------------------------------------===//
7929 // sopp branch targets
7930 //===----------------------------------------------------------------------===//
7931 
7932 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
7933 
7934   // Make sure we are not parsing something
7935   // that looks like a label or an expression but is not.
7936   // This will improve error messages.
7937   if (isRegister() || isModifier())
7938     return ParseStatus::NoMatch;
7939 
7940   if (!parseExpr(Operands))
7941     return ParseStatus::Failure;
7942 
7943   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7944   assert(Opr.isImm() || Opr.isExpr());
7945   SMLoc Loc = Opr.getStartLoc();
7946 
7947   // Currently we do not support arbitrary expressions as branch targets.
7948   // Only labels and absolute expressions are accepted.
7949   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7950     Error(Loc, "expected an absolute expression or a label");
7951   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7952     Error(Loc, "expected a 16-bit signed jump offset");
7953   }
7954 
7955   return ParseStatus::Success;
7956 }
7957 
7958 //===----------------------------------------------------------------------===//
7959 // Boolean holding registers
7960 //===----------------------------------------------------------------------===//
7961 
7962 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7963   return parseReg(Operands);
7964 }
7965 
7966 //===----------------------------------------------------------------------===//
7967 // mubuf
7968 //===----------------------------------------------------------------------===//
7969 
7970 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7971                                    const OperandVector &Operands,
7972                                    bool IsAtomic) {
7973   OptionalImmIndexMap OptionalIdx;
7974   unsigned FirstOperandIdx = 1;
7975   bool IsAtomicReturn = false;
7976 
7977   if (IsAtomic) {
7978     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7979       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7980       if (!Op.isCPol())
7981         continue;
7982       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7983       break;
7984     }
7985 
7986     if (!IsAtomicReturn) {
7987       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7988       if (NewOpc != -1)
7989         Inst.setOpcode(NewOpc);
7990     }
7991 
7992     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7993                       SIInstrFlags::IsAtomicRet;
7994   }
7995 
7996   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7997     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7998 
7999     // Add the register arguments
8000     if (Op.isReg()) {
8001       Op.addRegOperands(Inst, 1);
8002       // Insert a tied src for atomic return dst.
8003       // This cannot be postponed as subsequent calls to
8004       // addImmOperands rely on correct number of MC operands.
8005       if (IsAtomicReturn && i == FirstOperandIdx)
8006         Op.addRegOperands(Inst, 1);
8007       continue;
8008     }
8009 
8010     // Handle the case where soffset is an immediate
8011     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8012       Op.addImmOperands(Inst, 1);
8013       continue;
8014     }
8015 
8016     // Handle tokens like 'offen' which are sometimes hard-coded into the
8017     // asm string.  There are no MCInst operands for these.
8018     if (Op.isToken()) {
8019       continue;
8020     }
8021     assert(Op.isImm());
8022 
8023     // Handle optional arguments
8024     OptionalIdx[Op.getImmTy()] = i;
8025   }
8026 
8027   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8028   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8029 }
8030 
8031 //===----------------------------------------------------------------------===//
8032 // smrd
8033 //===----------------------------------------------------------------------===//
8034 
8035 bool AMDGPUOperand::isSMRDOffset8() const {
8036   return isImmLiteral() && isUInt<8>(getImm());
8037 }
8038 
8039 bool AMDGPUOperand::isSMEMOffset() const {
8040   // Offset range is checked later by validator.
8041   return isImmLiteral();
8042 }
8043 
8044 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8045   // 32-bit literals are only supported on CI and we only want to use them
8046   // when the offset is > 8-bits.
8047   return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8048 }
8049 
8050 //===----------------------------------------------------------------------===//
8051 // vop3
8052 //===----------------------------------------------------------------------===//
8053 
8054 static bool ConvertOmodMul(int64_t &Mul) {
8055   if (Mul != 1 && Mul != 2 && Mul != 4)
8056     return false;
8057 
8058   Mul >>= 1;
8059   return true;
8060 }
8061 
8062 static bool ConvertOmodDiv(int64_t &Div) {
8063   if (Div == 1) {
8064     Div = 0;
8065     return true;
8066   }
8067 
8068   if (Div == 2) {
8069     Div = 3;
8070     return true;
8071   }
8072 
8073   return false;
8074 }
8075 
8076 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8077 // This is intentional and ensures compatibility with sp3.
8078 // See bug 35397 for details.
8079 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8080   if (BoundCtrl == 0 || BoundCtrl == 1) {
8081     if (!isGFX11Plus())
8082       BoundCtrl = 1;
8083     return true;
8084   }
8085   return false;
8086 }
8087 
8088 void AMDGPUAsmParser::onBeginOfFile() {
8089   if (!getParser().getStreamer().getTargetStreamer() ||
8090       getSTI().getTargetTriple().getArch() == Triple::r600)
8091     return;
8092 
8093   if (!getTargetStreamer().getTargetID())
8094     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
8095         // TODO: Should try to check code object version from directive???
8096         AMDGPU::getAmdhsaCodeObjectVersion());
8097 
8098   if (isHsaAbi(getSTI()))
8099     getTargetStreamer().EmitDirectiveAMDGCNTarget();
8100 }
8101 
8102 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8103   StringRef Name = getTokenStr();
8104   if (Name == "mul") {
8105     return parseIntWithPrefix("mul", Operands,
8106                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8107   }
8108 
8109   if (Name == "div") {
8110     return parseIntWithPrefix("div", Operands,
8111                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8112   }
8113 
8114   return ParseStatus::NoMatch;
8115 }
8116 
8117 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8118 // the number of src operands present, then copies that bit into src0_modifiers.
8119 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8120   int Opc = Inst.getOpcode();
8121   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8122   if (OpSelIdx == -1)
8123     return;
8124 
8125   int SrcNum;
8126   const int Ops[] = { AMDGPU::OpName::src0,
8127                       AMDGPU::OpName::src1,
8128                       AMDGPU::OpName::src2 };
8129   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8130        ++SrcNum)
8131     ;
8132   assert(SrcNum > 0);
8133 
8134   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8135 
8136   if ((OpSel & (1 << SrcNum)) != 0) {
8137     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8138     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8139     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8140   }
8141 }
8142 
8143 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8144                                    const OperandVector &Operands) {
8145   cvtVOP3P(Inst, Operands);
8146   cvtVOP3DstOpSelOnly(Inst);
8147 }
8148 
8149 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8150                                    OptionalImmIndexMap &OptionalIdx) {
8151   cvtVOP3P(Inst, Operands, OptionalIdx);
8152   cvtVOP3DstOpSelOnly(Inst);
8153 }
8154 
8155 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8156   return
8157       // 1. This operand is input modifiers
8158       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8159       // 2. This is not last operand
8160       && Desc.NumOperands > (OpNum + 1)
8161       // 3. Next operand is register class
8162       && Desc.operands()[OpNum + 1].RegClass != -1
8163       // 4. Next register is not tied to any other operand
8164       && Desc.getOperandConstraint(OpNum + 1,
8165                                    MCOI::OperandConstraint::TIED_TO) == -1;
8166 }
8167 
8168 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8169 {
8170   OptionalImmIndexMap OptionalIdx;
8171   unsigned Opc = Inst.getOpcode();
8172 
8173   unsigned I = 1;
8174   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8175   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8176     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8177   }
8178 
8179   for (unsigned E = Operands.size(); I != E; ++I) {
8180     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8181     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8182       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8183     } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8184                Op.isInterpAttrChan()) {
8185       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8186     } else if (Op.isImmModifier()) {
8187       OptionalIdx[Op.getImmTy()] = I;
8188     } else {
8189       llvm_unreachable("unhandled operand type");
8190     }
8191   }
8192 
8193   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8194     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8195                           AMDGPUOperand::ImmTyHigh);
8196 
8197   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8198     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8199                           AMDGPUOperand::ImmTyClampSI);
8200 
8201   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8202     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8203                           AMDGPUOperand::ImmTyOModSI);
8204 }
8205 
8206 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8207 {
8208   OptionalImmIndexMap OptionalIdx;
8209   unsigned Opc = Inst.getOpcode();
8210 
8211   unsigned I = 1;
8212   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8213   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8214     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8215   }
8216 
8217   for (unsigned E = Operands.size(); I != E; ++I) {
8218     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8219     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8220       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8221     } else if (Op.isImmModifier()) {
8222       OptionalIdx[Op.getImmTy()] = I;
8223     } else {
8224       llvm_unreachable("unhandled operand type");
8225     }
8226   }
8227 
8228   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8229 
8230   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8231   if (OpSelIdx != -1)
8232     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8233 
8234   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8235 
8236   if (OpSelIdx == -1)
8237     return;
8238 
8239   const int Ops[] = { AMDGPU::OpName::src0,
8240                       AMDGPU::OpName::src1,
8241                       AMDGPU::OpName::src2 };
8242   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8243                          AMDGPU::OpName::src1_modifiers,
8244                          AMDGPU::OpName::src2_modifiers };
8245 
8246   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8247 
8248   for (int J = 0; J < 3; ++J) {
8249     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8250     if (OpIdx == -1)
8251       break;
8252 
8253     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8254     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8255 
8256     if ((OpSel & (1 << J)) != 0)
8257       ModVal |= SISrcMods::OP_SEL_0;
8258     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8259         (OpSel & (1 << 3)) != 0)
8260       ModVal |= SISrcMods::DST_OP_SEL;
8261 
8262     Inst.getOperand(ModIdx).setImm(ModVal);
8263   }
8264 }
8265 
8266 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8267                               OptionalImmIndexMap &OptionalIdx) {
8268   unsigned Opc = Inst.getOpcode();
8269 
8270   unsigned I = 1;
8271   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8272   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8273     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8274   }
8275 
8276   for (unsigned E = Operands.size(); I != E; ++I) {
8277     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8278     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8279       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8280     } else if (Op.isImmModifier()) {
8281       OptionalIdx[Op.getImmTy()] = I;
8282     } else if (Op.isRegOrImm()) {
8283       Op.addRegOrImmOperands(Inst, 1);
8284     } else {
8285       llvm_unreachable("unhandled operand type");
8286     }
8287   }
8288 
8289   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8290     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8291                           AMDGPUOperand::ImmTyClampSI);
8292 
8293   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8294     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8295                           AMDGPUOperand::ImmTyOModSI);
8296 
8297   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8298   // it has src2 register operand that is tied to dst operand
8299   // we don't allow modifiers for this operand in assembler so src2_modifiers
8300   // should be 0.
8301   if (isMAC(Opc)) {
8302     auto it = Inst.begin();
8303     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8304     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8305     ++it;
8306     // Copy the operand to ensure it's not invalidated when Inst grows.
8307     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8308   }
8309 }
8310 
8311 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8312   OptionalImmIndexMap OptionalIdx;
8313   cvtVOP3(Inst, Operands, OptionalIdx);
8314 }
8315 
8316 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8317                                OptionalImmIndexMap &OptIdx) {
8318   const int Opc = Inst.getOpcode();
8319   const MCInstrDesc &Desc = MII.get(Opc);
8320 
8321   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8322 
8323   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8324       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
8325     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8326     Inst.addOperand(Inst.getOperand(0));
8327   }
8328 
8329   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
8330     assert(!IsPacked);
8331     Inst.addOperand(Inst.getOperand(0));
8332   }
8333 
8334   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8335   // instruction, and then figure out where to actually put the modifiers
8336 
8337   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8338   if (OpSelIdx != -1) {
8339     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8340   }
8341 
8342   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8343   if (OpSelHiIdx != -1) {
8344     int DefaultVal = IsPacked ? -1 : 0;
8345     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8346                           DefaultVal);
8347   }
8348 
8349   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8350   if (NegLoIdx != -1) {
8351     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8352     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8353   }
8354 
8355   const int Ops[] = { AMDGPU::OpName::src0,
8356                       AMDGPU::OpName::src1,
8357                       AMDGPU::OpName::src2 };
8358   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8359                          AMDGPU::OpName::src1_modifiers,
8360                          AMDGPU::OpName::src2_modifiers };
8361 
8362   unsigned OpSel = 0;
8363   unsigned OpSelHi = 0;
8364   unsigned NegLo = 0;
8365   unsigned NegHi = 0;
8366 
8367   if (OpSelIdx != -1)
8368     OpSel = Inst.getOperand(OpSelIdx).getImm();
8369 
8370   if (OpSelHiIdx != -1)
8371     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8372 
8373   if (NegLoIdx != -1) {
8374     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8375     NegLo = Inst.getOperand(NegLoIdx).getImm();
8376     NegHi = Inst.getOperand(NegHiIdx).getImm();
8377   }
8378 
8379   for (int J = 0; J < 3; ++J) {
8380     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8381     if (OpIdx == -1)
8382       break;
8383 
8384     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8385 
8386     if (ModIdx == -1)
8387       continue;
8388 
8389     uint32_t ModVal = 0;
8390 
8391     if ((OpSel & (1 << J)) != 0)
8392       ModVal |= SISrcMods::OP_SEL_0;
8393 
8394     if ((OpSelHi & (1 << J)) != 0)
8395       ModVal |= SISrcMods::OP_SEL_1;
8396 
8397     if ((NegLo & (1 << J)) != 0)
8398       ModVal |= SISrcMods::NEG;
8399 
8400     if ((NegHi & (1 << J)) != 0)
8401       ModVal |= SISrcMods::NEG_HI;
8402 
8403     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8404   }
8405 }
8406 
8407 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8408   OptionalImmIndexMap OptIdx;
8409   cvtVOP3(Inst, Operands, OptIdx);
8410   cvtVOP3P(Inst, Operands, OptIdx);
8411 }
8412 
8413 //===----------------------------------------------------------------------===//
8414 // VOPD
8415 //===----------------------------------------------------------------------===//
8416 
8417 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8418   if (!hasVOPD(getSTI()))
8419     return ParseStatus::NoMatch;
8420 
8421   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8422     SMLoc S = getLoc();
8423     lex();
8424     lex();
8425     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8426     SMLoc OpYLoc = getLoc();
8427     StringRef OpYName;
8428     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8429       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8430       return ParseStatus::Success;
8431     }
8432     return Error(OpYLoc, "expected a VOPDY instruction after ::");
8433   }
8434   return ParseStatus::NoMatch;
8435 }
8436 
8437 // Create VOPD MCInst operands using parsed assembler operands.
8438 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8439   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8440     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8441     if (Op.isReg()) {
8442       Op.addRegOperands(Inst, 1);
8443       return;
8444     }
8445     if (Op.isImm()) {
8446       Op.addImmOperands(Inst, 1);
8447       return;
8448     }
8449     llvm_unreachable("Unhandled operand type in cvtVOPD");
8450   };
8451 
8452   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8453 
8454   // MCInst operands are ordered as follows:
8455   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8456 
8457   for (auto CompIdx : VOPD::COMPONENTS) {
8458     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8459   }
8460 
8461   for (auto CompIdx : VOPD::COMPONENTS) {
8462     const auto &CInfo = InstInfo[CompIdx];
8463     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8464     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8465       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8466     if (CInfo.hasSrc2Acc())
8467       addOp(CInfo.getIndexOfDstInParsedOperands());
8468   }
8469 }
8470 
8471 //===----------------------------------------------------------------------===//
8472 // dpp
8473 //===----------------------------------------------------------------------===//
8474 
8475 bool AMDGPUOperand::isDPP8() const {
8476   return isImmTy(ImmTyDPP8);
8477 }
8478 
8479 bool AMDGPUOperand::isDPPCtrl() const {
8480   using namespace AMDGPU::DPP;
8481 
8482   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8483   if (result) {
8484     int64_t Imm = getImm();
8485     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8486            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8487            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8488            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8489            (Imm == DppCtrl::WAVE_SHL1) ||
8490            (Imm == DppCtrl::WAVE_ROL1) ||
8491            (Imm == DppCtrl::WAVE_SHR1) ||
8492            (Imm == DppCtrl::WAVE_ROR1) ||
8493            (Imm == DppCtrl::ROW_MIRROR) ||
8494            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8495            (Imm == DppCtrl::BCAST15) ||
8496            (Imm == DppCtrl::BCAST31) ||
8497            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8498            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8499   }
8500   return false;
8501 }
8502 
8503 //===----------------------------------------------------------------------===//
8504 // mAI
8505 //===----------------------------------------------------------------------===//
8506 
8507 bool AMDGPUOperand::isBLGP() const {
8508   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8509 }
8510 
8511 bool AMDGPUOperand::isCBSZ() const {
8512   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8513 }
8514 
8515 bool AMDGPUOperand::isABID() const {
8516   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8517 }
8518 
8519 bool AMDGPUOperand::isS16Imm() const {
8520   return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8521 }
8522 
8523 bool AMDGPUOperand::isU16Imm() const {
8524   return isImmLiteral() && isUInt<16>(getImm());
8525 }
8526 
8527 //===----------------------------------------------------------------------===//
8528 // dim
8529 //===----------------------------------------------------------------------===//
8530 
8531 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8532   // We want to allow "dim:1D" etc.,
8533   // but the initial 1 is tokenized as an integer.
8534   std::string Token;
8535   if (isToken(AsmToken::Integer)) {
8536     SMLoc Loc = getToken().getEndLoc();
8537     Token = std::string(getTokenStr());
8538     lex();
8539     if (getLoc() != Loc)
8540       return false;
8541   }
8542 
8543   StringRef Suffix;
8544   if (!parseId(Suffix))
8545     return false;
8546   Token += Suffix;
8547 
8548   StringRef DimId = Token;
8549   if (DimId.starts_with("SQ_RSRC_IMG_"))
8550     DimId = DimId.drop_front(12);
8551 
8552   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8553   if (!DimInfo)
8554     return false;
8555 
8556   Encoding = DimInfo->Encoding;
8557   return true;
8558 }
8559 
8560 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8561   if (!isGFX10Plus())
8562     return ParseStatus::NoMatch;
8563 
8564   SMLoc S = getLoc();
8565 
8566   if (!trySkipId("dim", AsmToken::Colon))
8567     return ParseStatus::NoMatch;
8568 
8569   unsigned Encoding;
8570   SMLoc Loc = getLoc();
8571   if (!parseDimId(Encoding))
8572     return Error(Loc, "invalid dim value");
8573 
8574   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8575                                               AMDGPUOperand::ImmTyDim));
8576   return ParseStatus::Success;
8577 }
8578 
8579 //===----------------------------------------------------------------------===//
8580 // dpp
8581 //===----------------------------------------------------------------------===//
8582 
8583 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8584   SMLoc S = getLoc();
8585 
8586   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8587     return ParseStatus::NoMatch;
8588 
8589   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8590 
8591   int64_t Sels[8];
8592 
8593   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8594     return ParseStatus::Failure;
8595 
8596   for (size_t i = 0; i < 8; ++i) {
8597     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8598       return ParseStatus::Failure;
8599 
8600     SMLoc Loc = getLoc();
8601     if (getParser().parseAbsoluteExpression(Sels[i]))
8602       return ParseStatus::Failure;
8603     if (0 > Sels[i] || 7 < Sels[i])
8604       return Error(Loc, "expected a 3-bit value");
8605   }
8606 
8607   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8608     return ParseStatus::Failure;
8609 
8610   unsigned DPP8 = 0;
8611   for (size_t i = 0; i < 8; ++i)
8612     DPP8 |= (Sels[i] << (i * 3));
8613 
8614   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8615   return ParseStatus::Success;
8616 }
8617 
8618 bool
8619 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8620                                     const OperandVector &Operands) {
8621   if (Ctrl == "row_newbcast")
8622     return isGFX90A();
8623 
8624   if (Ctrl == "row_share" ||
8625       Ctrl == "row_xmask")
8626     return isGFX10Plus();
8627 
8628   if (Ctrl == "wave_shl" ||
8629       Ctrl == "wave_shr" ||
8630       Ctrl == "wave_rol" ||
8631       Ctrl == "wave_ror" ||
8632       Ctrl == "row_bcast")
8633     return isVI() || isGFX9();
8634 
8635   return Ctrl == "row_mirror" ||
8636          Ctrl == "row_half_mirror" ||
8637          Ctrl == "quad_perm" ||
8638          Ctrl == "row_shl" ||
8639          Ctrl == "row_shr" ||
8640          Ctrl == "row_ror";
8641 }
8642 
8643 int64_t
8644 AMDGPUAsmParser::parseDPPCtrlPerm() {
8645   // quad_perm:[%d,%d,%d,%d]
8646 
8647   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8648     return -1;
8649 
8650   int64_t Val = 0;
8651   for (int i = 0; i < 4; ++i) {
8652     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8653       return -1;
8654 
8655     int64_t Temp;
8656     SMLoc Loc = getLoc();
8657     if (getParser().parseAbsoluteExpression(Temp))
8658       return -1;
8659     if (Temp < 0 || Temp > 3) {
8660       Error(Loc, "expected a 2-bit value");
8661       return -1;
8662     }
8663 
8664     Val += (Temp << i * 2);
8665   }
8666 
8667   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8668     return -1;
8669 
8670   return Val;
8671 }
8672 
8673 int64_t
8674 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8675   using namespace AMDGPU::DPP;
8676 
8677   // sel:%d
8678 
8679   int64_t Val;
8680   SMLoc Loc = getLoc();
8681 
8682   if (getParser().parseAbsoluteExpression(Val))
8683     return -1;
8684 
8685   struct DppCtrlCheck {
8686     int64_t Ctrl;
8687     int Lo;
8688     int Hi;
8689   };
8690 
8691   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8692     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8693     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8694     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8695     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8696     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8697     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8698     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8699     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8700     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8701     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8702     .Default({-1, 0, 0});
8703 
8704   bool Valid;
8705   if (Check.Ctrl == -1) {
8706     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8707     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8708   } else {
8709     Valid = Check.Lo <= Val && Val <= Check.Hi;
8710     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8711   }
8712 
8713   if (!Valid) {
8714     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8715     return -1;
8716   }
8717 
8718   return Val;
8719 }
8720 
8721 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8722   using namespace AMDGPU::DPP;
8723 
8724   if (!isToken(AsmToken::Identifier) ||
8725       !isSupportedDPPCtrl(getTokenStr(), Operands))
8726     return ParseStatus::NoMatch;
8727 
8728   SMLoc S = getLoc();
8729   int64_t Val = -1;
8730   StringRef Ctrl;
8731 
8732   parseId(Ctrl);
8733 
8734   if (Ctrl == "row_mirror") {
8735     Val = DppCtrl::ROW_MIRROR;
8736   } else if (Ctrl == "row_half_mirror") {
8737     Val = DppCtrl::ROW_HALF_MIRROR;
8738   } else {
8739     if (skipToken(AsmToken::Colon, "expected a colon")) {
8740       if (Ctrl == "quad_perm") {
8741         Val = parseDPPCtrlPerm();
8742       } else {
8743         Val = parseDPPCtrlSel(Ctrl);
8744       }
8745     }
8746   }
8747 
8748   if (Val == -1)
8749     return ParseStatus::Failure;
8750 
8751   Operands.push_back(
8752     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8753   return ParseStatus::Success;
8754 }
8755 
8756 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8757                                  bool IsDPP8) {
8758   OptionalImmIndexMap OptionalIdx;
8759   unsigned Opc = Inst.getOpcode();
8760   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8761 
8762   // MAC instructions are special because they have 'old'
8763   // operand which is not tied to dst (but assumed to be).
8764   // They also have dummy unused src2_modifiers.
8765   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8766   int Src2ModIdx =
8767       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8768   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8769                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8770 
8771   unsigned I = 1;
8772   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8773     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8774   }
8775 
8776   int Fi = 0;
8777   for (unsigned E = Operands.size(); I != E; ++I) {
8778 
8779     if (IsMAC) {
8780       int NumOperands = Inst.getNumOperands();
8781       if (OldIdx == NumOperands) {
8782         // Handle old operand
8783         constexpr int DST_IDX = 0;
8784         Inst.addOperand(Inst.getOperand(DST_IDX));
8785       } else if (Src2ModIdx == NumOperands) {
8786         // Add unused dummy src2_modifiers
8787         Inst.addOperand(MCOperand::createImm(0));
8788       }
8789     }
8790 
8791     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8792                                             MCOI::TIED_TO);
8793     if (TiedTo != -1) {
8794       assert((unsigned)TiedTo < Inst.getNumOperands());
8795       // handle tied old or src2 for MAC instructions
8796       Inst.addOperand(Inst.getOperand(TiedTo));
8797     }
8798     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8799     // Add the register arguments
8800     if (IsDPP8 && Op.isDppFI()) {
8801       Fi = Op.getImm();
8802     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8803       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8804     } else if (Op.isReg()) {
8805       Op.addRegOperands(Inst, 1);
8806     } else if (Op.isImm() &&
8807                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8808       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8809       Op.addImmOperands(Inst, 1);
8810     } else if (Op.isImm()) {
8811       OptionalIdx[Op.getImmTy()] = I;
8812     } else {
8813       llvm_unreachable("unhandled operand type");
8814     }
8815   }
8816   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8817     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8818 
8819   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8820     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8821 
8822   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8823     cvtVOP3P(Inst, Operands, OptionalIdx);
8824   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8825     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8826   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8827     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8828   }
8829 
8830   if (IsDPP8) {
8831     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8832     using namespace llvm::AMDGPU::DPP;
8833     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8834   } else {
8835     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8836     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8837     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8838     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8839 
8840     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8841       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8842                             AMDGPUOperand::ImmTyDppFI);
8843   }
8844 }
8845 
8846 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8847   OptionalImmIndexMap OptionalIdx;
8848 
8849   unsigned I = 1;
8850   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8851   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8852     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8853   }
8854 
8855   int Fi = 0;
8856   for (unsigned E = Operands.size(); I != E; ++I) {
8857     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8858                                             MCOI::TIED_TO);
8859     if (TiedTo != -1) {
8860       assert((unsigned)TiedTo < Inst.getNumOperands());
8861       // handle tied old or src2 for MAC instructions
8862       Inst.addOperand(Inst.getOperand(TiedTo));
8863     }
8864     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8865     // Add the register arguments
8866     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8867       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8868       // Skip it.
8869       continue;
8870     }
8871 
8872     if (IsDPP8) {
8873       if (Op.isDPP8()) {
8874         Op.addImmOperands(Inst, 1);
8875       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8876         Op.addRegWithFPInputModsOperands(Inst, 2);
8877       } else if (Op.isDppFI()) {
8878         Fi = Op.getImm();
8879       } else if (Op.isReg()) {
8880         Op.addRegOperands(Inst, 1);
8881       } else {
8882         llvm_unreachable("Invalid operand type");
8883       }
8884     } else {
8885       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8886         Op.addRegWithFPInputModsOperands(Inst, 2);
8887       } else if (Op.isReg()) {
8888         Op.addRegOperands(Inst, 1);
8889       } else if (Op.isDPPCtrl()) {
8890         Op.addImmOperands(Inst, 1);
8891       } else if (Op.isImm()) {
8892         // Handle optional arguments
8893         OptionalIdx[Op.getImmTy()] = I;
8894       } else {
8895         llvm_unreachable("Invalid operand type");
8896       }
8897     }
8898   }
8899 
8900   if (IsDPP8) {
8901     using namespace llvm::AMDGPU::DPP;
8902     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8903   } else {
8904     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8905     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8906     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8907     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
8908       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8909                             AMDGPUOperand::ImmTyDppFI);
8910     }
8911   }
8912 }
8913 
8914 //===----------------------------------------------------------------------===//
8915 // sdwa
8916 //===----------------------------------------------------------------------===//
8917 
8918 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
8919                                           StringRef Prefix,
8920                                           AMDGPUOperand::ImmTy Type) {
8921   using namespace llvm::AMDGPU::SDWA;
8922 
8923   SMLoc S = getLoc();
8924   StringRef Value;
8925 
8926   SMLoc StringLoc;
8927   ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
8928   if (!Res.isSuccess())
8929     return Res;
8930 
8931   int64_t Int;
8932   Int = StringSwitch<int64_t>(Value)
8933         .Case("BYTE_0", SdwaSel::BYTE_0)
8934         .Case("BYTE_1", SdwaSel::BYTE_1)
8935         .Case("BYTE_2", SdwaSel::BYTE_2)
8936         .Case("BYTE_3", SdwaSel::BYTE_3)
8937         .Case("WORD_0", SdwaSel::WORD_0)
8938         .Case("WORD_1", SdwaSel::WORD_1)
8939         .Case("DWORD", SdwaSel::DWORD)
8940         .Default(0xffffffff);
8941 
8942   if (Int == 0xffffffff)
8943     return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8944 
8945   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8946   return ParseStatus::Success;
8947 }
8948 
8949 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8950   using namespace llvm::AMDGPU::SDWA;
8951 
8952   SMLoc S = getLoc();
8953   StringRef Value;
8954 
8955   SMLoc StringLoc;
8956   ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8957   if (!Res.isSuccess())
8958     return Res;
8959 
8960   int64_t Int;
8961   Int = StringSwitch<int64_t>(Value)
8962         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8963         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8964         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8965         .Default(0xffffffff);
8966 
8967   if (Int == 0xffffffff)
8968     return Error(StringLoc, "invalid dst_unused value");
8969 
8970   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
8971   return ParseStatus::Success;
8972 }
8973 
8974 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8975   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8976 }
8977 
8978 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8979   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8980 }
8981 
8982 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8983   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8984 }
8985 
8986 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8987   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8988 }
8989 
8990 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8991   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8992 }
8993 
8994 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8995                               uint64_t BasicInstType,
8996                               bool SkipDstVcc,
8997                               bool SkipSrcVcc) {
8998   using namespace llvm::AMDGPU::SDWA;
8999 
9000   OptionalImmIndexMap OptionalIdx;
9001   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9002   bool SkippedVcc = false;
9003 
9004   unsigned I = 1;
9005   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9006   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9007     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9008   }
9009 
9010   for (unsigned E = Operands.size(); I != E; ++I) {
9011     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9012     if (SkipVcc && !SkippedVcc && Op.isReg() &&
9013         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9014       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9015       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9016       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9017       // Skip VCC only if we didn't skip it on previous iteration.
9018       // Note that src0 and src1 occupy 2 slots each because of modifiers.
9019       if (BasicInstType == SIInstrFlags::VOP2 &&
9020           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9021            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9022         SkippedVcc = true;
9023         continue;
9024       } else if (BasicInstType == SIInstrFlags::VOPC &&
9025                  Inst.getNumOperands() == 0) {
9026         SkippedVcc = true;
9027         continue;
9028       }
9029     }
9030     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9031       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9032     } else if (Op.isImm()) {
9033       // Handle optional arguments
9034       OptionalIdx[Op.getImmTy()] = I;
9035     } else {
9036       llvm_unreachable("Invalid operand type");
9037     }
9038     SkippedVcc = false;
9039   }
9040 
9041   const unsigned Opc = Inst.getOpcode();
9042   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9043       Opc != AMDGPU::V_NOP_sdwa_vi) {
9044     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9045     switch (BasicInstType) {
9046     case SIInstrFlags::VOP1:
9047       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9048         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9049                               AMDGPUOperand::ImmTyClampSI, 0);
9050 
9051       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9052         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9053                               AMDGPUOperand::ImmTyOModSI, 0);
9054 
9055       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9056         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9057                               AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9058 
9059       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9060         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9061                               AMDGPUOperand::ImmTySDWADstUnused,
9062                               DstUnused::UNUSED_PRESERVE);
9063 
9064       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9065       break;
9066 
9067     case SIInstrFlags::VOP2:
9068       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9069 
9070       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9071         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9072 
9073       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9074       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9075       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9076       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9077       break;
9078 
9079     case SIInstrFlags::VOPC:
9080       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9081         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9082       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9083       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9084       break;
9085 
9086     default:
9087       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9088     }
9089   }
9090 
9091   // special case v_mac_{f16, f32}:
9092   // it has src2 register operand that is tied to dst operand
9093   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9094       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9095     auto it = Inst.begin();
9096     std::advance(
9097       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9098     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9099   }
9100 }
9101 
9102 /// Force static initialization.
9103 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9104   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9105   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9106 }
9107 
9108 #define GET_REGISTER_MATCHER
9109 #define GET_MATCHER_IMPLEMENTATION
9110 #define GET_MNEMONIC_SPELL_CHECKER
9111 #define GET_MNEMONIC_CHECKER
9112 #include "AMDGPUGenAsmMatcher.inc"
9113 
9114 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9115                                                 unsigned MCK) {
9116   switch (MCK) {
9117   case MCK_addr64:
9118     return parseTokenOp("addr64", Operands);
9119   case MCK_done:
9120     return parseTokenOp("done", Operands);
9121   case MCK_idxen:
9122     return parseTokenOp("idxen", Operands);
9123   case MCK_lds:
9124     return parseTokenOp("lds", Operands);
9125   case MCK_offen:
9126     return parseTokenOp("offen", Operands);
9127   case MCK_off:
9128     return parseTokenOp("off", Operands);
9129   case MCK_row_95_en:
9130     return parseTokenOp("row_en", Operands);
9131   case MCK_gds:
9132     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9133   case MCK_tfe:
9134     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9135   }
9136   return tryCustomParseOperand(Operands, MCK);
9137 }
9138 
9139 // This function should be defined after auto-generated include so that we have
9140 // MatchClassKind enum defined
9141 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9142                                                      unsigned Kind) {
9143   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9144   // But MatchInstructionImpl() expects to meet token and fails to validate
9145   // operand. This method checks if we are given immediate operand but expect to
9146   // get corresponding token.
9147   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9148   switch (Kind) {
9149   case MCK_addr64:
9150     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9151   case MCK_gds:
9152     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9153   case MCK_lds:
9154     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9155   case MCK_idxen:
9156     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9157   case MCK_offen:
9158     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9159   case MCK_tfe:
9160     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9161   case MCK_SSrcB32:
9162     // When operands have expression values, they will return true for isToken,
9163     // because it is not possible to distinguish between a token and an
9164     // expression at parse time. MatchInstructionImpl() will always try to
9165     // match an operand as a token, when isToken returns true, and when the
9166     // name of the expression is not a valid token, the match will fail,
9167     // so we need to handle it here.
9168     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9169   case MCK_SSrcF32:
9170     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9171   case MCK_SOPPBrTarget:
9172     return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9173   case MCK_VReg32OrOff:
9174     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9175   case MCK_InterpSlot:
9176     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9177   case MCK_InterpAttr:
9178     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9179   case MCK_InterpAttrChan:
9180     return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9181   case MCK_SReg_64:
9182   case MCK_SReg_64_XEXEC:
9183     // Null is defined as a 32-bit register but
9184     // it should also be enabled with 64-bit operands.
9185     // The following code enables it for SReg_64 operands
9186     // used as source and destination. Remaining source
9187     // operands are handled in isInlinableImm.
9188     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9189   default:
9190     return Match_InvalidOperand;
9191   }
9192 }
9193 
9194 //===----------------------------------------------------------------------===//
9195 // endpgm
9196 //===----------------------------------------------------------------------===//
9197 
9198 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9199   SMLoc S = getLoc();
9200   int64_t Imm = 0;
9201 
9202   if (!parseExpr(Imm)) {
9203     // The operand is optional, if not present default to 0
9204     Imm = 0;
9205   }
9206 
9207   if (!isUInt<16>(Imm))
9208     return Error(S, "expected a 16-bit value");
9209 
9210   Operands.push_back(
9211       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9212   return ParseStatus::Success;
9213 }
9214 
9215 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9216 
9217 //===----------------------------------------------------------------------===//
9218 // LDSDIR
9219 //===----------------------------------------------------------------------===//
9220 
9221 bool AMDGPUOperand::isWaitVDST() const {
9222   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9223 }
9224 
9225 bool AMDGPUOperand::isWaitVAVDst() const {
9226   return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9227 }
9228 
9229 bool AMDGPUOperand::isWaitVMVSrc() const {
9230   return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9231 }
9232 
9233 //===----------------------------------------------------------------------===//
9234 // VINTERP
9235 //===----------------------------------------------------------------------===//
9236 
9237 bool AMDGPUOperand::isWaitEXP() const {
9238   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9239 }
9240 
9241 //===----------------------------------------------------------------------===//
9242 // Split Barrier
9243 //===----------------------------------------------------------------------===//
9244 
9245 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
9246