xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision 258a0d760aa8b42899a000e30f610f900a402556)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41 #include <optional>
42 
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 using namespace llvm::amdhsa;
46 
47 namespace {
48 
49 class AMDGPUAsmParser;
50 
51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52 
53 //===----------------------------------------------------------------------===//
54 // Operand
55 //===----------------------------------------------------------------------===//
56 
57 class AMDGPUOperand : public MCParsedAsmOperand {
58   enum KindTy {
59     Token,
60     Immediate,
61     Register,
62     Expression
63   } Kind;
64 
65   SMLoc StartLoc, EndLoc;
66   const AMDGPUAsmParser *AsmParser;
67 
68 public:
69   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70       : Kind(Kind_), AsmParser(AsmParser_) {}
71 
72   using Ptr = std::unique_ptr<AMDGPUOperand>;
73 
74   struct Modifiers {
75     bool Abs = false;
76     bool Neg = false;
77     bool Sext = false;
78 
79     bool hasFPModifiers() const { return Abs || Neg; }
80     bool hasIntModifiers() const { return Sext; }
81     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
82 
83     int64_t getFPModifiersOperand() const {
84       int64_t Operand = 0;
85       Operand |= Abs ? SISrcMods::ABS : 0u;
86       Operand |= Neg ? SISrcMods::NEG : 0u;
87       return Operand;
88     }
89 
90     int64_t getIntModifiersOperand() const {
91       int64_t Operand = 0;
92       Operand |= Sext ? SISrcMods::SEXT : 0u;
93       return Operand;
94     }
95 
96     int64_t getModifiersOperand() const {
97       assert(!(hasFPModifiers() && hasIntModifiers())
98            && "fp and int modifiers should not be used simultaneously");
99       if (hasFPModifiers()) {
100         return getFPModifiersOperand();
101       } else if (hasIntModifiers()) {
102         return getIntModifiersOperand();
103       } else {
104         return 0;
105       }
106     }
107 
108     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
109   };
110 
111   enum ImmTy {
112     ImmTyNone,
113     ImmTyGDS,
114     ImmTyLDS,
115     ImmTyOffen,
116     ImmTyIdxen,
117     ImmTyAddr64,
118     ImmTyOffset,
119     ImmTyInstOffset,
120     ImmTyOffset0,
121     ImmTyOffset1,
122     ImmTyCPol,
123     ImmTySWZ,
124     ImmTyTFE,
125     ImmTyD16,
126     ImmTyClampSI,
127     ImmTyOModSI,
128     ImmTySdwaDstSel,
129     ImmTySdwaSrc0Sel,
130     ImmTySdwaSrc1Sel,
131     ImmTySdwaDstUnused,
132     ImmTyDMask,
133     ImmTyDim,
134     ImmTyUNorm,
135     ImmTyDA,
136     ImmTyR128A16,
137     ImmTyA16,
138     ImmTyLWE,
139     ImmTyExpTgt,
140     ImmTyExpCompr,
141     ImmTyExpVM,
142     ImmTyFORMAT,
143     ImmTyHwreg,
144     ImmTyOff,
145     ImmTySendMsg,
146     ImmTyInterpSlot,
147     ImmTyInterpAttr,
148     ImmTyAttrChan,
149     ImmTyOpSel,
150     ImmTyOpSelHi,
151     ImmTyNegLo,
152     ImmTyNegHi,
153     ImmTyDPP8,
154     ImmTyDppCtrl,
155     ImmTyDppRowMask,
156     ImmTyDppBankMask,
157     ImmTyDppBoundCtrl,
158     ImmTyDppFi,
159     ImmTySwizzle,
160     ImmTyGprIdxMode,
161     ImmTyHigh,
162     ImmTyBLGP,
163     ImmTyCBSZ,
164     ImmTyABID,
165     ImmTyEndpgm,
166     ImmTyWaitVDST,
167     ImmTyWaitEXP,
168   };
169 
170   // Immediate operand kind.
171   // It helps to identify the location of an offending operand after an error.
172   // Note that regular literals and mandatory literals (KImm) must be handled
173   // differently. When looking for an offending operand, we should usually
174   // ignore mandatory literals because they are part of the instruction and
175   // cannot be changed. Report location of mandatory operands only for VOPD,
176   // when both OpX and OpY have a KImm and there are no other literals.
177   enum ImmKindTy {
178     ImmKindTyNone,
179     ImmKindTyLiteral,
180     ImmKindTyMandatoryLiteral,
181     ImmKindTyConst,
182   };
183 
184 private:
185   struct TokOp {
186     const char *Data;
187     unsigned Length;
188   };
189 
190   struct ImmOp {
191     int64_t Val;
192     ImmTy Type;
193     bool IsFPImm;
194     mutable ImmKindTy Kind;
195     Modifiers Mods;
196   };
197 
198   struct RegOp {
199     unsigned RegNo;
200     Modifiers Mods;
201   };
202 
203   union {
204     TokOp Tok;
205     ImmOp Imm;
206     RegOp Reg;
207     const MCExpr *Expr;
208   };
209 
210 public:
211   bool isToken() const override { return Kind == Token; }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindMandatoryLiteral() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyMandatoryLiteral;
234   }
235 
236   void setImmKindConst() const {
237     assert(isImm());
238     Imm.Kind = ImmKindTyConst;
239   }
240 
241   bool IsImmKindLiteral() const {
242     return isImm() && Imm.Kind == ImmKindTyLiteral;
243   }
244 
245   bool IsImmKindMandatoryLiteral() const {
246     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
247   }
248 
249   bool isImmKindConst() const {
250     return isImm() && Imm.Kind == ImmKindTyConst;
251   }
252 
253   bool isInlinableImm(MVT type) const;
254   bool isLiteralImm(MVT type) const;
255 
256   bool isRegKind() const {
257     return Kind == Register;
258   }
259 
260   bool isReg() const override {
261     return isRegKind() && !hasModifiers();
262   }
263 
264   bool isRegOrInline(unsigned RCID, MVT type) const {
265     return isRegClass(RCID) || isInlinableImm(type);
266   }
267 
268   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
269     return isRegOrInline(RCID, type) || isLiteralImm(type);
270   }
271 
272   bool isRegOrImmWithInt16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
274   }
275 
276   bool isRegOrImmWithInt32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
278   }
279 
280   bool isRegOrInlineImmWithInt16InputMods() const {
281     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
282   }
283 
284   bool isRegOrInlineImmWithInt32InputMods() const {
285     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
286   }
287 
288   bool isRegOrImmWithInt64InputMods() const {
289     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
290   }
291 
292   bool isRegOrImmWithFP16InputMods() const {
293     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
294   }
295 
296   bool isRegOrImmWithFP32InputMods() const {
297     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
298   }
299 
300   bool isRegOrImmWithFP64InputMods() const {
301     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
302   }
303 
304   bool isRegOrInlineImmWithFP16InputMods() const {
305     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
306   }
307 
308   bool isRegOrInlineImmWithFP32InputMods() const {
309     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
310   }
311 
312 
313   bool isVReg() const {
314     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
315            isRegClass(AMDGPU::VReg_64RegClassID) ||
316            isRegClass(AMDGPU::VReg_96RegClassID) ||
317            isRegClass(AMDGPU::VReg_128RegClassID) ||
318            isRegClass(AMDGPU::VReg_160RegClassID) ||
319            isRegClass(AMDGPU::VReg_192RegClassID) ||
320            isRegClass(AMDGPU::VReg_256RegClassID) ||
321            isRegClass(AMDGPU::VReg_512RegClassID) ||
322            isRegClass(AMDGPU::VReg_1024RegClassID);
323   }
324 
325   bool isVReg32() const {
326     return isRegClass(AMDGPU::VGPR_32RegClassID);
327   }
328 
329   bool isVReg32OrOff() const {
330     return isOff() || isVReg32();
331   }
332 
333   bool isNull() const {
334     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
335   }
336 
337   bool isVRegWithInputMods() const;
338   bool isT16VRegWithInputMods() const;
339 
340   bool isSDWAOperand(MVT type) const;
341   bool isSDWAFP16Operand() const;
342   bool isSDWAFP32Operand() const;
343   bool isSDWAInt16Operand() const;
344   bool isSDWAInt32Operand() const;
345 
346   bool isImmTy(ImmTy ImmT) const {
347     return isImm() && Imm.Type == ImmT;
348   }
349 
350   bool isImmModifier() const {
351     return isImm() && Imm.Type != ImmTyNone;
352   }
353 
354   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
355   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
356   bool isDMask() const { return isImmTy(ImmTyDMask); }
357   bool isDim() const { return isImmTy(ImmTyDim); }
358   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
359   bool isDA() const { return isImmTy(ImmTyDA); }
360   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
361   bool isA16() const { return isImmTy(ImmTyA16); }
362   bool isLWE() const { return isImmTy(ImmTyLWE); }
363   bool isOff() const { return isImmTy(ImmTyOff); }
364   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
365   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
366   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
367   bool isOffen() const { return isImmTy(ImmTyOffen); }
368   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
369   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
370   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
371   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
372   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
373 
374   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
375   bool isGDS() const { return isImmTy(ImmTyGDS); }
376   bool isLDS() const { return isImmTy(ImmTyLDS); }
377   bool isCPol() const { return isImmTy(ImmTyCPol); }
378   bool isSWZ() const { return isImmTy(ImmTySWZ); }
379   bool isTFE() const { return isImmTy(ImmTyTFE); }
380   bool isD16() const { return isImmTy(ImmTyD16); }
381   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
382   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
383   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
384   bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
385   bool isFI() const { return isImmTy(ImmTyDppFi); }
386   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
387   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
388   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
389   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
390   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
391   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
392   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
393   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
394   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
395   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
396   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
397   bool isHigh() const { return isImmTy(ImmTyHigh); }
398 
399   bool isRegOrImm() const {
400     return isReg() || isImm();
401   }
402 
403   bool isRegClass(unsigned RCID) const;
404 
405   bool isInlineValue() const;
406 
407   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
408     return isRegOrInline(RCID, type) && !hasModifiers();
409   }
410 
411   bool isSCSrcB16() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
413   }
414 
415   bool isSCSrcV2B16() const {
416     return isSCSrcB16();
417   }
418 
419   bool isSCSrcB32() const {
420     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
421   }
422 
423   bool isSCSrcB64() const {
424     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
425   }
426 
427   bool isBoolReg() const;
428 
429   bool isSCSrcF16() const {
430     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
431   }
432 
433   bool isSCSrcV2F16() const {
434     return isSCSrcF16();
435   }
436 
437   bool isSCSrcF32() const {
438     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
439   }
440 
441   bool isSCSrcF64() const {
442     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
443   }
444 
445   bool isSSrcB32() const {
446     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
447   }
448 
449   bool isSSrcB16() const {
450     return isSCSrcB16() || isLiteralImm(MVT::i16);
451   }
452 
453   bool isSSrcV2B16() const {
454     llvm_unreachable("cannot happen");
455     return isSSrcB16();
456   }
457 
458   bool isSSrcB64() const {
459     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
460     // See isVSrc64().
461     return isSCSrcB64() || isLiteralImm(MVT::i64);
462   }
463 
464   bool isSSrcF32() const {
465     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
466   }
467 
468   bool isSSrcF64() const {
469     return isSCSrcB64() || isLiteralImm(MVT::f64);
470   }
471 
472   bool isSSrcF16() const {
473     return isSCSrcB16() || isLiteralImm(MVT::f16);
474   }
475 
476   bool isSSrcV2F16() const {
477     llvm_unreachable("cannot happen");
478     return isSSrcF16();
479   }
480 
481   bool isSSrcV2FP32() const {
482     llvm_unreachable("cannot happen");
483     return isSSrcF32();
484   }
485 
486   bool isSCSrcV2FP32() const {
487     llvm_unreachable("cannot happen");
488     return isSCSrcF32();
489   }
490 
491   bool isSSrcV2INT32() const {
492     llvm_unreachable("cannot happen");
493     return isSSrcB32();
494   }
495 
496   bool isSCSrcV2INT32() const {
497     llvm_unreachable("cannot happen");
498     return isSCSrcB32();
499   }
500 
501   bool isSSrcOrLdsB32() const {
502     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
503            isLiteralImm(MVT::i32) || isExpr();
504   }
505 
506   bool isVCSrcB32() const {
507     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
508   }
509 
510   bool isVCSrcB64() const {
511     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
512   }
513 
514   bool isVCSrcTB16_Lo128() const {
515     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
516   }
517 
518   bool isVCSrcB16() const {
519     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
520   }
521 
522   bool isVCSrcV2B16() const {
523     return isVCSrcB16();
524   }
525 
526   bool isVCSrcF32() const {
527     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
528   }
529 
530   bool isVCSrcF64() const {
531     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
532   }
533 
534   bool isVCSrcTF16_Lo128() const {
535     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
536   }
537 
538   bool isVCSrcF16() const {
539     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
540   }
541 
542   bool isVCSrcV2F16() const {
543     return isVCSrcF16();
544   }
545 
546   bool isVSrcB32() const {
547     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
548   }
549 
550   bool isVSrcB64() const {
551     return isVCSrcF64() || isLiteralImm(MVT::i64);
552   }
553 
554   bool isVSrcTB16_Lo128() const {
555     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
556   }
557 
558   bool isVSrcB16() const {
559     return isVCSrcB16() || isLiteralImm(MVT::i16);
560   }
561 
562   bool isVSrcV2B16() const {
563     return isVSrcB16() || isLiteralImm(MVT::v2i16);
564   }
565 
566   bool isVCSrcV2FP32() const {
567     return isVCSrcF64();
568   }
569 
570   bool isVSrcV2FP32() const {
571     return isVSrcF64() || isLiteralImm(MVT::v2f32);
572   }
573 
574   bool isVCSrcV2INT32() const {
575     return isVCSrcB64();
576   }
577 
578   bool isVSrcV2INT32() const {
579     return isVSrcB64() || isLiteralImm(MVT::v2i32);
580   }
581 
582   bool isVSrcF32() const {
583     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
584   }
585 
586   bool isVSrcF64() const {
587     return isVCSrcF64() || isLiteralImm(MVT::f64);
588   }
589 
590   bool isVSrcTF16_Lo128() const {
591     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
592   }
593 
594   bool isVSrcF16() const {
595     return isVCSrcF16() || isLiteralImm(MVT::f16);
596   }
597 
598   bool isVSrcV2F16() const {
599     return isVSrcF16() || isLiteralImm(MVT::v2f16);
600   }
601 
602   bool isVISrcB32() const {
603     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
604   }
605 
606   bool isVISrcB16() const {
607     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
608   }
609 
610   bool isVISrcV2B16() const {
611     return isVISrcB16();
612   }
613 
614   bool isVISrcF32() const {
615     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
616   }
617 
618   bool isVISrcF16() const {
619     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
620   }
621 
622   bool isVISrcV2F16() const {
623     return isVISrcF16() || isVISrcB32();
624   }
625 
626   bool isVISrc_64B64() const {
627     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
628   }
629 
630   bool isVISrc_64F64() const {
631     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
632   }
633 
634   bool isVISrc_64V2FP32() const {
635     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
636   }
637 
638   bool isVISrc_64V2INT32() const {
639     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
640   }
641 
642   bool isVISrc_256B64() const {
643     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
644   }
645 
646   bool isVISrc_256F64() const {
647     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
648   }
649 
650   bool isVISrc_128B16() const {
651     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
652   }
653 
654   bool isVISrc_128V2B16() const {
655     return isVISrc_128B16();
656   }
657 
658   bool isVISrc_128B32() const {
659     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
660   }
661 
662   bool isVISrc_128F32() const {
663     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
664   }
665 
666   bool isVISrc_256V2FP32() const {
667     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
668   }
669 
670   bool isVISrc_256V2INT32() const {
671     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
672   }
673 
674   bool isVISrc_512B32() const {
675     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
676   }
677 
678   bool isVISrc_512B16() const {
679     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
680   }
681 
682   bool isVISrc_512V2B16() const {
683     return isVISrc_512B16();
684   }
685 
686   bool isVISrc_512F32() const {
687     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
688   }
689 
690   bool isVISrc_512F16() const {
691     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
692   }
693 
694   bool isVISrc_512V2F16() const {
695     return isVISrc_512F16() || isVISrc_512B32();
696   }
697 
698   bool isVISrc_1024B32() const {
699     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
700   }
701 
702   bool isVISrc_1024B16() const {
703     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
704   }
705 
706   bool isVISrc_1024V2B16() const {
707     return isVISrc_1024B16();
708   }
709 
710   bool isVISrc_1024F32() const {
711     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
712   }
713 
714   bool isVISrc_1024F16() const {
715     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
716   }
717 
718   bool isVISrc_1024V2F16() const {
719     return isVISrc_1024F16() || isVISrc_1024B32();
720   }
721 
722   bool isAISrcB32() const {
723     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
724   }
725 
726   bool isAISrcB16() const {
727     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
728   }
729 
730   bool isAISrcV2B16() const {
731     return isAISrcB16();
732   }
733 
734   bool isAISrcF32() const {
735     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
736   }
737 
738   bool isAISrcF16() const {
739     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
740   }
741 
742   bool isAISrcV2F16() const {
743     return isAISrcF16() || isAISrcB32();
744   }
745 
746   bool isAISrc_64B64() const {
747     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
748   }
749 
750   bool isAISrc_64F64() const {
751     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
752   }
753 
754   bool isAISrc_128B32() const {
755     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
756   }
757 
758   bool isAISrc_128B16() const {
759     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
760   }
761 
762   bool isAISrc_128V2B16() const {
763     return isAISrc_128B16();
764   }
765 
766   bool isAISrc_128F32() const {
767     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
768   }
769 
770   bool isAISrc_128F16() const {
771     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
772   }
773 
774   bool isAISrc_128V2F16() const {
775     return isAISrc_128F16() || isAISrc_128B32();
776   }
777 
778   bool isVISrc_128F16() const {
779     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
780   }
781 
782   bool isVISrc_128V2F16() const {
783     return isVISrc_128F16() || isVISrc_128B32();
784   }
785 
786   bool isAISrc_256B64() const {
787     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
788   }
789 
790   bool isAISrc_256F64() const {
791     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
792   }
793 
794   bool isAISrc_512B32() const {
795     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
796   }
797 
798   bool isAISrc_512B16() const {
799     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
800   }
801 
802   bool isAISrc_512V2B16() const {
803     return isAISrc_512B16();
804   }
805 
806   bool isAISrc_512F32() const {
807     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
808   }
809 
810   bool isAISrc_512F16() const {
811     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
812   }
813 
814   bool isAISrc_512V2F16() const {
815     return isAISrc_512F16() || isAISrc_512B32();
816   }
817 
818   bool isAISrc_1024B32() const {
819     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
820   }
821 
822   bool isAISrc_1024B16() const {
823     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
824   }
825 
826   bool isAISrc_1024V2B16() const {
827     return isAISrc_1024B16();
828   }
829 
830   bool isAISrc_1024F32() const {
831     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
832   }
833 
834   bool isAISrc_1024F16() const {
835     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
836   }
837 
838   bool isAISrc_1024V2F16() const {
839     return isAISrc_1024F16() || isAISrc_1024B32();
840   }
841 
842   bool isKImmFP32() const {
843     return isLiteralImm(MVT::f32);
844   }
845 
846   bool isKImmFP16() const {
847     return isLiteralImm(MVT::f16);
848   }
849 
850   bool isMem() const override {
851     return false;
852   }
853 
854   bool isExpr() const {
855     return Kind == Expression;
856   }
857 
858   bool isSoppBrTarget() const {
859     return isExpr() || isImm();
860   }
861 
862   bool isSWaitCnt() const;
863   bool isDepCtr() const;
864   bool isSDelayAlu() const;
865   bool isHwreg() const;
866   bool isSendMsg() const;
867   bool isSwizzle() const;
868   bool isSMRDOffset8() const;
869   bool isSMEMOffset() const;
870   bool isSMRDLiteralOffset() const;
871   bool isDPP8() const;
872   bool isDPPCtrl() const;
873   bool isBLGP() const;
874   bool isCBSZ() const;
875   bool isABID() const;
876   bool isGPRIdxMode() const;
877   bool isS16Imm() const;
878   bool isU16Imm() const;
879   bool isEndpgm() const;
880   bool isWaitVDST() const;
881   bool isWaitEXP() const;
882 
883   StringRef getToken() const {
884     assert(isToken());
885     return StringRef(Tok.Data, Tok.Length);
886   }
887 
888   int64_t getImm() const {
889     assert(isImm());
890     return Imm.Val;
891   }
892 
893   void setImm(int64_t Val) {
894     assert(isImm());
895     Imm.Val = Val;
896   }
897 
898   ImmTy getImmTy() const {
899     assert(isImm());
900     return Imm.Type;
901   }
902 
903   unsigned getReg() const override {
904     assert(isRegKind());
905     return Reg.RegNo;
906   }
907 
908   SMLoc getStartLoc() const override {
909     return StartLoc;
910   }
911 
912   SMLoc getEndLoc() const override {
913     return EndLoc;
914   }
915 
916   SMRange getLocRange() const {
917     return SMRange(StartLoc, EndLoc);
918   }
919 
920   Modifiers getModifiers() const {
921     assert(isRegKind() || isImmTy(ImmTyNone));
922     return isRegKind() ? Reg.Mods : Imm.Mods;
923   }
924 
925   void setModifiers(Modifiers Mods) {
926     assert(isRegKind() || isImmTy(ImmTyNone));
927     if (isRegKind())
928       Reg.Mods = Mods;
929     else
930       Imm.Mods = Mods;
931   }
932 
933   bool hasModifiers() const {
934     return getModifiers().hasModifiers();
935   }
936 
937   bool hasFPModifiers() const {
938     return getModifiers().hasFPModifiers();
939   }
940 
941   bool hasIntModifiers() const {
942     return getModifiers().hasIntModifiers();
943   }
944 
945   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
946 
947   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
948 
949   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
950 
951   template <unsigned Bitwidth>
952   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
953 
954   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
955     addKImmFPOperands<16>(Inst, N);
956   }
957 
958   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
959     addKImmFPOperands<32>(Inst, N);
960   }
961 
962   void addRegOperands(MCInst &Inst, unsigned N) const;
963 
964   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
965     addRegOperands(Inst, N);
966   }
967 
968   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
969     if (isRegKind())
970       addRegOperands(Inst, N);
971     else if (isExpr())
972       Inst.addOperand(MCOperand::createExpr(Expr));
973     else
974       addImmOperands(Inst, N);
975   }
976 
977   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
978     Modifiers Mods = getModifiers();
979     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
980     if (isRegKind()) {
981       addRegOperands(Inst, N);
982     } else {
983       addImmOperands(Inst, N, false);
984     }
985   }
986 
987   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
988     assert(!hasIntModifiers());
989     addRegOrImmWithInputModsOperands(Inst, N);
990   }
991 
992   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
993     assert(!hasFPModifiers());
994     addRegOrImmWithInputModsOperands(Inst, N);
995   }
996 
997   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
998     Modifiers Mods = getModifiers();
999     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1000     assert(isRegKind());
1001     addRegOperands(Inst, N);
1002   }
1003 
1004   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1005     assert(!hasIntModifiers());
1006     addRegWithInputModsOperands(Inst, N);
1007   }
1008 
1009   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1010     assert(!hasFPModifiers());
1011     addRegWithInputModsOperands(Inst, N);
1012   }
1013 
1014   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1015     if (isImm())
1016       addImmOperands(Inst, N);
1017     else {
1018       assert(isExpr());
1019       Inst.addOperand(MCOperand::createExpr(Expr));
1020     }
1021   }
1022 
1023   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1024     switch (Type) {
1025     case ImmTyNone: OS << "None"; break;
1026     case ImmTyGDS: OS << "GDS"; break;
1027     case ImmTyLDS: OS << "LDS"; break;
1028     case ImmTyOffen: OS << "Offen"; break;
1029     case ImmTyIdxen: OS << "Idxen"; break;
1030     case ImmTyAddr64: OS << "Addr64"; break;
1031     case ImmTyOffset: OS << "Offset"; break;
1032     case ImmTyInstOffset: OS << "InstOffset"; break;
1033     case ImmTyOffset0: OS << "Offset0"; break;
1034     case ImmTyOffset1: OS << "Offset1"; break;
1035     case ImmTyCPol: OS << "CPol"; break;
1036     case ImmTySWZ: OS << "SWZ"; break;
1037     case ImmTyTFE: OS << "TFE"; break;
1038     case ImmTyD16: OS << "D16"; break;
1039     case ImmTyFORMAT: OS << "FORMAT"; break;
1040     case ImmTyClampSI: OS << "ClampSI"; break;
1041     case ImmTyOModSI: OS << "OModSI"; break;
1042     case ImmTyDPP8: OS << "DPP8"; break;
1043     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1044     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1045     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1046     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1047     case ImmTyDppFi: OS << "FI"; break;
1048     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1049     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1050     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1051     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1052     case ImmTyDMask: OS << "DMask"; break;
1053     case ImmTyDim: OS << "Dim"; break;
1054     case ImmTyUNorm: OS << "UNorm"; break;
1055     case ImmTyDA: OS << "DA"; break;
1056     case ImmTyR128A16: OS << "R128A16"; break;
1057     case ImmTyA16: OS << "A16"; break;
1058     case ImmTyLWE: OS << "LWE"; break;
1059     case ImmTyOff: OS << "Off"; break;
1060     case ImmTyExpTgt: OS << "ExpTgt"; break;
1061     case ImmTyExpCompr: OS << "ExpCompr"; break;
1062     case ImmTyExpVM: OS << "ExpVM"; break;
1063     case ImmTyHwreg: OS << "Hwreg"; break;
1064     case ImmTySendMsg: OS << "SendMsg"; break;
1065     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1066     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1067     case ImmTyAttrChan: OS << "AttrChan"; break;
1068     case ImmTyOpSel: OS << "OpSel"; break;
1069     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1070     case ImmTyNegLo: OS << "NegLo"; break;
1071     case ImmTyNegHi: OS << "NegHi"; break;
1072     case ImmTySwizzle: OS << "Swizzle"; break;
1073     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1074     case ImmTyHigh: OS << "High"; break;
1075     case ImmTyBLGP: OS << "BLGP"; break;
1076     case ImmTyCBSZ: OS << "CBSZ"; break;
1077     case ImmTyABID: OS << "ABID"; break;
1078     case ImmTyEndpgm: OS << "Endpgm"; break;
1079     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1080     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1081     }
1082   }
1083 
1084   void print(raw_ostream &OS) const override {
1085     switch (Kind) {
1086     case Register:
1087       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1088       break;
1089     case Immediate:
1090       OS << '<' << getImm();
1091       if (getImmTy() != ImmTyNone) {
1092         OS << " type: "; printImmTy(OS, getImmTy());
1093       }
1094       OS << " mods: " << Imm.Mods << '>';
1095       break;
1096     case Token:
1097       OS << '\'' << getToken() << '\'';
1098       break;
1099     case Expression:
1100       OS << "<expr " << *Expr << '>';
1101       break;
1102     }
1103   }
1104 
1105   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1106                                       int64_t Val, SMLoc Loc,
1107                                       ImmTy Type = ImmTyNone,
1108                                       bool IsFPImm = false) {
1109     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1110     Op->Imm.Val = Val;
1111     Op->Imm.IsFPImm = IsFPImm;
1112     Op->Imm.Kind = ImmKindTyNone;
1113     Op->Imm.Type = Type;
1114     Op->Imm.Mods = Modifiers();
1115     Op->StartLoc = Loc;
1116     Op->EndLoc = Loc;
1117     return Op;
1118   }
1119 
1120   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1121                                         StringRef Str, SMLoc Loc,
1122                                         bool HasExplicitEncodingSize = true) {
1123     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1124     Res->Tok.Data = Str.data();
1125     Res->Tok.Length = Str.size();
1126     Res->StartLoc = Loc;
1127     Res->EndLoc = Loc;
1128     return Res;
1129   }
1130 
1131   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1132                                       unsigned RegNo, SMLoc S,
1133                                       SMLoc E) {
1134     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1135     Op->Reg.RegNo = RegNo;
1136     Op->Reg.Mods = Modifiers();
1137     Op->StartLoc = S;
1138     Op->EndLoc = E;
1139     return Op;
1140   }
1141 
1142   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1143                                        const class MCExpr *Expr, SMLoc S) {
1144     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1145     Op->Expr = Expr;
1146     Op->StartLoc = S;
1147     Op->EndLoc = S;
1148     return Op;
1149   }
1150 };
1151 
1152 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1153   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1154   return OS;
1155 }
1156 
1157 //===----------------------------------------------------------------------===//
1158 // AsmParser
1159 //===----------------------------------------------------------------------===//
1160 
1161 // Holds info related to the current kernel, e.g. count of SGPRs used.
1162 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1163 // .amdgpu_hsa_kernel or at EOF.
1164 class KernelScopeInfo {
1165   int SgprIndexUnusedMin = -1;
1166   int VgprIndexUnusedMin = -1;
1167   int AgprIndexUnusedMin = -1;
1168   MCContext *Ctx = nullptr;
1169   MCSubtargetInfo const *MSTI = nullptr;
1170 
1171   void usesSgprAt(int i) {
1172     if (i >= SgprIndexUnusedMin) {
1173       SgprIndexUnusedMin = ++i;
1174       if (Ctx) {
1175         MCSymbol* const Sym =
1176           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1177         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1178       }
1179     }
1180   }
1181 
1182   void usesVgprAt(int i) {
1183     if (i >= VgprIndexUnusedMin) {
1184       VgprIndexUnusedMin = ++i;
1185       if (Ctx) {
1186         MCSymbol* const Sym =
1187           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1188         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1189                                          VgprIndexUnusedMin);
1190         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1191       }
1192     }
1193   }
1194 
1195   void usesAgprAt(int i) {
1196     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1197     if (!hasMAIInsts(*MSTI))
1198       return;
1199 
1200     if (i >= AgprIndexUnusedMin) {
1201       AgprIndexUnusedMin = ++i;
1202       if (Ctx) {
1203         MCSymbol* const Sym =
1204           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1205         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1206 
1207         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1208         MCSymbol* const vSym =
1209           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1210         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1211                                          VgprIndexUnusedMin);
1212         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1213       }
1214     }
1215   }
1216 
1217 public:
1218   KernelScopeInfo() = default;
1219 
1220   void initialize(MCContext &Context) {
1221     Ctx = &Context;
1222     MSTI = Ctx->getSubtargetInfo();
1223 
1224     usesSgprAt(SgprIndexUnusedMin = -1);
1225     usesVgprAt(VgprIndexUnusedMin = -1);
1226     if (hasMAIInsts(*MSTI)) {
1227       usesAgprAt(AgprIndexUnusedMin = -1);
1228     }
1229   }
1230 
1231   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1232                     unsigned RegWidth) {
1233     switch (RegKind) {
1234     case IS_SGPR:
1235       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1236       break;
1237     case IS_AGPR:
1238       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1239       break;
1240     case IS_VGPR:
1241       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1242       break;
1243     default:
1244       break;
1245     }
1246   }
1247 };
1248 
1249 class AMDGPUAsmParser : public MCTargetAsmParser {
1250   MCAsmParser &Parser;
1251 
1252   unsigned ForcedEncodingSize = 0;
1253   bool ForcedDPP = false;
1254   bool ForcedSDWA = false;
1255   KernelScopeInfo KernelScope;
1256 
1257   /// @name Auto-generated Match Functions
1258   /// {
1259 
1260 #define GET_ASSEMBLER_HEADER
1261 #include "AMDGPUGenAsmMatcher.inc"
1262 
1263   /// }
1264 
1265 private:
1266   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1267   bool OutOfRangeError(SMRange Range);
1268   /// Calculate VGPR/SGPR blocks required for given target, reserved
1269   /// registers, and user-specified NextFreeXGPR values.
1270   ///
1271   /// \param Features [in] Target features, used for bug corrections.
1272   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1273   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1274   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1275   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1276   /// descriptor field, if valid.
1277   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1278   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1279   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1280   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1281   /// \param VGPRBlocks [out] Result VGPR block count.
1282   /// \param SGPRBlocks [out] Result SGPR block count.
1283   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1284                           bool FlatScrUsed, bool XNACKUsed,
1285                           std::optional<bool> EnableWavefrontSize32,
1286                           unsigned NextFreeVGPR, SMRange VGPRRange,
1287                           unsigned NextFreeSGPR, SMRange SGPRRange,
1288                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1289   bool ParseDirectiveAMDGCNTarget();
1290   bool ParseDirectiveAMDHSAKernel();
1291   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1292   bool ParseDirectiveHSACodeObjectVersion();
1293   bool ParseDirectiveHSACodeObjectISA();
1294   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1295   bool ParseDirectiveAMDKernelCodeT();
1296   // TODO: Possibly make subtargetHasRegister const.
1297   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1298   bool ParseDirectiveAMDGPUHsaKernel();
1299 
1300   bool ParseDirectiveISAVersion();
1301   bool ParseDirectiveHSAMetadata();
1302   bool ParseDirectivePALMetadataBegin();
1303   bool ParseDirectivePALMetadata();
1304   bool ParseDirectiveAMDGPULDS();
1305 
1306   /// Common code to parse out a block of text (typically YAML) between start and
1307   /// end directives.
1308   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1309                            const char *AssemblerDirectiveEnd,
1310                            std::string &CollectString);
1311 
1312   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1313                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1314   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1315                            unsigned &RegNum, unsigned &RegWidth,
1316                            bool RestoreOnFailure = false);
1317   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1318                            unsigned &RegNum, unsigned &RegWidth,
1319                            SmallVectorImpl<AsmToken> &Tokens);
1320   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1321                            unsigned &RegWidth,
1322                            SmallVectorImpl<AsmToken> &Tokens);
1323   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1324                            unsigned &RegWidth,
1325                            SmallVectorImpl<AsmToken> &Tokens);
1326   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1327                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1328   bool ParseRegRange(unsigned& Num, unsigned& Width);
1329   unsigned getRegularReg(RegisterKind RegKind,
1330                          unsigned RegNum,
1331                          unsigned RegWidth,
1332                          SMLoc Loc);
1333 
1334   bool isRegister();
1335   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1336   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1337   void initializeGprCountSymbol(RegisterKind RegKind);
1338   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1339                              unsigned RegWidth);
1340   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1341                     bool IsAtomic);
1342   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1343                  bool IsGdsHardcoded);
1344 
1345 public:
1346   enum AMDGPUMatchResultTy {
1347     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1348   };
1349   enum OperandMode {
1350     OperandMode_Default,
1351     OperandMode_NSA,
1352   };
1353 
1354   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1355 
1356   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1357                const MCInstrInfo &MII,
1358                const MCTargetOptions &Options)
1359       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1360     MCAsmParserExtension::Initialize(Parser);
1361 
1362     if (getFeatureBits().none()) {
1363       // Set default features.
1364       copySTI().ToggleFeature("southern-islands");
1365     }
1366 
1367     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1368 
1369     {
1370       // TODO: make those pre-defined variables read-only.
1371       // Currently there is none suitable machinery in the core llvm-mc for this.
1372       // MCSymbol::isRedefinable is intended for another purpose, and
1373       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1374       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1375       MCContext &Ctx = getContext();
1376       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1377         MCSymbol *Sym =
1378             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1379         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1380         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1381         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1382         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1383         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1384       } else {
1385         MCSymbol *Sym =
1386             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1387         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1388         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1389         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1390         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1391         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1392       }
1393       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1394         initializeGprCountSymbol(IS_VGPR);
1395         initializeGprCountSymbol(IS_SGPR);
1396       } else
1397         KernelScope.initialize(getContext());
1398     }
1399   }
1400 
1401   bool hasMIMG_R128() const {
1402     return AMDGPU::hasMIMG_R128(getSTI());
1403   }
1404 
1405   bool hasPackedD16() const {
1406     return AMDGPU::hasPackedD16(getSTI());
1407   }
1408 
1409   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1410 
1411   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1412 
1413   bool isSI() const {
1414     return AMDGPU::isSI(getSTI());
1415   }
1416 
1417   bool isCI() const {
1418     return AMDGPU::isCI(getSTI());
1419   }
1420 
1421   bool isVI() const {
1422     return AMDGPU::isVI(getSTI());
1423   }
1424 
1425   bool isGFX9() const {
1426     return AMDGPU::isGFX9(getSTI());
1427   }
1428 
1429   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1430   bool isGFX90A() const {
1431     return AMDGPU::isGFX90A(getSTI());
1432   }
1433 
1434   bool isGFX940() const {
1435     return AMDGPU::isGFX940(getSTI());
1436   }
1437 
1438   bool isGFX9Plus() const {
1439     return AMDGPU::isGFX9Plus(getSTI());
1440   }
1441 
1442   bool isGFX10() const {
1443     return AMDGPU::isGFX10(getSTI());
1444   }
1445 
1446   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1447 
1448   bool isGFX11() const {
1449     return AMDGPU::isGFX11(getSTI());
1450   }
1451 
1452   bool isGFX11Plus() const {
1453     return AMDGPU::isGFX11Plus(getSTI());
1454   }
1455 
1456   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1457 
1458   bool isGFX10_BEncoding() const {
1459     return AMDGPU::isGFX10_BEncoding(getSTI());
1460   }
1461 
1462   bool hasInv2PiInlineImm() const {
1463     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1464   }
1465 
1466   bool hasFlatOffsets() const {
1467     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1468   }
1469 
1470   bool hasArchitectedFlatScratch() const {
1471     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1472   }
1473 
1474   bool hasSGPR102_SGPR103() const {
1475     return !isVI() && !isGFX9();
1476   }
1477 
1478   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1479 
1480   bool hasIntClamp() const {
1481     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1482   }
1483 
1484   AMDGPUTargetStreamer &getTargetStreamer() {
1485     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1486     return static_cast<AMDGPUTargetStreamer &>(TS);
1487   }
1488 
1489   const MCRegisterInfo *getMRI() const {
1490     // We need this const_cast because for some reason getContext() is not const
1491     // in MCAsmParser.
1492     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1493   }
1494 
1495   const MCInstrInfo *getMII() const {
1496     return &MII;
1497   }
1498 
1499   const FeatureBitset &getFeatureBits() const {
1500     return getSTI().getFeatureBits();
1501   }
1502 
1503   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1504   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1505   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1506 
1507   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1508   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1509   bool isForcedDPP() const { return ForcedDPP; }
1510   bool isForcedSDWA() const { return ForcedSDWA; }
1511   ArrayRef<unsigned> getMatchedVariants() const;
1512   StringRef getMatchedVariantName() const;
1513 
1514   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1515   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1516                      bool RestoreOnFailure);
1517   bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1518                      SMLoc &EndLoc) override;
1519   OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1520                                         SMLoc &EndLoc) override;
1521   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1522   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1523                                       unsigned Kind) override;
1524   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1525                                OperandVector &Operands, MCStreamer &Out,
1526                                uint64_t &ErrorInfo,
1527                                bool MatchingInlineAsm) override;
1528   bool ParseDirective(AsmToken DirectiveID) override;
1529   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1530                                     OperandMode Mode = OperandMode_Default);
1531   StringRef parseMnemonicSuffix(StringRef Name);
1532   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1533                         SMLoc NameLoc, OperandVector &Operands) override;
1534   //bool ProcessInstruction(MCInst &Inst);
1535 
1536   OperandMatchResultTy parseTokenOp(StringRef Name, OperandVector &Operands);
1537 
1538   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1539 
1540   OperandMatchResultTy
1541   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1542                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1543                      bool (*ConvertResult)(int64_t &) = nullptr);
1544 
1545   OperandMatchResultTy
1546   parseOperandArrayWithPrefix(const char *Prefix,
1547                               OperandVector &Operands,
1548                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1549                               bool (*ConvertResult)(int64_t&) = nullptr);
1550 
1551   OperandMatchResultTy
1552   parseNamedBit(StringRef Name, OperandVector &Operands,
1553                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1554   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1555   OperandMatchResultTy parseCPol(OperandVector &Operands);
1556   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1557                                              StringRef &Value,
1558                                              SMLoc &StringLoc);
1559 
1560   bool isModifier();
1561   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1562   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1563   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1564   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1565   bool parseSP3NegModifier();
1566   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1567   OperandMatchResultTy parseReg(OperandVector &Operands);
1568   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1569   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1570   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1571   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1572   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1573   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1574   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1575   OperandMatchResultTy parseUfmt(int64_t &Format);
1576   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1577   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1578   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1579   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1580   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1581   OperandMatchResultTy parseFlatOffset(OperandVector &Operands);
1582   OperandMatchResultTy parseR128A16(OperandVector &Operands);
1583   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1584   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1585 
1586   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1587   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1588   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1589   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1590 
1591   bool parseCnt(int64_t &IntVal);
1592   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1593 
1594   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1595   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1596   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1597 
1598   bool parseDelay(int64_t &Delay);
1599   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1600 
1601   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1602 
1603 private:
1604   struct OperandInfoTy {
1605     SMLoc Loc;
1606     int64_t Id;
1607     bool IsSymbolic = false;
1608     bool IsDefined = false;
1609 
1610     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1611   };
1612 
1613   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1614   bool validateSendMsg(const OperandInfoTy &Msg,
1615                        const OperandInfoTy &Op,
1616                        const OperandInfoTy &Stream);
1617 
1618   bool parseHwregBody(OperandInfoTy &HwReg,
1619                       OperandInfoTy &Offset,
1620                       OperandInfoTy &Width);
1621   bool validateHwreg(const OperandInfoTy &HwReg,
1622                      const OperandInfoTy &Offset,
1623                      const OperandInfoTy &Width);
1624 
1625   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1626   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1627   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1628 
1629   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1630                       const OperandVector &Operands) const;
1631   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1632   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1633   SMLoc getLitLoc(const OperandVector &Operands,
1634                   bool SearchMandatoryLiterals = false) const;
1635   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1636   SMLoc getConstLoc(const OperandVector &Operands) const;
1637   SMLoc getInstLoc(const OperandVector &Operands) const;
1638 
1639   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1640   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1641   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateSOPLiteral(const MCInst &Inst) const;
1643   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateVOPDRegBankConstraints(const MCInst &Inst,
1645                                       const OperandVector &Operands);
1646   bool validateIntClampSupported(const MCInst &Inst);
1647   bool validateMIMGAtomicDMask(const MCInst &Inst);
1648   bool validateMIMGGatherDMask(const MCInst &Inst);
1649   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1650   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1651   bool validateMIMGAddrSize(const MCInst &Inst);
1652   bool validateMIMGD16(const MCInst &Inst);
1653   bool validateMIMGMSAA(const MCInst &Inst);
1654   bool validateOpSel(const MCInst &Inst);
1655   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1656   bool validateVccOperand(unsigned Reg) const;
1657   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1658   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1659   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1660   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1661   bool validateAGPRLdSt(const MCInst &Inst) const;
1662   bool validateVGPRAlign(const MCInst &Inst) const;
1663   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1664   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1665   bool validateDivScale(const MCInst &Inst);
1666   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1667   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1668                              const SMLoc &IDLoc);
1669   bool validateExeczVcczOperands(const OperandVector &Operands);
1670   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1671   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1672   unsigned getConstantBusLimit(unsigned Opcode) const;
1673   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1674   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1675   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1676 
1677   bool isSupportedMnemo(StringRef Mnemo,
1678                         const FeatureBitset &FBS);
1679   bool isSupportedMnemo(StringRef Mnemo,
1680                         const FeatureBitset &FBS,
1681                         ArrayRef<unsigned> Variants);
1682   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1683 
1684   bool isId(const StringRef Id) const;
1685   bool isId(const AsmToken &Token, const StringRef Id) const;
1686   bool isToken(const AsmToken::TokenKind Kind) const;
1687   StringRef getId() const;
1688   bool trySkipId(const StringRef Id);
1689   bool trySkipId(const StringRef Pref, const StringRef Id);
1690   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1691   bool trySkipToken(const AsmToken::TokenKind Kind);
1692   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1693   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1694   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1695 
1696   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1697   AsmToken::TokenKind getTokenKind() const;
1698   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1699   bool parseExpr(OperandVector &Operands);
1700   StringRef getTokenStr() const;
1701   AsmToken peekToken(bool ShouldSkipSpace = true);
1702   AsmToken getToken() const;
1703   SMLoc getLoc() const;
1704   void lex();
1705 
1706 public:
1707   void onBeginOfFile() override;
1708 
1709   OperandMatchResultTy parseCustomOperand(OperandVector &Operands,
1710                                           unsigned MCK);
1711 
1712   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1713   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1714   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1715   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1716   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1717   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1718 
1719   bool parseSwizzleOperand(int64_t &Op,
1720                            const unsigned MinVal,
1721                            const unsigned MaxVal,
1722                            const StringRef ErrMsg,
1723                            SMLoc &Loc);
1724   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1725                             const unsigned MinVal,
1726                             const unsigned MaxVal,
1727                             const StringRef ErrMsg);
1728   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1729   bool parseSwizzleOffset(int64_t &Imm);
1730   bool parseSwizzleMacro(int64_t &Imm);
1731   bool parseSwizzleQuadPerm(int64_t &Imm);
1732   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1733   bool parseSwizzleBroadcast(int64_t &Imm);
1734   bool parseSwizzleSwap(int64_t &Imm);
1735   bool parseSwizzleReverse(int64_t &Imm);
1736 
1737   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1738   int64_t parseGPRIdxMacro();
1739 
1740   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1741   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1742   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1743 
1744   AMDGPUOperand::Ptr defaultCPol() const;
1745 
1746   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1747   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1748   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1749   AMDGPUOperand::Ptr defaultFlatOffset() const;
1750 
1751   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1752 
1753   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1754                OptionalImmIndexMap &OptionalIdx);
1755   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1756   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1757   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1758   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1759   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1760                     OptionalImmIndexMap &OptionalIdx);
1761   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1762                 OptionalImmIndexMap &OptionalIdx);
1763 
1764   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1765   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1766 
1767   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1768                bool IsAtomic = false);
1769   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1770   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1771 
1772   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1773 
1774   bool parseDimId(unsigned &Encoding);
1775   OperandMatchResultTy parseDim(OperandVector &Operands);
1776   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1777   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1778   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1779   int64_t parseDPPCtrlSel(StringRef Ctrl);
1780   int64_t parseDPPCtrlPerm();
1781   AMDGPUOperand::Ptr defaultRowMask() const;
1782   AMDGPUOperand::Ptr defaultBankMask() const;
1783   AMDGPUOperand::Ptr defaultDppBoundCtrl() const;
1784   AMDGPUOperand::Ptr defaultFI() const;
1785   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1786   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1787     cvtDPP(Inst, Operands, true);
1788   }
1789   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1790                   bool IsDPP8 = false);
1791   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1792     cvtVOP3DPP(Inst, Operands, true);
1793   }
1794 
1795   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1796                                     AMDGPUOperand::ImmTy Type);
1797   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1798   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1799   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1800   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1801   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1802   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1803   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1804                uint64_t BasicInstType,
1805                bool SkipDstVcc = false,
1806                bool SkipSrcVcc = false);
1807 
1808   AMDGPUOperand::Ptr defaultBLGP() const;
1809   AMDGPUOperand::Ptr defaultCBSZ() const;
1810   AMDGPUOperand::Ptr defaultABID() const;
1811 
1812   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1813   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1814 
1815   AMDGPUOperand::Ptr defaultWaitVDST() const;
1816   AMDGPUOperand::Ptr defaultWaitEXP() const;
1817   OperandMatchResultTy parseVOPD(OperandVector &Operands);
1818 };
1819 
1820 } // end anonymous namespace
1821 
1822 // May be called with integer type with equivalent bitwidth.
1823 static const fltSemantics *getFltSemantics(unsigned Size) {
1824   switch (Size) {
1825   case 4:
1826     return &APFloat::IEEEsingle();
1827   case 8:
1828     return &APFloat::IEEEdouble();
1829   case 2:
1830     return &APFloat::IEEEhalf();
1831   default:
1832     llvm_unreachable("unsupported fp type");
1833   }
1834 }
1835 
1836 static const fltSemantics *getFltSemantics(MVT VT) {
1837   return getFltSemantics(VT.getSizeInBits() / 8);
1838 }
1839 
1840 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1841   switch (OperandType) {
1842   case AMDGPU::OPERAND_REG_IMM_INT32:
1843   case AMDGPU::OPERAND_REG_IMM_FP32:
1844   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1845   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1846   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1847   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1848   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1849   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1850   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1851   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1852   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1853   case AMDGPU::OPERAND_KIMM32:
1854     return &APFloat::IEEEsingle();
1855   case AMDGPU::OPERAND_REG_IMM_INT64:
1856   case AMDGPU::OPERAND_REG_IMM_FP64:
1857   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1858   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1859   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1860     return &APFloat::IEEEdouble();
1861   case AMDGPU::OPERAND_REG_IMM_INT16:
1862   case AMDGPU::OPERAND_REG_IMM_FP16:
1863   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1864   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1865   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1866   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1867   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1868   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1869   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1870   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1871   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1872   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1873   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1874   case AMDGPU::OPERAND_KIMM16:
1875     return &APFloat::IEEEhalf();
1876   default:
1877     llvm_unreachable("unsupported fp type");
1878   }
1879 }
1880 
1881 //===----------------------------------------------------------------------===//
1882 // Operand
1883 //===----------------------------------------------------------------------===//
1884 
1885 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1886   bool Lost;
1887 
1888   // Convert literal to single precision
1889   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1890                                                APFloat::rmNearestTiesToEven,
1891                                                &Lost);
1892   // We allow precision lost but not overflow or underflow
1893   if (Status != APFloat::opOK &&
1894       Lost &&
1895       ((Status & APFloat::opOverflow)  != 0 ||
1896        (Status & APFloat::opUnderflow) != 0)) {
1897     return false;
1898   }
1899 
1900   return true;
1901 }
1902 
1903 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1904   return isUIntN(Size, Val) || isIntN(Size, Val);
1905 }
1906 
1907 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1908   if (VT.getScalarType() == MVT::i16) {
1909     // FP immediate values are broken.
1910     return isInlinableIntLiteral(Val);
1911   }
1912 
1913   // f16/v2f16 operands work correctly for all values.
1914   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1915 }
1916 
1917 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1918 
1919   // This is a hack to enable named inline values like
1920   // shared_base with both 32-bit and 64-bit operands.
1921   // Note that these values are defined as
1922   // 32-bit operands only.
1923   if (isInlineValue()) {
1924     return true;
1925   }
1926 
1927   if (!isImmTy(ImmTyNone)) {
1928     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1929     return false;
1930   }
1931   // TODO: We should avoid using host float here. It would be better to
1932   // check the float bit values which is what a few other places do.
1933   // We've had bot failures before due to weird NaN support on mips hosts.
1934 
1935   APInt Literal(64, Imm.Val);
1936 
1937   if (Imm.IsFPImm) { // We got fp literal token
1938     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1939       return AMDGPU::isInlinableLiteral64(Imm.Val,
1940                                           AsmParser->hasInv2PiInlineImm());
1941     }
1942 
1943     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1944     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1945       return false;
1946 
1947     if (type.getScalarSizeInBits() == 16) {
1948       return isInlineableLiteralOp16(
1949         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1950         type, AsmParser->hasInv2PiInlineImm());
1951     }
1952 
1953     // Check if single precision literal is inlinable
1954     return AMDGPU::isInlinableLiteral32(
1955       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1956       AsmParser->hasInv2PiInlineImm());
1957   }
1958 
1959   // We got int literal token.
1960   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1961     return AMDGPU::isInlinableLiteral64(Imm.Val,
1962                                         AsmParser->hasInv2PiInlineImm());
1963   }
1964 
1965   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1966     return false;
1967   }
1968 
1969   if (type.getScalarSizeInBits() == 16) {
1970     return isInlineableLiteralOp16(
1971       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1972       type, AsmParser->hasInv2PiInlineImm());
1973   }
1974 
1975   return AMDGPU::isInlinableLiteral32(
1976     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1977     AsmParser->hasInv2PiInlineImm());
1978 }
1979 
1980 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1981   // Check that this immediate can be added as literal
1982   if (!isImmTy(ImmTyNone)) {
1983     return false;
1984   }
1985 
1986   if (!Imm.IsFPImm) {
1987     // We got int literal token.
1988 
1989     if (type == MVT::f64 && hasFPModifiers()) {
1990       // Cannot apply fp modifiers to int literals preserving the same semantics
1991       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1992       // disable these cases.
1993       return false;
1994     }
1995 
1996     unsigned Size = type.getSizeInBits();
1997     if (Size == 64)
1998       Size = 32;
1999 
2000     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2001     // types.
2002     return isSafeTruncation(Imm.Val, Size);
2003   }
2004 
2005   // We got fp literal token
2006   if (type == MVT::f64) { // Expected 64-bit fp operand
2007     // We would set low 64-bits of literal to zeroes but we accept this literals
2008     return true;
2009   }
2010 
2011   if (type == MVT::i64) { // Expected 64-bit int operand
2012     // We don't allow fp literals in 64-bit integer instructions. It is
2013     // unclear how we should encode them.
2014     return false;
2015   }
2016 
2017   // We allow fp literals with f16x2 operands assuming that the specified
2018   // literal goes into the lower half and the upper half is zero. We also
2019   // require that the literal may be losslessly converted to f16.
2020   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2021                      (type == MVT::v2i16)? MVT::i16 :
2022                      (type == MVT::v2f32)? MVT::f32 : type;
2023 
2024   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2025   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2026 }
2027 
2028 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2029   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2030 }
2031 
2032 bool AMDGPUOperand::isVRegWithInputMods() const {
2033   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2034          // GFX90A allows DPP on 64-bit operands.
2035          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2036           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2037 }
2038 
2039 bool AMDGPUOperand::isT16VRegWithInputMods() const {
2040   return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2041 }
2042 
2043 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2044   if (AsmParser->isVI())
2045     return isVReg32();
2046   else if (AsmParser->isGFX9Plus())
2047     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2048   else
2049     return false;
2050 }
2051 
2052 bool AMDGPUOperand::isSDWAFP16Operand() const {
2053   return isSDWAOperand(MVT::f16);
2054 }
2055 
2056 bool AMDGPUOperand::isSDWAFP32Operand() const {
2057   return isSDWAOperand(MVT::f32);
2058 }
2059 
2060 bool AMDGPUOperand::isSDWAInt16Operand() const {
2061   return isSDWAOperand(MVT::i16);
2062 }
2063 
2064 bool AMDGPUOperand::isSDWAInt32Operand() const {
2065   return isSDWAOperand(MVT::i32);
2066 }
2067 
2068 bool AMDGPUOperand::isBoolReg() const {
2069   auto FB = AsmParser->getFeatureBits();
2070   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2071                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2072 }
2073 
2074 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2075 {
2076   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2077   assert(Size == 2 || Size == 4 || Size == 8);
2078 
2079   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2080 
2081   if (Imm.Mods.Abs) {
2082     Val &= ~FpSignMask;
2083   }
2084   if (Imm.Mods.Neg) {
2085     Val ^= FpSignMask;
2086   }
2087 
2088   return Val;
2089 }
2090 
2091 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2092   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2093                              Inst.getNumOperands())) {
2094     addLiteralImmOperand(Inst, Imm.Val,
2095                          ApplyModifiers &
2096                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2097   } else {
2098     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2099     Inst.addOperand(MCOperand::createImm(Imm.Val));
2100     setImmKindNone();
2101   }
2102 }
2103 
2104 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2105   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2106   auto OpNum = Inst.getNumOperands();
2107   // Check that this operand accepts literals
2108   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2109 
2110   if (ApplyModifiers) {
2111     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2112     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2113     Val = applyInputFPModifiers(Val, Size);
2114   }
2115 
2116   APInt Literal(64, Val);
2117   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2118 
2119   if (Imm.IsFPImm) { // We got fp literal token
2120     switch (OpTy) {
2121     case AMDGPU::OPERAND_REG_IMM_INT64:
2122     case AMDGPU::OPERAND_REG_IMM_FP64:
2123     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2124     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2125     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2126       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2127                                        AsmParser->hasInv2PiInlineImm())) {
2128         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2129         setImmKindConst();
2130         return;
2131       }
2132 
2133       // Non-inlineable
2134       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2135         // For fp operands we check if low 32 bits are zeros
2136         if (Literal.getLoBits(32) != 0) {
2137           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2138           "Can't encode literal as exact 64-bit floating-point operand. "
2139           "Low 32-bits will be set to zero");
2140         }
2141 
2142         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2143         setImmKindLiteral();
2144         return;
2145       }
2146 
2147       // We don't allow fp literals in 64-bit integer instructions. It is
2148       // unclear how we should encode them. This case should be checked earlier
2149       // in predicate methods (isLiteralImm())
2150       llvm_unreachable("fp literal in 64-bit integer instruction.");
2151 
2152     case AMDGPU::OPERAND_REG_IMM_INT32:
2153     case AMDGPU::OPERAND_REG_IMM_FP32:
2154     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2155     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2156     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2157     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2158     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2159     case AMDGPU::OPERAND_REG_IMM_INT16:
2160     case AMDGPU::OPERAND_REG_IMM_FP16:
2161     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2162     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2163     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2164     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2165     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2166     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2167     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2168     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2169     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2170     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2171     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2172     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2173     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2174     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2175     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2176     case AMDGPU::OPERAND_KIMM32:
2177     case AMDGPU::OPERAND_KIMM16: {
2178       bool lost;
2179       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2180       // Convert literal to single precision
2181       FPLiteral.convert(*getOpFltSemantics(OpTy),
2182                         APFloat::rmNearestTiesToEven, &lost);
2183       // We allow precision lost but not overflow or underflow. This should be
2184       // checked earlier in isLiteralImm()
2185 
2186       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2187       Inst.addOperand(MCOperand::createImm(ImmVal));
2188       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2189         setImmKindMandatoryLiteral();
2190       } else {
2191         setImmKindLiteral();
2192       }
2193       return;
2194     }
2195     default:
2196       llvm_unreachable("invalid operand size");
2197     }
2198 
2199     return;
2200   }
2201 
2202   // We got int literal token.
2203   // Only sign extend inline immediates.
2204   switch (OpTy) {
2205   case AMDGPU::OPERAND_REG_IMM_INT32:
2206   case AMDGPU::OPERAND_REG_IMM_FP32:
2207   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2208   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2209   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2210   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2211   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2212   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2213   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2214   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2215   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2216   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2217   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2218     if (isSafeTruncation(Val, 32) &&
2219         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2220                                      AsmParser->hasInv2PiInlineImm())) {
2221       Inst.addOperand(MCOperand::createImm(Val));
2222       setImmKindConst();
2223       return;
2224     }
2225 
2226     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2227     setImmKindLiteral();
2228     return;
2229 
2230   case AMDGPU::OPERAND_REG_IMM_INT64:
2231   case AMDGPU::OPERAND_REG_IMM_FP64:
2232   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2233   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2234   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2235     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2236       Inst.addOperand(MCOperand::createImm(Val));
2237       setImmKindConst();
2238       return;
2239     }
2240 
2241     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2242     setImmKindLiteral();
2243     return;
2244 
2245   case AMDGPU::OPERAND_REG_IMM_INT16:
2246   case AMDGPU::OPERAND_REG_IMM_FP16:
2247   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2248   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2249   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2250   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2251   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2252     if (isSafeTruncation(Val, 16) &&
2253         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2254                                      AsmParser->hasInv2PiInlineImm())) {
2255       Inst.addOperand(MCOperand::createImm(Val));
2256       setImmKindConst();
2257       return;
2258     }
2259 
2260     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2261     setImmKindLiteral();
2262     return;
2263 
2264   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2265   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2266   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2267   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2268     assert(isSafeTruncation(Val, 16));
2269     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2270                                         AsmParser->hasInv2PiInlineImm()));
2271 
2272     Inst.addOperand(MCOperand::createImm(Val));
2273     return;
2274   }
2275   case AMDGPU::OPERAND_KIMM32:
2276     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2277     setImmKindMandatoryLiteral();
2278     return;
2279   case AMDGPU::OPERAND_KIMM16:
2280     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2281     setImmKindMandatoryLiteral();
2282     return;
2283   default:
2284     llvm_unreachable("invalid operand size");
2285   }
2286 }
2287 
2288 template <unsigned Bitwidth>
2289 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2290   APInt Literal(64, Imm.Val);
2291   setImmKindMandatoryLiteral();
2292 
2293   if (!Imm.IsFPImm) {
2294     // We got int literal token.
2295     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2296     return;
2297   }
2298 
2299   bool Lost;
2300   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2301   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2302                     APFloat::rmNearestTiesToEven, &Lost);
2303   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2304 }
2305 
2306 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2307   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2308 }
2309 
2310 bool AMDGPUOperand::isInlineValue() const {
2311   return isRegKind() && ::isInlineValue(getReg());
2312 }
2313 
2314 //===----------------------------------------------------------------------===//
2315 // AsmParser
2316 //===----------------------------------------------------------------------===//
2317 
2318 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2319   if (Is == IS_VGPR) {
2320     switch (RegWidth) {
2321       default: return -1;
2322       case 32:
2323         return AMDGPU::VGPR_32RegClassID;
2324       case 64:
2325         return AMDGPU::VReg_64RegClassID;
2326       case 96:
2327         return AMDGPU::VReg_96RegClassID;
2328       case 128:
2329         return AMDGPU::VReg_128RegClassID;
2330       case 160:
2331         return AMDGPU::VReg_160RegClassID;
2332       case 192:
2333         return AMDGPU::VReg_192RegClassID;
2334       case 224:
2335         return AMDGPU::VReg_224RegClassID;
2336       case 256:
2337         return AMDGPU::VReg_256RegClassID;
2338       case 288:
2339         return AMDGPU::VReg_288RegClassID;
2340       case 320:
2341         return AMDGPU::VReg_320RegClassID;
2342       case 352:
2343         return AMDGPU::VReg_352RegClassID;
2344       case 384:
2345         return AMDGPU::VReg_384RegClassID;
2346       case 512:
2347         return AMDGPU::VReg_512RegClassID;
2348       case 1024:
2349         return AMDGPU::VReg_1024RegClassID;
2350     }
2351   } else if (Is == IS_TTMP) {
2352     switch (RegWidth) {
2353       default: return -1;
2354       case 32:
2355         return AMDGPU::TTMP_32RegClassID;
2356       case 64:
2357         return AMDGPU::TTMP_64RegClassID;
2358       case 128:
2359         return AMDGPU::TTMP_128RegClassID;
2360       case 256:
2361         return AMDGPU::TTMP_256RegClassID;
2362       case 512:
2363         return AMDGPU::TTMP_512RegClassID;
2364     }
2365   } else if (Is == IS_SGPR) {
2366     switch (RegWidth) {
2367       default: return -1;
2368       case 32:
2369         return AMDGPU::SGPR_32RegClassID;
2370       case 64:
2371         return AMDGPU::SGPR_64RegClassID;
2372       case 96:
2373         return AMDGPU::SGPR_96RegClassID;
2374       case 128:
2375         return AMDGPU::SGPR_128RegClassID;
2376       case 160:
2377         return AMDGPU::SGPR_160RegClassID;
2378       case 192:
2379         return AMDGPU::SGPR_192RegClassID;
2380       case 224:
2381         return AMDGPU::SGPR_224RegClassID;
2382       case 256:
2383         return AMDGPU::SGPR_256RegClassID;
2384       case 288:
2385         return AMDGPU::SGPR_288RegClassID;
2386       case 320:
2387         return AMDGPU::SGPR_320RegClassID;
2388       case 352:
2389         return AMDGPU::SGPR_352RegClassID;
2390       case 384:
2391         return AMDGPU::SGPR_384RegClassID;
2392       case 512:
2393         return AMDGPU::SGPR_512RegClassID;
2394     }
2395   } else if (Is == IS_AGPR) {
2396     switch (RegWidth) {
2397       default: return -1;
2398       case 32:
2399         return AMDGPU::AGPR_32RegClassID;
2400       case 64:
2401         return AMDGPU::AReg_64RegClassID;
2402       case 96:
2403         return AMDGPU::AReg_96RegClassID;
2404       case 128:
2405         return AMDGPU::AReg_128RegClassID;
2406       case 160:
2407         return AMDGPU::AReg_160RegClassID;
2408       case 192:
2409         return AMDGPU::AReg_192RegClassID;
2410       case 224:
2411         return AMDGPU::AReg_224RegClassID;
2412       case 256:
2413         return AMDGPU::AReg_256RegClassID;
2414       case 288:
2415         return AMDGPU::AReg_288RegClassID;
2416       case 320:
2417         return AMDGPU::AReg_320RegClassID;
2418       case 352:
2419         return AMDGPU::AReg_352RegClassID;
2420       case 384:
2421         return AMDGPU::AReg_384RegClassID;
2422       case 512:
2423         return AMDGPU::AReg_512RegClassID;
2424       case 1024:
2425         return AMDGPU::AReg_1024RegClassID;
2426     }
2427   }
2428   return -1;
2429 }
2430 
2431 static unsigned getSpecialRegForName(StringRef RegName) {
2432   return StringSwitch<unsigned>(RegName)
2433     .Case("exec", AMDGPU::EXEC)
2434     .Case("vcc", AMDGPU::VCC)
2435     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2436     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2437     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2438     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2439     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2440     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2441     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2442     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2443     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2444     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2445     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2446     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2447     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2448     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2449     .Case("m0", AMDGPU::M0)
2450     .Case("vccz", AMDGPU::SRC_VCCZ)
2451     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2452     .Case("execz", AMDGPU::SRC_EXECZ)
2453     .Case("src_execz", AMDGPU::SRC_EXECZ)
2454     .Case("scc", AMDGPU::SRC_SCC)
2455     .Case("src_scc", AMDGPU::SRC_SCC)
2456     .Case("tba", AMDGPU::TBA)
2457     .Case("tma", AMDGPU::TMA)
2458     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2459     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2460     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2461     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2462     .Case("vcc_lo", AMDGPU::VCC_LO)
2463     .Case("vcc_hi", AMDGPU::VCC_HI)
2464     .Case("exec_lo", AMDGPU::EXEC_LO)
2465     .Case("exec_hi", AMDGPU::EXEC_HI)
2466     .Case("tma_lo", AMDGPU::TMA_LO)
2467     .Case("tma_hi", AMDGPU::TMA_HI)
2468     .Case("tba_lo", AMDGPU::TBA_LO)
2469     .Case("tba_hi", AMDGPU::TBA_HI)
2470     .Case("pc", AMDGPU::PC_REG)
2471     .Case("null", AMDGPU::SGPR_NULL)
2472     .Default(AMDGPU::NoRegister);
2473 }
2474 
2475 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2476                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2477   auto R = parseRegister();
2478   if (!R) return true;
2479   assert(R->isReg());
2480   RegNo = R->getReg();
2481   StartLoc = R->getStartLoc();
2482   EndLoc = R->getEndLoc();
2483   return false;
2484 }
2485 
2486 bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2487                                     SMLoc &EndLoc) {
2488   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2489 }
2490 
2491 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo,
2492                                                        SMLoc &StartLoc,
2493                                                        SMLoc &EndLoc) {
2494   bool Result =
2495       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2496   bool PendingErrors = getParser().hasPendingError();
2497   getParser().clearPendingErrors();
2498   if (PendingErrors)
2499     return MatchOperand_ParseFail;
2500   if (Result)
2501     return MatchOperand_NoMatch;
2502   return MatchOperand_Success;
2503 }
2504 
2505 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2506                                             RegisterKind RegKind, unsigned Reg1,
2507                                             SMLoc Loc) {
2508   switch (RegKind) {
2509   case IS_SPECIAL:
2510     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2511       Reg = AMDGPU::EXEC;
2512       RegWidth = 64;
2513       return true;
2514     }
2515     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2516       Reg = AMDGPU::FLAT_SCR;
2517       RegWidth = 64;
2518       return true;
2519     }
2520     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2521       Reg = AMDGPU::XNACK_MASK;
2522       RegWidth = 64;
2523       return true;
2524     }
2525     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2526       Reg = AMDGPU::VCC;
2527       RegWidth = 64;
2528       return true;
2529     }
2530     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2531       Reg = AMDGPU::TBA;
2532       RegWidth = 64;
2533       return true;
2534     }
2535     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2536       Reg = AMDGPU::TMA;
2537       RegWidth = 64;
2538       return true;
2539     }
2540     Error(Loc, "register does not fit in the list");
2541     return false;
2542   case IS_VGPR:
2543   case IS_SGPR:
2544   case IS_AGPR:
2545   case IS_TTMP:
2546     if (Reg1 != Reg + RegWidth / 32) {
2547       Error(Loc, "registers in a list must have consecutive indices");
2548       return false;
2549     }
2550     RegWidth += 32;
2551     return true;
2552   default:
2553     llvm_unreachable("unexpected register kind");
2554   }
2555 }
2556 
2557 struct RegInfo {
2558   StringLiteral Name;
2559   RegisterKind Kind;
2560 };
2561 
2562 static constexpr RegInfo RegularRegisters[] = {
2563   {{"v"},    IS_VGPR},
2564   {{"s"},    IS_SGPR},
2565   {{"ttmp"}, IS_TTMP},
2566   {{"acc"},  IS_AGPR},
2567   {{"a"},    IS_AGPR},
2568 };
2569 
2570 static bool isRegularReg(RegisterKind Kind) {
2571   return Kind == IS_VGPR ||
2572          Kind == IS_SGPR ||
2573          Kind == IS_TTMP ||
2574          Kind == IS_AGPR;
2575 }
2576 
2577 static const RegInfo* getRegularRegInfo(StringRef Str) {
2578   for (const RegInfo &Reg : RegularRegisters)
2579     if (Str.startswith(Reg.Name))
2580       return &Reg;
2581   return nullptr;
2582 }
2583 
2584 static bool getRegNum(StringRef Str, unsigned& Num) {
2585   return !Str.getAsInteger(10, Num);
2586 }
2587 
2588 bool
2589 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2590                             const AsmToken &NextToken) const {
2591 
2592   // A list of consecutive registers: [s0,s1,s2,s3]
2593   if (Token.is(AsmToken::LBrac))
2594     return true;
2595 
2596   if (!Token.is(AsmToken::Identifier))
2597     return false;
2598 
2599   // A single register like s0 or a range of registers like s[0:1]
2600 
2601   StringRef Str = Token.getString();
2602   const RegInfo *Reg = getRegularRegInfo(Str);
2603   if (Reg) {
2604     StringRef RegName = Reg->Name;
2605     StringRef RegSuffix = Str.substr(RegName.size());
2606     if (!RegSuffix.empty()) {
2607       unsigned Num;
2608       // A single register with an index: rXX
2609       if (getRegNum(RegSuffix, Num))
2610         return true;
2611     } else {
2612       // A range of registers: r[XX:YY].
2613       if (NextToken.is(AsmToken::LBrac))
2614         return true;
2615     }
2616   }
2617 
2618   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2619 }
2620 
2621 bool
2622 AMDGPUAsmParser::isRegister()
2623 {
2624   return isRegister(getToken(), peekToken());
2625 }
2626 
2627 unsigned
2628 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2629                                unsigned RegNum,
2630                                unsigned RegWidth,
2631                                SMLoc Loc) {
2632 
2633   assert(isRegularReg(RegKind));
2634 
2635   unsigned AlignSize = 1;
2636   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2637     // SGPR and TTMP registers must be aligned.
2638     // Max required alignment is 4 dwords.
2639     AlignSize = std::min(RegWidth / 32, 4u);
2640   }
2641 
2642   if (RegNum % AlignSize != 0) {
2643     Error(Loc, "invalid register alignment");
2644     return AMDGPU::NoRegister;
2645   }
2646 
2647   unsigned RegIdx = RegNum / AlignSize;
2648   int RCID = getRegClass(RegKind, RegWidth);
2649   if (RCID == -1) {
2650     Error(Loc, "invalid or unsupported register size");
2651     return AMDGPU::NoRegister;
2652   }
2653 
2654   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2655   const MCRegisterClass RC = TRI->getRegClass(RCID);
2656   if (RegIdx >= RC.getNumRegs()) {
2657     Error(Loc, "register index is out of range");
2658     return AMDGPU::NoRegister;
2659   }
2660 
2661   return RC.getRegister(RegIdx);
2662 }
2663 
2664 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2665   int64_t RegLo, RegHi;
2666   if (!skipToken(AsmToken::LBrac, "missing register index"))
2667     return false;
2668 
2669   SMLoc FirstIdxLoc = getLoc();
2670   SMLoc SecondIdxLoc;
2671 
2672   if (!parseExpr(RegLo))
2673     return false;
2674 
2675   if (trySkipToken(AsmToken::Colon)) {
2676     SecondIdxLoc = getLoc();
2677     if (!parseExpr(RegHi))
2678       return false;
2679   } else {
2680     RegHi = RegLo;
2681   }
2682 
2683   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2684     return false;
2685 
2686   if (!isUInt<32>(RegLo)) {
2687     Error(FirstIdxLoc, "invalid register index");
2688     return false;
2689   }
2690 
2691   if (!isUInt<32>(RegHi)) {
2692     Error(SecondIdxLoc, "invalid register index");
2693     return false;
2694   }
2695 
2696   if (RegLo > RegHi) {
2697     Error(FirstIdxLoc, "first register index should not exceed second index");
2698     return false;
2699   }
2700 
2701   Num = static_cast<unsigned>(RegLo);
2702   RegWidth = 32 * ((RegHi - RegLo) + 1);
2703   return true;
2704 }
2705 
2706 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2707                                           unsigned &RegNum, unsigned &RegWidth,
2708                                           SmallVectorImpl<AsmToken> &Tokens) {
2709   assert(isToken(AsmToken::Identifier));
2710   unsigned Reg = getSpecialRegForName(getTokenStr());
2711   if (Reg) {
2712     RegNum = 0;
2713     RegWidth = 32;
2714     RegKind = IS_SPECIAL;
2715     Tokens.push_back(getToken());
2716     lex(); // skip register name
2717   }
2718   return Reg;
2719 }
2720 
2721 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2722                                           unsigned &RegNum, unsigned &RegWidth,
2723                                           SmallVectorImpl<AsmToken> &Tokens) {
2724   assert(isToken(AsmToken::Identifier));
2725   StringRef RegName = getTokenStr();
2726   auto Loc = getLoc();
2727 
2728   const RegInfo *RI = getRegularRegInfo(RegName);
2729   if (!RI) {
2730     Error(Loc, "invalid register name");
2731     return AMDGPU::NoRegister;
2732   }
2733 
2734   Tokens.push_back(getToken());
2735   lex(); // skip register name
2736 
2737   RegKind = RI->Kind;
2738   StringRef RegSuffix = RegName.substr(RI->Name.size());
2739   if (!RegSuffix.empty()) {
2740     // Single 32-bit register: vXX.
2741     if (!getRegNum(RegSuffix, RegNum)) {
2742       Error(Loc, "invalid register index");
2743       return AMDGPU::NoRegister;
2744     }
2745     RegWidth = 32;
2746   } else {
2747     // Range of registers: v[XX:YY]. ":YY" is optional.
2748     if (!ParseRegRange(RegNum, RegWidth))
2749       return AMDGPU::NoRegister;
2750   }
2751 
2752   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2753 }
2754 
2755 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2756                                        unsigned &RegWidth,
2757                                        SmallVectorImpl<AsmToken> &Tokens) {
2758   unsigned Reg = AMDGPU::NoRegister;
2759   auto ListLoc = getLoc();
2760 
2761   if (!skipToken(AsmToken::LBrac,
2762                  "expected a register or a list of registers")) {
2763     return AMDGPU::NoRegister;
2764   }
2765 
2766   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2767 
2768   auto Loc = getLoc();
2769   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2770     return AMDGPU::NoRegister;
2771   if (RegWidth != 32) {
2772     Error(Loc, "expected a single 32-bit register");
2773     return AMDGPU::NoRegister;
2774   }
2775 
2776   for (; trySkipToken(AsmToken::Comma); ) {
2777     RegisterKind NextRegKind;
2778     unsigned NextReg, NextRegNum, NextRegWidth;
2779     Loc = getLoc();
2780 
2781     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2782                              NextRegNum, NextRegWidth,
2783                              Tokens)) {
2784       return AMDGPU::NoRegister;
2785     }
2786     if (NextRegWidth != 32) {
2787       Error(Loc, "expected a single 32-bit register");
2788       return AMDGPU::NoRegister;
2789     }
2790     if (NextRegKind != RegKind) {
2791       Error(Loc, "registers in a list must be of the same kind");
2792       return AMDGPU::NoRegister;
2793     }
2794     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2795       return AMDGPU::NoRegister;
2796   }
2797 
2798   if (!skipToken(AsmToken::RBrac,
2799                  "expected a comma or a closing square bracket")) {
2800     return AMDGPU::NoRegister;
2801   }
2802 
2803   if (isRegularReg(RegKind))
2804     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2805 
2806   return Reg;
2807 }
2808 
2809 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2810                                           unsigned &RegNum, unsigned &RegWidth,
2811                                           SmallVectorImpl<AsmToken> &Tokens) {
2812   auto Loc = getLoc();
2813   Reg = AMDGPU::NoRegister;
2814 
2815   if (isToken(AsmToken::Identifier)) {
2816     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2817     if (Reg == AMDGPU::NoRegister)
2818       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2819   } else {
2820     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2821   }
2822 
2823   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2824   if (Reg == AMDGPU::NoRegister) {
2825     assert(Parser.hasPendingError());
2826     return false;
2827   }
2828 
2829   if (!subtargetHasRegister(*TRI, Reg)) {
2830     if (Reg == AMDGPU::SGPR_NULL) {
2831       Error(Loc, "'null' operand is not supported on this GPU");
2832     } else {
2833       Error(Loc, "register not available on this GPU");
2834     }
2835     return false;
2836   }
2837 
2838   return true;
2839 }
2840 
2841 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2842                                           unsigned &RegNum, unsigned &RegWidth,
2843                                           bool RestoreOnFailure /*=false*/) {
2844   Reg = AMDGPU::NoRegister;
2845 
2846   SmallVector<AsmToken, 1> Tokens;
2847   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2848     if (RestoreOnFailure) {
2849       while (!Tokens.empty()) {
2850         getLexer().UnLex(Tokens.pop_back_val());
2851       }
2852     }
2853     return true;
2854   }
2855   return false;
2856 }
2857 
2858 std::optional<StringRef>
2859 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2860   switch (RegKind) {
2861   case IS_VGPR:
2862     return StringRef(".amdgcn.next_free_vgpr");
2863   case IS_SGPR:
2864     return StringRef(".amdgcn.next_free_sgpr");
2865   default:
2866     return std::nullopt;
2867   }
2868 }
2869 
2870 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2871   auto SymbolName = getGprCountSymbolName(RegKind);
2872   assert(SymbolName && "initializing invalid register kind");
2873   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2874   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2875 }
2876 
2877 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2878                                             unsigned DwordRegIndex,
2879                                             unsigned RegWidth) {
2880   // Symbols are only defined for GCN targets
2881   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2882     return true;
2883 
2884   auto SymbolName = getGprCountSymbolName(RegKind);
2885   if (!SymbolName)
2886     return true;
2887   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2888 
2889   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2890   int64_t OldCount;
2891 
2892   if (!Sym->isVariable())
2893     return !Error(getLoc(),
2894                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2895   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2896     return !Error(
2897         getLoc(),
2898         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2899 
2900   if (OldCount <= NewMax)
2901     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2902 
2903   return true;
2904 }
2905 
2906 std::unique_ptr<AMDGPUOperand>
2907 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2908   const auto &Tok = getToken();
2909   SMLoc StartLoc = Tok.getLoc();
2910   SMLoc EndLoc = Tok.getEndLoc();
2911   RegisterKind RegKind;
2912   unsigned Reg, RegNum, RegWidth;
2913 
2914   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2915     return nullptr;
2916   }
2917   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2918     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2919       return nullptr;
2920   } else
2921     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2922   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2923 }
2924 
2925 OperandMatchResultTy
2926 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2927   // TODO: add syntactic sugar for 1/(2*PI)
2928 
2929   if (isRegister())
2930     return MatchOperand_NoMatch;
2931   assert(!isModifier());
2932 
2933   const auto& Tok = getToken();
2934   const auto& NextTok = peekToken();
2935   bool IsReal = Tok.is(AsmToken::Real);
2936   SMLoc S = getLoc();
2937   bool Negate = false;
2938 
2939   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2940     lex();
2941     IsReal = true;
2942     Negate = true;
2943   }
2944 
2945   if (IsReal) {
2946     // Floating-point expressions are not supported.
2947     // Can only allow floating-point literals with an
2948     // optional sign.
2949 
2950     StringRef Num = getTokenStr();
2951     lex();
2952 
2953     APFloat RealVal(APFloat::IEEEdouble());
2954     auto roundMode = APFloat::rmNearestTiesToEven;
2955     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2956       return MatchOperand_ParseFail;
2957     }
2958     if (Negate)
2959       RealVal.changeSign();
2960 
2961     Operands.push_back(
2962       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2963                                AMDGPUOperand::ImmTyNone, true));
2964 
2965     return MatchOperand_Success;
2966 
2967   } else {
2968     int64_t IntVal;
2969     const MCExpr *Expr;
2970     SMLoc S = getLoc();
2971 
2972     if (HasSP3AbsModifier) {
2973       // This is a workaround for handling expressions
2974       // as arguments of SP3 'abs' modifier, for example:
2975       //     |1.0|
2976       //     |-1|
2977       //     |1+x|
2978       // This syntax is not compatible with syntax of standard
2979       // MC expressions (due to the trailing '|').
2980       SMLoc EndLoc;
2981       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2982         return MatchOperand_ParseFail;
2983     } else {
2984       if (Parser.parseExpression(Expr))
2985         return MatchOperand_ParseFail;
2986     }
2987 
2988     if (Expr->evaluateAsAbsolute(IntVal)) {
2989       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2990     } else {
2991       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2992     }
2993 
2994     return MatchOperand_Success;
2995   }
2996 
2997   return MatchOperand_NoMatch;
2998 }
2999 
3000 OperandMatchResultTy
3001 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3002   if (!isRegister())
3003     return MatchOperand_NoMatch;
3004 
3005   if (auto R = parseRegister()) {
3006     assert(R->isReg());
3007     Operands.push_back(std::move(R));
3008     return MatchOperand_Success;
3009   }
3010   return MatchOperand_ParseFail;
3011 }
3012 
3013 OperandMatchResultTy
3014 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
3015   auto res = parseReg(Operands);
3016   if (res != MatchOperand_NoMatch) {
3017     return res;
3018   } else if (isModifier()) {
3019     return MatchOperand_NoMatch;
3020   } else {
3021     return parseImm(Operands, HasSP3AbsMod);
3022   }
3023 }
3024 
3025 bool
3026 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3027   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3028     const auto &str = Token.getString();
3029     return str == "abs" || str == "neg" || str == "sext";
3030   }
3031   return false;
3032 }
3033 
3034 bool
3035 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3036   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3037 }
3038 
3039 bool
3040 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3041   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3042 }
3043 
3044 bool
3045 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3046   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3047 }
3048 
3049 // Check if this is an operand modifier or an opcode modifier
3050 // which may look like an expression but it is not. We should
3051 // avoid parsing these modifiers as expressions. Currently
3052 // recognized sequences are:
3053 //   |...|
3054 //   abs(...)
3055 //   neg(...)
3056 //   sext(...)
3057 //   -reg
3058 //   -|...|
3059 //   -abs(...)
3060 //   name:...
3061 //
3062 bool
3063 AMDGPUAsmParser::isModifier() {
3064 
3065   AsmToken Tok = getToken();
3066   AsmToken NextToken[2];
3067   peekTokens(NextToken);
3068 
3069   return isOperandModifier(Tok, NextToken[0]) ||
3070          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3071          isOpcodeModifierWithVal(Tok, NextToken[0]);
3072 }
3073 
3074 // Check if the current token is an SP3 'neg' modifier.
3075 // Currently this modifier is allowed in the following context:
3076 //
3077 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3078 // 2. Before an 'abs' modifier: -abs(...)
3079 // 3. Before an SP3 'abs' modifier: -|...|
3080 //
3081 // In all other cases "-" is handled as a part
3082 // of an expression that follows the sign.
3083 //
3084 // Note: When "-" is followed by an integer literal,
3085 // this is interpreted as integer negation rather
3086 // than a floating-point NEG modifier applied to N.
3087 // Beside being contr-intuitive, such use of floating-point
3088 // NEG modifier would have resulted in different meaning
3089 // of integer literals used with VOP1/2/C and VOP3,
3090 // for example:
3091 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3092 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3093 // Negative fp literals with preceding "-" are
3094 // handled likewise for uniformity
3095 //
3096 bool
3097 AMDGPUAsmParser::parseSP3NegModifier() {
3098 
3099   AsmToken NextToken[2];
3100   peekTokens(NextToken);
3101 
3102   if (isToken(AsmToken::Minus) &&
3103       (isRegister(NextToken[0], NextToken[1]) ||
3104        NextToken[0].is(AsmToken::Pipe) ||
3105        isId(NextToken[0], "abs"))) {
3106     lex();
3107     return true;
3108   }
3109 
3110   return false;
3111 }
3112 
3113 OperandMatchResultTy
3114 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3115                                               bool AllowImm) {
3116   bool Neg, SP3Neg;
3117   bool Abs, SP3Abs;
3118   SMLoc Loc;
3119 
3120   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3121   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3122     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3123     return MatchOperand_ParseFail;
3124   }
3125 
3126   SP3Neg = parseSP3NegModifier();
3127 
3128   Loc = getLoc();
3129   Neg = trySkipId("neg");
3130   if (Neg && SP3Neg) {
3131     Error(Loc, "expected register or immediate");
3132     return MatchOperand_ParseFail;
3133   }
3134   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3135     return MatchOperand_ParseFail;
3136 
3137   Abs = trySkipId("abs");
3138   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3139     return MatchOperand_ParseFail;
3140 
3141   Loc = getLoc();
3142   SP3Abs = trySkipToken(AsmToken::Pipe);
3143   if (Abs && SP3Abs) {
3144     Error(Loc, "expected register or immediate");
3145     return MatchOperand_ParseFail;
3146   }
3147 
3148   OperandMatchResultTy Res;
3149   if (AllowImm) {
3150     Res = parseRegOrImm(Operands, SP3Abs);
3151   } else {
3152     Res = parseReg(Operands);
3153   }
3154   if (Res != MatchOperand_Success) {
3155     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3156   }
3157 
3158   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3159     return MatchOperand_ParseFail;
3160   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3161     return MatchOperand_ParseFail;
3162   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3163     return MatchOperand_ParseFail;
3164 
3165   AMDGPUOperand::Modifiers Mods;
3166   Mods.Abs = Abs || SP3Abs;
3167   Mods.Neg = Neg || SP3Neg;
3168 
3169   if (Mods.hasFPModifiers()) {
3170     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3171     if (Op.isExpr()) {
3172       Error(Op.getStartLoc(), "expected an absolute expression");
3173       return MatchOperand_ParseFail;
3174     }
3175     Op.setModifiers(Mods);
3176   }
3177   return MatchOperand_Success;
3178 }
3179 
3180 OperandMatchResultTy
3181 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3182                                                bool AllowImm) {
3183   bool Sext = trySkipId("sext");
3184   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3185     return MatchOperand_ParseFail;
3186 
3187   OperandMatchResultTy Res;
3188   if (AllowImm) {
3189     Res = parseRegOrImm(Operands);
3190   } else {
3191     Res = parseReg(Operands);
3192   }
3193   if (Res != MatchOperand_Success) {
3194     return Sext? MatchOperand_ParseFail : Res;
3195   }
3196 
3197   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3198     return MatchOperand_ParseFail;
3199 
3200   AMDGPUOperand::Modifiers Mods;
3201   Mods.Sext = Sext;
3202 
3203   if (Mods.hasIntModifiers()) {
3204     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3205     if (Op.isExpr()) {
3206       Error(Op.getStartLoc(), "expected an absolute expression");
3207       return MatchOperand_ParseFail;
3208     }
3209     Op.setModifiers(Mods);
3210   }
3211 
3212   return MatchOperand_Success;
3213 }
3214 
3215 OperandMatchResultTy
3216 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3217   return parseRegOrImmWithFPInputMods(Operands, false);
3218 }
3219 
3220 OperandMatchResultTy
3221 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3222   return parseRegOrImmWithIntInputMods(Operands, false);
3223 }
3224 
3225 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3226   auto Loc = getLoc();
3227   if (trySkipId("off")) {
3228     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3229                                                 AMDGPUOperand::ImmTyOff, false));
3230     return MatchOperand_Success;
3231   }
3232 
3233   if (!isRegister())
3234     return MatchOperand_NoMatch;
3235 
3236   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3237   if (Reg) {
3238     Operands.push_back(std::move(Reg));
3239     return MatchOperand_Success;
3240   }
3241 
3242   return MatchOperand_ParseFail;
3243 
3244 }
3245 
3246 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3247   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3248 
3249   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3250       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3251       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3252       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3253     return Match_InvalidOperand;
3254 
3255   if ((TSFlags & SIInstrFlags::VOP3) &&
3256       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3257       getForcedEncodingSize() != 64)
3258     return Match_PreferE32;
3259 
3260   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3261       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3262     // v_mac_f32/16 allow only dst_sel == DWORD;
3263     auto OpNum =
3264         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3265     const auto &Op = Inst.getOperand(OpNum);
3266     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3267       return Match_InvalidOperand;
3268     }
3269   }
3270 
3271   return Match_Success;
3272 }
3273 
3274 static ArrayRef<unsigned> getAllVariants() {
3275   static const unsigned Variants[] = {
3276     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3277     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3278     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3279   };
3280 
3281   return ArrayRef(Variants);
3282 }
3283 
3284 // What asm variants we should check
3285 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3286   if (isForcedDPP() && isForcedVOP3()) {
3287     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3288     return ArrayRef(Variants);
3289   }
3290   if (getForcedEncodingSize() == 32) {
3291     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3292     return ArrayRef(Variants);
3293   }
3294 
3295   if (isForcedVOP3()) {
3296     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3297     return ArrayRef(Variants);
3298   }
3299 
3300   if (isForcedSDWA()) {
3301     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3302                                         AMDGPUAsmVariants::SDWA9};
3303     return ArrayRef(Variants);
3304   }
3305 
3306   if (isForcedDPP()) {
3307     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3308     return ArrayRef(Variants);
3309   }
3310 
3311   return getAllVariants();
3312 }
3313 
3314 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3315   if (isForcedDPP() && isForcedVOP3())
3316     return "e64_dpp";
3317 
3318   if (getForcedEncodingSize() == 32)
3319     return "e32";
3320 
3321   if (isForcedVOP3())
3322     return "e64";
3323 
3324   if (isForcedSDWA())
3325     return "sdwa";
3326 
3327   if (isForcedDPP())
3328     return "dpp";
3329 
3330   return "";
3331 }
3332 
3333 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3334   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3335   for (MCPhysReg Reg : Desc.implicit_uses()) {
3336     switch (Reg) {
3337     case AMDGPU::FLAT_SCR:
3338     case AMDGPU::VCC:
3339     case AMDGPU::VCC_LO:
3340     case AMDGPU::VCC_HI:
3341     case AMDGPU::M0:
3342       return Reg;
3343     default:
3344       break;
3345     }
3346   }
3347   return AMDGPU::NoRegister;
3348 }
3349 
3350 // NB: This code is correct only when used to check constant
3351 // bus limitations because GFX7 support no f16 inline constants.
3352 // Note that there are no cases when a GFX7 opcode violates
3353 // constant bus limitations due to the use of an f16 constant.
3354 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3355                                        unsigned OpIdx) const {
3356   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3357 
3358   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3359       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3360     return false;
3361   }
3362 
3363   const MCOperand &MO = Inst.getOperand(OpIdx);
3364 
3365   int64_t Val = MO.getImm();
3366   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3367 
3368   switch (OpSize) { // expected operand size
3369   case 8:
3370     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3371   case 4:
3372     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3373   case 2: {
3374     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3375     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3376         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3377         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3378       return AMDGPU::isInlinableIntLiteral(Val);
3379 
3380     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3381         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3382         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3383       return AMDGPU::isInlinableIntLiteralV216(Val);
3384 
3385     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3386         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3387         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3388       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3389 
3390     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3391   }
3392   default:
3393     llvm_unreachable("invalid operand size");
3394   }
3395 }
3396 
3397 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3398   if (!isGFX10Plus())
3399     return 1;
3400 
3401   switch (Opcode) {
3402   // 64-bit shift instructions can use only one scalar value input
3403   case AMDGPU::V_LSHLREV_B64_e64:
3404   case AMDGPU::V_LSHLREV_B64_gfx10:
3405   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3406   case AMDGPU::V_LSHRREV_B64_e64:
3407   case AMDGPU::V_LSHRREV_B64_gfx10:
3408   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3409   case AMDGPU::V_ASHRREV_I64_e64:
3410   case AMDGPU::V_ASHRREV_I64_gfx10:
3411   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3412   case AMDGPU::V_LSHL_B64_e64:
3413   case AMDGPU::V_LSHR_B64_e64:
3414   case AMDGPU::V_ASHR_I64_e64:
3415     return 1;
3416   default:
3417     return 2;
3418   }
3419 }
3420 
3421 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3422 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3423 
3424 // Get regular operand indices in the same order as specified
3425 // in the instruction (but append mandatory literals to the end).
3426 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3427                                            bool AddMandatoryLiterals = false) {
3428 
3429   int16_t ImmIdx =
3430       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3431 
3432   if (isVOPD(Opcode)) {
3433     int16_t ImmDeferredIdx =
3434         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3435                              : -1;
3436 
3437     return {getNamedOperandIdx(Opcode, OpName::src0X),
3438             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3439             getNamedOperandIdx(Opcode, OpName::src0Y),
3440             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3441             ImmDeferredIdx,
3442             ImmIdx};
3443   }
3444 
3445   return {getNamedOperandIdx(Opcode, OpName::src0),
3446           getNamedOperandIdx(Opcode, OpName::src1),
3447           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3448 }
3449 
3450 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3451   const MCOperand &MO = Inst.getOperand(OpIdx);
3452   if (MO.isImm()) {
3453     return !isInlineConstant(Inst, OpIdx);
3454   } else if (MO.isReg()) {
3455     auto Reg = MO.getReg();
3456     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3457     auto PReg = mc2PseudoReg(Reg);
3458     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3459   } else {
3460     return true;
3461   }
3462 }
3463 
3464 bool AMDGPUAsmParser::validateConstantBusLimitations(
3465     const MCInst &Inst, const OperandVector &Operands) {
3466   const unsigned Opcode = Inst.getOpcode();
3467   const MCInstrDesc &Desc = MII.get(Opcode);
3468   unsigned LastSGPR = AMDGPU::NoRegister;
3469   unsigned ConstantBusUseCount = 0;
3470   unsigned NumLiterals = 0;
3471   unsigned LiteralSize;
3472 
3473   if (!(Desc.TSFlags &
3474         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3475          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3476       !isVOPD(Opcode))
3477     return true;
3478 
3479   // Check special imm operands (used by madmk, etc)
3480   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3481     ++NumLiterals;
3482     LiteralSize = 4;
3483   }
3484 
3485   SmallDenseSet<unsigned> SGPRsUsed;
3486   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3487   if (SGPRUsed != AMDGPU::NoRegister) {
3488     SGPRsUsed.insert(SGPRUsed);
3489     ++ConstantBusUseCount;
3490   }
3491 
3492   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3493 
3494   for (int OpIdx : OpIndices) {
3495     if (OpIdx == -1)
3496       continue;
3497 
3498     const MCOperand &MO = Inst.getOperand(OpIdx);
3499     if (usesConstantBus(Inst, OpIdx)) {
3500       if (MO.isReg()) {
3501         LastSGPR = mc2PseudoReg(MO.getReg());
3502         // Pairs of registers with a partial intersections like these
3503         //   s0, s[0:1]
3504         //   flat_scratch_lo, flat_scratch
3505         //   flat_scratch_lo, flat_scratch_hi
3506         // are theoretically valid but they are disabled anyway.
3507         // Note that this code mimics SIInstrInfo::verifyInstruction
3508         if (SGPRsUsed.insert(LastSGPR).second) {
3509           ++ConstantBusUseCount;
3510         }
3511       } else { // Expression or a literal
3512 
3513         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3514           continue; // special operand like VINTERP attr_chan
3515 
3516         // An instruction may use only one literal.
3517         // This has been validated on the previous step.
3518         // See validateVOPLiteral.
3519         // This literal may be used as more than one operand.
3520         // If all these operands are of the same size,
3521         // this literal counts as one scalar value.
3522         // Otherwise it counts as 2 scalar values.
3523         // See "GFX10 Shader Programming", section 3.6.2.3.
3524 
3525         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3526         if (Size < 4)
3527           Size = 4;
3528 
3529         if (NumLiterals == 0) {
3530           NumLiterals = 1;
3531           LiteralSize = Size;
3532         } else if (LiteralSize != Size) {
3533           NumLiterals = 2;
3534         }
3535       }
3536     }
3537   }
3538   ConstantBusUseCount += NumLiterals;
3539 
3540   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3541     return true;
3542 
3543   SMLoc LitLoc = getLitLoc(Operands);
3544   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3545   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3546   Error(Loc, "invalid operand (violates constant bus restrictions)");
3547   return false;
3548 }
3549 
3550 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3551     const MCInst &Inst, const OperandVector &Operands) {
3552 
3553   const unsigned Opcode = Inst.getOpcode();
3554   if (!isVOPD(Opcode))
3555     return true;
3556 
3557   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3558 
3559   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3560     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3561     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3562                ? Opr.getReg()
3563                : MCRegister::NoRegister;
3564   };
3565 
3566   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3567   auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3568   if (!InvalidCompOprIdx)
3569     return true;
3570 
3571   auto CompOprIdx = *InvalidCompOprIdx;
3572   auto ParsedIdx =
3573       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3574                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3575   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3576 
3577   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3578   if (CompOprIdx == VOPD::Component::DST) {
3579     Error(Loc, "one dst register must be even and the other odd");
3580   } else {
3581     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3582     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3583                    " operands must use different VGPR banks");
3584   }
3585 
3586   return false;
3587 }
3588 
3589 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3590 
3591   const unsigned Opc = Inst.getOpcode();
3592   const MCInstrDesc &Desc = MII.get(Opc);
3593 
3594   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3595     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3596     assert(ClampIdx != -1);
3597     return Inst.getOperand(ClampIdx).getImm() == 0;
3598   }
3599 
3600   return true;
3601 }
3602 
3603 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3604                                            const SMLoc &IDLoc) {
3605 
3606   const unsigned Opc = Inst.getOpcode();
3607   const MCInstrDesc &Desc = MII.get(Opc);
3608 
3609   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3610     return true;
3611 
3612   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3613   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3614   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3615 
3616   assert(VDataIdx != -1);
3617 
3618   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3619     return true;
3620 
3621   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3622   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3623   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3624   if (DMask == 0)
3625     DMask = 1;
3626 
3627   bool IsPackedD16 = false;
3628   unsigned DataSize =
3629       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3630   if (hasPackedD16()) {
3631     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3632     IsPackedD16 = D16Idx >= 0;
3633     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3634       DataSize = (DataSize + 1) / 2;
3635   }
3636 
3637   if ((VDataSize / 4) == DataSize + TFESize)
3638     return true;
3639 
3640   StringRef Modifiers;
3641   if (isGFX90A())
3642     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3643   else
3644     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3645 
3646   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3647   return false;
3648 }
3649 
3650 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3651   const unsigned Opc = Inst.getOpcode();
3652   const MCInstrDesc &Desc = MII.get(Opc);
3653 
3654   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3655     return true;
3656 
3657   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3658 
3659   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3660       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3661   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3662   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3663   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3664   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3665 
3666   assert(VAddr0Idx != -1);
3667   assert(SrsrcIdx != -1);
3668   assert(SrsrcIdx > VAddr0Idx);
3669 
3670   if (DimIdx == -1)
3671     return true; // intersect_ray
3672 
3673   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3674   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3675   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3676   unsigned ActualAddrSize =
3677       IsNSA ? SrsrcIdx - VAddr0Idx
3678             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3679   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3680 
3681   unsigned ExpectedAddrSize =
3682       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3683 
3684   if (!IsNSA) {
3685     if (ExpectedAddrSize > 12)
3686       ExpectedAddrSize = 16;
3687 
3688     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3689     // This provides backward compatibility for assembly created
3690     // before 160b/192b/224b types were directly supported.
3691     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3692       return true;
3693   }
3694 
3695   return ActualAddrSize == ExpectedAddrSize;
3696 }
3697 
3698 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3699 
3700   const unsigned Opc = Inst.getOpcode();
3701   const MCInstrDesc &Desc = MII.get(Opc);
3702 
3703   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3704     return true;
3705   if (!Desc.mayLoad() || !Desc.mayStore())
3706     return true; // Not atomic
3707 
3708   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3709   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3710 
3711   // This is an incomplete check because image_atomic_cmpswap
3712   // may only use 0x3 and 0xf while other atomic operations
3713   // may use 0x1 and 0x3. However these limitations are
3714   // verified when we check that dmask matches dst size.
3715   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3716 }
3717 
3718 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3719 
3720   const unsigned Opc = Inst.getOpcode();
3721   const MCInstrDesc &Desc = MII.get(Opc);
3722 
3723   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3724     return true;
3725 
3726   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3727   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3728 
3729   // GATHER4 instructions use dmask in a different fashion compared to
3730   // other MIMG instructions. The only useful DMASK values are
3731   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3732   // (red,red,red,red) etc.) The ISA document doesn't mention
3733   // this.
3734   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3735 }
3736 
3737 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3738   const unsigned Opc = Inst.getOpcode();
3739   const MCInstrDesc &Desc = MII.get(Opc);
3740 
3741   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3742     return true;
3743 
3744   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3745   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3746       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3747 
3748   if (!BaseOpcode->MSAA)
3749     return true;
3750 
3751   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3752   assert(DimIdx != -1);
3753 
3754   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3755   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3756 
3757   return DimInfo->MSAA;
3758 }
3759 
3760 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3761 {
3762   switch (Opcode) {
3763   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3764   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3765   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3766     return true;
3767   default:
3768     return false;
3769   }
3770 }
3771 
3772 // movrels* opcodes should only allow VGPRS as src0.
3773 // This is specified in .td description for vop1/vop3,
3774 // but sdwa is handled differently. See isSDWAOperand.
3775 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3776                                       const OperandVector &Operands) {
3777 
3778   const unsigned Opc = Inst.getOpcode();
3779   const MCInstrDesc &Desc = MII.get(Opc);
3780 
3781   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3782     return true;
3783 
3784   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3785   assert(Src0Idx != -1);
3786 
3787   SMLoc ErrLoc;
3788   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3789   if (Src0.isReg()) {
3790     auto Reg = mc2PseudoReg(Src0.getReg());
3791     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3792     if (!isSGPR(Reg, TRI))
3793       return true;
3794     ErrLoc = getRegLoc(Reg, Operands);
3795   } else {
3796     ErrLoc = getConstLoc(Operands);
3797   }
3798 
3799   Error(ErrLoc, "source operand must be a VGPR");
3800   return false;
3801 }
3802 
3803 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3804                                           const OperandVector &Operands) {
3805 
3806   const unsigned Opc = Inst.getOpcode();
3807 
3808   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3809     return true;
3810 
3811   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3812   assert(Src0Idx != -1);
3813 
3814   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3815   if (!Src0.isReg())
3816     return true;
3817 
3818   auto Reg = mc2PseudoReg(Src0.getReg());
3819   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3820   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3821     Error(getRegLoc(Reg, Operands),
3822           "source operand must be either a VGPR or an inline constant");
3823     return false;
3824   }
3825 
3826   return true;
3827 }
3828 
3829 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3830                                       const OperandVector &Operands) {
3831   unsigned Opcode = Inst.getOpcode();
3832   const MCInstrDesc &Desc = MII.get(Opcode);
3833 
3834   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3835       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3836     return true;
3837 
3838   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3839   if (Src2Idx == -1)
3840     return true;
3841 
3842   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3843     Error(getConstLoc(Operands),
3844           "inline constants are not allowed for this operand");
3845     return false;
3846   }
3847 
3848   return true;
3849 }
3850 
3851 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3852                                    const OperandVector &Operands) {
3853   const unsigned Opc = Inst.getOpcode();
3854   const MCInstrDesc &Desc = MII.get(Opc);
3855 
3856   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3857     return true;
3858 
3859   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3860   if (Src2Idx == -1)
3861     return true;
3862 
3863   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3864   if (!Src2.isReg())
3865     return true;
3866 
3867   MCRegister Src2Reg = Src2.getReg();
3868   MCRegister DstReg = Inst.getOperand(0).getReg();
3869   if (Src2Reg == DstReg)
3870     return true;
3871 
3872   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3873   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3874     return true;
3875 
3876   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3877     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3878           "source 2 operand must not partially overlap with dst");
3879     return false;
3880   }
3881 
3882   return true;
3883 }
3884 
3885 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3886   switch (Inst.getOpcode()) {
3887   default:
3888     return true;
3889   case V_DIV_SCALE_F32_gfx6_gfx7:
3890   case V_DIV_SCALE_F32_vi:
3891   case V_DIV_SCALE_F32_gfx10:
3892   case V_DIV_SCALE_F64_gfx6_gfx7:
3893   case V_DIV_SCALE_F64_vi:
3894   case V_DIV_SCALE_F64_gfx10:
3895     break;
3896   }
3897 
3898   // TODO: Check that src0 = src1 or src2.
3899 
3900   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3901                     AMDGPU::OpName::src2_modifiers,
3902                     AMDGPU::OpName::src2_modifiers}) {
3903     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3904             .getImm() &
3905         SISrcMods::ABS) {
3906       return false;
3907     }
3908   }
3909 
3910   return true;
3911 }
3912 
3913 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3914 
3915   const unsigned Opc = Inst.getOpcode();
3916   const MCInstrDesc &Desc = MII.get(Opc);
3917 
3918   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3919     return true;
3920 
3921   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3922   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3923     if (isCI() || isSI())
3924       return false;
3925   }
3926 
3927   return true;
3928 }
3929 
3930 static bool IsRevOpcode(const unsigned Opcode)
3931 {
3932   switch (Opcode) {
3933   case AMDGPU::V_SUBREV_F32_e32:
3934   case AMDGPU::V_SUBREV_F32_e64:
3935   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3936   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3937   case AMDGPU::V_SUBREV_F32_e32_vi:
3938   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3939   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3940   case AMDGPU::V_SUBREV_F32_e64_vi:
3941 
3942   case AMDGPU::V_SUBREV_CO_U32_e32:
3943   case AMDGPU::V_SUBREV_CO_U32_e64:
3944   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3945   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3946 
3947   case AMDGPU::V_SUBBREV_U32_e32:
3948   case AMDGPU::V_SUBBREV_U32_e64:
3949   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3950   case AMDGPU::V_SUBBREV_U32_e32_vi:
3951   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3952   case AMDGPU::V_SUBBREV_U32_e64_vi:
3953 
3954   case AMDGPU::V_SUBREV_U32_e32:
3955   case AMDGPU::V_SUBREV_U32_e64:
3956   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3957   case AMDGPU::V_SUBREV_U32_e32_vi:
3958   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3959   case AMDGPU::V_SUBREV_U32_e64_vi:
3960 
3961   case AMDGPU::V_SUBREV_F16_e32:
3962   case AMDGPU::V_SUBREV_F16_e64:
3963   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3964   case AMDGPU::V_SUBREV_F16_e32_vi:
3965   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3966   case AMDGPU::V_SUBREV_F16_e64_vi:
3967 
3968   case AMDGPU::V_SUBREV_U16_e32:
3969   case AMDGPU::V_SUBREV_U16_e64:
3970   case AMDGPU::V_SUBREV_U16_e32_vi:
3971   case AMDGPU::V_SUBREV_U16_e64_vi:
3972 
3973   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3974   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3975   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3976 
3977   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3978   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3979 
3980   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3981   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3982 
3983   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3984   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3985 
3986   case AMDGPU::V_LSHRREV_B32_e32:
3987   case AMDGPU::V_LSHRREV_B32_e64:
3988   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3989   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3990   case AMDGPU::V_LSHRREV_B32_e32_vi:
3991   case AMDGPU::V_LSHRREV_B32_e64_vi:
3992   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3993   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3994 
3995   case AMDGPU::V_ASHRREV_I32_e32:
3996   case AMDGPU::V_ASHRREV_I32_e64:
3997   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3998   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3999   case AMDGPU::V_ASHRREV_I32_e32_vi:
4000   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4001   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4002   case AMDGPU::V_ASHRREV_I32_e64_vi:
4003 
4004   case AMDGPU::V_LSHLREV_B32_e32:
4005   case AMDGPU::V_LSHLREV_B32_e64:
4006   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4007   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4008   case AMDGPU::V_LSHLREV_B32_e32_vi:
4009   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4010   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4011   case AMDGPU::V_LSHLREV_B32_e64_vi:
4012 
4013   case AMDGPU::V_LSHLREV_B16_e32:
4014   case AMDGPU::V_LSHLREV_B16_e64:
4015   case AMDGPU::V_LSHLREV_B16_e32_vi:
4016   case AMDGPU::V_LSHLREV_B16_e64_vi:
4017   case AMDGPU::V_LSHLREV_B16_gfx10:
4018 
4019   case AMDGPU::V_LSHRREV_B16_e32:
4020   case AMDGPU::V_LSHRREV_B16_e64:
4021   case AMDGPU::V_LSHRREV_B16_e32_vi:
4022   case AMDGPU::V_LSHRREV_B16_e64_vi:
4023   case AMDGPU::V_LSHRREV_B16_gfx10:
4024 
4025   case AMDGPU::V_ASHRREV_I16_e32:
4026   case AMDGPU::V_ASHRREV_I16_e64:
4027   case AMDGPU::V_ASHRREV_I16_e32_vi:
4028   case AMDGPU::V_ASHRREV_I16_e64_vi:
4029   case AMDGPU::V_ASHRREV_I16_gfx10:
4030 
4031   case AMDGPU::V_LSHLREV_B64_e64:
4032   case AMDGPU::V_LSHLREV_B64_gfx10:
4033   case AMDGPU::V_LSHLREV_B64_vi:
4034 
4035   case AMDGPU::V_LSHRREV_B64_e64:
4036   case AMDGPU::V_LSHRREV_B64_gfx10:
4037   case AMDGPU::V_LSHRREV_B64_vi:
4038 
4039   case AMDGPU::V_ASHRREV_I64_e64:
4040   case AMDGPU::V_ASHRREV_I64_gfx10:
4041   case AMDGPU::V_ASHRREV_I64_vi:
4042 
4043   case AMDGPU::V_PK_LSHLREV_B16:
4044   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4045   case AMDGPU::V_PK_LSHLREV_B16_vi:
4046 
4047   case AMDGPU::V_PK_LSHRREV_B16:
4048   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4049   case AMDGPU::V_PK_LSHRREV_B16_vi:
4050   case AMDGPU::V_PK_ASHRREV_I16:
4051   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4052   case AMDGPU::V_PK_ASHRREV_I16_vi:
4053     return true;
4054   default:
4055     return false;
4056   }
4057 }
4058 
4059 std::optional<StringRef>
4060 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4061 
4062   using namespace SIInstrFlags;
4063   const unsigned Opcode = Inst.getOpcode();
4064   const MCInstrDesc &Desc = MII.get(Opcode);
4065 
4066   // lds_direct register is defined so that it can be used
4067   // with 9-bit operands only. Ignore encodings which do not accept these.
4068   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4069   if ((Desc.TSFlags & Enc) == 0)
4070     return std::nullopt;
4071 
4072   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4073     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4074     if (SrcIdx == -1)
4075       break;
4076     const auto &Src = Inst.getOperand(SrcIdx);
4077     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4078 
4079       if (isGFX90A() || isGFX11Plus())
4080         return StringRef("lds_direct is not supported on this GPU");
4081 
4082       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4083         return StringRef("lds_direct cannot be used with this instruction");
4084 
4085       if (SrcName != OpName::src0)
4086         return StringRef("lds_direct may be used as src0 only");
4087     }
4088   }
4089 
4090   return std::nullopt;
4091 }
4092 
4093 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4094   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4095     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4096     if (Op.isFlatOffset())
4097       return Op.getStartLoc();
4098   }
4099   return getLoc();
4100 }
4101 
4102 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4103                                          const OperandVector &Operands) {
4104   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4105   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4106     return true;
4107 
4108   auto Opcode = Inst.getOpcode();
4109   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4110   assert(OpNum != -1);
4111 
4112   const auto &Op = Inst.getOperand(OpNum);
4113   if (!hasFlatOffsets() && Op.getImm() != 0) {
4114     Error(getFlatOffsetLoc(Operands),
4115           "flat offset modifier is not supported on this GPU");
4116     return false;
4117   }
4118 
4119   // For FLAT segment the offset must be positive;
4120   // MSB is ignored and forced to zero.
4121   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4122   bool AllowNegative =
4123       TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
4124   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4125     Error(getFlatOffsetLoc(Operands),
4126           Twine("expected a ") +
4127               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4128                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4129     return false;
4130   }
4131 
4132   return true;
4133 }
4134 
4135 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4136   // Start with second operand because SMEM Offset cannot be dst or src0.
4137   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4138     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4139     if (Op.isSMEMOffset())
4140       return Op.getStartLoc();
4141   }
4142   return getLoc();
4143 }
4144 
4145 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4146                                          const OperandVector &Operands) {
4147   if (isCI() || isSI())
4148     return true;
4149 
4150   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4151   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4152     return true;
4153 
4154   auto Opcode = Inst.getOpcode();
4155   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4156   if (OpNum == -1)
4157     return true;
4158 
4159   const auto &Op = Inst.getOperand(OpNum);
4160   if (!Op.isImm())
4161     return true;
4162 
4163   uint64_t Offset = Op.getImm();
4164   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4165   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4166       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4167     return true;
4168 
4169   Error(getSMEMOffsetLoc(Operands),
4170         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4171                                "expected a 21-bit signed offset");
4172 
4173   return false;
4174 }
4175 
4176 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4177   unsigned Opcode = Inst.getOpcode();
4178   const MCInstrDesc &Desc = MII.get(Opcode);
4179   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4180     return true;
4181 
4182   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4183   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4184 
4185   const int OpIndices[] = { Src0Idx, Src1Idx };
4186 
4187   unsigned NumExprs = 0;
4188   unsigned NumLiterals = 0;
4189   uint32_t LiteralValue;
4190 
4191   for (int OpIdx : OpIndices) {
4192     if (OpIdx == -1) break;
4193 
4194     const MCOperand &MO = Inst.getOperand(OpIdx);
4195     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4196     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4197       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4198         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4199         if (NumLiterals == 0 || LiteralValue != Value) {
4200           LiteralValue = Value;
4201           ++NumLiterals;
4202         }
4203       } else if (MO.isExpr()) {
4204         ++NumExprs;
4205       }
4206     }
4207   }
4208 
4209   return NumLiterals + NumExprs <= 1;
4210 }
4211 
4212 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4213   const unsigned Opc = Inst.getOpcode();
4214   if (isPermlane16(Opc)) {
4215     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4216     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4217 
4218     if (OpSel & ~3)
4219       return false;
4220   }
4221 
4222   uint64_t TSFlags = MII.get(Opc).TSFlags;
4223 
4224   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4225     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4226     if (OpSelIdx != -1) {
4227       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4228         return false;
4229     }
4230     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4231     if (OpSelHiIdx != -1) {
4232       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4233         return false;
4234     }
4235   }
4236 
4237   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4238   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4239       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4240     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4241     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4242     if (OpSel & 3)
4243       return false;
4244   }
4245 
4246   return true;
4247 }
4248 
4249 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4250                                   const OperandVector &Operands) {
4251   const unsigned Opc = Inst.getOpcode();
4252   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4253   if (DppCtrlIdx < 0)
4254     return true;
4255   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4256 
4257   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4258     // DPP64 is supported for row_newbcast only.
4259     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4260     if (Src0Idx >= 0 &&
4261         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4262       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4263       Error(S, "64 bit dpp only supports row_newbcast");
4264       return false;
4265     }
4266   }
4267 
4268   return true;
4269 }
4270 
4271 // Check if VCC register matches wavefront size
4272 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4273   auto FB = getFeatureBits();
4274   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4275     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4276 }
4277 
4278 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4279 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4280                                          const OperandVector &Operands) {
4281   unsigned Opcode = Inst.getOpcode();
4282   const MCInstrDesc &Desc = MII.get(Opcode);
4283   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4284   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4285       !HasMandatoryLiteral && !isVOPD(Opcode))
4286     return true;
4287 
4288   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4289 
4290   unsigned NumExprs = 0;
4291   unsigned NumLiterals = 0;
4292   uint32_t LiteralValue;
4293 
4294   for (int OpIdx : OpIndices) {
4295     if (OpIdx == -1)
4296       continue;
4297 
4298     const MCOperand &MO = Inst.getOperand(OpIdx);
4299     if (!MO.isImm() && !MO.isExpr())
4300       continue;
4301     if (!isSISrcOperand(Desc, OpIdx))
4302       continue;
4303 
4304     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4305       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4306       if (NumLiterals == 0 || LiteralValue != Value) {
4307         LiteralValue = Value;
4308         ++NumLiterals;
4309       }
4310     } else if (MO.isExpr()) {
4311       ++NumExprs;
4312     }
4313   }
4314   NumLiterals += NumExprs;
4315 
4316   if (!NumLiterals)
4317     return true;
4318 
4319   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4320     Error(getLitLoc(Operands), "literal operands are not supported");
4321     return false;
4322   }
4323 
4324   if (NumLiterals > 1) {
4325     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4326     return false;
4327   }
4328 
4329   return true;
4330 }
4331 
4332 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4333 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4334                          const MCRegisterInfo *MRI) {
4335   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4336   if (OpIdx < 0)
4337     return -1;
4338 
4339   const MCOperand &Op = Inst.getOperand(OpIdx);
4340   if (!Op.isReg())
4341     return -1;
4342 
4343   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4344   auto Reg = Sub ? Sub : Op.getReg();
4345   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4346   return AGPR32.contains(Reg) ? 1 : 0;
4347 }
4348 
4349 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4350   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4351   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4352                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4353                   SIInstrFlags::DS)) == 0)
4354     return true;
4355 
4356   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4357                                                       : AMDGPU::OpName::vdata;
4358 
4359   const MCRegisterInfo *MRI = getMRI();
4360   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4361   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4362 
4363   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4364     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4365     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4366       return false;
4367   }
4368 
4369   auto FB = getFeatureBits();
4370   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4371     if (DataAreg < 0 || DstAreg < 0)
4372       return true;
4373     return DstAreg == DataAreg;
4374   }
4375 
4376   return DstAreg < 1 && DataAreg < 1;
4377 }
4378 
4379 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4380   auto FB = getFeatureBits();
4381   if (!FB[AMDGPU::FeatureGFX90AInsts])
4382     return true;
4383 
4384   const MCRegisterInfo *MRI = getMRI();
4385   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4386   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4387   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4388     const MCOperand &Op = Inst.getOperand(I);
4389     if (!Op.isReg())
4390       continue;
4391 
4392     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4393     if (!Sub)
4394       continue;
4395 
4396     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4397       return false;
4398     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4399       return false;
4400   }
4401 
4402   return true;
4403 }
4404 
4405 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4406   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4407     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4408     if (Op.isBLGP())
4409       return Op.getStartLoc();
4410   }
4411   return SMLoc();
4412 }
4413 
4414 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4415                                    const OperandVector &Operands) {
4416   unsigned Opc = Inst.getOpcode();
4417   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4418   if (BlgpIdx == -1)
4419     return true;
4420   SMLoc BLGPLoc = getBLGPLoc(Operands);
4421   if (!BLGPLoc.isValid())
4422     return true;
4423   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4424   auto FB = getFeatureBits();
4425   bool UsesNeg = false;
4426   if (FB[AMDGPU::FeatureGFX940Insts]) {
4427     switch (Opc) {
4428     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4429     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4430     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4431     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4432       UsesNeg = true;
4433     }
4434   }
4435 
4436   if (IsNeg == UsesNeg)
4437     return true;
4438 
4439   Error(BLGPLoc,
4440         UsesNeg ? "invalid modifier: blgp is not supported"
4441                 : "invalid modifier: neg is not supported");
4442 
4443   return false;
4444 }
4445 
4446 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4447                                       const OperandVector &Operands) {
4448   if (!isGFX11Plus())
4449     return true;
4450 
4451   unsigned Opc = Inst.getOpcode();
4452   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4453       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4454       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4455       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4456     return true;
4457 
4458   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4459   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4460   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4461   if (Reg == AMDGPU::SGPR_NULL)
4462     return true;
4463 
4464   SMLoc RegLoc = getRegLoc(Reg, Operands);
4465   Error(RegLoc, "src0 must be null");
4466   return false;
4467 }
4468 
4469 // gfx90a has an undocumented limitation:
4470 // DS_GWS opcodes must use even aligned registers.
4471 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4472                                   const OperandVector &Operands) {
4473   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4474     return true;
4475 
4476   int Opc = Inst.getOpcode();
4477   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4478       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4479     return true;
4480 
4481   const MCRegisterInfo *MRI = getMRI();
4482   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4483   int Data0Pos =
4484       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4485   assert(Data0Pos != -1);
4486   auto Reg = Inst.getOperand(Data0Pos).getReg();
4487   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4488   if (RegIdx & 1) {
4489     SMLoc RegLoc = getRegLoc(Reg, Operands);
4490     Error(RegLoc, "vgpr must be even aligned");
4491     return false;
4492   }
4493 
4494   return true;
4495 }
4496 
4497 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4498                                             const OperandVector &Operands,
4499                                             const SMLoc &IDLoc) {
4500   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4501                                            AMDGPU::OpName::cpol);
4502   if (CPolPos == -1)
4503     return true;
4504 
4505   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4506 
4507   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4508   if (TSFlags & SIInstrFlags::SMRD) {
4509     if (CPol && (isSI() || isCI())) {
4510       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4511       Error(S, "cache policy is not supported for SMRD instructions");
4512       return false;
4513     }
4514     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4515       Error(IDLoc, "invalid cache policy for SMEM instruction");
4516       return false;
4517     }
4518   }
4519 
4520   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4521     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4522     StringRef CStr(S.getPointer());
4523     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4524     Error(S, "scc is not supported on this GPU");
4525     return false;
4526   }
4527 
4528   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4529     return true;
4530 
4531   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4532     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4533       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4534                               : "instruction must use glc");
4535       return false;
4536     }
4537   } else {
4538     if (CPol & CPol::GLC) {
4539       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4540       StringRef CStr(S.getPointer());
4541       S = SMLoc::getFromPointer(
4542           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4543       Error(S, isGFX940() ? "instruction must not use sc0"
4544                           : "instruction must not use glc");
4545       return false;
4546     }
4547   }
4548 
4549   return true;
4550 }
4551 
4552 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4553   if (!isGFX11Plus())
4554     return true;
4555   for (auto &Operand : Operands) {
4556     if (!Operand->isReg())
4557       continue;
4558     unsigned Reg = Operand->getReg();
4559     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4560       Error(getRegLoc(Reg, Operands),
4561             "execz and vccz are not supported on this GPU");
4562       return false;
4563     }
4564   }
4565   return true;
4566 }
4567 
4568 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4569                                   const OperandVector &Operands) {
4570   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4571   if (Desc.mayStore() &&
4572       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4573     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4574     if (Loc != getInstLoc(Operands)) {
4575       Error(Loc, "TFE modifier has no meaning for store instructions");
4576       return false;
4577     }
4578   }
4579 
4580   return true;
4581 }
4582 
4583 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4584                                           const SMLoc &IDLoc,
4585                                           const OperandVector &Operands) {
4586   if (auto ErrMsg = validateLdsDirect(Inst)) {
4587     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4588     return false;
4589   }
4590   if (!validateSOPLiteral(Inst)) {
4591     Error(getLitLoc(Operands),
4592       "only one unique literal operand is allowed");
4593     return false;
4594   }
4595   if (!validateVOPLiteral(Inst, Operands)) {
4596     return false;
4597   }
4598   if (!validateConstantBusLimitations(Inst, Operands)) {
4599     return false;
4600   }
4601   if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4602     return false;
4603   }
4604   if (!validateIntClampSupported(Inst)) {
4605     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4606       "integer clamping is not supported on this GPU");
4607     return false;
4608   }
4609   if (!validateOpSel(Inst)) {
4610     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4611       "invalid op_sel operand");
4612     return false;
4613   }
4614   if (!validateDPP(Inst, Operands)) {
4615     return false;
4616   }
4617   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4618   if (!validateMIMGD16(Inst)) {
4619     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4620       "d16 modifier is not supported on this GPU");
4621     return false;
4622   }
4623   if (!validateMIMGMSAA(Inst)) {
4624     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4625           "invalid dim; must be MSAA type");
4626     return false;
4627   }
4628   if (!validateMIMGDataSize(Inst, IDLoc)) {
4629     return false;
4630   }
4631   if (!validateMIMGAddrSize(Inst)) {
4632     Error(IDLoc,
4633       "image address size does not match dim and a16");
4634     return false;
4635   }
4636   if (!validateMIMGAtomicDMask(Inst)) {
4637     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4638       "invalid atomic image dmask");
4639     return false;
4640   }
4641   if (!validateMIMGGatherDMask(Inst)) {
4642     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4643       "invalid image_gather dmask: only one bit must be set");
4644     return false;
4645   }
4646   if (!validateMovrels(Inst, Operands)) {
4647     return false;
4648   }
4649   if (!validateFlatOffset(Inst, Operands)) {
4650     return false;
4651   }
4652   if (!validateSMEMOffset(Inst, Operands)) {
4653     return false;
4654   }
4655   if (!validateMAIAccWrite(Inst, Operands)) {
4656     return false;
4657   }
4658   if (!validateMAISrc2(Inst, Operands)) {
4659     return false;
4660   }
4661   if (!validateMFMA(Inst, Operands)) {
4662     return false;
4663   }
4664   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4665     return false;
4666   }
4667 
4668   if (!validateAGPRLdSt(Inst)) {
4669     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4670     ? "invalid register class: data and dst should be all VGPR or AGPR"
4671     : "invalid register class: agpr loads and stores not supported on this GPU"
4672     );
4673     return false;
4674   }
4675   if (!validateVGPRAlign(Inst)) {
4676     Error(IDLoc,
4677       "invalid register class: vgpr tuples must be 64 bit aligned");
4678     return false;
4679   }
4680   if (!validateGWS(Inst, Operands)) {
4681     return false;
4682   }
4683 
4684   if (!validateBLGP(Inst, Operands)) {
4685     return false;
4686   }
4687 
4688   if (!validateDivScale(Inst)) {
4689     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4690     return false;
4691   }
4692   if (!validateWaitCnt(Inst, Operands)) {
4693     return false;
4694   }
4695   if (!validateExeczVcczOperands(Operands)) {
4696     return false;
4697   }
4698   if (!validateTFE(Inst, Operands)) {
4699     return false;
4700   }
4701 
4702   return true;
4703 }
4704 
4705 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4706                                             const FeatureBitset &FBS,
4707                                             unsigned VariantID = 0);
4708 
4709 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4710                                 const FeatureBitset &AvailableFeatures,
4711                                 unsigned VariantID);
4712 
4713 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4714                                        const FeatureBitset &FBS) {
4715   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4716 }
4717 
4718 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4719                                        const FeatureBitset &FBS,
4720                                        ArrayRef<unsigned> Variants) {
4721   for (auto Variant : Variants) {
4722     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4723       return true;
4724   }
4725 
4726   return false;
4727 }
4728 
4729 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4730                                                   const SMLoc &IDLoc) {
4731   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4732 
4733   // Check if requested instruction variant is supported.
4734   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4735     return false;
4736 
4737   // This instruction is not supported.
4738   // Clear any other pending errors because they are no longer relevant.
4739   getParser().clearPendingErrors();
4740 
4741   // Requested instruction variant is not supported.
4742   // Check if any other variants are supported.
4743   StringRef VariantName = getMatchedVariantName();
4744   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4745     return Error(IDLoc,
4746                  Twine(VariantName,
4747                        " variant of this instruction is not supported"));
4748   }
4749 
4750   // Check if this instruction may be used with a different wavesize.
4751   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4752       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4753 
4754     FeatureBitset FeaturesWS32 = getFeatureBits();
4755     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4756         .flip(AMDGPU::FeatureWavefrontSize32);
4757     FeatureBitset AvailableFeaturesWS32 =
4758         ComputeAvailableFeatures(FeaturesWS32);
4759 
4760     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4761       return Error(IDLoc, "instruction requires wavesize=32");
4762   }
4763 
4764   // Finally check if this instruction is supported on any other GPU.
4765   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4766     return Error(IDLoc, "instruction not supported on this GPU");
4767   }
4768 
4769   // Instruction not supported on any GPU. Probably a typo.
4770   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4771   return Error(IDLoc, "invalid instruction" + Suggestion);
4772 }
4773 
4774 static bool isInvalidVOPDY(const OperandVector &Operands,
4775                            uint64_t InvalidOprIdx) {
4776   assert(InvalidOprIdx < Operands.size());
4777   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4778   if (Op.isToken() && InvalidOprIdx > 1) {
4779     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4780     return PrevOp.isToken() && PrevOp.getToken() == "::";
4781   }
4782   return false;
4783 }
4784 
4785 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4786                                               OperandVector &Operands,
4787                                               MCStreamer &Out,
4788                                               uint64_t &ErrorInfo,
4789                                               bool MatchingInlineAsm) {
4790   MCInst Inst;
4791   unsigned Result = Match_Success;
4792   for (auto Variant : getMatchedVariants()) {
4793     uint64_t EI;
4794     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4795                                   Variant);
4796     // We order match statuses from least to most specific. We use most specific
4797     // status as resulting
4798     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4799     if ((R == Match_Success) ||
4800         (R == Match_PreferE32) ||
4801         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4802         (R == Match_InvalidOperand && Result != Match_MissingFeature
4803                                    && Result != Match_PreferE32) ||
4804         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4805                                    && Result != Match_MissingFeature
4806                                    && Result != Match_PreferE32)) {
4807       Result = R;
4808       ErrorInfo = EI;
4809     }
4810     if (R == Match_Success)
4811       break;
4812   }
4813 
4814   if (Result == Match_Success) {
4815     if (!validateInstruction(Inst, IDLoc, Operands)) {
4816       return true;
4817     }
4818     Inst.setLoc(IDLoc);
4819     Out.emitInstruction(Inst, getSTI());
4820     return false;
4821   }
4822 
4823   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4824   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4825     return true;
4826   }
4827 
4828   switch (Result) {
4829   default: break;
4830   case Match_MissingFeature:
4831     // It has been verified that the specified instruction
4832     // mnemonic is valid. A match was found but it requires
4833     // features which are not supported on this GPU.
4834     return Error(IDLoc, "operands are not valid for this GPU or mode");
4835 
4836   case Match_InvalidOperand: {
4837     SMLoc ErrorLoc = IDLoc;
4838     if (ErrorInfo != ~0ULL) {
4839       if (ErrorInfo >= Operands.size()) {
4840         return Error(IDLoc, "too few operands for instruction");
4841       }
4842       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4843       if (ErrorLoc == SMLoc())
4844         ErrorLoc = IDLoc;
4845 
4846       if (isInvalidVOPDY(Operands, ErrorInfo))
4847         return Error(ErrorLoc, "invalid VOPDY instruction");
4848     }
4849     return Error(ErrorLoc, "invalid operand for instruction");
4850   }
4851 
4852   case Match_PreferE32:
4853     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4854                         "should be encoded as e32");
4855   case Match_MnemonicFail:
4856     llvm_unreachable("Invalid instructions should have been handled already");
4857   }
4858   llvm_unreachable("Implement any new match types added!");
4859 }
4860 
4861 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4862   int64_t Tmp = -1;
4863   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4864     return true;
4865   }
4866   if (getParser().parseAbsoluteExpression(Tmp)) {
4867     return true;
4868   }
4869   Ret = static_cast<uint32_t>(Tmp);
4870   return false;
4871 }
4872 
4873 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4874                                                uint32_t &Minor) {
4875   if (ParseAsAbsoluteExpression(Major))
4876     return TokError("invalid major version");
4877 
4878   if (!trySkipToken(AsmToken::Comma))
4879     return TokError("minor version number required, comma expected");
4880 
4881   if (ParseAsAbsoluteExpression(Minor))
4882     return TokError("invalid minor version");
4883 
4884   return false;
4885 }
4886 
4887 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4888   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4889     return TokError("directive only supported for amdgcn architecture");
4890 
4891   std::string TargetIDDirective;
4892   SMLoc TargetStart = getTok().getLoc();
4893   if (getParser().parseEscapedString(TargetIDDirective))
4894     return true;
4895 
4896   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4897   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4898     return getParser().Error(TargetRange.Start,
4899         (Twine(".amdgcn_target directive's target id ") +
4900          Twine(TargetIDDirective) +
4901          Twine(" does not match the specified target id ") +
4902          Twine(getTargetStreamer().getTargetID()->toString())).str());
4903 
4904   return false;
4905 }
4906 
4907 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4908   return Error(Range.Start, "value out of range", Range);
4909 }
4910 
4911 bool AMDGPUAsmParser::calculateGPRBlocks(
4912     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4913     bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4914     unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
4915     SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4916   // TODO(scott.linder): These calculations are duplicated from
4917   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4918   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4919 
4920   unsigned NumVGPRs = NextFreeVGPR;
4921   unsigned NumSGPRs = NextFreeSGPR;
4922 
4923   if (Version.Major >= 10)
4924     NumSGPRs = 0;
4925   else {
4926     unsigned MaxAddressableNumSGPRs =
4927         IsaInfo::getAddressableNumSGPRs(&getSTI());
4928 
4929     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4930         NumSGPRs > MaxAddressableNumSGPRs)
4931       return OutOfRangeError(SGPRRange);
4932 
4933     NumSGPRs +=
4934         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4935 
4936     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4937         NumSGPRs > MaxAddressableNumSGPRs)
4938       return OutOfRangeError(SGPRRange);
4939 
4940     if (Features.test(FeatureSGPRInitBug))
4941       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4942   }
4943 
4944   VGPRBlocks =
4945       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4946   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4947 
4948   return false;
4949 }
4950 
4951 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4952   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4953     return TokError("directive only supported for amdgcn architecture");
4954 
4955   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4956     return TokError("directive only supported for amdhsa OS");
4957 
4958   StringRef KernelName;
4959   if (getParser().parseIdentifier(KernelName))
4960     return true;
4961 
4962   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4963 
4964   StringSet<> Seen;
4965 
4966   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4967 
4968   SMRange VGPRRange;
4969   uint64_t NextFreeVGPR = 0;
4970   uint64_t AccumOffset = 0;
4971   uint64_t SharedVGPRCount = 0;
4972   SMRange SGPRRange;
4973   uint64_t NextFreeSGPR = 0;
4974 
4975   // Count the number of user SGPRs implied from the enabled feature bits.
4976   unsigned ImpliedUserSGPRCount = 0;
4977 
4978   // Track if the asm explicitly contains the directive for the user SGPR
4979   // count.
4980   std::optional<unsigned> ExplicitUserSGPRCount;
4981   bool ReserveVCC = true;
4982   bool ReserveFlatScr = true;
4983   std::optional<bool> EnableWavefrontSize32;
4984 
4985   while (true) {
4986     while (trySkipToken(AsmToken::EndOfStatement));
4987 
4988     StringRef ID;
4989     SMRange IDRange = getTok().getLocRange();
4990     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4991       return true;
4992 
4993     if (ID == ".end_amdhsa_kernel")
4994       break;
4995 
4996     if (!Seen.insert(ID).second)
4997       return TokError(".amdhsa_ directives cannot be repeated");
4998 
4999     SMLoc ValStart = getLoc();
5000     int64_t IVal;
5001     if (getParser().parseAbsoluteExpression(IVal))
5002       return true;
5003     SMLoc ValEnd = getLoc();
5004     SMRange ValRange = SMRange(ValStart, ValEnd);
5005 
5006     if (IVal < 0)
5007       return OutOfRangeError(ValRange);
5008 
5009     uint64_t Val = IVal;
5010 
5011 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
5012   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
5013     return OutOfRangeError(RANGE);                                             \
5014   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5015 
5016     if (ID == ".amdhsa_group_segment_fixed_size") {
5017       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5018         return OutOfRangeError(ValRange);
5019       KD.group_segment_fixed_size = Val;
5020     } else if (ID == ".amdhsa_private_segment_fixed_size") {
5021       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5022         return OutOfRangeError(ValRange);
5023       KD.private_segment_fixed_size = Val;
5024     } else if (ID == ".amdhsa_kernarg_size") {
5025       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5026         return OutOfRangeError(ValRange);
5027       KD.kernarg_size = Val;
5028     } else if (ID == ".amdhsa_user_sgpr_count") {
5029       ExplicitUserSGPRCount = Val;
5030     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5031       if (hasArchitectedFlatScratch())
5032         return Error(IDRange.Start,
5033                      "directive is not supported with architected flat scratch",
5034                      IDRange);
5035       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5036                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5037                        Val, ValRange);
5038       if (Val)
5039         ImpliedUserSGPRCount += 4;
5040     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5041       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5042                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5043                        ValRange);
5044       if (Val)
5045         ImpliedUserSGPRCount += 2;
5046     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5047       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5048                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5049                        ValRange);
5050       if (Val)
5051         ImpliedUserSGPRCount += 2;
5052     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5053       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5054                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5055                        Val, ValRange);
5056       if (Val)
5057         ImpliedUserSGPRCount += 2;
5058     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5059       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5060                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5061                        ValRange);
5062       if (Val)
5063         ImpliedUserSGPRCount += 2;
5064     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5065       if (hasArchitectedFlatScratch())
5066         return Error(IDRange.Start,
5067                      "directive is not supported with architected flat scratch",
5068                      IDRange);
5069       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5070                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5071                        ValRange);
5072       if (Val)
5073         ImpliedUserSGPRCount += 2;
5074     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5075       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5076                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5077                        Val, ValRange);
5078       if (Val)
5079         ImpliedUserSGPRCount += 1;
5080     } else if (ID == ".amdhsa_wavefront_size32") {
5081       if (IVersion.Major < 10)
5082         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5083       EnableWavefrontSize32 = Val;
5084       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5085                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5086                        Val, ValRange);
5087     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5088       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5089                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5090     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5091       if (hasArchitectedFlatScratch())
5092         return Error(IDRange.Start,
5093                      "directive is not supported with architected flat scratch",
5094                      IDRange);
5095       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5096                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5097     } else if (ID == ".amdhsa_enable_private_segment") {
5098       if (!hasArchitectedFlatScratch())
5099         return Error(
5100             IDRange.Start,
5101             "directive is not supported without architected flat scratch",
5102             IDRange);
5103       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5104                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5105     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5106       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5107                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5108                        ValRange);
5109     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5110       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5111                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5112                        ValRange);
5113     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5114       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5115                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5116                        ValRange);
5117     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5118       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5119                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5120                        ValRange);
5121     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5122       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5123                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5124                        ValRange);
5125     } else if (ID == ".amdhsa_next_free_vgpr") {
5126       VGPRRange = ValRange;
5127       NextFreeVGPR = Val;
5128     } else if (ID == ".amdhsa_next_free_sgpr") {
5129       SGPRRange = ValRange;
5130       NextFreeSGPR = Val;
5131     } else if (ID == ".amdhsa_accum_offset") {
5132       if (!isGFX90A())
5133         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5134       AccumOffset = Val;
5135     } else if (ID == ".amdhsa_reserve_vcc") {
5136       if (!isUInt<1>(Val))
5137         return OutOfRangeError(ValRange);
5138       ReserveVCC = Val;
5139     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5140       if (IVersion.Major < 7)
5141         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5142       if (hasArchitectedFlatScratch())
5143         return Error(IDRange.Start,
5144                      "directive is not supported with architected flat scratch",
5145                      IDRange);
5146       if (!isUInt<1>(Val))
5147         return OutOfRangeError(ValRange);
5148       ReserveFlatScr = Val;
5149     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5150       if (IVersion.Major < 8)
5151         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5152       if (!isUInt<1>(Val))
5153         return OutOfRangeError(ValRange);
5154       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5155         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5156                                  IDRange);
5157     } else if (ID == ".amdhsa_float_round_mode_32") {
5158       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5159                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5160     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5161       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5162                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5163     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5164       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5165                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5166     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5167       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5168                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5169                        ValRange);
5170     } else if (ID == ".amdhsa_dx10_clamp") {
5171       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5172                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5173     } else if (ID == ".amdhsa_ieee_mode") {
5174       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5175                        Val, ValRange);
5176     } else if (ID == ".amdhsa_fp16_overflow") {
5177       if (IVersion.Major < 9)
5178         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5179       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5180                        ValRange);
5181     } else if (ID == ".amdhsa_tg_split") {
5182       if (!isGFX90A())
5183         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5184       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5185                        ValRange);
5186     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5187       if (IVersion.Major < 10)
5188         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5189       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5190                        ValRange);
5191     } else if (ID == ".amdhsa_memory_ordered") {
5192       if (IVersion.Major < 10)
5193         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5194       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5195                        ValRange);
5196     } else if (ID == ".amdhsa_forward_progress") {
5197       if (IVersion.Major < 10)
5198         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5199       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5200                        ValRange);
5201     } else if (ID == ".amdhsa_shared_vgpr_count") {
5202       if (IVersion.Major < 10)
5203         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5204       SharedVGPRCount = Val;
5205       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5206                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5207                        ValRange);
5208     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5209       PARSE_BITS_ENTRY(
5210           KD.compute_pgm_rsrc2,
5211           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5212           ValRange);
5213     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5214       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5215                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5216                        Val, ValRange);
5217     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5218       PARSE_BITS_ENTRY(
5219           KD.compute_pgm_rsrc2,
5220           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5221           ValRange);
5222     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5223       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5224                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5225                        Val, ValRange);
5226     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5227       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5228                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5229                        Val, ValRange);
5230     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5231       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5232                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5233                        Val, ValRange);
5234     } else if (ID == ".amdhsa_exception_int_div_zero") {
5235       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5236                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5237                        Val, ValRange);
5238     } else {
5239       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5240     }
5241 
5242 #undef PARSE_BITS_ENTRY
5243   }
5244 
5245   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5246     return TokError(".amdhsa_next_free_vgpr directive is required");
5247 
5248   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5249     return TokError(".amdhsa_next_free_sgpr directive is required");
5250 
5251   unsigned VGPRBlocks;
5252   unsigned SGPRBlocks;
5253   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5254                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5255                          EnableWavefrontSize32, NextFreeVGPR,
5256                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5257                          SGPRBlocks))
5258     return true;
5259 
5260   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5261           VGPRBlocks))
5262     return OutOfRangeError(VGPRRange);
5263   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5264                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5265 
5266   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5267           SGPRBlocks))
5268     return OutOfRangeError(SGPRRange);
5269   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5270                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5271                   SGPRBlocks);
5272 
5273   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5274     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5275                     "enabled user SGPRs");
5276 
5277   unsigned UserSGPRCount =
5278       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5279 
5280   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5281     return TokError("too many user SGPRs enabled");
5282   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5283                   UserSGPRCount);
5284 
5285   if (isGFX90A()) {
5286     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5287       return TokError(".amdhsa_accum_offset directive is required");
5288     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5289       return TokError("accum_offset should be in range [4..256] in "
5290                       "increments of 4");
5291     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5292       return TokError("accum_offset exceeds total VGPR allocation");
5293     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5294                     (AccumOffset / 4 - 1));
5295   }
5296 
5297   if (IVersion.Major == 10) {
5298     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5299     if (SharedVGPRCount && EnableWavefrontSize32) {
5300       return TokError("shared_vgpr_count directive not valid on "
5301                       "wavefront size 32");
5302     }
5303     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5304       return TokError("shared_vgpr_count*2 + "
5305                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5306                       "exceed 63\n");
5307     }
5308   }
5309 
5310   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5311       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5312       ReserveFlatScr);
5313   return false;
5314 }
5315 
5316 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5317   uint32_t Major;
5318   uint32_t Minor;
5319 
5320   if (ParseDirectiveMajorMinor(Major, Minor))
5321     return true;
5322 
5323   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5324   return false;
5325 }
5326 
5327 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5328   uint32_t Major;
5329   uint32_t Minor;
5330   uint32_t Stepping;
5331   StringRef VendorName;
5332   StringRef ArchName;
5333 
5334   // If this directive has no arguments, then use the ISA version for the
5335   // targeted GPU.
5336   if (isToken(AsmToken::EndOfStatement)) {
5337     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5338     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5339                                                         ISA.Stepping,
5340                                                         "AMD", "AMDGPU");
5341     return false;
5342   }
5343 
5344   if (ParseDirectiveMajorMinor(Major, Minor))
5345     return true;
5346 
5347   if (!trySkipToken(AsmToken::Comma))
5348     return TokError("stepping version number required, comma expected");
5349 
5350   if (ParseAsAbsoluteExpression(Stepping))
5351     return TokError("invalid stepping version");
5352 
5353   if (!trySkipToken(AsmToken::Comma))
5354     return TokError("vendor name required, comma expected");
5355 
5356   if (!parseString(VendorName, "invalid vendor name"))
5357     return true;
5358 
5359   if (!trySkipToken(AsmToken::Comma))
5360     return TokError("arch name required, comma expected");
5361 
5362   if (!parseString(ArchName, "invalid arch name"))
5363     return true;
5364 
5365   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5366                                                       VendorName, ArchName);
5367   return false;
5368 }
5369 
5370 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5371                                                amd_kernel_code_t &Header) {
5372   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5373   // assembly for backwards compatibility.
5374   if (ID == "max_scratch_backing_memory_byte_size") {
5375     Parser.eatToEndOfStatement();
5376     return false;
5377   }
5378 
5379   SmallString<40> ErrStr;
5380   raw_svector_ostream Err(ErrStr);
5381   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5382     return TokError(Err.str());
5383   }
5384   Lex();
5385 
5386   if (ID == "enable_wavefront_size32") {
5387     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5388       if (!isGFX10Plus())
5389         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5390       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5391         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5392     } else {
5393       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5394         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5395     }
5396   }
5397 
5398   if (ID == "wavefront_size") {
5399     if (Header.wavefront_size == 5) {
5400       if (!isGFX10Plus())
5401         return TokError("wavefront_size=5 is only allowed on GFX10+");
5402       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5403         return TokError("wavefront_size=5 requires +WavefrontSize32");
5404     } else if (Header.wavefront_size == 6) {
5405       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5406         return TokError("wavefront_size=6 requires +WavefrontSize64");
5407     }
5408   }
5409 
5410   if (ID == "enable_wgp_mode") {
5411     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5412         !isGFX10Plus())
5413       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5414   }
5415 
5416   if (ID == "enable_mem_ordered") {
5417     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5418         !isGFX10Plus())
5419       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5420   }
5421 
5422   if (ID == "enable_fwd_progress") {
5423     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5424         !isGFX10Plus())
5425       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5426   }
5427 
5428   return false;
5429 }
5430 
5431 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5432   amd_kernel_code_t Header;
5433   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5434 
5435   while (true) {
5436     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5437     // will set the current token to EndOfStatement.
5438     while(trySkipToken(AsmToken::EndOfStatement));
5439 
5440     StringRef ID;
5441     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5442       return true;
5443 
5444     if (ID == ".end_amd_kernel_code_t")
5445       break;
5446 
5447     if (ParseAMDKernelCodeTValue(ID, Header))
5448       return true;
5449   }
5450 
5451   getTargetStreamer().EmitAMDKernelCodeT(Header);
5452 
5453   return false;
5454 }
5455 
5456 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5457   StringRef KernelName;
5458   if (!parseId(KernelName, "expected symbol name"))
5459     return true;
5460 
5461   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5462                                            ELF::STT_AMDGPU_HSA_KERNEL);
5463 
5464   KernelScope.initialize(getContext());
5465   return false;
5466 }
5467 
5468 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5469   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5470     return Error(getLoc(),
5471                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5472                  "architectures");
5473   }
5474 
5475   auto TargetIDDirective = getLexer().getTok().getStringContents();
5476   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5477     return Error(getParser().getTok().getLoc(), "target id must match options");
5478 
5479   getTargetStreamer().EmitISAVersion();
5480   Lex();
5481 
5482   return false;
5483 }
5484 
5485 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5486   const char *AssemblerDirectiveBegin;
5487   const char *AssemblerDirectiveEnd;
5488   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5489       isHsaAbiVersion3AndAbove(&getSTI())
5490           ? std::tuple(HSAMD::V3::AssemblerDirectiveBegin,
5491                        HSAMD::V3::AssemblerDirectiveEnd)
5492           : std::tuple(HSAMD::AssemblerDirectiveBegin,
5493                        HSAMD::AssemblerDirectiveEnd);
5494 
5495   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5496     return Error(getLoc(),
5497                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5498                  "not available on non-amdhsa OSes")).str());
5499   }
5500 
5501   std::string HSAMetadataString;
5502   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5503                           HSAMetadataString))
5504     return true;
5505 
5506   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5507     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5508       return Error(getLoc(), "invalid HSA metadata");
5509   } else {
5510     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5511       return Error(getLoc(), "invalid HSA metadata");
5512   }
5513 
5514   return false;
5515 }
5516 
5517 /// Common code to parse out a block of text (typically YAML) between start and
5518 /// end directives.
5519 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5520                                           const char *AssemblerDirectiveEnd,
5521                                           std::string &CollectString) {
5522 
5523   raw_string_ostream CollectStream(CollectString);
5524 
5525   getLexer().setSkipSpace(false);
5526 
5527   bool FoundEnd = false;
5528   while (!isToken(AsmToken::Eof)) {
5529     while (isToken(AsmToken::Space)) {
5530       CollectStream << getTokenStr();
5531       Lex();
5532     }
5533 
5534     if (trySkipId(AssemblerDirectiveEnd)) {
5535       FoundEnd = true;
5536       break;
5537     }
5538 
5539     CollectStream << Parser.parseStringToEndOfStatement()
5540                   << getContext().getAsmInfo()->getSeparatorString();
5541 
5542     Parser.eatToEndOfStatement();
5543   }
5544 
5545   getLexer().setSkipSpace(true);
5546 
5547   if (isToken(AsmToken::Eof) && !FoundEnd) {
5548     return TokError(Twine("expected directive ") +
5549                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5550   }
5551 
5552   CollectStream.flush();
5553   return false;
5554 }
5555 
5556 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5557 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5558   std::string String;
5559   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5560                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5561     return true;
5562 
5563   auto PALMetadata = getTargetStreamer().getPALMetadata();
5564   if (!PALMetadata->setFromString(String))
5565     return Error(getLoc(), "invalid PAL metadata");
5566   return false;
5567 }
5568 
5569 /// Parse the assembler directive for old linear-format PAL metadata.
5570 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5571   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5572     return Error(getLoc(),
5573                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5574                  "not available on non-amdpal OSes")).str());
5575   }
5576 
5577   auto PALMetadata = getTargetStreamer().getPALMetadata();
5578   PALMetadata->setLegacy();
5579   for (;;) {
5580     uint32_t Key, Value;
5581     if (ParseAsAbsoluteExpression(Key)) {
5582       return TokError(Twine("invalid value in ") +
5583                       Twine(PALMD::AssemblerDirective));
5584     }
5585     if (!trySkipToken(AsmToken::Comma)) {
5586       return TokError(Twine("expected an even number of values in ") +
5587                       Twine(PALMD::AssemblerDirective));
5588     }
5589     if (ParseAsAbsoluteExpression(Value)) {
5590       return TokError(Twine("invalid value in ") +
5591                       Twine(PALMD::AssemblerDirective));
5592     }
5593     PALMetadata->setRegister(Key, Value);
5594     if (!trySkipToken(AsmToken::Comma))
5595       break;
5596   }
5597   return false;
5598 }
5599 
5600 /// ParseDirectiveAMDGPULDS
5601 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5602 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5603   if (getParser().checkForValidSection())
5604     return true;
5605 
5606   StringRef Name;
5607   SMLoc NameLoc = getLoc();
5608   if (getParser().parseIdentifier(Name))
5609     return TokError("expected identifier in directive");
5610 
5611   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5612   if (parseToken(AsmToken::Comma, "expected ','"))
5613     return true;
5614 
5615   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5616 
5617   int64_t Size;
5618   SMLoc SizeLoc = getLoc();
5619   if (getParser().parseAbsoluteExpression(Size))
5620     return true;
5621   if (Size < 0)
5622     return Error(SizeLoc, "size must be non-negative");
5623   if (Size > LocalMemorySize)
5624     return Error(SizeLoc, "size is too large");
5625 
5626   int64_t Alignment = 4;
5627   if (trySkipToken(AsmToken::Comma)) {
5628     SMLoc AlignLoc = getLoc();
5629     if (getParser().parseAbsoluteExpression(Alignment))
5630       return true;
5631     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5632       return Error(AlignLoc, "alignment must be a power of two");
5633 
5634     // Alignment larger than the size of LDS is possible in theory, as long
5635     // as the linker manages to place to symbol at address 0, but we do want
5636     // to make sure the alignment fits nicely into a 32-bit integer.
5637     if (Alignment >= 1u << 31)
5638       return Error(AlignLoc, "alignment is too large");
5639   }
5640 
5641   if (parseEOL())
5642     return true;
5643 
5644   Symbol->redefineIfPossible();
5645   if (!Symbol->isUndefined())
5646     return Error(NameLoc, "invalid symbol redefinition");
5647 
5648   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5649   return false;
5650 }
5651 
5652 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5653   StringRef IDVal = DirectiveID.getString();
5654 
5655   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5656     if (IDVal == ".amdhsa_kernel")
5657      return ParseDirectiveAMDHSAKernel();
5658 
5659     // TODO: Restructure/combine with PAL metadata directive.
5660     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5661       return ParseDirectiveHSAMetadata();
5662   } else {
5663     if (IDVal == ".hsa_code_object_version")
5664       return ParseDirectiveHSACodeObjectVersion();
5665 
5666     if (IDVal == ".hsa_code_object_isa")
5667       return ParseDirectiveHSACodeObjectISA();
5668 
5669     if (IDVal == ".amd_kernel_code_t")
5670       return ParseDirectiveAMDKernelCodeT();
5671 
5672     if (IDVal == ".amdgpu_hsa_kernel")
5673       return ParseDirectiveAMDGPUHsaKernel();
5674 
5675     if (IDVal == ".amd_amdgpu_isa")
5676       return ParseDirectiveISAVersion();
5677 
5678     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5679       return ParseDirectiveHSAMetadata();
5680   }
5681 
5682   if (IDVal == ".amdgcn_target")
5683     return ParseDirectiveAMDGCNTarget();
5684 
5685   if (IDVal == ".amdgpu_lds")
5686     return ParseDirectiveAMDGPULDS();
5687 
5688   if (IDVal == PALMD::AssemblerDirectiveBegin)
5689     return ParseDirectivePALMetadataBegin();
5690 
5691   if (IDVal == PALMD::AssemblerDirective)
5692     return ParseDirectivePALMetadata();
5693 
5694   return true;
5695 }
5696 
5697 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5698                                            unsigned RegNo) {
5699 
5700   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5701     return isGFX9Plus();
5702 
5703   // GFX10+ has 2 more SGPRs 104 and 105.
5704   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5705     return hasSGPR104_SGPR105();
5706 
5707   switch (RegNo) {
5708   case AMDGPU::SRC_SHARED_BASE_LO:
5709   case AMDGPU::SRC_SHARED_BASE:
5710   case AMDGPU::SRC_SHARED_LIMIT_LO:
5711   case AMDGPU::SRC_SHARED_LIMIT:
5712   case AMDGPU::SRC_PRIVATE_BASE_LO:
5713   case AMDGPU::SRC_PRIVATE_BASE:
5714   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5715   case AMDGPU::SRC_PRIVATE_LIMIT:
5716     return isGFX9Plus();
5717   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5718     return isGFX9Plus() && !isGFX11Plus();
5719   case AMDGPU::TBA:
5720   case AMDGPU::TBA_LO:
5721   case AMDGPU::TBA_HI:
5722   case AMDGPU::TMA:
5723   case AMDGPU::TMA_LO:
5724   case AMDGPU::TMA_HI:
5725     return !isGFX9Plus();
5726   case AMDGPU::XNACK_MASK:
5727   case AMDGPU::XNACK_MASK_LO:
5728   case AMDGPU::XNACK_MASK_HI:
5729     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5730   case AMDGPU::SGPR_NULL:
5731     return isGFX10Plus();
5732   default:
5733     break;
5734   }
5735 
5736   if (isCI())
5737     return true;
5738 
5739   if (isSI() || isGFX10Plus()) {
5740     // No flat_scr on SI.
5741     // On GFX10Plus flat scratch is not a valid register operand and can only be
5742     // accessed with s_setreg/s_getreg.
5743     switch (RegNo) {
5744     case AMDGPU::FLAT_SCR:
5745     case AMDGPU::FLAT_SCR_LO:
5746     case AMDGPU::FLAT_SCR_HI:
5747       return false;
5748     default:
5749       return true;
5750     }
5751   }
5752 
5753   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5754   // SI/CI have.
5755   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5756     return hasSGPR102_SGPR103();
5757 
5758   return true;
5759 }
5760 
5761 OperandMatchResultTy
5762 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5763                               OperandMode Mode) {
5764   OperandMatchResultTy ResTy = parseVOPD(Operands);
5765   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5766       isToken(AsmToken::EndOfStatement))
5767     return ResTy;
5768 
5769   // Try to parse with a custom parser
5770   ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5771 
5772   // If we successfully parsed the operand or if there as an error parsing,
5773   // we are done.
5774   //
5775   // If we are parsing after we reach EndOfStatement then this means we
5776   // are appending default values to the Operands list.  This is only done
5777   // by custom parser, so we shouldn't continue on to the generic parsing.
5778   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5779       isToken(AsmToken::EndOfStatement))
5780     return ResTy;
5781 
5782   SMLoc RBraceLoc;
5783   SMLoc LBraceLoc = getLoc();
5784   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5785     unsigned Prefix = Operands.size();
5786 
5787     for (;;) {
5788       auto Loc = getLoc();
5789       ResTy = parseReg(Operands);
5790       if (ResTy == MatchOperand_NoMatch)
5791         Error(Loc, "expected a register");
5792       if (ResTy != MatchOperand_Success)
5793         return MatchOperand_ParseFail;
5794 
5795       RBraceLoc = getLoc();
5796       if (trySkipToken(AsmToken::RBrac))
5797         break;
5798 
5799       if (!skipToken(AsmToken::Comma,
5800                      "expected a comma or a closing square bracket")) {
5801         return MatchOperand_ParseFail;
5802       }
5803     }
5804 
5805     if (Operands.size() - Prefix > 1) {
5806       Operands.insert(Operands.begin() + Prefix,
5807                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5808       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5809     }
5810 
5811     return MatchOperand_Success;
5812   }
5813 
5814   return parseRegOrImm(Operands);
5815 }
5816 
5817 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5818   // Clear any forced encodings from the previous instruction.
5819   setForcedEncodingSize(0);
5820   setForcedDPP(false);
5821   setForcedSDWA(false);
5822 
5823   if (Name.endswith("_e64_dpp")) {
5824     setForcedDPP(true);
5825     setForcedEncodingSize(64);
5826     return Name.substr(0, Name.size() - 8);
5827   } else if (Name.endswith("_e64")) {
5828     setForcedEncodingSize(64);
5829     return Name.substr(0, Name.size() - 4);
5830   } else if (Name.endswith("_e32")) {
5831     setForcedEncodingSize(32);
5832     return Name.substr(0, Name.size() - 4);
5833   } else if (Name.endswith("_dpp")) {
5834     setForcedDPP(true);
5835     return Name.substr(0, Name.size() - 4);
5836   } else if (Name.endswith("_sdwa")) {
5837     setForcedSDWA(true);
5838     return Name.substr(0, Name.size() - 5);
5839   }
5840   return Name;
5841 }
5842 
5843 static void applyMnemonicAliases(StringRef &Mnemonic,
5844                                  const FeatureBitset &Features,
5845                                  unsigned VariantID);
5846 
5847 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5848                                        StringRef Name,
5849                                        SMLoc NameLoc, OperandVector &Operands) {
5850   // Add the instruction mnemonic
5851   Name = parseMnemonicSuffix(Name);
5852 
5853   // If the target architecture uses MnemonicAlias, call it here to parse
5854   // operands correctly.
5855   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5856 
5857   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5858 
5859   bool IsMIMG = Name.startswith("image_");
5860 
5861   while (!trySkipToken(AsmToken::EndOfStatement)) {
5862     OperandMode Mode = OperandMode_Default;
5863     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5864       Mode = OperandMode_NSA;
5865     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5866 
5867     if (Res != MatchOperand_Success) {
5868       checkUnsupportedInstruction(Name, NameLoc);
5869       if (!Parser.hasPendingError()) {
5870         // FIXME: use real operand location rather than the current location.
5871         StringRef Msg =
5872           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5873                                             "not a valid operand.";
5874         Error(getLoc(), Msg);
5875       }
5876       while (!trySkipToken(AsmToken::EndOfStatement)) {
5877         lex();
5878       }
5879       return true;
5880     }
5881 
5882     // Eat the comma or space if there is one.
5883     trySkipToken(AsmToken::Comma);
5884   }
5885 
5886   return false;
5887 }
5888 
5889 //===----------------------------------------------------------------------===//
5890 // Utility functions
5891 //===----------------------------------------------------------------------===//
5892 
5893 OperandMatchResultTy AMDGPUAsmParser::parseTokenOp(StringRef Name,
5894                                                    OperandVector &Operands) {
5895   SMLoc S = getLoc();
5896   if (!trySkipId(Name))
5897     return MatchOperand_NoMatch;
5898 
5899   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
5900   return MatchOperand_Success;
5901 }
5902 
5903 OperandMatchResultTy
5904 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5905 
5906   if (!trySkipId(Prefix, AsmToken::Colon))
5907     return MatchOperand_NoMatch;
5908 
5909   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5910 }
5911 
5912 OperandMatchResultTy
5913 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5914                                     AMDGPUOperand::ImmTy ImmTy,
5915                                     bool (*ConvertResult)(int64_t&)) {
5916   SMLoc S = getLoc();
5917   int64_t Value = 0;
5918 
5919   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5920   if (Res != MatchOperand_Success)
5921     return Res;
5922 
5923   if (ConvertResult && !ConvertResult(Value)) {
5924     Error(S, "invalid " + StringRef(Prefix) + " value.");
5925   }
5926 
5927   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5928   return MatchOperand_Success;
5929 }
5930 
5931 OperandMatchResultTy
5932 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5933                                              OperandVector &Operands,
5934                                              AMDGPUOperand::ImmTy ImmTy,
5935                                              bool (*ConvertResult)(int64_t&)) {
5936   SMLoc S = getLoc();
5937   if (!trySkipId(Prefix, AsmToken::Colon))
5938     return MatchOperand_NoMatch;
5939 
5940   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5941     return MatchOperand_ParseFail;
5942 
5943   unsigned Val = 0;
5944   const unsigned MaxSize = 4;
5945 
5946   // FIXME: How to verify the number of elements matches the number of src
5947   // operands?
5948   for (int I = 0; ; ++I) {
5949     int64_t Op;
5950     SMLoc Loc = getLoc();
5951     if (!parseExpr(Op))
5952       return MatchOperand_ParseFail;
5953 
5954     if (Op != 0 && Op != 1) {
5955       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5956       return MatchOperand_ParseFail;
5957     }
5958 
5959     Val |= (Op << I);
5960 
5961     if (trySkipToken(AsmToken::RBrac))
5962       break;
5963 
5964     if (I + 1 == MaxSize) {
5965       Error(getLoc(), "expected a closing square bracket");
5966       return MatchOperand_ParseFail;
5967     }
5968 
5969     if (!skipToken(AsmToken::Comma, "expected a comma"))
5970       return MatchOperand_ParseFail;
5971   }
5972 
5973   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5974   return MatchOperand_Success;
5975 }
5976 
5977 OperandMatchResultTy
5978 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5979                                AMDGPUOperand::ImmTy ImmTy) {
5980   int64_t Bit;
5981   SMLoc S = getLoc();
5982 
5983   if (trySkipId(Name)) {
5984     Bit = 1;
5985   } else if (trySkipId("no", Name)) {
5986     Bit = 0;
5987   } else {
5988     return MatchOperand_NoMatch;
5989   }
5990 
5991   if (Name == "r128" && !hasMIMG_R128()) {
5992     Error(S, "r128 modifier is not supported on this GPU");
5993     return MatchOperand_ParseFail;
5994   }
5995   if (Name == "a16" && !hasA16()) {
5996     Error(S, "a16 modifier is not supported on this GPU");
5997     return MatchOperand_ParseFail;
5998   }
5999 
6000   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6001     ImmTy = AMDGPUOperand::ImmTyR128A16;
6002 
6003   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6004   return MatchOperand_Success;
6005 }
6006 
6007 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6008                                       bool &Disabling) const {
6009   Disabling = Id.startswith("no");
6010 
6011   if (isGFX940() && !Mnemo.startswith("s_")) {
6012     return StringSwitch<unsigned>(Id)
6013         .Case("nt", AMDGPU::CPol::NT)
6014         .Case("nont", AMDGPU::CPol::NT)
6015         .Case("sc0", AMDGPU::CPol::SC0)
6016         .Case("nosc0", AMDGPU::CPol::SC0)
6017         .Case("sc1", AMDGPU::CPol::SC1)
6018         .Case("nosc1", AMDGPU::CPol::SC1)
6019         .Default(0);
6020   }
6021 
6022   return StringSwitch<unsigned>(Id)
6023       .Case("dlc", AMDGPU::CPol::DLC)
6024       .Case("nodlc", AMDGPU::CPol::DLC)
6025       .Case("glc", AMDGPU::CPol::GLC)
6026       .Case("noglc", AMDGPU::CPol::GLC)
6027       .Case("scc", AMDGPU::CPol::SCC)
6028       .Case("noscc", AMDGPU::CPol::SCC)
6029       .Case("slc", AMDGPU::CPol::SLC)
6030       .Case("noslc", AMDGPU::CPol::SLC)
6031       .Default(0);
6032 }
6033 
6034 OperandMatchResultTy
6035 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6036   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6037   SMLoc OpLoc = getLoc();
6038   unsigned Enabled = 0, Seen = 0;
6039   for (;;) {
6040     SMLoc S = getLoc();
6041     bool Disabling;
6042     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6043     if (!CPol)
6044       break;
6045 
6046     lex();
6047 
6048     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) {
6049       Error(S, "dlc modifier is not supported on this GPU");
6050       return MatchOperand_ParseFail;
6051     }
6052 
6053     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) {
6054       Error(S, "scc modifier is not supported on this GPU");
6055       return MatchOperand_ParseFail;
6056     }
6057 
6058     if (Seen & CPol) {
6059       Error(S, "duplicate cache policy modifier");
6060       return MatchOperand_ParseFail;
6061     }
6062 
6063     if (!Disabling)
6064       Enabled |= CPol;
6065 
6066     Seen |= CPol;
6067   }
6068 
6069   if (!Seen)
6070     return MatchOperand_NoMatch;
6071 
6072   Operands.push_back(
6073       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6074   return MatchOperand_Success;
6075 }
6076 
6077 static void addOptionalImmOperand(
6078   MCInst& Inst, const OperandVector& Operands,
6079   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6080   AMDGPUOperand::ImmTy ImmT,
6081   int64_t Default = 0) {
6082   auto i = OptionalIdx.find(ImmT);
6083   if (i != OptionalIdx.end()) {
6084     unsigned Idx = i->second;
6085     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6086   } else {
6087     Inst.addOperand(MCOperand::createImm(Default));
6088   }
6089 }
6090 
6091 OperandMatchResultTy
6092 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6093                                        StringRef &Value,
6094                                        SMLoc &StringLoc) {
6095   if (!trySkipId(Prefix, AsmToken::Colon))
6096     return MatchOperand_NoMatch;
6097 
6098   StringLoc = getLoc();
6099   return parseId(Value, "expected an identifier") ? MatchOperand_Success
6100                                                   : MatchOperand_ParseFail;
6101 }
6102 
6103 //===----------------------------------------------------------------------===//
6104 // MTBUF format
6105 //===----------------------------------------------------------------------===//
6106 
6107 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6108                                   int64_t MaxVal,
6109                                   int64_t &Fmt) {
6110   int64_t Val;
6111   SMLoc Loc = getLoc();
6112 
6113   auto Res = parseIntWithPrefix(Pref, Val);
6114   if (Res == MatchOperand_ParseFail)
6115     return false;
6116   if (Res == MatchOperand_NoMatch)
6117     return true;
6118 
6119   if (Val < 0 || Val > MaxVal) {
6120     Error(Loc, Twine("out of range ", StringRef(Pref)));
6121     return false;
6122   }
6123 
6124   Fmt = Val;
6125   return true;
6126 }
6127 
6128 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6129 // values to live in a joint format operand in the MCInst encoding.
6130 OperandMatchResultTy
6131 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6132   using namespace llvm::AMDGPU::MTBUFFormat;
6133 
6134   int64_t Dfmt = DFMT_UNDEF;
6135   int64_t Nfmt = NFMT_UNDEF;
6136 
6137   // dfmt and nfmt can appear in either order, and each is optional.
6138   for (int I = 0; I < 2; ++I) {
6139     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6140       return MatchOperand_ParseFail;
6141 
6142     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6143       return MatchOperand_ParseFail;
6144     }
6145     // Skip optional comma between dfmt/nfmt
6146     // but guard against 2 commas following each other.
6147     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6148         !peekToken().is(AsmToken::Comma)) {
6149       trySkipToken(AsmToken::Comma);
6150     }
6151   }
6152 
6153   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6154     return MatchOperand_NoMatch;
6155 
6156   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6157   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6158 
6159   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6160   return MatchOperand_Success;
6161 }
6162 
6163 OperandMatchResultTy
6164 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6165   using namespace llvm::AMDGPU::MTBUFFormat;
6166 
6167   int64_t Fmt = UFMT_UNDEF;
6168 
6169   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6170     return MatchOperand_ParseFail;
6171 
6172   if (Fmt == UFMT_UNDEF)
6173     return MatchOperand_NoMatch;
6174 
6175   Format = Fmt;
6176   return MatchOperand_Success;
6177 }
6178 
6179 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6180                                     int64_t &Nfmt,
6181                                     StringRef FormatStr,
6182                                     SMLoc Loc) {
6183   using namespace llvm::AMDGPU::MTBUFFormat;
6184   int64_t Format;
6185 
6186   Format = getDfmt(FormatStr);
6187   if (Format != DFMT_UNDEF) {
6188     Dfmt = Format;
6189     return true;
6190   }
6191 
6192   Format = getNfmt(FormatStr, getSTI());
6193   if (Format != NFMT_UNDEF) {
6194     Nfmt = Format;
6195     return true;
6196   }
6197 
6198   Error(Loc, "unsupported format");
6199   return false;
6200 }
6201 
6202 OperandMatchResultTy
6203 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6204                                           SMLoc FormatLoc,
6205                                           int64_t &Format) {
6206   using namespace llvm::AMDGPU::MTBUFFormat;
6207 
6208   int64_t Dfmt = DFMT_UNDEF;
6209   int64_t Nfmt = NFMT_UNDEF;
6210   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6211     return MatchOperand_ParseFail;
6212 
6213   if (trySkipToken(AsmToken::Comma)) {
6214     StringRef Str;
6215     SMLoc Loc = getLoc();
6216     if (!parseId(Str, "expected a format string") ||
6217         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6218       return MatchOperand_ParseFail;
6219     }
6220     if (Dfmt == DFMT_UNDEF) {
6221       Error(Loc, "duplicate numeric format");
6222       return MatchOperand_ParseFail;
6223     } else if (Nfmt == NFMT_UNDEF) {
6224       Error(Loc, "duplicate data format");
6225       return MatchOperand_ParseFail;
6226     }
6227   }
6228 
6229   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6230   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6231 
6232   if (isGFX10Plus()) {
6233     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6234     if (Ufmt == UFMT_UNDEF) {
6235       Error(FormatLoc, "unsupported format");
6236       return MatchOperand_ParseFail;
6237     }
6238     Format = Ufmt;
6239   } else {
6240     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6241   }
6242 
6243   return MatchOperand_Success;
6244 }
6245 
6246 OperandMatchResultTy
6247 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6248                                             SMLoc Loc,
6249                                             int64_t &Format) {
6250   using namespace llvm::AMDGPU::MTBUFFormat;
6251 
6252   auto Id = getUnifiedFormat(FormatStr, getSTI());
6253   if (Id == UFMT_UNDEF)
6254     return MatchOperand_NoMatch;
6255 
6256   if (!isGFX10Plus()) {
6257     Error(Loc, "unified format is not supported on this GPU");
6258     return MatchOperand_ParseFail;
6259   }
6260 
6261   Format = Id;
6262   return MatchOperand_Success;
6263 }
6264 
6265 OperandMatchResultTy
6266 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6267   using namespace llvm::AMDGPU::MTBUFFormat;
6268   SMLoc Loc = getLoc();
6269 
6270   if (!parseExpr(Format))
6271     return MatchOperand_ParseFail;
6272   if (!isValidFormatEncoding(Format, getSTI())) {
6273     Error(Loc, "out of range format");
6274     return MatchOperand_ParseFail;
6275   }
6276 
6277   return MatchOperand_Success;
6278 }
6279 
6280 OperandMatchResultTy
6281 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6282   using namespace llvm::AMDGPU::MTBUFFormat;
6283 
6284   if (!trySkipId("format", AsmToken::Colon))
6285     return MatchOperand_NoMatch;
6286 
6287   if (trySkipToken(AsmToken::LBrac)) {
6288     StringRef FormatStr;
6289     SMLoc Loc = getLoc();
6290     if (!parseId(FormatStr, "expected a format string"))
6291       return MatchOperand_ParseFail;
6292 
6293     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6294     if (Res == MatchOperand_NoMatch)
6295       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6296     if (Res != MatchOperand_Success)
6297       return Res;
6298 
6299     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6300       return MatchOperand_ParseFail;
6301 
6302     return MatchOperand_Success;
6303   }
6304 
6305   return parseNumericFormat(Format);
6306 }
6307 
6308 OperandMatchResultTy
6309 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6310   using namespace llvm::AMDGPU::MTBUFFormat;
6311 
6312   int64_t Format = getDefaultFormatEncoding(getSTI());
6313   OperandMatchResultTy Res;
6314   SMLoc Loc = getLoc();
6315 
6316   // Parse legacy format syntax.
6317   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6318   if (Res == MatchOperand_ParseFail)
6319     return Res;
6320 
6321   bool FormatFound = (Res == MatchOperand_Success);
6322 
6323   Operands.push_back(
6324     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6325 
6326   if (FormatFound)
6327     trySkipToken(AsmToken::Comma);
6328 
6329   if (isToken(AsmToken::EndOfStatement)) {
6330     // We are expecting an soffset operand,
6331     // but let matcher handle the error.
6332     return MatchOperand_Success;
6333   }
6334 
6335   // Parse soffset.
6336   Res = parseRegOrImm(Operands);
6337   if (Res != MatchOperand_Success)
6338     return Res;
6339 
6340   trySkipToken(AsmToken::Comma);
6341 
6342   if (!FormatFound) {
6343     Res = parseSymbolicOrNumericFormat(Format);
6344     if (Res == MatchOperand_ParseFail)
6345       return Res;
6346     if (Res == MatchOperand_Success) {
6347       auto Size = Operands.size();
6348       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6349       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6350       Op.setImm(Format);
6351     }
6352     return MatchOperand_Success;
6353   }
6354 
6355   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6356     Error(getLoc(), "duplicate format");
6357     return MatchOperand_ParseFail;
6358   }
6359   return MatchOperand_Success;
6360 }
6361 
6362 OperandMatchResultTy AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6363   OperandMatchResultTy Res =
6364       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6365   if (Res == MatchOperand_NoMatch) {
6366     Res = parseIntWithPrefix("inst_offset", Operands,
6367                              AMDGPUOperand::ImmTyInstOffset);
6368   }
6369   return Res;
6370 }
6371 
6372 OperandMatchResultTy AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6373   OperandMatchResultTy Res =
6374       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6375   if (Res == MatchOperand_NoMatch)
6376     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6377   return Res;
6378 }
6379 
6380 //===----------------------------------------------------------------------===//
6381 // ds
6382 //===----------------------------------------------------------------------===//
6383 
6384 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6385                                     const OperandVector &Operands) {
6386   OptionalImmIndexMap OptionalIdx;
6387 
6388   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6389     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6390 
6391     // Add the register arguments
6392     if (Op.isReg()) {
6393       Op.addRegOperands(Inst, 1);
6394       continue;
6395     }
6396 
6397     // Handle optional arguments
6398     OptionalIdx[Op.getImmTy()] = i;
6399   }
6400 
6401   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6402   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6403   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6404 
6405   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6406 }
6407 
6408 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6409                                 bool IsGdsHardcoded) {
6410   OptionalImmIndexMap OptionalIdx;
6411   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6412   AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
6413 
6414   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6415     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6416 
6417     auto TiedTo =
6418         Desc.getOperandConstraint(Inst.getNumOperands(), MCOI::TIED_TO);
6419 
6420     if (TiedTo != -1) {
6421       assert((unsigned)TiedTo < Inst.getNumOperands());
6422       Inst.addOperand(Inst.getOperand(TiedTo));
6423     }
6424 
6425     // Add the register arguments
6426     if (Op.isReg()) {
6427       Op.addRegOperands(Inst, 1);
6428       continue;
6429     }
6430 
6431     if (Op.isToken() && Op.getToken() == "gds") {
6432       IsGdsHardcoded = true;
6433       continue;
6434     }
6435 
6436     // Handle optional arguments
6437     OptionalIdx[Op.getImmTy()] = i;
6438 
6439     if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
6440       OffsetType = AMDGPUOperand::ImmTySwizzle;
6441   }
6442 
6443   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6444 
6445   if (!IsGdsHardcoded) {
6446     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6447   }
6448   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6449 }
6450 
6451 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6452   OptionalImmIndexMap OptionalIdx;
6453 
6454   unsigned OperandIdx[4];
6455   unsigned EnMask = 0;
6456   int SrcIdx = 0;
6457 
6458   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6459     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6460 
6461     // Add the register arguments
6462     if (Op.isReg()) {
6463       assert(SrcIdx < 4);
6464       OperandIdx[SrcIdx] = Inst.size();
6465       Op.addRegOperands(Inst, 1);
6466       ++SrcIdx;
6467       continue;
6468     }
6469 
6470     if (Op.isOff()) {
6471       assert(SrcIdx < 4);
6472       OperandIdx[SrcIdx] = Inst.size();
6473       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6474       ++SrcIdx;
6475       continue;
6476     }
6477 
6478     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6479       Op.addImmOperands(Inst, 1);
6480       continue;
6481     }
6482 
6483     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6484       continue;
6485 
6486     // Handle optional arguments
6487     OptionalIdx[Op.getImmTy()] = i;
6488   }
6489 
6490   assert(SrcIdx == 4);
6491 
6492   bool Compr = false;
6493   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6494     Compr = true;
6495     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6496     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6497     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6498   }
6499 
6500   for (auto i = 0; i < SrcIdx; ++i) {
6501     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6502       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6503     }
6504   }
6505 
6506   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6507   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6508 
6509   Inst.addOperand(MCOperand::createImm(EnMask));
6510 }
6511 
6512 //===----------------------------------------------------------------------===//
6513 // s_waitcnt
6514 //===----------------------------------------------------------------------===//
6515 
6516 static bool
6517 encodeCnt(
6518   const AMDGPU::IsaVersion ISA,
6519   int64_t &IntVal,
6520   int64_t CntVal,
6521   bool Saturate,
6522   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6523   unsigned (*decode)(const IsaVersion &Version, unsigned))
6524 {
6525   bool Failed = false;
6526 
6527   IntVal = encode(ISA, IntVal, CntVal);
6528   if (CntVal != decode(ISA, IntVal)) {
6529     if (Saturate) {
6530       IntVal = encode(ISA, IntVal, -1);
6531     } else {
6532       Failed = true;
6533     }
6534   }
6535   return Failed;
6536 }
6537 
6538 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6539 
6540   SMLoc CntLoc = getLoc();
6541   StringRef CntName = getTokenStr();
6542 
6543   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6544       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6545     return false;
6546 
6547   int64_t CntVal;
6548   SMLoc ValLoc = getLoc();
6549   if (!parseExpr(CntVal))
6550     return false;
6551 
6552   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6553 
6554   bool Failed = true;
6555   bool Sat = CntName.endswith("_sat");
6556 
6557   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6558     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6559   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6560     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6561   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6562     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6563   } else {
6564     Error(CntLoc, "invalid counter name " + CntName);
6565     return false;
6566   }
6567 
6568   if (Failed) {
6569     Error(ValLoc, "too large value for " + CntName);
6570     return false;
6571   }
6572 
6573   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6574     return false;
6575 
6576   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6577     if (isToken(AsmToken::EndOfStatement)) {
6578       Error(getLoc(), "expected a counter name");
6579       return false;
6580     }
6581   }
6582 
6583   return true;
6584 }
6585 
6586 OperandMatchResultTy
6587 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6588   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6589   int64_t Waitcnt = getWaitcntBitMask(ISA);
6590   SMLoc S = getLoc();
6591 
6592   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6593     while (!isToken(AsmToken::EndOfStatement)) {
6594       if (!parseCnt(Waitcnt))
6595         return MatchOperand_ParseFail;
6596     }
6597   } else {
6598     if (!parseExpr(Waitcnt))
6599       return MatchOperand_ParseFail;
6600   }
6601 
6602   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6603   return MatchOperand_Success;
6604 }
6605 
6606 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6607   SMLoc FieldLoc = getLoc();
6608   StringRef FieldName = getTokenStr();
6609   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6610       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6611     return false;
6612 
6613   SMLoc ValueLoc = getLoc();
6614   StringRef ValueName = getTokenStr();
6615   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6616       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6617     return false;
6618 
6619   unsigned Shift;
6620   if (FieldName == "instid0") {
6621     Shift = 0;
6622   } else if (FieldName == "instskip") {
6623     Shift = 4;
6624   } else if (FieldName == "instid1") {
6625     Shift = 7;
6626   } else {
6627     Error(FieldLoc, "invalid field name " + FieldName);
6628     return false;
6629   }
6630 
6631   int Value;
6632   if (Shift == 4) {
6633     // Parse values for instskip.
6634     Value = StringSwitch<int>(ValueName)
6635                 .Case("SAME", 0)
6636                 .Case("NEXT", 1)
6637                 .Case("SKIP_1", 2)
6638                 .Case("SKIP_2", 3)
6639                 .Case("SKIP_3", 4)
6640                 .Case("SKIP_4", 5)
6641                 .Default(-1);
6642   } else {
6643     // Parse values for instid0 and instid1.
6644     Value = StringSwitch<int>(ValueName)
6645                 .Case("NO_DEP", 0)
6646                 .Case("VALU_DEP_1", 1)
6647                 .Case("VALU_DEP_2", 2)
6648                 .Case("VALU_DEP_3", 3)
6649                 .Case("VALU_DEP_4", 4)
6650                 .Case("TRANS32_DEP_1", 5)
6651                 .Case("TRANS32_DEP_2", 6)
6652                 .Case("TRANS32_DEP_3", 7)
6653                 .Case("FMA_ACCUM_CYCLE_1", 8)
6654                 .Case("SALU_CYCLE_1", 9)
6655                 .Case("SALU_CYCLE_2", 10)
6656                 .Case("SALU_CYCLE_3", 11)
6657                 .Default(-1);
6658   }
6659   if (Value < 0) {
6660     Error(ValueLoc, "invalid value name " + ValueName);
6661     return false;
6662   }
6663 
6664   Delay |= Value << Shift;
6665   return true;
6666 }
6667 
6668 OperandMatchResultTy
6669 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6670   int64_t Delay = 0;
6671   SMLoc S = getLoc();
6672 
6673   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6674     do {
6675       if (!parseDelay(Delay))
6676         return MatchOperand_ParseFail;
6677     } while (trySkipToken(AsmToken::Pipe));
6678   } else {
6679     if (!parseExpr(Delay))
6680       return MatchOperand_ParseFail;
6681   }
6682 
6683   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6684   return MatchOperand_Success;
6685 }
6686 
6687 bool
6688 AMDGPUOperand::isSWaitCnt() const {
6689   return isImm();
6690 }
6691 
6692 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6693 
6694 //===----------------------------------------------------------------------===//
6695 // DepCtr
6696 //===----------------------------------------------------------------------===//
6697 
6698 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6699                                   StringRef DepCtrName) {
6700   switch (ErrorId) {
6701   case OPR_ID_UNKNOWN:
6702     Error(Loc, Twine("invalid counter name ", DepCtrName));
6703     return;
6704   case OPR_ID_UNSUPPORTED:
6705     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6706     return;
6707   case OPR_ID_DUPLICATE:
6708     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6709     return;
6710   case OPR_VAL_INVALID:
6711     Error(Loc, Twine("invalid value for ", DepCtrName));
6712     return;
6713   default:
6714     assert(false);
6715   }
6716 }
6717 
6718 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6719 
6720   using namespace llvm::AMDGPU::DepCtr;
6721 
6722   SMLoc DepCtrLoc = getLoc();
6723   StringRef DepCtrName = getTokenStr();
6724 
6725   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6726       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6727     return false;
6728 
6729   int64_t ExprVal;
6730   if (!parseExpr(ExprVal))
6731     return false;
6732 
6733   unsigned PrevOprMask = UsedOprMask;
6734   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6735 
6736   if (CntVal < 0) {
6737     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6738     return false;
6739   }
6740 
6741   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6742     return false;
6743 
6744   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6745     if (isToken(AsmToken::EndOfStatement)) {
6746       Error(getLoc(), "expected a counter name");
6747       return false;
6748     }
6749   }
6750 
6751   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6752   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6753   return true;
6754 }
6755 
6756 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6757   using namespace llvm::AMDGPU::DepCtr;
6758 
6759   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6760   SMLoc Loc = getLoc();
6761 
6762   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6763     unsigned UsedOprMask = 0;
6764     while (!isToken(AsmToken::EndOfStatement)) {
6765       if (!parseDepCtr(DepCtr, UsedOprMask))
6766         return MatchOperand_ParseFail;
6767     }
6768   } else {
6769     if (!parseExpr(DepCtr))
6770       return MatchOperand_ParseFail;
6771   }
6772 
6773   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6774   return MatchOperand_Success;
6775 }
6776 
6777 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6778 
6779 //===----------------------------------------------------------------------===//
6780 // hwreg
6781 //===----------------------------------------------------------------------===//
6782 
6783 bool
6784 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6785                                 OperandInfoTy &Offset,
6786                                 OperandInfoTy &Width) {
6787   using namespace llvm::AMDGPU::Hwreg;
6788 
6789   // The register may be specified by name or using a numeric code
6790   HwReg.Loc = getLoc();
6791   if (isToken(AsmToken::Identifier) &&
6792       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6793     HwReg.IsSymbolic = true;
6794     lex(); // skip register name
6795   } else if (!parseExpr(HwReg.Id, "a register name")) {
6796     return false;
6797   }
6798 
6799   if (trySkipToken(AsmToken::RParen))
6800     return true;
6801 
6802   // parse optional params
6803   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6804     return false;
6805 
6806   Offset.Loc = getLoc();
6807   if (!parseExpr(Offset.Id))
6808     return false;
6809 
6810   if (!skipToken(AsmToken::Comma, "expected a comma"))
6811     return false;
6812 
6813   Width.Loc = getLoc();
6814   return parseExpr(Width.Id) &&
6815          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6816 }
6817 
6818 bool
6819 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6820                                const OperandInfoTy &Offset,
6821                                const OperandInfoTy &Width) {
6822 
6823   using namespace llvm::AMDGPU::Hwreg;
6824 
6825   if (HwReg.IsSymbolic) {
6826     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6827       Error(HwReg.Loc,
6828             "specified hardware register is not supported on this GPU");
6829       return false;
6830     }
6831   } else {
6832     if (!isValidHwreg(HwReg.Id)) {
6833       Error(HwReg.Loc,
6834             "invalid code of hardware register: only 6-bit values are legal");
6835       return false;
6836     }
6837   }
6838   if (!isValidHwregOffset(Offset.Id)) {
6839     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6840     return false;
6841   }
6842   if (!isValidHwregWidth(Width.Id)) {
6843     Error(Width.Loc,
6844           "invalid bitfield width: only values from 1 to 32 are legal");
6845     return false;
6846   }
6847   return true;
6848 }
6849 
6850 OperandMatchResultTy
6851 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6852   using namespace llvm::AMDGPU::Hwreg;
6853 
6854   int64_t ImmVal = 0;
6855   SMLoc Loc = getLoc();
6856 
6857   if (trySkipId("hwreg", AsmToken::LParen)) {
6858     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6859     OperandInfoTy Offset(OFFSET_DEFAULT_);
6860     OperandInfoTy Width(WIDTH_DEFAULT_);
6861     if (parseHwregBody(HwReg, Offset, Width) &&
6862         validateHwreg(HwReg, Offset, Width)) {
6863       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6864     } else {
6865       return MatchOperand_ParseFail;
6866     }
6867   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6868     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6869       Error(Loc, "invalid immediate: only 16-bit values are legal");
6870       return MatchOperand_ParseFail;
6871     }
6872   } else {
6873     return MatchOperand_ParseFail;
6874   }
6875 
6876   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6877   return MatchOperand_Success;
6878 }
6879 
6880 bool AMDGPUOperand::isHwreg() const {
6881   return isImmTy(ImmTyHwreg);
6882 }
6883 
6884 //===----------------------------------------------------------------------===//
6885 // sendmsg
6886 //===----------------------------------------------------------------------===//
6887 
6888 bool
6889 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6890                                   OperandInfoTy &Op,
6891                                   OperandInfoTy &Stream) {
6892   using namespace llvm::AMDGPU::SendMsg;
6893 
6894   Msg.Loc = getLoc();
6895   if (isToken(AsmToken::Identifier) &&
6896       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6897     Msg.IsSymbolic = true;
6898     lex(); // skip message name
6899   } else if (!parseExpr(Msg.Id, "a message name")) {
6900     return false;
6901   }
6902 
6903   if (trySkipToken(AsmToken::Comma)) {
6904     Op.IsDefined = true;
6905     Op.Loc = getLoc();
6906     if (isToken(AsmToken::Identifier) &&
6907         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6908       lex(); // skip operation name
6909     } else if (!parseExpr(Op.Id, "an operation name")) {
6910       return false;
6911     }
6912 
6913     if (trySkipToken(AsmToken::Comma)) {
6914       Stream.IsDefined = true;
6915       Stream.Loc = getLoc();
6916       if (!parseExpr(Stream.Id))
6917         return false;
6918     }
6919   }
6920 
6921   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6922 }
6923 
6924 bool
6925 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6926                                  const OperandInfoTy &Op,
6927                                  const OperandInfoTy &Stream) {
6928   using namespace llvm::AMDGPU::SendMsg;
6929 
6930   // Validation strictness depends on whether message is specified
6931   // in a symbolic or in a numeric form. In the latter case
6932   // only encoding possibility is checked.
6933   bool Strict = Msg.IsSymbolic;
6934 
6935   if (Strict) {
6936     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6937       Error(Msg.Loc, "specified message id is not supported on this GPU");
6938       return false;
6939     }
6940   } else {
6941     if (!isValidMsgId(Msg.Id, getSTI())) {
6942       Error(Msg.Loc, "invalid message id");
6943       return false;
6944     }
6945   }
6946   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6947     if (Op.IsDefined) {
6948       Error(Op.Loc, "message does not support operations");
6949     } else {
6950       Error(Msg.Loc, "missing message operation");
6951     }
6952     return false;
6953   }
6954   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6955     Error(Op.Loc, "invalid operation id");
6956     return false;
6957   }
6958   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6959       Stream.IsDefined) {
6960     Error(Stream.Loc, "message operation does not support streams");
6961     return false;
6962   }
6963   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6964     Error(Stream.Loc, "invalid message stream id");
6965     return false;
6966   }
6967   return true;
6968 }
6969 
6970 OperandMatchResultTy
6971 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6972   using namespace llvm::AMDGPU::SendMsg;
6973 
6974   int64_t ImmVal = 0;
6975   SMLoc Loc = getLoc();
6976 
6977   if (trySkipId("sendmsg", AsmToken::LParen)) {
6978     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6979     OperandInfoTy Op(OP_NONE_);
6980     OperandInfoTy Stream(STREAM_ID_NONE_);
6981     if (parseSendMsgBody(Msg, Op, Stream) &&
6982         validateSendMsg(Msg, Op, Stream)) {
6983       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6984     } else {
6985       return MatchOperand_ParseFail;
6986     }
6987   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6988     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6989       Error(Loc, "invalid immediate: only 16-bit values are legal");
6990       return MatchOperand_ParseFail;
6991     }
6992   } else {
6993     return MatchOperand_ParseFail;
6994   }
6995 
6996   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6997   return MatchOperand_Success;
6998 }
6999 
7000 bool AMDGPUOperand::isSendMsg() const {
7001   return isImmTy(ImmTySendMsg);
7002 }
7003 
7004 //===----------------------------------------------------------------------===//
7005 // v_interp
7006 //===----------------------------------------------------------------------===//
7007 
7008 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7009   StringRef Str;
7010   SMLoc S = getLoc();
7011 
7012   if (!parseId(Str))
7013     return MatchOperand_NoMatch;
7014 
7015   int Slot = StringSwitch<int>(Str)
7016     .Case("p10", 0)
7017     .Case("p20", 1)
7018     .Case("p0", 2)
7019     .Default(-1);
7020 
7021   if (Slot == -1) {
7022     Error(S, "invalid interpolation slot");
7023     return MatchOperand_ParseFail;
7024   }
7025 
7026   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7027                                               AMDGPUOperand::ImmTyInterpSlot));
7028   return MatchOperand_Success;
7029 }
7030 
7031 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7032   StringRef Str;
7033   SMLoc S = getLoc();
7034 
7035   if (!parseId(Str))
7036     return MatchOperand_NoMatch;
7037 
7038   if (!Str.startswith("attr")) {
7039     Error(S, "invalid interpolation attribute");
7040     return MatchOperand_ParseFail;
7041   }
7042 
7043   StringRef Chan = Str.take_back(2);
7044   int AttrChan = StringSwitch<int>(Chan)
7045     .Case(".x", 0)
7046     .Case(".y", 1)
7047     .Case(".z", 2)
7048     .Case(".w", 3)
7049     .Default(-1);
7050   if (AttrChan == -1) {
7051     Error(S, "invalid or missing interpolation attribute channel");
7052     return MatchOperand_ParseFail;
7053   }
7054 
7055   Str = Str.drop_back(2).drop_front(4);
7056 
7057   uint8_t Attr;
7058   if (Str.getAsInteger(10, Attr)) {
7059     Error(S, "invalid or missing interpolation attribute number");
7060     return MatchOperand_ParseFail;
7061   }
7062 
7063   if (Attr > 63) {
7064     Error(S, "out of bounds interpolation attribute number");
7065     return MatchOperand_ParseFail;
7066   }
7067 
7068   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7069 
7070   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7071                                               AMDGPUOperand::ImmTyInterpAttr));
7072   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
7073                                               AMDGPUOperand::ImmTyAttrChan));
7074   return MatchOperand_Success;
7075 }
7076 
7077 //===----------------------------------------------------------------------===//
7078 // exp
7079 //===----------------------------------------------------------------------===//
7080 
7081 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7082   using namespace llvm::AMDGPU::Exp;
7083 
7084   StringRef Str;
7085   SMLoc S = getLoc();
7086 
7087   if (!parseId(Str))
7088     return MatchOperand_NoMatch;
7089 
7090   unsigned Id = getTgtId(Str);
7091   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
7092     Error(S, (Id == ET_INVALID) ?
7093                 "invalid exp target" :
7094                 "exp target is not supported on this GPU");
7095     return MatchOperand_ParseFail;
7096   }
7097 
7098   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7099                                               AMDGPUOperand::ImmTyExpTgt));
7100   return MatchOperand_Success;
7101 }
7102 
7103 //===----------------------------------------------------------------------===//
7104 // parser helpers
7105 //===----------------------------------------------------------------------===//
7106 
7107 bool
7108 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7109   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7110 }
7111 
7112 bool
7113 AMDGPUAsmParser::isId(const StringRef Id) const {
7114   return isId(getToken(), Id);
7115 }
7116 
7117 bool
7118 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7119   return getTokenKind() == Kind;
7120 }
7121 
7122 StringRef AMDGPUAsmParser::getId() const {
7123   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7124 }
7125 
7126 bool
7127 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7128   if (isId(Id)) {
7129     lex();
7130     return true;
7131   }
7132   return false;
7133 }
7134 
7135 bool
7136 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7137   if (isToken(AsmToken::Identifier)) {
7138     StringRef Tok = getTokenStr();
7139     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7140       lex();
7141       return true;
7142     }
7143   }
7144   return false;
7145 }
7146 
7147 bool
7148 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7149   if (isId(Id) && peekToken().is(Kind)) {
7150     lex();
7151     lex();
7152     return true;
7153   }
7154   return false;
7155 }
7156 
7157 bool
7158 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7159   if (isToken(Kind)) {
7160     lex();
7161     return true;
7162   }
7163   return false;
7164 }
7165 
7166 bool
7167 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7168                            const StringRef ErrMsg) {
7169   if (!trySkipToken(Kind)) {
7170     Error(getLoc(), ErrMsg);
7171     return false;
7172   }
7173   return true;
7174 }
7175 
7176 bool
7177 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7178   SMLoc S = getLoc();
7179 
7180   const MCExpr *Expr;
7181   if (Parser.parseExpression(Expr))
7182     return false;
7183 
7184   if (Expr->evaluateAsAbsolute(Imm))
7185     return true;
7186 
7187   if (Expected.empty()) {
7188     Error(S, "expected absolute expression");
7189   } else {
7190     Error(S, Twine("expected ", Expected) +
7191              Twine(" or an absolute expression"));
7192   }
7193   return false;
7194 }
7195 
7196 bool
7197 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7198   SMLoc S = getLoc();
7199 
7200   const MCExpr *Expr;
7201   if (Parser.parseExpression(Expr))
7202     return false;
7203 
7204   int64_t IntVal;
7205   if (Expr->evaluateAsAbsolute(IntVal)) {
7206     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7207   } else {
7208     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7209   }
7210   return true;
7211 }
7212 
7213 bool
7214 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7215   if (isToken(AsmToken::String)) {
7216     Val = getToken().getStringContents();
7217     lex();
7218     return true;
7219   } else {
7220     Error(getLoc(), ErrMsg);
7221     return false;
7222   }
7223 }
7224 
7225 bool
7226 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7227   if (isToken(AsmToken::Identifier)) {
7228     Val = getTokenStr();
7229     lex();
7230     return true;
7231   } else {
7232     if (!ErrMsg.empty())
7233       Error(getLoc(), ErrMsg);
7234     return false;
7235   }
7236 }
7237 
7238 AsmToken
7239 AMDGPUAsmParser::getToken() const {
7240   return Parser.getTok();
7241 }
7242 
7243 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7244   return isToken(AsmToken::EndOfStatement)
7245              ? getToken()
7246              : getLexer().peekTok(ShouldSkipSpace);
7247 }
7248 
7249 void
7250 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7251   auto TokCount = getLexer().peekTokens(Tokens);
7252 
7253   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7254     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7255 }
7256 
7257 AsmToken::TokenKind
7258 AMDGPUAsmParser::getTokenKind() const {
7259   return getLexer().getKind();
7260 }
7261 
7262 SMLoc
7263 AMDGPUAsmParser::getLoc() const {
7264   return getToken().getLoc();
7265 }
7266 
7267 StringRef
7268 AMDGPUAsmParser::getTokenStr() const {
7269   return getToken().getString();
7270 }
7271 
7272 void
7273 AMDGPUAsmParser::lex() {
7274   Parser.Lex();
7275 }
7276 
7277 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7278   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7279 }
7280 
7281 SMLoc
7282 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7283                                const OperandVector &Operands) const {
7284   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7285     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7286     if (Test(Op))
7287       return Op.getStartLoc();
7288   }
7289   return getInstLoc(Operands);
7290 }
7291 
7292 SMLoc
7293 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7294                            const OperandVector &Operands) const {
7295   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7296   return getOperandLoc(Test, Operands);
7297 }
7298 
7299 SMLoc
7300 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7301                            const OperandVector &Operands) const {
7302   auto Test = [=](const AMDGPUOperand& Op) {
7303     return Op.isRegKind() && Op.getReg() == Reg;
7304   };
7305   return getOperandLoc(Test, Operands);
7306 }
7307 
7308 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7309                                  bool SearchMandatoryLiterals) const {
7310   auto Test = [](const AMDGPUOperand& Op) {
7311     return Op.IsImmKindLiteral() || Op.isExpr();
7312   };
7313   SMLoc Loc = getOperandLoc(Test, Operands);
7314   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7315     Loc = getMandatoryLitLoc(Operands);
7316   return Loc;
7317 }
7318 
7319 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7320   auto Test = [](const AMDGPUOperand &Op) {
7321     return Op.IsImmKindMandatoryLiteral();
7322   };
7323   return getOperandLoc(Test, Operands);
7324 }
7325 
7326 SMLoc
7327 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7328   auto Test = [](const AMDGPUOperand& Op) {
7329     return Op.isImmKindConst();
7330   };
7331   return getOperandLoc(Test, Operands);
7332 }
7333 
7334 //===----------------------------------------------------------------------===//
7335 // swizzle
7336 //===----------------------------------------------------------------------===//
7337 
7338 LLVM_READNONE
7339 static unsigned
7340 encodeBitmaskPerm(const unsigned AndMask,
7341                   const unsigned OrMask,
7342                   const unsigned XorMask) {
7343   using namespace llvm::AMDGPU::Swizzle;
7344 
7345   return BITMASK_PERM_ENC |
7346          (AndMask << BITMASK_AND_SHIFT) |
7347          (OrMask  << BITMASK_OR_SHIFT)  |
7348          (XorMask << BITMASK_XOR_SHIFT);
7349 }
7350 
7351 bool
7352 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7353                                      const unsigned MinVal,
7354                                      const unsigned MaxVal,
7355                                      const StringRef ErrMsg,
7356                                      SMLoc &Loc) {
7357   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7358     return false;
7359   }
7360   Loc = getLoc();
7361   if (!parseExpr(Op)) {
7362     return false;
7363   }
7364   if (Op < MinVal || Op > MaxVal) {
7365     Error(Loc, ErrMsg);
7366     return false;
7367   }
7368 
7369   return true;
7370 }
7371 
7372 bool
7373 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7374                                       const unsigned MinVal,
7375                                       const unsigned MaxVal,
7376                                       const StringRef ErrMsg) {
7377   SMLoc Loc;
7378   for (unsigned i = 0; i < OpNum; ++i) {
7379     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7380       return false;
7381   }
7382 
7383   return true;
7384 }
7385 
7386 bool
7387 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7388   using namespace llvm::AMDGPU::Swizzle;
7389 
7390   int64_t Lane[LANE_NUM];
7391   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7392                            "expected a 2-bit lane id")) {
7393     Imm = QUAD_PERM_ENC;
7394     for (unsigned I = 0; I < LANE_NUM; ++I) {
7395       Imm |= Lane[I] << (LANE_SHIFT * I);
7396     }
7397     return true;
7398   }
7399   return false;
7400 }
7401 
7402 bool
7403 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7404   using namespace llvm::AMDGPU::Swizzle;
7405 
7406   SMLoc Loc;
7407   int64_t GroupSize;
7408   int64_t LaneIdx;
7409 
7410   if (!parseSwizzleOperand(GroupSize,
7411                            2, 32,
7412                            "group size must be in the interval [2,32]",
7413                            Loc)) {
7414     return false;
7415   }
7416   if (!isPowerOf2_64(GroupSize)) {
7417     Error(Loc, "group size must be a power of two");
7418     return false;
7419   }
7420   if (parseSwizzleOperand(LaneIdx,
7421                           0, GroupSize - 1,
7422                           "lane id must be in the interval [0,group size - 1]",
7423                           Loc)) {
7424     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7425     return true;
7426   }
7427   return false;
7428 }
7429 
7430 bool
7431 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7432   using namespace llvm::AMDGPU::Swizzle;
7433 
7434   SMLoc Loc;
7435   int64_t GroupSize;
7436 
7437   if (!parseSwizzleOperand(GroupSize,
7438                            2, 32,
7439                            "group size must be in the interval [2,32]",
7440                            Loc)) {
7441     return false;
7442   }
7443   if (!isPowerOf2_64(GroupSize)) {
7444     Error(Loc, "group size must be a power of two");
7445     return false;
7446   }
7447 
7448   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7449   return true;
7450 }
7451 
7452 bool
7453 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7454   using namespace llvm::AMDGPU::Swizzle;
7455 
7456   SMLoc Loc;
7457   int64_t GroupSize;
7458 
7459   if (!parseSwizzleOperand(GroupSize,
7460                            1, 16,
7461                            "group size must be in the interval [1,16]",
7462                            Loc)) {
7463     return false;
7464   }
7465   if (!isPowerOf2_64(GroupSize)) {
7466     Error(Loc, "group size must be a power of two");
7467     return false;
7468   }
7469 
7470   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7471   return true;
7472 }
7473 
7474 bool
7475 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7476   using namespace llvm::AMDGPU::Swizzle;
7477 
7478   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7479     return false;
7480   }
7481 
7482   StringRef Ctl;
7483   SMLoc StrLoc = getLoc();
7484   if (!parseString(Ctl)) {
7485     return false;
7486   }
7487   if (Ctl.size() != BITMASK_WIDTH) {
7488     Error(StrLoc, "expected a 5-character mask");
7489     return false;
7490   }
7491 
7492   unsigned AndMask = 0;
7493   unsigned OrMask = 0;
7494   unsigned XorMask = 0;
7495 
7496   for (size_t i = 0; i < Ctl.size(); ++i) {
7497     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7498     switch(Ctl[i]) {
7499     default:
7500       Error(StrLoc, "invalid mask");
7501       return false;
7502     case '0':
7503       break;
7504     case '1':
7505       OrMask |= Mask;
7506       break;
7507     case 'p':
7508       AndMask |= Mask;
7509       break;
7510     case 'i':
7511       AndMask |= Mask;
7512       XorMask |= Mask;
7513       break;
7514     }
7515   }
7516 
7517   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7518   return true;
7519 }
7520 
7521 bool
7522 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7523 
7524   SMLoc OffsetLoc = getLoc();
7525 
7526   if (!parseExpr(Imm, "a swizzle macro")) {
7527     return false;
7528   }
7529   if (!isUInt<16>(Imm)) {
7530     Error(OffsetLoc, "expected a 16-bit offset");
7531     return false;
7532   }
7533   return true;
7534 }
7535 
7536 bool
7537 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7538   using namespace llvm::AMDGPU::Swizzle;
7539 
7540   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7541 
7542     SMLoc ModeLoc = getLoc();
7543     bool Ok = false;
7544 
7545     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7546       Ok = parseSwizzleQuadPerm(Imm);
7547     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7548       Ok = parseSwizzleBitmaskPerm(Imm);
7549     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7550       Ok = parseSwizzleBroadcast(Imm);
7551     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7552       Ok = parseSwizzleSwap(Imm);
7553     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7554       Ok = parseSwizzleReverse(Imm);
7555     } else {
7556       Error(ModeLoc, "expected a swizzle mode");
7557     }
7558 
7559     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7560   }
7561 
7562   return false;
7563 }
7564 
7565 OperandMatchResultTy
7566 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7567   SMLoc S = getLoc();
7568   int64_t Imm = 0;
7569 
7570   if (trySkipId("offset")) {
7571 
7572     bool Ok = false;
7573     if (skipToken(AsmToken::Colon, "expected a colon")) {
7574       if (trySkipId("swizzle")) {
7575         Ok = parseSwizzleMacro(Imm);
7576       } else {
7577         Ok = parseSwizzleOffset(Imm);
7578       }
7579     }
7580 
7581     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7582 
7583     return Ok ? MatchOperand_Success : MatchOperand_ParseFail;
7584   }
7585   return MatchOperand_NoMatch;
7586 }
7587 
7588 bool
7589 AMDGPUOperand::isSwizzle() const {
7590   return isImmTy(ImmTySwizzle);
7591 }
7592 
7593 //===----------------------------------------------------------------------===//
7594 // VGPR Index Mode
7595 //===----------------------------------------------------------------------===//
7596 
7597 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7598 
7599   using namespace llvm::AMDGPU::VGPRIndexMode;
7600 
7601   if (trySkipToken(AsmToken::RParen)) {
7602     return OFF;
7603   }
7604 
7605   int64_t Imm = 0;
7606 
7607   while (true) {
7608     unsigned Mode = 0;
7609     SMLoc S = getLoc();
7610 
7611     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7612       if (trySkipId(IdSymbolic[ModeId])) {
7613         Mode = 1 << ModeId;
7614         break;
7615       }
7616     }
7617 
7618     if (Mode == 0) {
7619       Error(S, (Imm == 0)?
7620                "expected a VGPR index mode or a closing parenthesis" :
7621                "expected a VGPR index mode");
7622       return UNDEF;
7623     }
7624 
7625     if (Imm & Mode) {
7626       Error(S, "duplicate VGPR index mode");
7627       return UNDEF;
7628     }
7629     Imm |= Mode;
7630 
7631     if (trySkipToken(AsmToken::RParen))
7632       break;
7633     if (!skipToken(AsmToken::Comma,
7634                    "expected a comma or a closing parenthesis"))
7635       return UNDEF;
7636   }
7637 
7638   return Imm;
7639 }
7640 
7641 OperandMatchResultTy
7642 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7643 
7644   using namespace llvm::AMDGPU::VGPRIndexMode;
7645 
7646   int64_t Imm = 0;
7647   SMLoc S = getLoc();
7648 
7649   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7650     Imm = parseGPRIdxMacro();
7651     if (Imm == UNDEF)
7652       return MatchOperand_ParseFail;
7653   } else {
7654     if (getParser().parseAbsoluteExpression(Imm))
7655       return MatchOperand_ParseFail;
7656     if (Imm < 0 || !isUInt<4>(Imm)) {
7657       Error(S, "invalid immediate: only 4-bit values are legal");
7658       return MatchOperand_ParseFail;
7659     }
7660   }
7661 
7662   Operands.push_back(
7663       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7664   return MatchOperand_Success;
7665 }
7666 
7667 bool AMDGPUOperand::isGPRIdxMode() const {
7668   return isImmTy(ImmTyGprIdxMode);
7669 }
7670 
7671 //===----------------------------------------------------------------------===//
7672 // sopp branch targets
7673 //===----------------------------------------------------------------------===//
7674 
7675 OperandMatchResultTy
7676 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7677 
7678   // Make sure we are not parsing something
7679   // that looks like a label or an expression but is not.
7680   // This will improve error messages.
7681   if (isRegister() || isModifier())
7682     return MatchOperand_NoMatch;
7683 
7684   if (!parseExpr(Operands))
7685     return MatchOperand_ParseFail;
7686 
7687   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7688   assert(Opr.isImm() || Opr.isExpr());
7689   SMLoc Loc = Opr.getStartLoc();
7690 
7691   // Currently we do not support arbitrary expressions as branch targets.
7692   // Only labels and absolute expressions are accepted.
7693   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7694     Error(Loc, "expected an absolute expression or a label");
7695   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7696     Error(Loc, "expected a 16-bit signed jump offset");
7697   }
7698 
7699   return MatchOperand_Success;
7700 }
7701 
7702 //===----------------------------------------------------------------------===//
7703 // Boolean holding registers
7704 //===----------------------------------------------------------------------===//
7705 
7706 OperandMatchResultTy
7707 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7708   return parseReg(Operands);
7709 }
7710 
7711 //===----------------------------------------------------------------------===//
7712 // mubuf
7713 //===----------------------------------------------------------------------===//
7714 
7715 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7716   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7717 }
7718 
7719 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7720                                    const OperandVector &Operands,
7721                                    bool IsAtomic) {
7722   OptionalImmIndexMap OptionalIdx;
7723   unsigned FirstOperandIdx = 1;
7724   bool IsAtomicReturn = false;
7725 
7726   if (IsAtomic) {
7727     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7728       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7729       if (!Op.isCPol())
7730         continue;
7731       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7732       break;
7733     }
7734 
7735     if (!IsAtomicReturn) {
7736       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7737       if (NewOpc != -1)
7738         Inst.setOpcode(NewOpc);
7739     }
7740 
7741     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7742                       SIInstrFlags::IsAtomicRet;
7743   }
7744 
7745   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7746     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7747 
7748     // Add the register arguments
7749     if (Op.isReg()) {
7750       Op.addRegOperands(Inst, 1);
7751       // Insert a tied src for atomic return dst.
7752       // This cannot be postponed as subsequent calls to
7753       // addImmOperands rely on correct number of MC operands.
7754       if (IsAtomicReturn && i == FirstOperandIdx)
7755         Op.addRegOperands(Inst, 1);
7756       continue;
7757     }
7758 
7759     // Handle the case where soffset is an immediate
7760     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7761       Op.addImmOperands(Inst, 1);
7762       continue;
7763     }
7764 
7765     // Handle tokens like 'offen' which are sometimes hard-coded into the
7766     // asm string.  There are no MCInst operands for these.
7767     if (Op.isToken()) {
7768       continue;
7769     }
7770     assert(Op.isImm());
7771 
7772     // Handle optional arguments
7773     OptionalIdx[Op.getImmTy()] = i;
7774   }
7775 
7776   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7777   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7778   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7779 }
7780 
7781 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7782   OptionalImmIndexMap OptionalIdx;
7783 
7784   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7785     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7786 
7787     // Add the register arguments
7788     if (Op.isReg()) {
7789       Op.addRegOperands(Inst, 1);
7790       continue;
7791     }
7792 
7793     // Handle the case where soffset is an immediate
7794     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7795       Op.addImmOperands(Inst, 1);
7796       continue;
7797     }
7798 
7799     // Handle tokens like 'offen' which are sometimes hard-coded into the
7800     // asm string.  There are no MCInst operands for these.
7801     if (Op.isToken()) {
7802       continue;
7803     }
7804     assert(Op.isImm());
7805 
7806     // Handle optional arguments
7807     OptionalIdx[Op.getImmTy()] = i;
7808   }
7809 
7810   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7811                         AMDGPUOperand::ImmTyOffset);
7812   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7813   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7814   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7815 }
7816 
7817 //===----------------------------------------------------------------------===//
7818 // mimg
7819 //===----------------------------------------------------------------------===//
7820 
7821 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7822                               bool IsAtomic) {
7823   unsigned I = 1;
7824   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7825   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7826     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7827   }
7828 
7829   if (IsAtomic) {
7830     // Add src, same as dst
7831     assert(Desc.getNumDefs() == 1);
7832     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7833   }
7834 
7835   OptionalImmIndexMap OptionalIdx;
7836 
7837   for (unsigned E = Operands.size(); I != E; ++I) {
7838     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7839 
7840     // Add the register arguments
7841     if (Op.isReg()) {
7842       Op.addRegOperands(Inst, 1);
7843     } else if (Op.isImmModifier()) {
7844       OptionalIdx[Op.getImmTy()] = I;
7845     } else if (!Op.isToken()) {
7846       llvm_unreachable("unexpected operand type");
7847     }
7848   }
7849 
7850   bool IsGFX10Plus = isGFX10Plus();
7851 
7852   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7853   if (IsGFX10Plus)
7854     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7855   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7856   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7857   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7858   if (IsGFX10Plus)
7859     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7860   if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::tfe))
7861     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7862   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7863   if (!IsGFX10Plus)
7864     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7865   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7866 }
7867 
7868 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7869   cvtMIMG(Inst, Operands, true);
7870 }
7871 
7872 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7873   OptionalImmIndexMap OptionalIdx;
7874   bool IsAtomicReturn = false;
7875 
7876   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7877     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7878     if (!Op.isCPol())
7879       continue;
7880     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7881     break;
7882   }
7883 
7884   if (!IsAtomicReturn) {
7885     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7886     if (NewOpc != -1)
7887       Inst.setOpcode(NewOpc);
7888   }
7889 
7890   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7891                     SIInstrFlags::IsAtomicRet;
7892 
7893   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7894     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7895 
7896     // Add the register arguments
7897     if (Op.isReg()) {
7898       Op.addRegOperands(Inst, 1);
7899       if (IsAtomicReturn && i == 1)
7900         Op.addRegOperands(Inst, 1);
7901       continue;
7902     }
7903 
7904     // Handle the case where soffset is an immediate
7905     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7906       Op.addImmOperands(Inst, 1);
7907       continue;
7908     }
7909 
7910     // Handle tokens like 'offen' which are sometimes hard-coded into the
7911     // asm string.  There are no MCInst operands for these.
7912     if (Op.isToken()) {
7913       continue;
7914     }
7915     assert(Op.isImm());
7916 
7917     // Handle optional arguments
7918     OptionalIdx[Op.getImmTy()] = i;
7919   }
7920 
7921   if ((int)Inst.getNumOperands() <=
7922       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7923     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7924   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7925 }
7926 
7927 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7928                                       const OperandVector &Operands) {
7929   for (unsigned I = 1; I < Operands.size(); ++I) {
7930     auto &Operand = (AMDGPUOperand &)*Operands[I];
7931     if (Operand.isReg())
7932       Operand.addRegOperands(Inst, 1);
7933   }
7934 
7935   Inst.addOperand(MCOperand::createImm(1)); // a16
7936 }
7937 
7938 //===----------------------------------------------------------------------===//
7939 // smrd
7940 //===----------------------------------------------------------------------===//
7941 
7942 bool AMDGPUOperand::isSMRDOffset8() const {
7943   return isImm() && isUInt<8>(getImm());
7944 }
7945 
7946 bool AMDGPUOperand::isSMEMOffset() const {
7947   return isImmTy(ImmTyNone) ||
7948          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7949 }
7950 
7951 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7952   // 32-bit literals are only supported on CI and we only want to use them
7953   // when the offset is > 8-bits.
7954   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7955 }
7956 
7957 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7958   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7959 }
7960 
7961 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7962   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7963 }
7964 
7965 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7966   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7967 }
7968 
7969 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7970   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7971 }
7972 
7973 //===----------------------------------------------------------------------===//
7974 // vop3
7975 //===----------------------------------------------------------------------===//
7976 
7977 static bool ConvertOmodMul(int64_t &Mul) {
7978   if (Mul != 1 && Mul != 2 && Mul != 4)
7979     return false;
7980 
7981   Mul >>= 1;
7982   return true;
7983 }
7984 
7985 static bool ConvertOmodDiv(int64_t &Div) {
7986   if (Div == 1) {
7987     Div = 0;
7988     return true;
7989   }
7990 
7991   if (Div == 2) {
7992     Div = 3;
7993     return true;
7994   }
7995 
7996   return false;
7997 }
7998 
7999 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8000 // This is intentional and ensures compatibility with sp3.
8001 // See bug 35397 for details.
8002 static bool ConvertDppBoundCtrl(int64_t &BoundCtrl) {
8003   if (BoundCtrl == 0 || BoundCtrl == 1) {
8004     BoundCtrl = 1;
8005     return true;
8006   }
8007   return false;
8008 }
8009 
8010 void AMDGPUAsmParser::onBeginOfFile() {
8011   if (!getParser().getStreamer().getTargetStreamer() ||
8012       getSTI().getTargetTriple().getArch() == Triple::r600)
8013     return;
8014 
8015   if (!getTargetStreamer().getTargetID())
8016     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
8017 
8018   if (isHsaAbiVersion3AndAbove(&getSTI()))
8019     getTargetStreamer().EmitDirectiveAMDGCNTarget();
8020 }
8021 
8022 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
8023   StringRef Name = getTokenStr();
8024   if (Name == "mul") {
8025     return parseIntWithPrefix("mul", Operands,
8026                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8027   }
8028 
8029   if (Name == "div") {
8030     return parseIntWithPrefix("div", Operands,
8031                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8032   }
8033 
8034   return MatchOperand_NoMatch;
8035 }
8036 
8037 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8038 // the number of src operands present, then copies that bit into src0_modifiers.
8039 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8040   int Opc = Inst.getOpcode();
8041   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8042   if (OpSelIdx == -1)
8043     return;
8044 
8045   int SrcNum;
8046   const int Ops[] = { AMDGPU::OpName::src0,
8047                       AMDGPU::OpName::src1,
8048                       AMDGPU::OpName::src2 };
8049   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8050        ++SrcNum)
8051     ;
8052   assert(SrcNum > 0);
8053 
8054   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8055 
8056   if ((OpSel & (1 << SrcNum)) != 0) {
8057     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8058     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8059     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8060   }
8061 }
8062 
8063 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8064                                    const OperandVector &Operands) {
8065   cvtVOP3P(Inst, Operands);
8066   cvtVOP3DstOpSelOnly(Inst);
8067 }
8068 
8069 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8070                                    OptionalImmIndexMap &OptionalIdx) {
8071   cvtVOP3P(Inst, Operands, OptionalIdx);
8072   cvtVOP3DstOpSelOnly(Inst);
8073 }
8074 
8075 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8076   return
8077       // 1. This operand is input modifiers
8078       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8079       // 2. This is not last operand
8080       && Desc.NumOperands > (OpNum + 1)
8081       // 3. Next operand is register class
8082       && Desc.operands()[OpNum + 1].RegClass != -1
8083       // 4. Next register is not tied to any other operand
8084       && Desc.getOperandConstraint(OpNum + 1,
8085                                    MCOI::OperandConstraint::TIED_TO) == -1;
8086 }
8087 
8088 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8089 {
8090   OptionalImmIndexMap OptionalIdx;
8091   unsigned Opc = Inst.getOpcode();
8092 
8093   unsigned I = 1;
8094   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8095   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8096     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8097   }
8098 
8099   for (unsigned E = Operands.size(); I != E; ++I) {
8100     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8101     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8102       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8103     } else if (Op.isInterpSlot() ||
8104                Op.isInterpAttr() ||
8105                Op.isAttrChan()) {
8106       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8107     } else if (Op.isImmModifier()) {
8108       OptionalIdx[Op.getImmTy()] = I;
8109     } else {
8110       llvm_unreachable("unhandled operand type");
8111     }
8112   }
8113 
8114   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8115     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8116                           AMDGPUOperand::ImmTyHigh);
8117 
8118   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8119     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8120                           AMDGPUOperand::ImmTyClampSI);
8121 
8122   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8123     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8124                           AMDGPUOperand::ImmTyOModSI);
8125 }
8126 
8127 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8128 {
8129   OptionalImmIndexMap OptionalIdx;
8130   unsigned Opc = Inst.getOpcode();
8131 
8132   unsigned I = 1;
8133   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8134   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8135     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8136   }
8137 
8138   for (unsigned E = Operands.size(); I != E; ++I) {
8139     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8140     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8141       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8142     } else if (Op.isImmModifier()) {
8143       OptionalIdx[Op.getImmTy()] = I;
8144     } else {
8145       llvm_unreachable("unhandled operand type");
8146     }
8147   }
8148 
8149   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8150 
8151   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8152   if (OpSelIdx != -1)
8153     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8154 
8155   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8156 
8157   if (OpSelIdx == -1)
8158     return;
8159 
8160   const int Ops[] = { AMDGPU::OpName::src0,
8161                       AMDGPU::OpName::src1,
8162                       AMDGPU::OpName::src2 };
8163   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8164                          AMDGPU::OpName::src1_modifiers,
8165                          AMDGPU::OpName::src2_modifiers };
8166 
8167   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8168 
8169   for (int J = 0; J < 3; ++J) {
8170     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8171     if (OpIdx == -1)
8172       break;
8173 
8174     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8175     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8176 
8177     if ((OpSel & (1 << J)) != 0)
8178       ModVal |= SISrcMods::OP_SEL_0;
8179     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8180         (OpSel & (1 << 3)) != 0)
8181       ModVal |= SISrcMods::DST_OP_SEL;
8182 
8183     Inst.getOperand(ModIdx).setImm(ModVal);
8184   }
8185 }
8186 
8187 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8188                               OptionalImmIndexMap &OptionalIdx) {
8189   unsigned Opc = Inst.getOpcode();
8190 
8191   unsigned I = 1;
8192   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8193   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8194     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8195   }
8196 
8197   for (unsigned E = Operands.size(); I != E; ++I) {
8198     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8199     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8200       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8201     } else if (Op.isImmModifier()) {
8202       OptionalIdx[Op.getImmTy()] = I;
8203     } else if (Op.isRegOrImm()) {
8204       Op.addRegOrImmOperands(Inst, 1);
8205     } else {
8206       llvm_unreachable("unhandled operand type");
8207     }
8208   }
8209 
8210   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8211     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8212                           AMDGPUOperand::ImmTyClampSI);
8213 
8214   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8215     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8216                           AMDGPUOperand::ImmTyOModSI);
8217 
8218   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8219   // it has src2 register operand that is tied to dst operand
8220   // we don't allow modifiers for this operand in assembler so src2_modifiers
8221   // should be 0.
8222   if (isMAC(Opc)) {
8223     auto it = Inst.begin();
8224     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8225     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8226     ++it;
8227     // Copy the operand to ensure it's not invalidated when Inst grows.
8228     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8229   }
8230 }
8231 
8232 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8233   OptionalImmIndexMap OptionalIdx;
8234   cvtVOP3(Inst, Operands, OptionalIdx);
8235 }
8236 
8237 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8238                                OptionalImmIndexMap &OptIdx) {
8239   const int Opc = Inst.getOpcode();
8240   const MCInstrDesc &Desc = MII.get(Opc);
8241 
8242   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8243 
8244   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8245       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
8246     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8247     Inst.addOperand(Inst.getOperand(0));
8248   }
8249 
8250   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
8251     assert(!IsPacked);
8252     Inst.addOperand(Inst.getOperand(0));
8253   }
8254 
8255   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8256   // instruction, and then figure out where to actually put the modifiers
8257 
8258   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8259   if (OpSelIdx != -1) {
8260     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8261   }
8262 
8263   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8264   if (OpSelHiIdx != -1) {
8265     int DefaultVal = IsPacked ? -1 : 0;
8266     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8267                           DefaultVal);
8268   }
8269 
8270   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8271   if (NegLoIdx != -1) {
8272     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8273     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8274   }
8275 
8276   const int Ops[] = { AMDGPU::OpName::src0,
8277                       AMDGPU::OpName::src1,
8278                       AMDGPU::OpName::src2 };
8279   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8280                          AMDGPU::OpName::src1_modifiers,
8281                          AMDGPU::OpName::src2_modifiers };
8282 
8283   unsigned OpSel = 0;
8284   unsigned OpSelHi = 0;
8285   unsigned NegLo = 0;
8286   unsigned NegHi = 0;
8287 
8288   if (OpSelIdx != -1)
8289     OpSel = Inst.getOperand(OpSelIdx).getImm();
8290 
8291   if (OpSelHiIdx != -1)
8292     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8293 
8294   if (NegLoIdx != -1) {
8295     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8296     NegLo = Inst.getOperand(NegLoIdx).getImm();
8297     NegHi = Inst.getOperand(NegHiIdx).getImm();
8298   }
8299 
8300   for (int J = 0; J < 3; ++J) {
8301     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8302     if (OpIdx == -1)
8303       break;
8304 
8305     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8306 
8307     if (ModIdx == -1)
8308       continue;
8309 
8310     uint32_t ModVal = 0;
8311 
8312     if ((OpSel & (1 << J)) != 0)
8313       ModVal |= SISrcMods::OP_SEL_0;
8314 
8315     if ((OpSelHi & (1 << J)) != 0)
8316       ModVal |= SISrcMods::OP_SEL_1;
8317 
8318     if ((NegLo & (1 << J)) != 0)
8319       ModVal |= SISrcMods::NEG;
8320 
8321     if ((NegHi & (1 << J)) != 0)
8322       ModVal |= SISrcMods::NEG_HI;
8323 
8324     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8325   }
8326 }
8327 
8328 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8329   OptionalImmIndexMap OptIdx;
8330   cvtVOP3(Inst, Operands, OptIdx);
8331   cvtVOP3P(Inst, Operands, OptIdx);
8332 }
8333 
8334 //===----------------------------------------------------------------------===//
8335 // VOPD
8336 //===----------------------------------------------------------------------===//
8337 
8338 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8339   if (!hasVOPD(getSTI()))
8340     return MatchOperand_NoMatch;
8341 
8342   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8343     SMLoc S = getLoc();
8344     lex();
8345     lex();
8346     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8347     SMLoc OpYLoc = getLoc();
8348     StringRef OpYName;
8349     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8350       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8351       return MatchOperand_Success;
8352     }
8353     Error(OpYLoc, "expected a VOPDY instruction after ::");
8354     return MatchOperand_ParseFail;
8355   }
8356   return MatchOperand_NoMatch;
8357 }
8358 
8359 // Create VOPD MCInst operands using parsed assembler operands.
8360 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8361   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8362     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8363     if (Op.isReg()) {
8364       Op.addRegOperands(Inst, 1);
8365       return;
8366     }
8367     if (Op.isImm()) {
8368       Op.addImmOperands(Inst, 1);
8369       return;
8370     }
8371     llvm_unreachable("Unhandled operand type in cvtVOPD");
8372   };
8373 
8374   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8375 
8376   // MCInst operands are ordered as follows:
8377   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8378 
8379   for (auto CompIdx : VOPD::COMPONENTS) {
8380     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8381   }
8382 
8383   for (auto CompIdx : VOPD::COMPONENTS) {
8384     const auto &CInfo = InstInfo[CompIdx];
8385     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8386     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8387       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8388     if (CInfo.hasSrc2Acc())
8389       addOp(CInfo.getIndexOfDstInParsedOperands());
8390   }
8391 }
8392 
8393 //===----------------------------------------------------------------------===//
8394 // dpp
8395 //===----------------------------------------------------------------------===//
8396 
8397 bool AMDGPUOperand::isDPP8() const {
8398   return isImmTy(ImmTyDPP8);
8399 }
8400 
8401 bool AMDGPUOperand::isDPPCtrl() const {
8402   using namespace AMDGPU::DPP;
8403 
8404   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8405   if (result) {
8406     int64_t Imm = getImm();
8407     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8408            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8409            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8410            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8411            (Imm == DppCtrl::WAVE_SHL1) ||
8412            (Imm == DppCtrl::WAVE_ROL1) ||
8413            (Imm == DppCtrl::WAVE_SHR1) ||
8414            (Imm == DppCtrl::WAVE_ROR1) ||
8415            (Imm == DppCtrl::ROW_MIRROR) ||
8416            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8417            (Imm == DppCtrl::BCAST15) ||
8418            (Imm == DppCtrl::BCAST31) ||
8419            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8420            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8421   }
8422   return false;
8423 }
8424 
8425 //===----------------------------------------------------------------------===//
8426 // mAI
8427 //===----------------------------------------------------------------------===//
8428 
8429 bool AMDGPUOperand::isBLGP() const {
8430   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8431 }
8432 
8433 bool AMDGPUOperand::isCBSZ() const {
8434   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8435 }
8436 
8437 bool AMDGPUOperand::isABID() const {
8438   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8439 }
8440 
8441 bool AMDGPUOperand::isS16Imm() const {
8442   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8443 }
8444 
8445 bool AMDGPUOperand::isU16Imm() const {
8446   return isImm() && isUInt<16>(getImm());
8447 }
8448 
8449 //===----------------------------------------------------------------------===//
8450 // dim
8451 //===----------------------------------------------------------------------===//
8452 
8453 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8454   // We want to allow "dim:1D" etc.,
8455   // but the initial 1 is tokenized as an integer.
8456   std::string Token;
8457   if (isToken(AsmToken::Integer)) {
8458     SMLoc Loc = getToken().getEndLoc();
8459     Token = std::string(getTokenStr());
8460     lex();
8461     if (getLoc() != Loc)
8462       return false;
8463   }
8464 
8465   StringRef Suffix;
8466   if (!parseId(Suffix))
8467     return false;
8468   Token += Suffix;
8469 
8470   StringRef DimId = Token;
8471   if (DimId.startswith("SQ_RSRC_IMG_"))
8472     DimId = DimId.drop_front(12);
8473 
8474   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8475   if (!DimInfo)
8476     return false;
8477 
8478   Encoding = DimInfo->Encoding;
8479   return true;
8480 }
8481 
8482 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8483   if (!isGFX10Plus())
8484     return MatchOperand_NoMatch;
8485 
8486   SMLoc S = getLoc();
8487 
8488   if (!trySkipId("dim", AsmToken::Colon))
8489     return MatchOperand_NoMatch;
8490 
8491   unsigned Encoding;
8492   SMLoc Loc = getLoc();
8493   if (!parseDimId(Encoding)) {
8494     Error(Loc, "invalid dim value");
8495     return MatchOperand_ParseFail;
8496   }
8497 
8498   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8499                                               AMDGPUOperand::ImmTyDim));
8500   return MatchOperand_Success;
8501 }
8502 
8503 //===----------------------------------------------------------------------===//
8504 // dpp
8505 //===----------------------------------------------------------------------===//
8506 
8507 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8508   SMLoc S = getLoc();
8509 
8510   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8511     return MatchOperand_NoMatch;
8512 
8513   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8514 
8515   int64_t Sels[8];
8516 
8517   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8518     return MatchOperand_ParseFail;
8519 
8520   for (size_t i = 0; i < 8; ++i) {
8521     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8522       return MatchOperand_ParseFail;
8523 
8524     SMLoc Loc = getLoc();
8525     if (getParser().parseAbsoluteExpression(Sels[i]))
8526       return MatchOperand_ParseFail;
8527     if (0 > Sels[i] || 7 < Sels[i]) {
8528       Error(Loc, "expected a 3-bit value");
8529       return MatchOperand_ParseFail;
8530     }
8531   }
8532 
8533   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8534     return MatchOperand_ParseFail;
8535 
8536   unsigned DPP8 = 0;
8537   for (size_t i = 0; i < 8; ++i)
8538     DPP8 |= (Sels[i] << (i * 3));
8539 
8540   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8541   return MatchOperand_Success;
8542 }
8543 
8544 bool
8545 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8546                                     const OperandVector &Operands) {
8547   if (Ctrl == "row_newbcast")
8548     return isGFX90A();
8549 
8550   if (Ctrl == "row_share" ||
8551       Ctrl == "row_xmask")
8552     return isGFX10Plus();
8553 
8554   if (Ctrl == "wave_shl" ||
8555       Ctrl == "wave_shr" ||
8556       Ctrl == "wave_rol" ||
8557       Ctrl == "wave_ror" ||
8558       Ctrl == "row_bcast")
8559     return isVI() || isGFX9();
8560 
8561   return Ctrl == "row_mirror" ||
8562          Ctrl == "row_half_mirror" ||
8563          Ctrl == "quad_perm" ||
8564          Ctrl == "row_shl" ||
8565          Ctrl == "row_shr" ||
8566          Ctrl == "row_ror";
8567 }
8568 
8569 int64_t
8570 AMDGPUAsmParser::parseDPPCtrlPerm() {
8571   // quad_perm:[%d,%d,%d,%d]
8572 
8573   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8574     return -1;
8575 
8576   int64_t Val = 0;
8577   for (int i = 0; i < 4; ++i) {
8578     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8579       return -1;
8580 
8581     int64_t Temp;
8582     SMLoc Loc = getLoc();
8583     if (getParser().parseAbsoluteExpression(Temp))
8584       return -1;
8585     if (Temp < 0 || Temp > 3) {
8586       Error(Loc, "expected a 2-bit value");
8587       return -1;
8588     }
8589 
8590     Val += (Temp << i * 2);
8591   }
8592 
8593   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8594     return -1;
8595 
8596   return Val;
8597 }
8598 
8599 int64_t
8600 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8601   using namespace AMDGPU::DPP;
8602 
8603   // sel:%d
8604 
8605   int64_t Val;
8606   SMLoc Loc = getLoc();
8607 
8608   if (getParser().parseAbsoluteExpression(Val))
8609     return -1;
8610 
8611   struct DppCtrlCheck {
8612     int64_t Ctrl;
8613     int Lo;
8614     int Hi;
8615   };
8616 
8617   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8618     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8619     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8620     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8621     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8622     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8623     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8624     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8625     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8626     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8627     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8628     .Default({-1, 0, 0});
8629 
8630   bool Valid;
8631   if (Check.Ctrl == -1) {
8632     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8633     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8634   } else {
8635     Valid = Check.Lo <= Val && Val <= Check.Hi;
8636     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8637   }
8638 
8639   if (!Valid) {
8640     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8641     return -1;
8642   }
8643 
8644   return Val;
8645 }
8646 
8647 OperandMatchResultTy
8648 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8649   using namespace AMDGPU::DPP;
8650 
8651   if (!isToken(AsmToken::Identifier) ||
8652       !isSupportedDPPCtrl(getTokenStr(), Operands))
8653     return MatchOperand_NoMatch;
8654 
8655   SMLoc S = getLoc();
8656   int64_t Val = -1;
8657   StringRef Ctrl;
8658 
8659   parseId(Ctrl);
8660 
8661   if (Ctrl == "row_mirror") {
8662     Val = DppCtrl::ROW_MIRROR;
8663   } else if (Ctrl == "row_half_mirror") {
8664     Val = DppCtrl::ROW_HALF_MIRROR;
8665   } else {
8666     if (skipToken(AsmToken::Colon, "expected a colon")) {
8667       if (Ctrl == "quad_perm") {
8668         Val = parseDPPCtrlPerm();
8669       } else {
8670         Val = parseDPPCtrlSel(Ctrl);
8671       }
8672     }
8673   }
8674 
8675   if (Val == -1)
8676     return MatchOperand_ParseFail;
8677 
8678   Operands.push_back(
8679     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8680   return MatchOperand_Success;
8681 }
8682 
8683 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8684   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8685 }
8686 
8687 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8688   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8689 }
8690 
8691 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8692   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8693 }
8694 
8695 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDppBoundCtrl() const {
8696   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8697 }
8698 
8699 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8700   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8701 }
8702 
8703 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8704                                  bool IsDPP8) {
8705   OptionalImmIndexMap OptionalIdx;
8706   unsigned Opc = Inst.getOpcode();
8707   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8708 
8709   // MAC instructions are special because they have 'old'
8710   // operand which is not tied to dst (but assumed to be).
8711   // They also have dummy unused src2_modifiers.
8712   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8713   int Src2ModIdx =
8714       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8715   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8716                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8717 
8718   unsigned I = 1;
8719   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8720     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8721   }
8722 
8723   int Fi = 0;
8724   for (unsigned E = Operands.size(); I != E; ++I) {
8725 
8726     if (IsMAC) {
8727       int NumOperands = Inst.getNumOperands();
8728       if (OldIdx == NumOperands) {
8729         // Handle old operand
8730         constexpr int DST_IDX = 0;
8731         Inst.addOperand(Inst.getOperand(DST_IDX));
8732       } else if (Src2ModIdx == NumOperands) {
8733         // Add unused dummy src2_modifiers
8734         Inst.addOperand(MCOperand::createImm(0));
8735       }
8736     }
8737 
8738     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8739                                             MCOI::TIED_TO);
8740     if (TiedTo != -1) {
8741       assert((unsigned)TiedTo < Inst.getNumOperands());
8742       // handle tied old or src2 for MAC instructions
8743       Inst.addOperand(Inst.getOperand(TiedTo));
8744     }
8745     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8746     // Add the register arguments
8747     if (IsDPP8 && Op.isFI()) {
8748       Fi = Op.getImm();
8749     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8750       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8751     } else if (Op.isReg()) {
8752       Op.addRegOperands(Inst, 1);
8753     } else if (Op.isImm() &&
8754                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8755       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8756       Op.addImmOperands(Inst, 1);
8757     } else if (Op.isImm()) {
8758       OptionalIdx[Op.getImmTy()] = I;
8759     } else {
8760       llvm_unreachable("unhandled operand type");
8761     }
8762   }
8763   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8764     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8765 
8766   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8767     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8768 
8769   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8770     cvtVOP3P(Inst, Operands, OptionalIdx);
8771   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8772     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8773   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8774     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8775   }
8776 
8777   if (IsDPP8) {
8778     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8779     using namespace llvm::AMDGPU::DPP;
8780     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8781   } else {
8782     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8783     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8784     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8785     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8786 
8787     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8788       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8789                             AMDGPUOperand::ImmTyDppFi);
8790   }
8791 }
8792 
8793 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8794   OptionalImmIndexMap OptionalIdx;
8795 
8796   unsigned I = 1;
8797   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8798   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8799     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8800   }
8801 
8802   int Fi = 0;
8803   for (unsigned E = Operands.size(); I != E; ++I) {
8804     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8805                                             MCOI::TIED_TO);
8806     if (TiedTo != -1) {
8807       assert((unsigned)TiedTo < Inst.getNumOperands());
8808       // handle tied old or src2 for MAC instructions
8809       Inst.addOperand(Inst.getOperand(TiedTo));
8810     }
8811     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8812     // Add the register arguments
8813     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8814       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8815       // Skip it.
8816       continue;
8817     }
8818 
8819     if (IsDPP8) {
8820       if (Op.isDPP8()) {
8821         Op.addImmOperands(Inst, 1);
8822       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8823         Op.addRegWithFPInputModsOperands(Inst, 2);
8824       } else if (Op.isFI()) {
8825         Fi = Op.getImm();
8826       } else if (Op.isReg()) {
8827         Op.addRegOperands(Inst, 1);
8828       } else {
8829         llvm_unreachable("Invalid operand type");
8830       }
8831     } else {
8832       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8833         Op.addRegWithFPInputModsOperands(Inst, 2);
8834       } else if (Op.isReg()) {
8835         Op.addRegOperands(Inst, 1);
8836       } else if (Op.isDPPCtrl()) {
8837         Op.addImmOperands(Inst, 1);
8838       } else if (Op.isImm()) {
8839         // Handle optional arguments
8840         OptionalIdx[Op.getImmTy()] = I;
8841       } else {
8842         llvm_unreachable("Invalid operand type");
8843       }
8844     }
8845   }
8846 
8847   if (IsDPP8) {
8848     using namespace llvm::AMDGPU::DPP;
8849     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8850   } else {
8851     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8852     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8853     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8854     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
8855       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8856     }
8857   }
8858 }
8859 
8860 //===----------------------------------------------------------------------===//
8861 // sdwa
8862 //===----------------------------------------------------------------------===//
8863 
8864 OperandMatchResultTy
8865 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8866                               AMDGPUOperand::ImmTy Type) {
8867   using namespace llvm::AMDGPU::SDWA;
8868 
8869   SMLoc S = getLoc();
8870   StringRef Value;
8871   OperandMatchResultTy res;
8872 
8873   SMLoc StringLoc;
8874   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8875   if (res != MatchOperand_Success) {
8876     return res;
8877   }
8878 
8879   int64_t Int;
8880   Int = StringSwitch<int64_t>(Value)
8881         .Case("BYTE_0", SdwaSel::BYTE_0)
8882         .Case("BYTE_1", SdwaSel::BYTE_1)
8883         .Case("BYTE_2", SdwaSel::BYTE_2)
8884         .Case("BYTE_3", SdwaSel::BYTE_3)
8885         .Case("WORD_0", SdwaSel::WORD_0)
8886         .Case("WORD_1", SdwaSel::WORD_1)
8887         .Case("DWORD", SdwaSel::DWORD)
8888         .Default(0xffffffff);
8889 
8890   if (Int == 0xffffffff) {
8891     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8892     return MatchOperand_ParseFail;
8893   }
8894 
8895   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8896   return MatchOperand_Success;
8897 }
8898 
8899 OperandMatchResultTy
8900 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8901   using namespace llvm::AMDGPU::SDWA;
8902 
8903   SMLoc S = getLoc();
8904   StringRef Value;
8905   OperandMatchResultTy res;
8906 
8907   SMLoc StringLoc;
8908   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8909   if (res != MatchOperand_Success) {
8910     return res;
8911   }
8912 
8913   int64_t Int;
8914   Int = StringSwitch<int64_t>(Value)
8915         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8916         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8917         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8918         .Default(0xffffffff);
8919 
8920   if (Int == 0xffffffff) {
8921     Error(StringLoc, "invalid dst_unused value");
8922     return MatchOperand_ParseFail;
8923   }
8924 
8925   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8926   return MatchOperand_Success;
8927 }
8928 
8929 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8930   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8931 }
8932 
8933 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8934   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8935 }
8936 
8937 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8938   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8939 }
8940 
8941 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8942   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8943 }
8944 
8945 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8946   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8947 }
8948 
8949 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8950                               uint64_t BasicInstType,
8951                               bool SkipDstVcc,
8952                               bool SkipSrcVcc) {
8953   using namespace llvm::AMDGPU::SDWA;
8954 
8955   OptionalImmIndexMap OptionalIdx;
8956   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8957   bool SkippedVcc = false;
8958 
8959   unsigned I = 1;
8960   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8961   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8962     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8963   }
8964 
8965   for (unsigned E = Operands.size(); I != E; ++I) {
8966     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8967     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8968         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8969       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8970       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8971       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8972       // Skip VCC only if we didn't skip it on previous iteration.
8973       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8974       if (BasicInstType == SIInstrFlags::VOP2 &&
8975           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8976            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8977         SkippedVcc = true;
8978         continue;
8979       } else if (BasicInstType == SIInstrFlags::VOPC &&
8980                  Inst.getNumOperands() == 0) {
8981         SkippedVcc = true;
8982         continue;
8983       }
8984     }
8985     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8986       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8987     } else if (Op.isImm()) {
8988       // Handle optional arguments
8989       OptionalIdx[Op.getImmTy()] = I;
8990     } else {
8991       llvm_unreachable("Invalid operand type");
8992     }
8993     SkippedVcc = false;
8994   }
8995 
8996   const unsigned Opc = Inst.getOpcode();
8997   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
8998       Opc != AMDGPU::V_NOP_sdwa_vi) {
8999     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9000     switch (BasicInstType) {
9001     case SIInstrFlags::VOP1:
9002       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9003         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9004                               AMDGPUOperand::ImmTyClampSI, 0);
9005 
9006       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9007         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9008                               AMDGPUOperand::ImmTyOModSI, 0);
9009 
9010       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9011         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9012                               AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9013 
9014       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9015         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9016                               AMDGPUOperand::ImmTySdwaDstUnused,
9017                               DstUnused::UNUSED_PRESERVE);
9018 
9019       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9020       break;
9021 
9022     case SIInstrFlags::VOP2:
9023       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9024 
9025       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9026         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9027 
9028       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9029       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
9030       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9031       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9032       break;
9033 
9034     case SIInstrFlags::VOPC:
9035       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9036         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9037       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9038       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9039       break;
9040 
9041     default:
9042       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9043     }
9044   }
9045 
9046   // special case v_mac_{f16, f32}:
9047   // it has src2 register operand that is tied to dst operand
9048   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9049       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9050     auto it = Inst.begin();
9051     std::advance(
9052       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9053     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9054   }
9055 }
9056 
9057 //===----------------------------------------------------------------------===//
9058 // mAI
9059 //===----------------------------------------------------------------------===//
9060 
9061 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
9062   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
9063 }
9064 
9065 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
9066   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
9067 }
9068 
9069 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
9070   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
9071 }
9072 
9073 /// Force static initialization.
9074 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9075   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
9076   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9077 }
9078 
9079 #define GET_REGISTER_MATCHER
9080 #define GET_MATCHER_IMPLEMENTATION
9081 #define GET_MNEMONIC_SPELL_CHECKER
9082 #define GET_MNEMONIC_CHECKER
9083 #include "AMDGPUGenAsmMatcher.inc"
9084 
9085 OperandMatchResultTy
9086 AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, unsigned MCK) {
9087   switch (MCK) {
9088   case MCK_addr64:
9089     return parseTokenOp("addr64", Operands);
9090   case MCK_done:
9091     return parseTokenOp("done", Operands);
9092   case MCK_idxen:
9093     return parseTokenOp("idxen", Operands);
9094   case MCK_lds:
9095     return parseTokenOp("lds", Operands);
9096   case MCK_offen:
9097     return parseTokenOp("offen", Operands);
9098   case MCK_off:
9099     return parseTokenOp("off", Operands);
9100   case MCK_row_95_en:
9101     return parseTokenOp("row_en", Operands);
9102   case MCK_ImmABID:
9103     return parseIntWithPrefix("abid", Operands, AMDGPUOperand::ImmTyABID);
9104   case MCK_ImmBankMask:
9105     return parseIntWithPrefix("bank_mask", Operands,
9106                               AMDGPUOperand::ImmTyDppBankMask);
9107   case MCK_ImmBLGP: {
9108     OperandMatchResultTy Res =
9109         parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
9110     if (Res == MatchOperand_NoMatch) {
9111       Res = parseOperandArrayWithPrefix("neg", Operands,
9112                                         AMDGPUOperand::ImmTyBLGP);
9113     }
9114     return Res;
9115   }
9116   case MCK_ImmCBSZ:
9117     return parseIntWithPrefix("cbsz", Operands, AMDGPUOperand::ImmTyCBSZ);
9118   case MCK_ImmCPol:
9119     return parseCPol(Operands);
9120   case MCK_ImmFI:
9121     return parseIntWithPrefix("fi", Operands, AMDGPUOperand::ImmTyDppFi);
9122   case MCK_gds:
9123     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9124   case MCK_ImmNegHi:
9125     return parseOperandArrayWithPrefix("neg_hi", Operands,
9126                                        AMDGPUOperand::ImmTyNegHi);
9127   case MCK_ImmNegLo:
9128     return parseOperandArrayWithPrefix("neg_lo", Operands,
9129                                        AMDGPUOperand::ImmTyNegLo);
9130   case MCK_ImmSMEMOffset:
9131     return parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
9132   case MCK_ImmOModSI:
9133     return parseOModOperand(Operands);
9134   case MCK_ImmOpSel:
9135     return parseOperandArrayWithPrefix("op_sel", Operands,
9136                                        AMDGPUOperand::ImmTyOpSel);
9137   case MCK_ImmOpSelHi:
9138     return parseOperandArrayWithPrefix("op_sel_hi", Operands,
9139                                        AMDGPUOperand::ImmTyOpSelHi);
9140   case MCK_ImmRowMask:
9141     return parseIntWithPrefix("row_mask", Operands,
9142                               AMDGPUOperand::ImmTyDppRowMask);
9143   case MCK_ImmSDWADstSel:
9144     return parseSDWASel(Operands, "dst_sel", AMDGPUOperand::ImmTySdwaDstSel);
9145   case MCK_ImmSDWADstUnused:
9146     return parseSDWADstUnused(Operands);
9147   case MCK_ImmSDWASrc0Sel:
9148     return parseSDWASel(Operands, "src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel);
9149   case MCK_ImmSDWASrc1Sel:
9150     return parseSDWASel(Operands, "src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel);
9151   case MCK_tfe:
9152     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9153   }
9154   return tryCustomParseOperand(Operands, MCK);
9155 }
9156 
9157 // This function should be defined after auto-generated include so that we have
9158 // MatchClassKind enum defined
9159 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9160                                                      unsigned Kind) {
9161   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9162   // But MatchInstructionImpl() expects to meet token and fails to validate
9163   // operand. This method checks if we are given immediate operand but expect to
9164   // get corresponding token.
9165   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9166   switch (Kind) {
9167   case MCK_addr64:
9168     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9169   case MCK_gds:
9170     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9171   case MCK_lds:
9172     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9173   case MCK_idxen:
9174     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9175   case MCK_offen:
9176     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9177   case MCK_tfe:
9178     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9179   case MCK_SSrcB32:
9180     // When operands have expression values, they will return true for isToken,
9181     // because it is not possible to distinguish between a token and an
9182     // expression at parse time. MatchInstructionImpl() will always try to
9183     // match an operand as a token, when isToken returns true, and when the
9184     // name of the expression is not a valid token, the match will fail,
9185     // so we need to handle it here.
9186     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9187   case MCK_SSrcF32:
9188     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9189   case MCK_SoppBrTarget:
9190     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
9191   case MCK_VReg32OrOff:
9192     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9193   case MCK_InterpSlot:
9194     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9195   case MCK_Attr:
9196     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9197   case MCK_AttrChan:
9198     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
9199   case MCK_ImmSMEMOffset:
9200     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
9201   case MCK_SReg_64:
9202   case MCK_SReg_64_XEXEC:
9203     // Null is defined as a 32-bit register but
9204     // it should also be enabled with 64-bit operands.
9205     // The following code enables it for SReg_64 operands
9206     // used as source and destination. Remaining source
9207     // operands are handled in isInlinableImm.
9208     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9209   default:
9210     return Match_InvalidOperand;
9211   }
9212 }
9213 
9214 //===----------------------------------------------------------------------===//
9215 // endpgm
9216 //===----------------------------------------------------------------------===//
9217 
9218 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9219   SMLoc S = getLoc();
9220   int64_t Imm = 0;
9221 
9222   if (!parseExpr(Imm)) {
9223     // The operand is optional, if not present default to 0
9224     Imm = 0;
9225   }
9226 
9227   if (!isUInt<16>(Imm)) {
9228     Error(S, "expected a 16-bit value");
9229     return MatchOperand_ParseFail;
9230   }
9231 
9232   Operands.push_back(
9233       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9234   return MatchOperand_Success;
9235 }
9236 
9237 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9238 
9239 //===----------------------------------------------------------------------===//
9240 // LDSDIR
9241 //===----------------------------------------------------------------------===//
9242 
9243 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9244   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9245 }
9246 
9247 bool AMDGPUOperand::isWaitVDST() const {
9248   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9249 }
9250 
9251 //===----------------------------------------------------------------------===//
9252 // VINTERP
9253 //===----------------------------------------------------------------------===//
9254 
9255 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9256   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9257 }
9258 
9259 bool AMDGPUOperand::isWaitEXP() const {
9260   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9261 }
9262