xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCExpr.h"
11 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "SIInstrInfo.h"
16 #include "SIRegisterInfo.h"
17 #include "TargetInfo/AMDGPUTargetInfo.h"
18 #include "Utils/AMDGPUAsmUtils.h"
19 #include "Utils/AMDGPUBaseInfo.h"
20 #include "Utils/AMDKernelCodeTUtils.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/BinaryFormat/ELF.h"
26 #include "llvm/CodeGenTypes/MachineValueType.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCContext.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/MC/MCInst.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/MC/MCParser/MCAsmLexer.h"
33 #include "llvm/MC/MCParser/MCAsmParser.h"
34 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
35 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
36 #include "llvm/MC/MCSymbol.h"
37 #include "llvm/MC/TargetRegistry.h"
38 #include "llvm/Support/AMDGPUMetadata.h"
39 #include "llvm/Support/AMDHSAKernelDescriptor.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/MathExtras.h"
42 #include "llvm/TargetParser/TargetParser.h"
43 #include <optional>
44 
45 using namespace llvm;
46 using namespace llvm::AMDGPU;
47 using namespace llvm::amdhsa;
48 
49 namespace {
50 
51 class AMDGPUAsmParser;
52 
53 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54 
55 //===----------------------------------------------------------------------===//
56 // Operand
57 //===----------------------------------------------------------------------===//
58 
59 class AMDGPUOperand : public MCParsedAsmOperand {
60   enum KindTy {
61     Token,
62     Immediate,
63     Register,
64     Expression
65   } Kind;
66 
67   SMLoc StartLoc, EndLoc;
68   const AMDGPUAsmParser *AsmParser;
69 
70 public:
71   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72       : Kind(Kind_), AsmParser(AsmParser_) {}
73 
74   using Ptr = std::unique_ptr<AMDGPUOperand>;
75 
76   struct Modifiers {
77     bool Abs = false;
78     bool Neg = false;
79     bool Sext = false;
80     bool Lit = false;
81 
82     bool hasFPModifiers() const { return Abs || Neg; }
83     bool hasIntModifiers() const { return Sext; }
84     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85 
86     int64_t getFPModifiersOperand() const {
87       int64_t Operand = 0;
88       Operand |= Abs ? SISrcMods::ABS : 0u;
89       Operand |= Neg ? SISrcMods::NEG : 0u;
90       return Operand;
91     }
92 
93     int64_t getIntModifiersOperand() const {
94       int64_t Operand = 0;
95       Operand |= Sext ? SISrcMods::SEXT : 0u;
96       return Operand;
97     }
98 
99     int64_t getModifiersOperand() const {
100       assert(!(hasFPModifiers() && hasIntModifiers())
101            && "fp and int modifiers should not be used simultaneously");
102       if (hasFPModifiers())
103         return getFPModifiersOperand();
104       if (hasIntModifiers())
105         return getIntModifiersOperand();
106       return 0;
107     }
108 
109     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110   };
111 
112   enum ImmTy {
113     ImmTyNone,
114     ImmTyGDS,
115     ImmTyLDS,
116     ImmTyOffen,
117     ImmTyIdxen,
118     ImmTyAddr64,
119     ImmTyOffset,
120     ImmTyInstOffset,
121     ImmTyOffset0,
122     ImmTyOffset1,
123     ImmTySMEMOffsetMod,
124     ImmTyCPol,
125     ImmTyTFE,
126     ImmTyD16,
127     ImmTyClamp,
128     ImmTyOModSI,
129     ImmTySDWADstSel,
130     ImmTySDWASrc0Sel,
131     ImmTySDWASrc1Sel,
132     ImmTySDWADstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyInterpAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTyIndexKey8bit,
155     ImmTyIndexKey16bit,
156     ImmTyDPP8,
157     ImmTyDppCtrl,
158     ImmTyDppRowMask,
159     ImmTyDppBankMask,
160     ImmTyDppBoundCtrl,
161     ImmTyDppFI,
162     ImmTySwizzle,
163     ImmTyGprIdxMode,
164     ImmTyHigh,
165     ImmTyBLGP,
166     ImmTyCBSZ,
167     ImmTyABID,
168     ImmTyEndpgm,
169     ImmTyWaitVDST,
170     ImmTyWaitEXP,
171     ImmTyWaitVAVDst,
172     ImmTyWaitVMVSrc,
173     ImmTyByteSel,
174   };
175 
176   // Immediate operand kind.
177   // It helps to identify the location of an offending operand after an error.
178   // Note that regular literals and mandatory literals (KImm) must be handled
179   // differently. When looking for an offending operand, we should usually
180   // ignore mandatory literals because they are part of the instruction and
181   // cannot be changed. Report location of mandatory operands only for VOPD,
182   // when both OpX and OpY have a KImm and there are no other literals.
183   enum ImmKindTy {
184     ImmKindTyNone,
185     ImmKindTyLiteral,
186     ImmKindTyMandatoryLiteral,
187     ImmKindTyConst,
188   };
189 
190 private:
191   struct TokOp {
192     const char *Data;
193     unsigned Length;
194   };
195 
196   struct ImmOp {
197     int64_t Val;
198     ImmTy Type;
199     bool IsFPImm;
200     mutable ImmKindTy Kind;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override { return Kind == Token; }
218 
219   bool isSymbolRefExpr() const {
220     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
221   }
222 
223   bool isImm() const override {
224     return Kind == Immediate;
225   }
226 
227   void setImmKindNone() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyNone;
230   }
231 
232   void setImmKindLiteral() const {
233     assert(isImm());
234     Imm.Kind = ImmKindTyLiteral;
235   }
236 
237   void setImmKindMandatoryLiteral() const {
238     assert(isImm());
239     Imm.Kind = ImmKindTyMandatoryLiteral;
240   }
241 
242   void setImmKindConst() const {
243     assert(isImm());
244     Imm.Kind = ImmKindTyConst;
245   }
246 
247   bool IsImmKindLiteral() const {
248     return isImm() && Imm.Kind == ImmKindTyLiteral;
249   }
250 
251   bool IsImmKindMandatoryLiteral() const {
252     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
253   }
254 
255   bool isImmKindConst() const {
256     return isImm() && Imm.Kind == ImmKindTyConst;
257   }
258 
259   bool isInlinableImm(MVT type) const;
260   bool isLiteralImm(MVT type) const;
261 
262   bool isRegKind() const {
263     return Kind == Register;
264   }
265 
266   bool isReg() const override {
267     return isRegKind() && !hasModifiers();
268   }
269 
270   bool isRegOrInline(unsigned RCID, MVT type) const {
271     return isRegClass(RCID) || isInlinableImm(type);
272   }
273 
274   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
275     return isRegOrInline(RCID, type) || isLiteralImm(type);
276   }
277 
278   bool isRegOrImmWithInt16InputMods() const {
279     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
280   }
281 
282   bool isRegOrImmWithIntT16InputMods() const {
283     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
284   }
285 
286   bool isRegOrImmWithInt32InputMods() const {
287     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
288   }
289 
290   bool isRegOrInlineImmWithInt16InputMods() const {
291     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
292   }
293 
294   bool isRegOrInlineImmWithInt32InputMods() const {
295     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
296   }
297 
298   bool isRegOrImmWithInt64InputMods() const {
299     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
300   }
301 
302   bool isRegOrImmWithFP16InputMods() const {
303     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
304   }
305 
306   bool isRegOrImmWithFPT16InputMods() const {
307     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
308   }
309 
310   bool isRegOrImmWithFP32InputMods() const {
311     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
312   }
313 
314   bool isRegOrImmWithFP64InputMods() const {
315     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
316   }
317 
318   template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
319     return isRegOrInline(
320         IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
321   }
322 
323   bool isRegOrInlineImmWithFP32InputMods() const {
324     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
325   }
326 
327   bool isPackedFP16InputMods() const {
328     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
329   }
330 
331   bool isVReg() const {
332     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
333            isRegClass(AMDGPU::VReg_64RegClassID) ||
334            isRegClass(AMDGPU::VReg_96RegClassID) ||
335            isRegClass(AMDGPU::VReg_128RegClassID) ||
336            isRegClass(AMDGPU::VReg_160RegClassID) ||
337            isRegClass(AMDGPU::VReg_192RegClassID) ||
338            isRegClass(AMDGPU::VReg_256RegClassID) ||
339            isRegClass(AMDGPU::VReg_512RegClassID) ||
340            isRegClass(AMDGPU::VReg_1024RegClassID);
341   }
342 
343   bool isVReg32() const {
344     return isRegClass(AMDGPU::VGPR_32RegClassID);
345   }
346 
347   bool isVReg32OrOff() const {
348     return isOff() || isVReg32();
349   }
350 
351   bool isNull() const {
352     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
353   }
354 
355   bool isVRegWithInputMods() const;
356   template <bool IsFake16> bool isT16VRegWithInputMods() const;
357 
358   bool isSDWAOperand(MVT type) const;
359   bool isSDWAFP16Operand() const;
360   bool isSDWAFP32Operand() const;
361   bool isSDWAInt16Operand() const;
362   bool isSDWAInt32Operand() const;
363 
364   bool isImmTy(ImmTy ImmT) const {
365     return isImm() && Imm.Type == ImmT;
366   }
367 
368   template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
369 
370   bool isImmLiteral() const { return isImmTy(ImmTyNone); }
371 
372   bool isImmModifier() const {
373     return isImm() && Imm.Type != ImmTyNone;
374   }
375 
376   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
377   bool isDim() const { return isImmTy(ImmTyDim); }
378   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
379   bool isOff() const { return isImmTy(ImmTyOff); }
380   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
381   bool isOffen() const { return isImmTy(ImmTyOffen); }
382   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
383   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
384   bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
385   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
386   bool isGDS() const { return isImmTy(ImmTyGDS); }
387   bool isLDS() const { return isImmTy(ImmTyLDS); }
388   bool isCPol() const { return isImmTy(ImmTyCPol); }
389   bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
390   bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
391   bool isTFE() const { return isImmTy(ImmTyTFE); }
392   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
393   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
394   bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
395   bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
396   bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
397   bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
398   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
399   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
400   bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
401   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
402   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
403   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
404   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
405 
406   bool isRegOrImm() const {
407     return isReg() || isImm();
408   }
409 
410   bool isRegClass(unsigned RCID) const;
411 
412   bool isInlineValue() const;
413 
414   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
415     return isRegOrInline(RCID, type) && !hasModifiers();
416   }
417 
418   bool isSCSrcB16() const {
419     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
420   }
421 
422   bool isSCSrcV2B16() const {
423     return isSCSrcB16();
424   }
425 
426   bool isSCSrc_b32() const {
427     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
428   }
429 
430   bool isSCSrc_b64() const {
431     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
432   }
433 
434   bool isBoolReg() const;
435 
436   bool isSCSrcF16() const {
437     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
438   }
439 
440   bool isSCSrcV2F16() const {
441     return isSCSrcF16();
442   }
443 
444   bool isSCSrcF32() const {
445     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
446   }
447 
448   bool isSCSrcF64() const {
449     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
450   }
451 
452   bool isSSrc_b32() const {
453     return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
454   }
455 
456   bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
457 
458   bool isSSrcV2B16() const {
459     llvm_unreachable("cannot happen");
460     return isSSrc_b16();
461   }
462 
463   bool isSSrc_b64() const {
464     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
465     // See isVSrc64().
466     return isSCSrc_b64() || isLiteralImm(MVT::i64);
467   }
468 
469   bool isSSrc_f32() const {
470     return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
471   }
472 
473   bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
474 
475   bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
476 
477   bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
478 
479   bool isSSrcV2F16() const {
480     llvm_unreachable("cannot happen");
481     return isSSrc_f16();
482   }
483 
484   bool isSSrcV2FP32() const {
485     llvm_unreachable("cannot happen");
486     return isSSrc_f32();
487   }
488 
489   bool isSCSrcV2FP32() const {
490     llvm_unreachable("cannot happen");
491     return isSCSrcF32();
492   }
493 
494   bool isSSrcV2INT32() const {
495     llvm_unreachable("cannot happen");
496     return isSSrc_b32();
497   }
498 
499   bool isSCSrcV2INT32() const {
500     llvm_unreachable("cannot happen");
501     return isSCSrc_b32();
502   }
503 
504   bool isSSrcOrLds_b32() const {
505     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506            isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVCSrc_b32() const {
510     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
511   }
512 
513   bool isVCSrcB64() const {
514     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
515   }
516 
517   bool isVCSrcTB16() const {
518     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
519   }
520 
521   bool isVCSrcTB16_Lo128() const {
522     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
523   }
524 
525   bool isVCSrcFake16B16_Lo128() const {
526     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
527   }
528 
529   bool isVCSrc_b16() const {
530     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
531   }
532 
533   bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
534 
535   bool isVCSrc_f32() const {
536     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
537   }
538 
539   bool isVCSrcF64() const {
540     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
541   }
542 
543   bool isVCSrcTBF16() const {
544     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
545   }
546 
547   bool isVCSrcTF16() const {
548     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
549   }
550 
551   bool isVCSrcTBF16_Lo128() const {
552     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
553   }
554 
555   bool isVCSrcTF16_Lo128() const {
556     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
557   }
558 
559   bool isVCSrcFake16BF16_Lo128() const {
560     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
561   }
562 
563   bool isVCSrcFake16F16_Lo128() const {
564     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
565   }
566 
567   bool isVCSrc_bf16() const {
568     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
569   }
570 
571   bool isVCSrc_f16() const {
572     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
573   }
574 
575   bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
576 
577   bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
578 
579   bool isVSrc_b32() const {
580     return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
581   }
582 
583   bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
584 
585   bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
586 
587   bool isVSrcT_b16_Lo128() const {
588     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
589   }
590 
591   bool isVSrcFake16_b16_Lo128() const {
592     return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
593   }
594 
595   bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
596 
597   bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
598 
599   bool isVCSrcV2FP32() const {
600     return isVCSrcF64();
601   }
602 
603   bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
604 
605   bool isVCSrcV2INT32() const {
606     return isVCSrcB64();
607   }
608 
609   bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
610 
611   bool isVSrc_f32() const {
612     return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
613   }
614 
615   bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
616 
617   bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
618 
619   bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
620 
621   bool isVSrcT_bf16_Lo128() const {
622     return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
623   }
624 
625   bool isVSrcT_f16_Lo128() const {
626     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
627   }
628 
629   bool isVSrcFake16_bf16_Lo128() const {
630     return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
631   }
632 
633   bool isVSrcFake16_f16_Lo128() const {
634     return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
635   }
636 
637   bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
638 
639   bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
640 
641   bool isVSrc_v2bf16() const {
642     return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
643   }
644 
645   bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
646 
647   bool isVISrcB32() const {
648     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
649   }
650 
651   bool isVISrcB16() const {
652     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
653   }
654 
655   bool isVISrcV2B16() const {
656     return isVISrcB16();
657   }
658 
659   bool isVISrcF32() const {
660     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
661   }
662 
663   bool isVISrcF16() const {
664     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
665   }
666 
667   bool isVISrcV2F16() const {
668     return isVISrcF16() || isVISrcB32();
669   }
670 
671   bool isVISrc_64_bf16() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
673   }
674 
675   bool isVISrc_64_f16() const {
676     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
677   }
678 
679   bool isVISrc_64_b32() const {
680     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
681   }
682 
683   bool isVISrc_64B64() const {
684     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
685   }
686 
687   bool isVISrc_64_f64() const {
688     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
689   }
690 
691   bool isVISrc_64V2FP32() const {
692     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
693   }
694 
695   bool isVISrc_64V2INT32() const {
696     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
697   }
698 
699   bool isVISrc_256_b32() const {
700     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
701   }
702 
703   bool isVISrc_256_f32() const {
704     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
705   }
706 
707   bool isVISrc_256B64() const {
708     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
709   }
710 
711   bool isVISrc_256_f64() const {
712     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
713   }
714 
715   bool isVISrc_128B16() const {
716     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
717   }
718 
719   bool isVISrc_128V2B16() const {
720     return isVISrc_128B16();
721   }
722 
723   bool isVISrc_128_b32() const {
724     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
725   }
726 
727   bool isVISrc_128_f32() const {
728     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
729   }
730 
731   bool isVISrc_256V2FP32() const {
732     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
733   }
734 
735   bool isVISrc_256V2INT32() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
737   }
738 
739   bool isVISrc_512_b32() const {
740     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
741   }
742 
743   bool isVISrc_512B16() const {
744     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
745   }
746 
747   bool isVISrc_512V2B16() const {
748     return isVISrc_512B16();
749   }
750 
751   bool isVISrc_512_f32() const {
752     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
753   }
754 
755   bool isVISrc_512F16() const {
756     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
757   }
758 
759   bool isVISrc_512V2F16() const {
760     return isVISrc_512F16() || isVISrc_512_b32();
761   }
762 
763   bool isVISrc_1024_b32() const {
764     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
765   }
766 
767   bool isVISrc_1024B16() const {
768     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
769   }
770 
771   bool isVISrc_1024V2B16() const {
772     return isVISrc_1024B16();
773   }
774 
775   bool isVISrc_1024_f32() const {
776     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
777   }
778 
779   bool isVISrc_1024F16() const {
780     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
781   }
782 
783   bool isVISrc_1024V2F16() const {
784     return isVISrc_1024F16() || isVISrc_1024_b32();
785   }
786 
787   bool isAISrcB32() const {
788     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
789   }
790 
791   bool isAISrcB16() const {
792     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
793   }
794 
795   bool isAISrcV2B16() const {
796     return isAISrcB16();
797   }
798 
799   bool isAISrcF32() const {
800     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
801   }
802 
803   bool isAISrcF16() const {
804     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
805   }
806 
807   bool isAISrcV2F16() const {
808     return isAISrcF16() || isAISrcB32();
809   }
810 
811   bool isAISrc_64B64() const {
812     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
813   }
814 
815   bool isAISrc_64_f64() const {
816     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
817   }
818 
819   bool isAISrc_128_b32() const {
820     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
821   }
822 
823   bool isAISrc_128B16() const {
824     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
825   }
826 
827   bool isAISrc_128V2B16() const {
828     return isAISrc_128B16();
829   }
830 
831   bool isAISrc_128_f32() const {
832     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
833   }
834 
835   bool isAISrc_128F16() const {
836     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
837   }
838 
839   bool isAISrc_128V2F16() const {
840     return isAISrc_128F16() || isAISrc_128_b32();
841   }
842 
843   bool isVISrc_128_bf16() const {
844     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
845   }
846 
847   bool isVISrc_128_f16() const {
848     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
849   }
850 
851   bool isVISrc_128V2F16() const {
852     return isVISrc_128_f16() || isVISrc_128_b32();
853   }
854 
855   bool isAISrc_256B64() const {
856     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
857   }
858 
859   bool isAISrc_256_f64() const {
860     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
861   }
862 
863   bool isAISrc_512_b32() const {
864     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
865   }
866 
867   bool isAISrc_512B16() const {
868     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
869   }
870 
871   bool isAISrc_512V2B16() const {
872     return isAISrc_512B16();
873   }
874 
875   bool isAISrc_512_f32() const {
876     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
877   }
878 
879   bool isAISrc_512F16() const {
880     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
881   }
882 
883   bool isAISrc_512V2F16() const {
884     return isAISrc_512F16() || isAISrc_512_b32();
885   }
886 
887   bool isAISrc_1024_b32() const {
888     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
889   }
890 
891   bool isAISrc_1024B16() const {
892     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
893   }
894 
895   bool isAISrc_1024V2B16() const {
896     return isAISrc_1024B16();
897   }
898 
899   bool isAISrc_1024_f32() const {
900     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
901   }
902 
903   bool isAISrc_1024F16() const {
904     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
905   }
906 
907   bool isAISrc_1024V2F16() const {
908     return isAISrc_1024F16() || isAISrc_1024_b32();
909   }
910 
911   bool isKImmFP32() const {
912     return isLiteralImm(MVT::f32);
913   }
914 
915   bool isKImmFP16() const {
916     return isLiteralImm(MVT::f16);
917   }
918 
919   bool isMem() const override {
920     return false;
921   }
922 
923   bool isExpr() const {
924     return Kind == Expression;
925   }
926 
927   bool isSOPPBrTarget() const { return isExpr() || isImm(); }
928 
929   bool isSWaitCnt() const;
930   bool isDepCtr() const;
931   bool isSDelayALU() const;
932   bool isHwreg() const;
933   bool isSendMsg() const;
934   bool isSplitBarrier() const;
935   bool isSwizzle() const;
936   bool isSMRDOffset8() const;
937   bool isSMEMOffset() const;
938   bool isSMRDLiteralOffset() const;
939   bool isDPP8() const;
940   bool isDPPCtrl() const;
941   bool isBLGP() const;
942   bool isGPRIdxMode() const;
943   bool isS16Imm() const;
944   bool isU16Imm() const;
945   bool isEndpgm() const;
946 
947   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
948     return [=](){ return P(*this); };
949   }
950 
951   StringRef getToken() const {
952     assert(isToken());
953     return StringRef(Tok.Data, Tok.Length);
954   }
955 
956   int64_t getImm() const {
957     assert(isImm());
958     return Imm.Val;
959   }
960 
961   void setImm(int64_t Val) {
962     assert(isImm());
963     Imm.Val = Val;
964   }
965 
966   ImmTy getImmTy() const {
967     assert(isImm());
968     return Imm.Type;
969   }
970 
971   MCRegister getReg() const override {
972     assert(isRegKind());
973     return Reg.RegNo;
974   }
975 
976   SMLoc getStartLoc() const override {
977     return StartLoc;
978   }
979 
980   SMLoc getEndLoc() const override {
981     return EndLoc;
982   }
983 
984   SMRange getLocRange() const {
985     return SMRange(StartLoc, EndLoc);
986   }
987 
988   Modifiers getModifiers() const {
989     assert(isRegKind() || isImmTy(ImmTyNone));
990     return isRegKind() ? Reg.Mods : Imm.Mods;
991   }
992 
993   void setModifiers(Modifiers Mods) {
994     assert(isRegKind() || isImmTy(ImmTyNone));
995     if (isRegKind())
996       Reg.Mods = Mods;
997     else
998       Imm.Mods = Mods;
999   }
1000 
1001   bool hasModifiers() const {
1002     return getModifiers().hasModifiers();
1003   }
1004 
1005   bool hasFPModifiers() const {
1006     return getModifiers().hasFPModifiers();
1007   }
1008 
1009   bool hasIntModifiers() const {
1010     return getModifiers().hasIntModifiers();
1011   }
1012 
1013   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1014 
1015   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1016 
1017   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1018 
1019   void addRegOperands(MCInst &Inst, unsigned N) const;
1020 
1021   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1022     if (isRegKind())
1023       addRegOperands(Inst, N);
1024     else
1025       addImmOperands(Inst, N);
1026   }
1027 
1028   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1029     Modifiers Mods = getModifiers();
1030     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1031     if (isRegKind()) {
1032       addRegOperands(Inst, N);
1033     } else {
1034       addImmOperands(Inst, N, false);
1035     }
1036   }
1037 
1038   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1039     assert(!hasIntModifiers());
1040     addRegOrImmWithInputModsOperands(Inst, N);
1041   }
1042 
1043   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1044     assert(!hasFPModifiers());
1045     addRegOrImmWithInputModsOperands(Inst, N);
1046   }
1047 
1048   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1049     Modifiers Mods = getModifiers();
1050     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1051     assert(isRegKind());
1052     addRegOperands(Inst, N);
1053   }
1054 
1055   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1056     assert(!hasIntModifiers());
1057     addRegWithInputModsOperands(Inst, N);
1058   }
1059 
1060   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1061     assert(!hasFPModifiers());
1062     addRegWithInputModsOperands(Inst, N);
1063   }
1064 
1065   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1066     // clang-format off
1067     switch (Type) {
1068     case ImmTyNone: OS << "None"; break;
1069     case ImmTyGDS: OS << "GDS"; break;
1070     case ImmTyLDS: OS << "LDS"; break;
1071     case ImmTyOffen: OS << "Offen"; break;
1072     case ImmTyIdxen: OS << "Idxen"; break;
1073     case ImmTyAddr64: OS << "Addr64"; break;
1074     case ImmTyOffset: OS << "Offset"; break;
1075     case ImmTyInstOffset: OS << "InstOffset"; break;
1076     case ImmTyOffset0: OS << "Offset0"; break;
1077     case ImmTyOffset1: OS << "Offset1"; break;
1078     case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1079     case ImmTyCPol: OS << "CPol"; break;
1080     case ImmTyIndexKey8bit: OS << "index_key"; break;
1081     case ImmTyIndexKey16bit: OS << "index_key"; break;
1082     case ImmTyTFE: OS << "TFE"; break;
1083     case ImmTyD16: OS << "D16"; break;
1084     case ImmTyFORMAT: OS << "FORMAT"; break;
1085     case ImmTyClamp: OS << "Clamp"; break;
1086     case ImmTyOModSI: OS << "OModSI"; break;
1087     case ImmTyDPP8: OS << "DPP8"; break;
1088     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1089     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1090     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1091     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1092     case ImmTyDppFI: OS << "DppFI"; break;
1093     case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1094     case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1095     case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1096     case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1097     case ImmTyDMask: OS << "DMask"; break;
1098     case ImmTyDim: OS << "Dim"; break;
1099     case ImmTyUNorm: OS << "UNorm"; break;
1100     case ImmTyDA: OS << "DA"; break;
1101     case ImmTyR128A16: OS << "R128A16"; break;
1102     case ImmTyA16: OS << "A16"; break;
1103     case ImmTyLWE: OS << "LWE"; break;
1104     case ImmTyOff: OS << "Off"; break;
1105     case ImmTyExpTgt: OS << "ExpTgt"; break;
1106     case ImmTyExpCompr: OS << "ExpCompr"; break;
1107     case ImmTyExpVM: OS << "ExpVM"; break;
1108     case ImmTyHwreg: OS << "Hwreg"; break;
1109     case ImmTySendMsg: OS << "SendMsg"; break;
1110     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1111     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1112     case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1113     case ImmTyOpSel: OS << "OpSel"; break;
1114     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1115     case ImmTyNegLo: OS << "NegLo"; break;
1116     case ImmTyNegHi: OS << "NegHi"; break;
1117     case ImmTySwizzle: OS << "Swizzle"; break;
1118     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1119     case ImmTyHigh: OS << "High"; break;
1120     case ImmTyBLGP: OS << "BLGP"; break;
1121     case ImmTyCBSZ: OS << "CBSZ"; break;
1122     case ImmTyABID: OS << "ABID"; break;
1123     case ImmTyEndpgm: OS << "Endpgm"; break;
1124     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1125     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1126     case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1127     case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1128     case ImmTyByteSel: OS << "ByteSel" ; break;
1129     }
1130     // clang-format on
1131   }
1132 
1133   void print(raw_ostream &OS) const override {
1134     switch (Kind) {
1135     case Register:
1136       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1137       break;
1138     case Immediate:
1139       OS << '<' << getImm();
1140       if (getImmTy() != ImmTyNone) {
1141         OS << " type: "; printImmTy(OS, getImmTy());
1142       }
1143       OS << " mods: " << Imm.Mods << '>';
1144       break;
1145     case Token:
1146       OS << '\'' << getToken() << '\'';
1147       break;
1148     case Expression:
1149       OS << "<expr " << *Expr << '>';
1150       break;
1151     }
1152   }
1153 
1154   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1155                                       int64_t Val, SMLoc Loc,
1156                                       ImmTy Type = ImmTyNone,
1157                                       bool IsFPImm = false) {
1158     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1159     Op->Imm.Val = Val;
1160     Op->Imm.IsFPImm = IsFPImm;
1161     Op->Imm.Kind = ImmKindTyNone;
1162     Op->Imm.Type = Type;
1163     Op->Imm.Mods = Modifiers();
1164     Op->StartLoc = Loc;
1165     Op->EndLoc = Loc;
1166     return Op;
1167   }
1168 
1169   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1170                                         StringRef Str, SMLoc Loc,
1171                                         bool HasExplicitEncodingSize = true) {
1172     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1173     Res->Tok.Data = Str.data();
1174     Res->Tok.Length = Str.size();
1175     Res->StartLoc = Loc;
1176     Res->EndLoc = Loc;
1177     return Res;
1178   }
1179 
1180   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1181                                       unsigned RegNo, SMLoc S,
1182                                       SMLoc E) {
1183     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1184     Op->Reg.RegNo = RegNo;
1185     Op->Reg.Mods = Modifiers();
1186     Op->StartLoc = S;
1187     Op->EndLoc = E;
1188     return Op;
1189   }
1190 
1191   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1192                                        const class MCExpr *Expr, SMLoc S) {
1193     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1194     Op->Expr = Expr;
1195     Op->StartLoc = S;
1196     Op->EndLoc = S;
1197     return Op;
1198   }
1199 };
1200 
1201 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1202   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1203   return OS;
1204 }
1205 
1206 //===----------------------------------------------------------------------===//
1207 // AsmParser
1208 //===----------------------------------------------------------------------===//
1209 
1210 // Holds info related to the current kernel, e.g. count of SGPRs used.
1211 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1212 // .amdgpu_hsa_kernel or at EOF.
1213 class KernelScopeInfo {
1214   int SgprIndexUnusedMin = -1;
1215   int VgprIndexUnusedMin = -1;
1216   int AgprIndexUnusedMin = -1;
1217   MCContext *Ctx = nullptr;
1218   MCSubtargetInfo const *MSTI = nullptr;
1219 
1220   void usesSgprAt(int i) {
1221     if (i >= SgprIndexUnusedMin) {
1222       SgprIndexUnusedMin = ++i;
1223       if (Ctx) {
1224         MCSymbol* const Sym =
1225           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1226         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1227       }
1228     }
1229   }
1230 
1231   void usesVgprAt(int i) {
1232     if (i >= VgprIndexUnusedMin) {
1233       VgprIndexUnusedMin = ++i;
1234       if (Ctx) {
1235         MCSymbol* const Sym =
1236           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1237         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1238                                          VgprIndexUnusedMin);
1239         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1240       }
1241     }
1242   }
1243 
1244   void usesAgprAt(int i) {
1245     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1246     if (!hasMAIInsts(*MSTI))
1247       return;
1248 
1249     if (i >= AgprIndexUnusedMin) {
1250       AgprIndexUnusedMin = ++i;
1251       if (Ctx) {
1252         MCSymbol* const Sym =
1253           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1254         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1255 
1256         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1257         MCSymbol* const vSym =
1258           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1259         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1260                                          VgprIndexUnusedMin);
1261         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1262       }
1263     }
1264   }
1265 
1266 public:
1267   KernelScopeInfo() = default;
1268 
1269   void initialize(MCContext &Context) {
1270     Ctx = &Context;
1271     MSTI = Ctx->getSubtargetInfo();
1272 
1273     usesSgprAt(SgprIndexUnusedMin = -1);
1274     usesVgprAt(VgprIndexUnusedMin = -1);
1275     if (hasMAIInsts(*MSTI)) {
1276       usesAgprAt(AgprIndexUnusedMin = -1);
1277     }
1278   }
1279 
1280   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1281                     unsigned RegWidth) {
1282     switch (RegKind) {
1283     case IS_SGPR:
1284       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1285       break;
1286     case IS_AGPR:
1287       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1288       break;
1289     case IS_VGPR:
1290       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1291       break;
1292     default:
1293       break;
1294     }
1295   }
1296 };
1297 
1298 class AMDGPUAsmParser : public MCTargetAsmParser {
1299   MCAsmParser &Parser;
1300 
1301   unsigned ForcedEncodingSize = 0;
1302   bool ForcedDPP = false;
1303   bool ForcedSDWA = false;
1304   KernelScopeInfo KernelScope;
1305 
1306   /// @name Auto-generated Match Functions
1307   /// {
1308 
1309 #define GET_ASSEMBLER_HEADER
1310 #include "AMDGPUGenAsmMatcher.inc"
1311 
1312   /// }
1313 
1314 private:
1315   void createConstantSymbol(StringRef Id, int64_t Val);
1316 
1317   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1318   bool OutOfRangeError(SMRange Range);
1319   /// Calculate VGPR/SGPR blocks required for given target, reserved
1320   /// registers, and user-specified NextFreeXGPR values.
1321   ///
1322   /// \param Features [in] Target features, used for bug corrections.
1323   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1324   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1325   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1326   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1327   /// descriptor field, if valid.
1328   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1329   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1330   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1331   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1332   /// \param VGPRBlocks [out] Result VGPR block count.
1333   /// \param SGPRBlocks [out] Result SGPR block count.
1334   bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1335                           const MCExpr *FlatScrUsed, bool XNACKUsed,
1336                           std::optional<bool> EnableWavefrontSize32,
1337                           const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1338                           const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1339                           const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1340   bool ParseDirectiveAMDGCNTarget();
1341   bool ParseDirectiveAMDHSACodeObjectVersion();
1342   bool ParseDirectiveAMDHSAKernel();
1343   bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1344   bool ParseDirectiveAMDKernelCodeT();
1345   // TODO: Possibly make subtargetHasRegister const.
1346   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1347   bool ParseDirectiveAMDGPUHsaKernel();
1348 
1349   bool ParseDirectiveISAVersion();
1350   bool ParseDirectiveHSAMetadata();
1351   bool ParseDirectivePALMetadataBegin();
1352   bool ParseDirectivePALMetadata();
1353   bool ParseDirectiveAMDGPULDS();
1354 
1355   /// Common code to parse out a block of text (typically YAML) between start and
1356   /// end directives.
1357   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1358                            const char *AssemblerDirectiveEnd,
1359                            std::string &CollectString);
1360 
1361   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1362                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1363   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1364                            unsigned &RegNum, unsigned &RegWidth,
1365                            bool RestoreOnFailure = false);
1366   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1367                            unsigned &RegNum, unsigned &RegWidth,
1368                            SmallVectorImpl<AsmToken> &Tokens);
1369   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1370                            unsigned &RegWidth,
1371                            SmallVectorImpl<AsmToken> &Tokens);
1372   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1373                            unsigned &RegWidth,
1374                            SmallVectorImpl<AsmToken> &Tokens);
1375   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1376                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1377   bool ParseRegRange(unsigned& Num, unsigned& Width);
1378   unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1379                          unsigned RegWidth, SMLoc Loc);
1380 
1381   bool isRegister();
1382   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1383   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1384   void initializeGprCountSymbol(RegisterKind RegKind);
1385   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1386                              unsigned RegWidth);
1387   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1388                     bool IsAtomic);
1389 
1390 public:
1391   enum OperandMode {
1392     OperandMode_Default,
1393     OperandMode_NSA,
1394   };
1395 
1396   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1397 
1398   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1399                const MCInstrInfo &MII,
1400                const MCTargetOptions &Options)
1401       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1402     MCAsmParserExtension::Initialize(Parser);
1403 
1404     if (getFeatureBits().none()) {
1405       // Set default features.
1406       copySTI().ToggleFeature("southern-islands");
1407     }
1408 
1409     FeatureBitset FB = getFeatureBits();
1410     if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1411         !FB[AMDGPU::FeatureWavefrontSize32]) {
1412       // If there is no default wave size it must be a generation before gfx10,
1413       // these have FeatureWavefrontSize64 in their definition already. For
1414       // gfx10+ set wave32 as a default.
1415       copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1416     }
1417 
1418     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1419 
1420     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1421     if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1422       createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1423       createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1424       createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1425     } else {
1426       createConstantSymbol(".option.machine_version_major", ISA.Major);
1427       createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1428       createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1429     }
1430     if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1431       initializeGprCountSymbol(IS_VGPR);
1432       initializeGprCountSymbol(IS_SGPR);
1433     } else
1434       KernelScope.initialize(getContext());
1435 
1436     for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1437       createConstantSymbol(Symbol, Code);
1438 
1439     createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1440     createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1441     createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1442   }
1443 
1444   bool hasMIMG_R128() const {
1445     return AMDGPU::hasMIMG_R128(getSTI());
1446   }
1447 
1448   bool hasPackedD16() const {
1449     return AMDGPU::hasPackedD16(getSTI());
1450   }
1451 
1452   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1453 
1454   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1455 
1456   bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1457 
1458   bool isSI() const {
1459     return AMDGPU::isSI(getSTI());
1460   }
1461 
1462   bool isCI() const {
1463     return AMDGPU::isCI(getSTI());
1464   }
1465 
1466   bool isVI() const {
1467     return AMDGPU::isVI(getSTI());
1468   }
1469 
1470   bool isGFX9() const {
1471     return AMDGPU::isGFX9(getSTI());
1472   }
1473 
1474   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1475   bool isGFX90A() const {
1476     return AMDGPU::isGFX90A(getSTI());
1477   }
1478 
1479   bool isGFX940() const {
1480     return AMDGPU::isGFX940(getSTI());
1481   }
1482 
1483   bool isGFX9Plus() const {
1484     return AMDGPU::isGFX9Plus(getSTI());
1485   }
1486 
1487   bool isGFX10() const {
1488     return AMDGPU::isGFX10(getSTI());
1489   }
1490 
1491   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1492 
1493   bool isGFX11() const {
1494     return AMDGPU::isGFX11(getSTI());
1495   }
1496 
1497   bool isGFX11Plus() const {
1498     return AMDGPU::isGFX11Plus(getSTI());
1499   }
1500 
1501   bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1502 
1503   bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1504 
1505   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1506 
1507   bool isGFX10_BEncoding() const {
1508     return AMDGPU::isGFX10_BEncoding(getSTI());
1509   }
1510 
1511   bool hasInv2PiInlineImm() const {
1512     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1513   }
1514 
1515   bool hasFlatOffsets() const {
1516     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1517   }
1518 
1519   bool hasArchitectedFlatScratch() const {
1520     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1521   }
1522 
1523   bool hasSGPR102_SGPR103() const {
1524     return !isVI() && !isGFX9();
1525   }
1526 
1527   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1528 
1529   bool hasIntClamp() const {
1530     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1531   }
1532 
1533   bool hasPartialNSAEncoding() const {
1534     return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1535   }
1536 
1537   unsigned getNSAMaxSize(bool HasSampler = false) const {
1538     return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1539   }
1540 
1541   unsigned getMaxNumUserSGPRs() const {
1542     return AMDGPU::getMaxNumUserSGPRs(getSTI());
1543   }
1544 
1545   bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1546 
1547   AMDGPUTargetStreamer &getTargetStreamer() {
1548     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1549     return static_cast<AMDGPUTargetStreamer &>(TS);
1550   }
1551 
1552   const MCRegisterInfo *getMRI() const {
1553     // We need this const_cast because for some reason getContext() is not const
1554     // in MCAsmParser.
1555     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1556   }
1557 
1558   const MCInstrInfo *getMII() const {
1559     return &MII;
1560   }
1561 
1562   const FeatureBitset &getFeatureBits() const {
1563     return getSTI().getFeatureBits();
1564   }
1565 
1566   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1567   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1568   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1569 
1570   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1571   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1572   bool isForcedDPP() const { return ForcedDPP; }
1573   bool isForcedSDWA() const { return ForcedSDWA; }
1574   ArrayRef<unsigned> getMatchedVariants() const;
1575   StringRef getMatchedVariantName() const;
1576 
1577   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1578   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1579                      bool RestoreOnFailure);
1580   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1581   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1582                                SMLoc &EndLoc) override;
1583   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1584   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1585                                       unsigned Kind) override;
1586   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1587                                OperandVector &Operands, MCStreamer &Out,
1588                                uint64_t &ErrorInfo,
1589                                bool MatchingInlineAsm) override;
1590   bool ParseDirective(AsmToken DirectiveID) override;
1591   ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1592                            OperandMode Mode = OperandMode_Default);
1593   StringRef parseMnemonicSuffix(StringRef Name);
1594   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1595                         SMLoc NameLoc, OperandVector &Operands) override;
1596   //bool ProcessInstruction(MCInst &Inst);
1597 
1598   ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1599 
1600   ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1601 
1602   ParseStatus
1603   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1604                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1605                      std::function<bool(int64_t &)> ConvertResult = nullptr);
1606 
1607   ParseStatus parseOperandArrayWithPrefix(
1608       const char *Prefix, OperandVector &Operands,
1609       AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1610       bool (*ConvertResult)(int64_t &) = nullptr);
1611 
1612   ParseStatus
1613   parseNamedBit(StringRef Name, OperandVector &Operands,
1614                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1615   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1616   ParseStatus parseCPol(OperandVector &Operands);
1617   ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1618   ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1619   ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1620                                     SMLoc &StringLoc);
1621 
1622   bool isModifier();
1623   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1624   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1625   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1626   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1627   bool parseSP3NegModifier();
1628   ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1629                        bool HasLit = false);
1630   ParseStatus parseReg(OperandVector &Operands);
1631   ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1632                             bool HasLit = false);
1633   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1634                                            bool AllowImm = true);
1635   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1636                                             bool AllowImm = true);
1637   ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1638   ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1639   ParseStatus parseVReg32OrOff(OperandVector &Operands);
1640   ParseStatus tryParseIndexKey(OperandVector &Operands,
1641                                AMDGPUOperand::ImmTy ImmTy);
1642   ParseStatus parseIndexKey8bit(OperandVector &Operands);
1643   ParseStatus parseIndexKey16bit(OperandVector &Operands);
1644 
1645   ParseStatus parseDfmtNfmt(int64_t &Format);
1646   ParseStatus parseUfmt(int64_t &Format);
1647   ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1648                                        int64_t &Format);
1649   ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1650                                          int64_t &Format);
1651   ParseStatus parseFORMAT(OperandVector &Operands);
1652   ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1653   ParseStatus parseNumericFormat(int64_t &Format);
1654   ParseStatus parseFlatOffset(OperandVector &Operands);
1655   ParseStatus parseR128A16(OperandVector &Operands);
1656   ParseStatus parseBLGP(OperandVector &Operands);
1657   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1658   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1659 
1660   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1661 
1662   bool parseCnt(int64_t &IntVal);
1663   ParseStatus parseSWaitCnt(OperandVector &Operands);
1664 
1665   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1666   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1667   ParseStatus parseDepCtr(OperandVector &Operands);
1668 
1669   bool parseDelay(int64_t &Delay);
1670   ParseStatus parseSDelayALU(OperandVector &Operands);
1671 
1672   ParseStatus parseHwreg(OperandVector &Operands);
1673 
1674 private:
1675   struct OperandInfoTy {
1676     SMLoc Loc;
1677     int64_t Val;
1678     bool IsSymbolic = false;
1679     bool IsDefined = false;
1680 
1681     OperandInfoTy(int64_t Val) : Val(Val) {}
1682   };
1683 
1684   struct StructuredOpField : OperandInfoTy {
1685     StringLiteral Id;
1686     StringLiteral Desc;
1687     unsigned Width;
1688     bool IsDefined = false;
1689 
1690     StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1691                       int64_t Default)
1692         : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1693     virtual ~StructuredOpField() = default;
1694 
1695     bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1696       Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1697       return false;
1698     }
1699 
1700     virtual bool validate(AMDGPUAsmParser &Parser) const {
1701       if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1702         return Error(Parser, "not supported on this GPU");
1703       if (!isUIntN(Width, Val))
1704         return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1705       return true;
1706     }
1707   };
1708 
1709   ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1710   bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1711 
1712   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1713   bool validateSendMsg(const OperandInfoTy &Msg,
1714                        const OperandInfoTy &Op,
1715                        const OperandInfoTy &Stream);
1716 
1717   ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1718                              OperandInfoTy &Width);
1719 
1720   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1721   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1722   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1723 
1724   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1725                       const OperandVector &Operands) const;
1726   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1727   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1728   SMLoc getLitLoc(const OperandVector &Operands,
1729                   bool SearchMandatoryLiterals = false) const;
1730   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1731   SMLoc getConstLoc(const OperandVector &Operands) const;
1732   SMLoc getInstLoc(const OperandVector &Operands) const;
1733 
1734   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1735   bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1736   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1737   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1738   bool validateSOPLiteral(const MCInst &Inst) const;
1739   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1740   bool validateVOPDRegBankConstraints(const MCInst &Inst,
1741                                       const OperandVector &Operands);
1742   bool validateIntClampSupported(const MCInst &Inst);
1743   bool validateMIMGAtomicDMask(const MCInst &Inst);
1744   bool validateMIMGGatherDMask(const MCInst &Inst);
1745   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1746   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1747   bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1748   bool validateMIMGD16(const MCInst &Inst);
1749   bool validateMIMGMSAA(const MCInst &Inst);
1750   bool validateOpSel(const MCInst &Inst);
1751   bool validateNeg(const MCInst &Inst, int OpName);
1752   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1753   bool validateVccOperand(unsigned Reg) const;
1754   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1755   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1756   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1757   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1758   bool validateAGPRLdSt(const MCInst &Inst) const;
1759   bool validateVGPRAlign(const MCInst &Inst) const;
1760   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1761   bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1762   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1763   bool validateDivScale(const MCInst &Inst);
1764   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1765   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1766                              const SMLoc &IDLoc);
1767   bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1768                               const unsigned CPol);
1769   bool validateExeczVcczOperands(const OperandVector &Operands);
1770   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1771   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1772   unsigned getConstantBusLimit(unsigned Opcode) const;
1773   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1774   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1775   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1776 
1777   bool isSupportedMnemo(StringRef Mnemo,
1778                         const FeatureBitset &FBS);
1779   bool isSupportedMnemo(StringRef Mnemo,
1780                         const FeatureBitset &FBS,
1781                         ArrayRef<unsigned> Variants);
1782   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1783 
1784   bool isId(const StringRef Id) const;
1785   bool isId(const AsmToken &Token, const StringRef Id) const;
1786   bool isToken(const AsmToken::TokenKind Kind) const;
1787   StringRef getId() const;
1788   bool trySkipId(const StringRef Id);
1789   bool trySkipId(const StringRef Pref, const StringRef Id);
1790   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1791   bool trySkipToken(const AsmToken::TokenKind Kind);
1792   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1793   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1794   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1795 
1796   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1797   AsmToken::TokenKind getTokenKind() const;
1798   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1799   bool parseExpr(OperandVector &Operands);
1800   StringRef getTokenStr() const;
1801   AsmToken peekToken(bool ShouldSkipSpace = true);
1802   AsmToken getToken() const;
1803   SMLoc getLoc() const;
1804   void lex();
1805 
1806 public:
1807   void onBeginOfFile() override;
1808   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1809 
1810   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1811 
1812   ParseStatus parseExpTgt(OperandVector &Operands);
1813   ParseStatus parseSendMsg(OperandVector &Operands);
1814   ParseStatus parseInterpSlot(OperandVector &Operands);
1815   ParseStatus parseInterpAttr(OperandVector &Operands);
1816   ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1817   ParseStatus parseBoolReg(OperandVector &Operands);
1818 
1819   bool parseSwizzleOperand(int64_t &Op,
1820                            const unsigned MinVal,
1821                            const unsigned MaxVal,
1822                            const StringRef ErrMsg,
1823                            SMLoc &Loc);
1824   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1825                             const unsigned MinVal,
1826                             const unsigned MaxVal,
1827                             const StringRef ErrMsg);
1828   ParseStatus parseSwizzle(OperandVector &Operands);
1829   bool parseSwizzleOffset(int64_t &Imm);
1830   bool parseSwizzleMacro(int64_t &Imm);
1831   bool parseSwizzleQuadPerm(int64_t &Imm);
1832   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1833   bool parseSwizzleBroadcast(int64_t &Imm);
1834   bool parseSwizzleSwap(int64_t &Imm);
1835   bool parseSwizzleReverse(int64_t &Imm);
1836 
1837   ParseStatus parseGPRIdxMode(OperandVector &Operands);
1838   int64_t parseGPRIdxMacro();
1839 
1840   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1841   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1842 
1843   ParseStatus parseOModSI(OperandVector &Operands);
1844 
1845   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1846                OptionalImmIndexMap &OptionalIdx);
1847   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1848   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1849   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1850   void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1851 
1852   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1853   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1854                     OptionalImmIndexMap &OptionalIdx);
1855   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1856                 OptionalImmIndexMap &OptionalIdx);
1857 
1858   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1859   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1860 
1861   bool parseDimId(unsigned &Encoding);
1862   ParseStatus parseDim(OperandVector &Operands);
1863   bool convertDppBoundCtrl(int64_t &BoundCtrl);
1864   ParseStatus parseDPP8(OperandVector &Operands);
1865   ParseStatus parseDPPCtrl(OperandVector &Operands);
1866   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1867   int64_t parseDPPCtrlSel(StringRef Ctrl);
1868   int64_t parseDPPCtrlPerm();
1869   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1870   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1871     cvtDPP(Inst, Operands, true);
1872   }
1873   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1874                   bool IsDPP8 = false);
1875   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1876     cvtVOP3DPP(Inst, Operands, true);
1877   }
1878 
1879   ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1880                            AMDGPUOperand::ImmTy Type);
1881   ParseStatus parseSDWADstUnused(OperandVector &Operands);
1882   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1883   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1884   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1885   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1886   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1887   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1888                uint64_t BasicInstType,
1889                bool SkipDstVcc = false,
1890                bool SkipSrcVcc = false);
1891 
1892   ParseStatus parseEndpgm(OperandVector &Operands);
1893 
1894   ParseStatus parseVOPD(OperandVector &Operands);
1895 };
1896 
1897 } // end anonymous namespace
1898 
1899 // May be called with integer type with equivalent bitwidth.
1900 static const fltSemantics *getFltSemantics(unsigned Size) {
1901   switch (Size) {
1902   case 4:
1903     return &APFloat::IEEEsingle();
1904   case 8:
1905     return &APFloat::IEEEdouble();
1906   case 2:
1907     return &APFloat::IEEEhalf();
1908   default:
1909     llvm_unreachable("unsupported fp type");
1910   }
1911 }
1912 
1913 static const fltSemantics *getFltSemantics(MVT VT) {
1914   return getFltSemantics(VT.getSizeInBits() / 8);
1915 }
1916 
1917 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1918   switch (OperandType) {
1919   // When floating-point immediate is used as operand of type i16, the 32-bit
1920    // representation of the constant truncated to the 16 LSBs should be used.
1921   case AMDGPU::OPERAND_REG_IMM_INT16:
1922   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1923   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1924   case AMDGPU::OPERAND_REG_IMM_INT32:
1925   case AMDGPU::OPERAND_REG_IMM_FP32:
1926   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1927   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1928   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1929   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1930   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1931   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1932   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1933   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1934   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1935   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1936   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1937   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1938   case AMDGPU::OPERAND_KIMM32:
1939   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1940     return &APFloat::IEEEsingle();
1941   case AMDGPU::OPERAND_REG_IMM_INT64:
1942   case AMDGPU::OPERAND_REG_IMM_FP64:
1943   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1944   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1945   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1946     return &APFloat::IEEEdouble();
1947   case AMDGPU::OPERAND_REG_IMM_FP16:
1948   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1949   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1950   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1951   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1952   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1953   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1954   case AMDGPU::OPERAND_KIMM16:
1955     return &APFloat::IEEEhalf();
1956   case AMDGPU::OPERAND_REG_IMM_BF16:
1957   case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1958   case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1959   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1960   case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1961   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1962   case AMDGPU::OPERAND_REG_IMM_V2BF16:
1963     return &APFloat::BFloat();
1964   default:
1965     llvm_unreachable("unsupported fp type");
1966   }
1967 }
1968 
1969 //===----------------------------------------------------------------------===//
1970 // Operand
1971 //===----------------------------------------------------------------------===//
1972 
1973 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1974   bool Lost;
1975 
1976   // Convert literal to single precision
1977   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1978                                                APFloat::rmNearestTiesToEven,
1979                                                &Lost);
1980   // We allow precision lost but not overflow or underflow
1981   if (Status != APFloat::opOK &&
1982       Lost &&
1983       ((Status & APFloat::opOverflow)  != 0 ||
1984        (Status & APFloat::opUnderflow) != 0)) {
1985     return false;
1986   }
1987 
1988   return true;
1989 }
1990 
1991 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1992   return isUIntN(Size, Val) || isIntN(Size, Val);
1993 }
1994 
1995 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1996   if (VT.getScalarType() == MVT::i16)
1997     return isInlinableLiteral32(Val, HasInv2Pi);
1998 
1999   if (VT.getScalarType() == MVT::f16)
2000     return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2001 
2002   assert(VT.getScalarType() == MVT::bf16);
2003 
2004   return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2005 }
2006 
2007 bool AMDGPUOperand::isInlinableImm(MVT type) const {
2008 
2009   // This is a hack to enable named inline values like
2010   // shared_base with both 32-bit and 64-bit operands.
2011   // Note that these values are defined as
2012   // 32-bit operands only.
2013   if (isInlineValue()) {
2014     return true;
2015   }
2016 
2017   if (!isImmTy(ImmTyNone)) {
2018     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2019     return false;
2020   }
2021   // TODO: We should avoid using host float here. It would be better to
2022   // check the float bit values which is what a few other places do.
2023   // We've had bot failures before due to weird NaN support on mips hosts.
2024 
2025   APInt Literal(64, Imm.Val);
2026 
2027   if (Imm.IsFPImm) { // We got fp literal token
2028     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2029       return AMDGPU::isInlinableLiteral64(Imm.Val,
2030                                           AsmParser->hasInv2PiInlineImm());
2031     }
2032 
2033     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2034     if (!canLosslesslyConvertToFPType(FPLiteral, type))
2035       return false;
2036 
2037     if (type.getScalarSizeInBits() == 16) {
2038       bool Lost = false;
2039       switch (type.getScalarType().SimpleTy) {
2040       default:
2041         llvm_unreachable("unknown 16-bit type");
2042       case MVT::bf16:
2043         FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2044                           &Lost);
2045         break;
2046       case MVT::f16:
2047         FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2048                           &Lost);
2049         break;
2050       case MVT::i16:
2051         FPLiteral.convert(APFloatBase::IEEEsingle(),
2052                           APFloat::rmNearestTiesToEven, &Lost);
2053         break;
2054       }
2055       // We need to use 32-bit representation here because when a floating-point
2056       // inline constant is used as an i16 operand, its 32-bit representation
2057       // representation will be used. We will need the 32-bit value to check if
2058       // it is FP inline constant.
2059       uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2060       return isInlineableLiteralOp16(ImmVal, type,
2061                                      AsmParser->hasInv2PiInlineImm());
2062     }
2063 
2064     // Check if single precision literal is inlinable
2065     return AMDGPU::isInlinableLiteral32(
2066       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2067       AsmParser->hasInv2PiInlineImm());
2068   }
2069 
2070   // We got int literal token.
2071   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2072     return AMDGPU::isInlinableLiteral64(Imm.Val,
2073                                         AsmParser->hasInv2PiInlineImm());
2074   }
2075 
2076   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2077     return false;
2078   }
2079 
2080   if (type.getScalarSizeInBits() == 16) {
2081     return isInlineableLiteralOp16(
2082       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2083       type, AsmParser->hasInv2PiInlineImm());
2084   }
2085 
2086   return AMDGPU::isInlinableLiteral32(
2087     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2088     AsmParser->hasInv2PiInlineImm());
2089 }
2090 
2091 bool AMDGPUOperand::isLiteralImm(MVT type) const {
2092   // Check that this immediate can be added as literal
2093   if (!isImmTy(ImmTyNone)) {
2094     return false;
2095   }
2096 
2097   if (!Imm.IsFPImm) {
2098     // We got int literal token.
2099 
2100     if (type == MVT::f64 && hasFPModifiers()) {
2101       // Cannot apply fp modifiers to int literals preserving the same semantics
2102       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2103       // disable these cases.
2104       return false;
2105     }
2106 
2107     unsigned Size = type.getSizeInBits();
2108     if (Size == 64)
2109       Size = 32;
2110 
2111     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2112     // types.
2113     return isSafeTruncation(Imm.Val, Size);
2114   }
2115 
2116   // We got fp literal token
2117   if (type == MVT::f64) { // Expected 64-bit fp operand
2118     // We would set low 64-bits of literal to zeroes but we accept this literals
2119     return true;
2120   }
2121 
2122   if (type == MVT::i64) { // Expected 64-bit int operand
2123     // We don't allow fp literals in 64-bit integer instructions. It is
2124     // unclear how we should encode them.
2125     return false;
2126   }
2127 
2128   // We allow fp literals with f16x2 operands assuming that the specified
2129   // literal goes into the lower half and the upper half is zero. We also
2130   // require that the literal may be losslessly converted to f16.
2131   //
2132   // For i16x2 operands, we assume that the specified literal is encoded as a
2133   // single-precision float. This is pretty odd, but it matches SP3 and what
2134   // happens in hardware.
2135   MVT ExpectedType = (type == MVT::v2f16)   ? MVT::f16
2136                      : (type == MVT::v2i16) ? MVT::f32
2137                      : (type == MVT::v2f32) ? MVT::f32
2138                                             : type;
2139 
2140   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2141   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2142 }
2143 
2144 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2145   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2146 }
2147 
2148 bool AMDGPUOperand::isVRegWithInputMods() const {
2149   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2150          // GFX90A allows DPP on 64-bit operands.
2151          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2152           AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2153 }
2154 
2155 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2156   return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2157                              : AMDGPU::VGPR_16_Lo128RegClassID);
2158 }
2159 
2160 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2161   if (AsmParser->isVI())
2162     return isVReg32();
2163   if (AsmParser->isGFX9Plus())
2164     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2165   return false;
2166 }
2167 
2168 bool AMDGPUOperand::isSDWAFP16Operand() const {
2169   return isSDWAOperand(MVT::f16);
2170 }
2171 
2172 bool AMDGPUOperand::isSDWAFP32Operand() const {
2173   return isSDWAOperand(MVT::f32);
2174 }
2175 
2176 bool AMDGPUOperand::isSDWAInt16Operand() const {
2177   return isSDWAOperand(MVT::i16);
2178 }
2179 
2180 bool AMDGPUOperand::isSDWAInt32Operand() const {
2181   return isSDWAOperand(MVT::i32);
2182 }
2183 
2184 bool AMDGPUOperand::isBoolReg() const {
2185   auto FB = AsmParser->getFeatureBits();
2186   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2187                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2188 }
2189 
2190 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2191 {
2192   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2193   assert(Size == 2 || Size == 4 || Size == 8);
2194 
2195   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2196 
2197   if (Imm.Mods.Abs) {
2198     Val &= ~FpSignMask;
2199   }
2200   if (Imm.Mods.Neg) {
2201     Val ^= FpSignMask;
2202   }
2203 
2204   return Val;
2205 }
2206 
2207 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2208   if (isExpr()) {
2209     Inst.addOperand(MCOperand::createExpr(Expr));
2210     return;
2211   }
2212 
2213   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2214                              Inst.getNumOperands())) {
2215     addLiteralImmOperand(Inst, Imm.Val,
2216                          ApplyModifiers &
2217                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2218   } else {
2219     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2220     Inst.addOperand(MCOperand::createImm(Imm.Val));
2221     setImmKindNone();
2222   }
2223 }
2224 
2225 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2226   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2227   auto OpNum = Inst.getNumOperands();
2228   // Check that this operand accepts literals
2229   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2230 
2231   if (ApplyModifiers) {
2232     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2233     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2234     Val = applyInputFPModifiers(Val, Size);
2235   }
2236 
2237   APInt Literal(64, Val);
2238   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2239 
2240   if (Imm.IsFPImm) { // We got fp literal token
2241     switch (OpTy) {
2242     case AMDGPU::OPERAND_REG_IMM_INT64:
2243     case AMDGPU::OPERAND_REG_IMM_FP64:
2244     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2245     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2246     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2247       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2248                                        AsmParser->hasInv2PiInlineImm())) {
2249         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2250         setImmKindConst();
2251         return;
2252       }
2253 
2254       // Non-inlineable
2255       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2256         // For fp operands we check if low 32 bits are zeros
2257         if (Literal.getLoBits(32) != 0) {
2258           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2259           "Can't encode literal as exact 64-bit floating-point operand. "
2260           "Low 32-bits will be set to zero");
2261           Val &= 0xffffffff00000000u;
2262         }
2263 
2264         Inst.addOperand(MCOperand::createImm(Val));
2265         setImmKindLiteral();
2266         return;
2267       }
2268 
2269       // We don't allow fp literals in 64-bit integer instructions. It is
2270       // unclear how we should encode them. This case should be checked earlier
2271       // in predicate methods (isLiteralImm())
2272       llvm_unreachable("fp literal in 64-bit integer instruction.");
2273 
2274     case AMDGPU::OPERAND_REG_IMM_BF16:
2275     case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2276     case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2277     case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2278     case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2279     case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
2280     case AMDGPU::OPERAND_REG_IMM_V2BF16:
2281       if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2282         // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2283         // loss of precision. The constant represents ideomatic fp32 value of
2284         // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2285         // bits. Prevent rounding below.
2286         Inst.addOperand(MCOperand::createImm(0x3e22));
2287         setImmKindLiteral();
2288         return;
2289       }
2290       [[fallthrough]];
2291 
2292     case AMDGPU::OPERAND_REG_IMM_INT32:
2293     case AMDGPU::OPERAND_REG_IMM_FP32:
2294     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2295     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2296     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2297     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2298     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2299     case AMDGPU::OPERAND_REG_IMM_INT16:
2300     case AMDGPU::OPERAND_REG_IMM_FP16:
2301     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2302     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2303     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2304     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2305     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2306     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2307     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2308     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2309     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2310     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2311     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2312     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2313     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2314     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2315     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2316     case AMDGPU::OPERAND_KIMM32:
2317     case AMDGPU::OPERAND_KIMM16:
2318     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2319       bool lost;
2320       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2321       // Convert literal to single precision
2322       FPLiteral.convert(*getOpFltSemantics(OpTy),
2323                         APFloat::rmNearestTiesToEven, &lost);
2324       // We allow precision lost but not overflow or underflow. This should be
2325       // checked earlier in isLiteralImm()
2326 
2327       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2328       Inst.addOperand(MCOperand::createImm(ImmVal));
2329       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2330         setImmKindMandatoryLiteral();
2331       } else {
2332         setImmKindLiteral();
2333       }
2334       return;
2335     }
2336     default:
2337       llvm_unreachable("invalid operand size");
2338     }
2339 
2340     return;
2341   }
2342 
2343   // We got int literal token.
2344   // Only sign extend inline immediates.
2345   switch (OpTy) {
2346   case AMDGPU::OPERAND_REG_IMM_INT32:
2347   case AMDGPU::OPERAND_REG_IMM_FP32:
2348   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2349   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2350   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2351   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2352   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2353   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2354   case AMDGPU::OPERAND_REG_IMM_V2BF16:
2355   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2356   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2357   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2358   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2359   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2360   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2361     if (isSafeTruncation(Val, 32) &&
2362         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2363                                      AsmParser->hasInv2PiInlineImm())) {
2364       Inst.addOperand(MCOperand::createImm(Val));
2365       setImmKindConst();
2366       return;
2367     }
2368 
2369     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2370     setImmKindLiteral();
2371     return;
2372 
2373   case AMDGPU::OPERAND_REG_IMM_INT64:
2374   case AMDGPU::OPERAND_REG_IMM_FP64:
2375   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2376   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2377   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2378     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2379       Inst.addOperand(MCOperand::createImm(Val));
2380       setImmKindConst();
2381       return;
2382     }
2383 
2384     Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2385                                                     : Lo_32(Val);
2386 
2387     Inst.addOperand(MCOperand::createImm(Val));
2388     setImmKindLiteral();
2389     return;
2390 
2391   case AMDGPU::OPERAND_REG_IMM_INT16:
2392   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2393   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2394     if (isSafeTruncation(Val, 16) &&
2395         AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2396       Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2397       setImmKindConst();
2398       return;
2399     }
2400 
2401     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2402     setImmKindLiteral();
2403     return;
2404 
2405   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2406   case AMDGPU::OPERAND_REG_IMM_FP16:
2407   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2408   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2409     if (isSafeTruncation(Val, 16) &&
2410         AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2411                                        AsmParser->hasInv2PiInlineImm())) {
2412       Inst.addOperand(MCOperand::createImm(Val));
2413       setImmKindConst();
2414       return;
2415     }
2416 
2417     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2418     setImmKindLiteral();
2419     return;
2420 
2421   case AMDGPU::OPERAND_REG_IMM_BF16:
2422   case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2423   case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2424   case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2425     if (isSafeTruncation(Val, 16) &&
2426         AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2427                                      AsmParser->hasInv2PiInlineImm())) {
2428       Inst.addOperand(MCOperand::createImm(Val));
2429       setImmKindConst();
2430       return;
2431     }
2432 
2433     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2434     setImmKindLiteral();
2435     return;
2436 
2437   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2438   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2439     assert(isSafeTruncation(Val, 16));
2440     assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2441     Inst.addOperand(MCOperand::createImm(Val));
2442     return;
2443   }
2444   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2445   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2446     assert(isSafeTruncation(Val, 16));
2447     assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2448                                           AsmParser->hasInv2PiInlineImm()));
2449 
2450     Inst.addOperand(MCOperand::createImm(Val));
2451     return;
2452   }
2453 
2454   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2455   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: {
2456     assert(isSafeTruncation(Val, 16));
2457     assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2458                                           AsmParser->hasInv2PiInlineImm()));
2459 
2460     Inst.addOperand(MCOperand::createImm(Val));
2461     return;
2462   }
2463 
2464   case AMDGPU::OPERAND_KIMM32:
2465     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2466     setImmKindMandatoryLiteral();
2467     return;
2468   case AMDGPU::OPERAND_KIMM16:
2469     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2470     setImmKindMandatoryLiteral();
2471     return;
2472   default:
2473     llvm_unreachable("invalid operand size");
2474   }
2475 }
2476 
2477 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2478   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2479 }
2480 
2481 bool AMDGPUOperand::isInlineValue() const {
2482   return isRegKind() && ::isInlineValue(getReg());
2483 }
2484 
2485 //===----------------------------------------------------------------------===//
2486 // AsmParser
2487 //===----------------------------------------------------------------------===//
2488 
2489 void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2490   // TODO: make those pre-defined variables read-only.
2491   // Currently there is none suitable machinery in the core llvm-mc for this.
2492   // MCSymbol::isRedefinable is intended for another purpose, and
2493   // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2494   MCContext &Ctx = getContext();
2495   MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2496   Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2497 }
2498 
2499 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2500   if (Is == IS_VGPR) {
2501     switch (RegWidth) {
2502       default: return -1;
2503       case 32:
2504         return AMDGPU::VGPR_32RegClassID;
2505       case 64:
2506         return AMDGPU::VReg_64RegClassID;
2507       case 96:
2508         return AMDGPU::VReg_96RegClassID;
2509       case 128:
2510         return AMDGPU::VReg_128RegClassID;
2511       case 160:
2512         return AMDGPU::VReg_160RegClassID;
2513       case 192:
2514         return AMDGPU::VReg_192RegClassID;
2515       case 224:
2516         return AMDGPU::VReg_224RegClassID;
2517       case 256:
2518         return AMDGPU::VReg_256RegClassID;
2519       case 288:
2520         return AMDGPU::VReg_288RegClassID;
2521       case 320:
2522         return AMDGPU::VReg_320RegClassID;
2523       case 352:
2524         return AMDGPU::VReg_352RegClassID;
2525       case 384:
2526         return AMDGPU::VReg_384RegClassID;
2527       case 512:
2528         return AMDGPU::VReg_512RegClassID;
2529       case 1024:
2530         return AMDGPU::VReg_1024RegClassID;
2531     }
2532   } else if (Is == IS_TTMP) {
2533     switch (RegWidth) {
2534       default: return -1;
2535       case 32:
2536         return AMDGPU::TTMP_32RegClassID;
2537       case 64:
2538         return AMDGPU::TTMP_64RegClassID;
2539       case 128:
2540         return AMDGPU::TTMP_128RegClassID;
2541       case 256:
2542         return AMDGPU::TTMP_256RegClassID;
2543       case 512:
2544         return AMDGPU::TTMP_512RegClassID;
2545     }
2546   } else if (Is == IS_SGPR) {
2547     switch (RegWidth) {
2548       default: return -1;
2549       case 32:
2550         return AMDGPU::SGPR_32RegClassID;
2551       case 64:
2552         return AMDGPU::SGPR_64RegClassID;
2553       case 96:
2554         return AMDGPU::SGPR_96RegClassID;
2555       case 128:
2556         return AMDGPU::SGPR_128RegClassID;
2557       case 160:
2558         return AMDGPU::SGPR_160RegClassID;
2559       case 192:
2560         return AMDGPU::SGPR_192RegClassID;
2561       case 224:
2562         return AMDGPU::SGPR_224RegClassID;
2563       case 256:
2564         return AMDGPU::SGPR_256RegClassID;
2565       case 288:
2566         return AMDGPU::SGPR_288RegClassID;
2567       case 320:
2568         return AMDGPU::SGPR_320RegClassID;
2569       case 352:
2570         return AMDGPU::SGPR_352RegClassID;
2571       case 384:
2572         return AMDGPU::SGPR_384RegClassID;
2573       case 512:
2574         return AMDGPU::SGPR_512RegClassID;
2575     }
2576   } else if (Is == IS_AGPR) {
2577     switch (RegWidth) {
2578       default: return -1;
2579       case 32:
2580         return AMDGPU::AGPR_32RegClassID;
2581       case 64:
2582         return AMDGPU::AReg_64RegClassID;
2583       case 96:
2584         return AMDGPU::AReg_96RegClassID;
2585       case 128:
2586         return AMDGPU::AReg_128RegClassID;
2587       case 160:
2588         return AMDGPU::AReg_160RegClassID;
2589       case 192:
2590         return AMDGPU::AReg_192RegClassID;
2591       case 224:
2592         return AMDGPU::AReg_224RegClassID;
2593       case 256:
2594         return AMDGPU::AReg_256RegClassID;
2595       case 288:
2596         return AMDGPU::AReg_288RegClassID;
2597       case 320:
2598         return AMDGPU::AReg_320RegClassID;
2599       case 352:
2600         return AMDGPU::AReg_352RegClassID;
2601       case 384:
2602         return AMDGPU::AReg_384RegClassID;
2603       case 512:
2604         return AMDGPU::AReg_512RegClassID;
2605       case 1024:
2606         return AMDGPU::AReg_1024RegClassID;
2607     }
2608   }
2609   return -1;
2610 }
2611 
2612 static unsigned getSpecialRegForName(StringRef RegName) {
2613   return StringSwitch<unsigned>(RegName)
2614     .Case("exec", AMDGPU::EXEC)
2615     .Case("vcc", AMDGPU::VCC)
2616     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2617     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2618     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2619     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2620     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2621     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2622     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2623     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2624     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2625     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2626     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2627     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2628     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2629     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2630     .Case("m0", AMDGPU::M0)
2631     .Case("vccz", AMDGPU::SRC_VCCZ)
2632     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2633     .Case("execz", AMDGPU::SRC_EXECZ)
2634     .Case("src_execz", AMDGPU::SRC_EXECZ)
2635     .Case("scc", AMDGPU::SRC_SCC)
2636     .Case("src_scc", AMDGPU::SRC_SCC)
2637     .Case("tba", AMDGPU::TBA)
2638     .Case("tma", AMDGPU::TMA)
2639     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2640     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2641     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2642     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2643     .Case("vcc_lo", AMDGPU::VCC_LO)
2644     .Case("vcc_hi", AMDGPU::VCC_HI)
2645     .Case("exec_lo", AMDGPU::EXEC_LO)
2646     .Case("exec_hi", AMDGPU::EXEC_HI)
2647     .Case("tma_lo", AMDGPU::TMA_LO)
2648     .Case("tma_hi", AMDGPU::TMA_HI)
2649     .Case("tba_lo", AMDGPU::TBA_LO)
2650     .Case("tba_hi", AMDGPU::TBA_HI)
2651     .Case("pc", AMDGPU::PC_REG)
2652     .Case("null", AMDGPU::SGPR_NULL)
2653     .Default(AMDGPU::NoRegister);
2654 }
2655 
2656 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2657                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2658   auto R = parseRegister();
2659   if (!R) return true;
2660   assert(R->isReg());
2661   RegNo = R->getReg();
2662   StartLoc = R->getStartLoc();
2663   EndLoc = R->getEndLoc();
2664   return false;
2665 }
2666 
2667 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2668                                     SMLoc &EndLoc) {
2669   return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2670 }
2671 
2672 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2673                                               SMLoc &EndLoc) {
2674   bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2675   bool PendingErrors = getParser().hasPendingError();
2676   getParser().clearPendingErrors();
2677   if (PendingErrors)
2678     return ParseStatus::Failure;
2679   if (Result)
2680     return ParseStatus::NoMatch;
2681   return ParseStatus::Success;
2682 }
2683 
2684 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2685                                             RegisterKind RegKind, unsigned Reg1,
2686                                             SMLoc Loc) {
2687   switch (RegKind) {
2688   case IS_SPECIAL:
2689     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2690       Reg = AMDGPU::EXEC;
2691       RegWidth = 64;
2692       return true;
2693     }
2694     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2695       Reg = AMDGPU::FLAT_SCR;
2696       RegWidth = 64;
2697       return true;
2698     }
2699     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2700       Reg = AMDGPU::XNACK_MASK;
2701       RegWidth = 64;
2702       return true;
2703     }
2704     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2705       Reg = AMDGPU::VCC;
2706       RegWidth = 64;
2707       return true;
2708     }
2709     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2710       Reg = AMDGPU::TBA;
2711       RegWidth = 64;
2712       return true;
2713     }
2714     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2715       Reg = AMDGPU::TMA;
2716       RegWidth = 64;
2717       return true;
2718     }
2719     Error(Loc, "register does not fit in the list");
2720     return false;
2721   case IS_VGPR:
2722   case IS_SGPR:
2723   case IS_AGPR:
2724   case IS_TTMP:
2725     if (Reg1 != Reg + RegWidth / 32) {
2726       Error(Loc, "registers in a list must have consecutive indices");
2727       return false;
2728     }
2729     RegWidth += 32;
2730     return true;
2731   default:
2732     llvm_unreachable("unexpected register kind");
2733   }
2734 }
2735 
2736 struct RegInfo {
2737   StringLiteral Name;
2738   RegisterKind Kind;
2739 };
2740 
2741 static constexpr RegInfo RegularRegisters[] = {
2742   {{"v"},    IS_VGPR},
2743   {{"s"},    IS_SGPR},
2744   {{"ttmp"}, IS_TTMP},
2745   {{"acc"},  IS_AGPR},
2746   {{"a"},    IS_AGPR},
2747 };
2748 
2749 static bool isRegularReg(RegisterKind Kind) {
2750   return Kind == IS_VGPR ||
2751          Kind == IS_SGPR ||
2752          Kind == IS_TTMP ||
2753          Kind == IS_AGPR;
2754 }
2755 
2756 static const RegInfo* getRegularRegInfo(StringRef Str) {
2757   for (const RegInfo &Reg : RegularRegisters)
2758     if (Str.starts_with(Reg.Name))
2759       return &Reg;
2760   return nullptr;
2761 }
2762 
2763 static bool getRegNum(StringRef Str, unsigned& Num) {
2764   return !Str.getAsInteger(10, Num);
2765 }
2766 
2767 bool
2768 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2769                             const AsmToken &NextToken) const {
2770 
2771   // A list of consecutive registers: [s0,s1,s2,s3]
2772   if (Token.is(AsmToken::LBrac))
2773     return true;
2774 
2775   if (!Token.is(AsmToken::Identifier))
2776     return false;
2777 
2778   // A single register like s0 or a range of registers like s[0:1]
2779 
2780   StringRef Str = Token.getString();
2781   const RegInfo *Reg = getRegularRegInfo(Str);
2782   if (Reg) {
2783     StringRef RegName = Reg->Name;
2784     StringRef RegSuffix = Str.substr(RegName.size());
2785     if (!RegSuffix.empty()) {
2786       RegSuffix.consume_back(".l");
2787       RegSuffix.consume_back(".h");
2788       unsigned Num;
2789       // A single register with an index: rXX
2790       if (getRegNum(RegSuffix, Num))
2791         return true;
2792     } else {
2793       // A range of registers: r[XX:YY].
2794       if (NextToken.is(AsmToken::LBrac))
2795         return true;
2796     }
2797   }
2798 
2799   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2800 }
2801 
2802 bool
2803 AMDGPUAsmParser::isRegister()
2804 {
2805   return isRegister(getToken(), peekToken());
2806 }
2807 
2808 unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2809                                         unsigned SubReg, unsigned RegWidth,
2810                                         SMLoc Loc) {
2811   assert(isRegularReg(RegKind));
2812 
2813   unsigned AlignSize = 1;
2814   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2815     // SGPR and TTMP registers must be aligned.
2816     // Max required alignment is 4 dwords.
2817     AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2818   }
2819 
2820   if (RegNum % AlignSize != 0) {
2821     Error(Loc, "invalid register alignment");
2822     return AMDGPU::NoRegister;
2823   }
2824 
2825   unsigned RegIdx = RegNum / AlignSize;
2826   int RCID = getRegClass(RegKind, RegWidth);
2827   if (RCID == -1) {
2828     Error(Loc, "invalid or unsupported register size");
2829     return AMDGPU::NoRegister;
2830   }
2831 
2832   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2833   const MCRegisterClass RC = TRI->getRegClass(RCID);
2834   if (RegIdx >= RC.getNumRegs()) {
2835     Error(Loc, "register index is out of range");
2836     return AMDGPU::NoRegister;
2837   }
2838 
2839   unsigned Reg = RC.getRegister(RegIdx);
2840 
2841   if (SubReg) {
2842     Reg = TRI->getSubReg(Reg, SubReg);
2843 
2844     // Currently all regular registers have their .l and .h subregisters, so
2845     // we should never need to generate an error here.
2846     assert(Reg && "Invalid subregister!");
2847   }
2848 
2849   return Reg;
2850 }
2851 
2852 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2853   int64_t RegLo, RegHi;
2854   if (!skipToken(AsmToken::LBrac, "missing register index"))
2855     return false;
2856 
2857   SMLoc FirstIdxLoc = getLoc();
2858   SMLoc SecondIdxLoc;
2859 
2860   if (!parseExpr(RegLo))
2861     return false;
2862 
2863   if (trySkipToken(AsmToken::Colon)) {
2864     SecondIdxLoc = getLoc();
2865     if (!parseExpr(RegHi))
2866       return false;
2867   } else {
2868     RegHi = RegLo;
2869   }
2870 
2871   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2872     return false;
2873 
2874   if (!isUInt<32>(RegLo)) {
2875     Error(FirstIdxLoc, "invalid register index");
2876     return false;
2877   }
2878 
2879   if (!isUInt<32>(RegHi)) {
2880     Error(SecondIdxLoc, "invalid register index");
2881     return false;
2882   }
2883 
2884   if (RegLo > RegHi) {
2885     Error(FirstIdxLoc, "first register index should not exceed second index");
2886     return false;
2887   }
2888 
2889   Num = static_cast<unsigned>(RegLo);
2890   RegWidth = 32 * ((RegHi - RegLo) + 1);
2891   return true;
2892 }
2893 
2894 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2895                                           unsigned &RegNum, unsigned &RegWidth,
2896                                           SmallVectorImpl<AsmToken> &Tokens) {
2897   assert(isToken(AsmToken::Identifier));
2898   unsigned Reg = getSpecialRegForName(getTokenStr());
2899   if (Reg) {
2900     RegNum = 0;
2901     RegWidth = 32;
2902     RegKind = IS_SPECIAL;
2903     Tokens.push_back(getToken());
2904     lex(); // skip register name
2905   }
2906   return Reg;
2907 }
2908 
2909 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2910                                           unsigned &RegNum, unsigned &RegWidth,
2911                                           SmallVectorImpl<AsmToken> &Tokens) {
2912   assert(isToken(AsmToken::Identifier));
2913   StringRef RegName = getTokenStr();
2914   auto Loc = getLoc();
2915 
2916   const RegInfo *RI = getRegularRegInfo(RegName);
2917   if (!RI) {
2918     Error(Loc, "invalid register name");
2919     return AMDGPU::NoRegister;
2920   }
2921 
2922   Tokens.push_back(getToken());
2923   lex(); // skip register name
2924 
2925   RegKind = RI->Kind;
2926   StringRef RegSuffix = RegName.substr(RI->Name.size());
2927   unsigned SubReg = NoSubRegister;
2928   if (!RegSuffix.empty()) {
2929     // We don't know the opcode till we are done parsing, so we don't know if
2930     // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2931     // .h to correctly specify 16 bit registers. We also can't determine class
2932     // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2933     if (RegSuffix.consume_back(".l"))
2934       SubReg = AMDGPU::lo16;
2935     else if (RegSuffix.consume_back(".h"))
2936       SubReg = AMDGPU::hi16;
2937 
2938     // Single 32-bit register: vXX.
2939     if (!getRegNum(RegSuffix, RegNum)) {
2940       Error(Loc, "invalid register index");
2941       return AMDGPU::NoRegister;
2942     }
2943     RegWidth = 32;
2944   } else {
2945     // Range of registers: v[XX:YY]. ":YY" is optional.
2946     if (!ParseRegRange(RegNum, RegWidth))
2947       return AMDGPU::NoRegister;
2948   }
2949 
2950   return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2951 }
2952 
2953 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2954                                        unsigned &RegWidth,
2955                                        SmallVectorImpl<AsmToken> &Tokens) {
2956   unsigned Reg = AMDGPU::NoRegister;
2957   auto ListLoc = getLoc();
2958 
2959   if (!skipToken(AsmToken::LBrac,
2960                  "expected a register or a list of registers")) {
2961     return AMDGPU::NoRegister;
2962   }
2963 
2964   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2965 
2966   auto Loc = getLoc();
2967   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2968     return AMDGPU::NoRegister;
2969   if (RegWidth != 32) {
2970     Error(Loc, "expected a single 32-bit register");
2971     return AMDGPU::NoRegister;
2972   }
2973 
2974   for (; trySkipToken(AsmToken::Comma); ) {
2975     RegisterKind NextRegKind;
2976     unsigned NextReg, NextRegNum, NextRegWidth;
2977     Loc = getLoc();
2978 
2979     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2980                              NextRegNum, NextRegWidth,
2981                              Tokens)) {
2982       return AMDGPU::NoRegister;
2983     }
2984     if (NextRegWidth != 32) {
2985       Error(Loc, "expected a single 32-bit register");
2986       return AMDGPU::NoRegister;
2987     }
2988     if (NextRegKind != RegKind) {
2989       Error(Loc, "registers in a list must be of the same kind");
2990       return AMDGPU::NoRegister;
2991     }
2992     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2993       return AMDGPU::NoRegister;
2994   }
2995 
2996   if (!skipToken(AsmToken::RBrac,
2997                  "expected a comma or a closing square bracket")) {
2998     return AMDGPU::NoRegister;
2999   }
3000 
3001   if (isRegularReg(RegKind))
3002     Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3003 
3004   return Reg;
3005 }
3006 
3007 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3008                                           unsigned &RegNum, unsigned &RegWidth,
3009                                           SmallVectorImpl<AsmToken> &Tokens) {
3010   auto Loc = getLoc();
3011   Reg = AMDGPU::NoRegister;
3012 
3013   if (isToken(AsmToken::Identifier)) {
3014     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3015     if (Reg == AMDGPU::NoRegister)
3016       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3017   } else {
3018     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3019   }
3020 
3021   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3022   if (Reg == AMDGPU::NoRegister) {
3023     assert(Parser.hasPendingError());
3024     return false;
3025   }
3026 
3027   if (!subtargetHasRegister(*TRI, Reg)) {
3028     if (Reg == AMDGPU::SGPR_NULL) {
3029       Error(Loc, "'null' operand is not supported on this GPU");
3030     } else {
3031       Error(Loc, "register not available on this GPU");
3032     }
3033     return false;
3034   }
3035 
3036   return true;
3037 }
3038 
3039 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3040                                           unsigned &RegNum, unsigned &RegWidth,
3041                                           bool RestoreOnFailure /*=false*/) {
3042   Reg = AMDGPU::NoRegister;
3043 
3044   SmallVector<AsmToken, 1> Tokens;
3045   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3046     if (RestoreOnFailure) {
3047       while (!Tokens.empty()) {
3048         getLexer().UnLex(Tokens.pop_back_val());
3049       }
3050     }
3051     return true;
3052   }
3053   return false;
3054 }
3055 
3056 std::optional<StringRef>
3057 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3058   switch (RegKind) {
3059   case IS_VGPR:
3060     return StringRef(".amdgcn.next_free_vgpr");
3061   case IS_SGPR:
3062     return StringRef(".amdgcn.next_free_sgpr");
3063   default:
3064     return std::nullopt;
3065   }
3066 }
3067 
3068 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3069   auto SymbolName = getGprCountSymbolName(RegKind);
3070   assert(SymbolName && "initializing invalid register kind");
3071   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3072   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3073 }
3074 
3075 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3076                                             unsigned DwordRegIndex,
3077                                             unsigned RegWidth) {
3078   // Symbols are only defined for GCN targets
3079   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3080     return true;
3081 
3082   auto SymbolName = getGprCountSymbolName(RegKind);
3083   if (!SymbolName)
3084     return true;
3085   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3086 
3087   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3088   int64_t OldCount;
3089 
3090   if (!Sym->isVariable())
3091     return !Error(getLoc(),
3092                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3093   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3094     return !Error(
3095         getLoc(),
3096         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3097 
3098   if (OldCount <= NewMax)
3099     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3100 
3101   return true;
3102 }
3103 
3104 std::unique_ptr<AMDGPUOperand>
3105 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3106   const auto &Tok = getToken();
3107   SMLoc StartLoc = Tok.getLoc();
3108   SMLoc EndLoc = Tok.getEndLoc();
3109   RegisterKind RegKind;
3110   unsigned Reg, RegNum, RegWidth;
3111 
3112   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3113     return nullptr;
3114   }
3115   if (isHsaAbi(getSTI())) {
3116     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3117       return nullptr;
3118   } else
3119     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3120   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3121 }
3122 
3123 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3124                                       bool HasSP3AbsModifier, bool HasLit) {
3125   // TODO: add syntactic sugar for 1/(2*PI)
3126 
3127   if (isRegister())
3128     return ParseStatus::NoMatch;
3129   assert(!isModifier());
3130 
3131   if (!HasLit) {
3132     HasLit = trySkipId("lit");
3133     if (HasLit) {
3134       if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3135         return ParseStatus::Failure;
3136       ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3137       if (S.isSuccess() &&
3138           !skipToken(AsmToken::RParen, "expected closing parentheses"))
3139         return ParseStatus::Failure;
3140       return S;
3141     }
3142   }
3143 
3144   const auto& Tok = getToken();
3145   const auto& NextTok = peekToken();
3146   bool IsReal = Tok.is(AsmToken::Real);
3147   SMLoc S = getLoc();
3148   bool Negate = false;
3149 
3150   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3151     lex();
3152     IsReal = true;
3153     Negate = true;
3154   }
3155 
3156   AMDGPUOperand::Modifiers Mods;
3157   Mods.Lit = HasLit;
3158 
3159   if (IsReal) {
3160     // Floating-point expressions are not supported.
3161     // Can only allow floating-point literals with an
3162     // optional sign.
3163 
3164     StringRef Num = getTokenStr();
3165     lex();
3166 
3167     APFloat RealVal(APFloat::IEEEdouble());
3168     auto roundMode = APFloat::rmNearestTiesToEven;
3169     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3170       return ParseStatus::Failure;
3171     if (Negate)
3172       RealVal.changeSign();
3173 
3174     Operands.push_back(
3175       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3176                                AMDGPUOperand::ImmTyNone, true));
3177     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3178     Op.setModifiers(Mods);
3179 
3180     return ParseStatus::Success;
3181 
3182   } else {
3183     int64_t IntVal;
3184     const MCExpr *Expr;
3185     SMLoc S = getLoc();
3186 
3187     if (HasSP3AbsModifier) {
3188       // This is a workaround for handling expressions
3189       // as arguments of SP3 'abs' modifier, for example:
3190       //     |1.0|
3191       //     |-1|
3192       //     |1+x|
3193       // This syntax is not compatible with syntax of standard
3194       // MC expressions (due to the trailing '|').
3195       SMLoc EndLoc;
3196       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3197         return ParseStatus::Failure;
3198     } else {
3199       if (Parser.parseExpression(Expr))
3200         return ParseStatus::Failure;
3201     }
3202 
3203     if (Expr->evaluateAsAbsolute(IntVal)) {
3204       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3205       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3206       Op.setModifiers(Mods);
3207     } else {
3208       if (HasLit)
3209         return ParseStatus::NoMatch;
3210       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3211     }
3212 
3213     return ParseStatus::Success;
3214   }
3215 
3216   return ParseStatus::NoMatch;
3217 }
3218 
3219 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3220   if (!isRegister())
3221     return ParseStatus::NoMatch;
3222 
3223   if (auto R = parseRegister()) {
3224     assert(R->isReg());
3225     Operands.push_back(std::move(R));
3226     return ParseStatus::Success;
3227   }
3228   return ParseStatus::Failure;
3229 }
3230 
3231 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3232                                            bool HasSP3AbsMod, bool HasLit) {
3233   ParseStatus Res = parseReg(Operands);
3234   if (!Res.isNoMatch())
3235     return Res;
3236   if (isModifier())
3237     return ParseStatus::NoMatch;
3238   return parseImm(Operands, HasSP3AbsMod, HasLit);
3239 }
3240 
3241 bool
3242 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3243   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3244     const auto &str = Token.getString();
3245     return str == "abs" || str == "neg" || str == "sext";
3246   }
3247   return false;
3248 }
3249 
3250 bool
3251 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3252   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3253 }
3254 
3255 bool
3256 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3257   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3258 }
3259 
3260 bool
3261 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3262   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3263 }
3264 
3265 // Check if this is an operand modifier or an opcode modifier
3266 // which may look like an expression but it is not. We should
3267 // avoid parsing these modifiers as expressions. Currently
3268 // recognized sequences are:
3269 //   |...|
3270 //   abs(...)
3271 //   neg(...)
3272 //   sext(...)
3273 //   -reg
3274 //   -|...|
3275 //   -abs(...)
3276 //   name:...
3277 //
3278 bool
3279 AMDGPUAsmParser::isModifier() {
3280 
3281   AsmToken Tok = getToken();
3282   AsmToken NextToken[2];
3283   peekTokens(NextToken);
3284 
3285   return isOperandModifier(Tok, NextToken[0]) ||
3286          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3287          isOpcodeModifierWithVal(Tok, NextToken[0]);
3288 }
3289 
3290 // Check if the current token is an SP3 'neg' modifier.
3291 // Currently this modifier is allowed in the following context:
3292 //
3293 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3294 // 2. Before an 'abs' modifier: -abs(...)
3295 // 3. Before an SP3 'abs' modifier: -|...|
3296 //
3297 // In all other cases "-" is handled as a part
3298 // of an expression that follows the sign.
3299 //
3300 // Note: When "-" is followed by an integer literal,
3301 // this is interpreted as integer negation rather
3302 // than a floating-point NEG modifier applied to N.
3303 // Beside being contr-intuitive, such use of floating-point
3304 // NEG modifier would have resulted in different meaning
3305 // of integer literals used with VOP1/2/C and VOP3,
3306 // for example:
3307 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3308 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3309 // Negative fp literals with preceding "-" are
3310 // handled likewise for uniformity
3311 //
3312 bool
3313 AMDGPUAsmParser::parseSP3NegModifier() {
3314 
3315   AsmToken NextToken[2];
3316   peekTokens(NextToken);
3317 
3318   if (isToken(AsmToken::Minus) &&
3319       (isRegister(NextToken[0], NextToken[1]) ||
3320        NextToken[0].is(AsmToken::Pipe) ||
3321        isId(NextToken[0], "abs"))) {
3322     lex();
3323     return true;
3324   }
3325 
3326   return false;
3327 }
3328 
3329 ParseStatus
3330 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3331                                               bool AllowImm) {
3332   bool Neg, SP3Neg;
3333   bool Abs, SP3Abs;
3334   bool Lit;
3335   SMLoc Loc;
3336 
3337   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3338   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3339     return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3340 
3341   SP3Neg = parseSP3NegModifier();
3342 
3343   Loc = getLoc();
3344   Neg = trySkipId("neg");
3345   if (Neg && SP3Neg)
3346     return Error(Loc, "expected register or immediate");
3347   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3348     return ParseStatus::Failure;
3349 
3350   Abs = trySkipId("abs");
3351   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3352     return ParseStatus::Failure;
3353 
3354   Lit = trySkipId("lit");
3355   if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3356     return ParseStatus::Failure;
3357 
3358   Loc = getLoc();
3359   SP3Abs = trySkipToken(AsmToken::Pipe);
3360   if (Abs && SP3Abs)
3361     return Error(Loc, "expected register or immediate");
3362 
3363   ParseStatus Res;
3364   if (AllowImm) {
3365     Res = parseRegOrImm(Operands, SP3Abs, Lit);
3366   } else {
3367     Res = parseReg(Operands);
3368   }
3369   if (!Res.isSuccess())
3370     return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3371 
3372   if (Lit && !Operands.back()->isImm())
3373     Error(Loc, "expected immediate with lit modifier");
3374 
3375   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3376     return ParseStatus::Failure;
3377   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3378     return ParseStatus::Failure;
3379   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3380     return ParseStatus::Failure;
3381   if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3382     return ParseStatus::Failure;
3383 
3384   AMDGPUOperand::Modifiers Mods;
3385   Mods.Abs = Abs || SP3Abs;
3386   Mods.Neg = Neg || SP3Neg;
3387   Mods.Lit = Lit;
3388 
3389   if (Mods.hasFPModifiers() || Lit) {
3390     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3391     if (Op.isExpr())
3392       return Error(Op.getStartLoc(), "expected an absolute expression");
3393     Op.setModifiers(Mods);
3394   }
3395   return ParseStatus::Success;
3396 }
3397 
3398 ParseStatus
3399 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3400                                                bool AllowImm) {
3401   bool Sext = trySkipId("sext");
3402   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3403     return ParseStatus::Failure;
3404 
3405   ParseStatus Res;
3406   if (AllowImm) {
3407     Res = parseRegOrImm(Operands);
3408   } else {
3409     Res = parseReg(Operands);
3410   }
3411   if (!Res.isSuccess())
3412     return Sext ? ParseStatus::Failure : Res;
3413 
3414   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3415     return ParseStatus::Failure;
3416 
3417   AMDGPUOperand::Modifiers Mods;
3418   Mods.Sext = Sext;
3419 
3420   if (Mods.hasIntModifiers()) {
3421     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3422     if (Op.isExpr())
3423       return Error(Op.getStartLoc(), "expected an absolute expression");
3424     Op.setModifiers(Mods);
3425   }
3426 
3427   return ParseStatus::Success;
3428 }
3429 
3430 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3431   return parseRegOrImmWithFPInputMods(Operands, false);
3432 }
3433 
3434 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3435   return parseRegOrImmWithIntInputMods(Operands, false);
3436 }
3437 
3438 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3439   auto Loc = getLoc();
3440   if (trySkipId("off")) {
3441     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3442                                                 AMDGPUOperand::ImmTyOff, false));
3443     return ParseStatus::Success;
3444   }
3445 
3446   if (!isRegister())
3447     return ParseStatus::NoMatch;
3448 
3449   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3450   if (Reg) {
3451     Operands.push_back(std::move(Reg));
3452     return ParseStatus::Success;
3453   }
3454 
3455   return ParseStatus::Failure;
3456 }
3457 
3458 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3459   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3460 
3461   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3462       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3463       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3464       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3465     return Match_InvalidOperand;
3466 
3467   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3468       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3469     // v_mac_f32/16 allow only dst_sel == DWORD;
3470     auto OpNum =
3471         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3472     const auto &Op = Inst.getOperand(OpNum);
3473     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3474       return Match_InvalidOperand;
3475     }
3476   }
3477 
3478   return Match_Success;
3479 }
3480 
3481 static ArrayRef<unsigned> getAllVariants() {
3482   static const unsigned Variants[] = {
3483     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3484     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3485     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3486   };
3487 
3488   return ArrayRef(Variants);
3489 }
3490 
3491 // What asm variants we should check
3492 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3493   if (isForcedDPP() && isForcedVOP3()) {
3494     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3495     return ArrayRef(Variants);
3496   }
3497   if (getForcedEncodingSize() == 32) {
3498     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3499     return ArrayRef(Variants);
3500   }
3501 
3502   if (isForcedVOP3()) {
3503     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3504     return ArrayRef(Variants);
3505   }
3506 
3507   if (isForcedSDWA()) {
3508     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3509                                         AMDGPUAsmVariants::SDWA9};
3510     return ArrayRef(Variants);
3511   }
3512 
3513   if (isForcedDPP()) {
3514     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3515     return ArrayRef(Variants);
3516   }
3517 
3518   return getAllVariants();
3519 }
3520 
3521 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3522   if (isForcedDPP() && isForcedVOP3())
3523     return "e64_dpp";
3524 
3525   if (getForcedEncodingSize() == 32)
3526     return "e32";
3527 
3528   if (isForcedVOP3())
3529     return "e64";
3530 
3531   if (isForcedSDWA())
3532     return "sdwa";
3533 
3534   if (isForcedDPP())
3535     return "dpp";
3536 
3537   return "";
3538 }
3539 
3540 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3541   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3542   for (MCPhysReg Reg : Desc.implicit_uses()) {
3543     switch (Reg) {
3544     case AMDGPU::FLAT_SCR:
3545     case AMDGPU::VCC:
3546     case AMDGPU::VCC_LO:
3547     case AMDGPU::VCC_HI:
3548     case AMDGPU::M0:
3549       return Reg;
3550     default:
3551       break;
3552     }
3553   }
3554   return AMDGPU::NoRegister;
3555 }
3556 
3557 // NB: This code is correct only when used to check constant
3558 // bus limitations because GFX7 support no f16 inline constants.
3559 // Note that there are no cases when a GFX7 opcode violates
3560 // constant bus limitations due to the use of an f16 constant.
3561 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3562                                        unsigned OpIdx) const {
3563   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3564 
3565   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3566       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3567     return false;
3568   }
3569 
3570   const MCOperand &MO = Inst.getOperand(OpIdx);
3571 
3572   int64_t Val = MO.getImm();
3573   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3574 
3575   switch (OpSize) { // expected operand size
3576   case 8:
3577     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3578   case 4:
3579     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3580   case 2: {
3581     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3582     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3583         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3584         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3585       return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3586 
3587     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3588         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3589         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3590       return AMDGPU::isInlinableLiteralV2I16(Val);
3591 
3592     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3593         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3594         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3595       return AMDGPU::isInlinableLiteralV2F16(Val);
3596 
3597     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3598         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2BF16 ||
3599         OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3600       return AMDGPU::isInlinableLiteralV2BF16(Val);
3601 
3602     if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3603         OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3604         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3605         OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3606       return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3607 
3608     if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3609         OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
3610         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
3611         OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
3612       return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3613 
3614     llvm_unreachable("invalid operand type");
3615   }
3616   default:
3617     llvm_unreachable("invalid operand size");
3618   }
3619 }
3620 
3621 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3622   if (!isGFX10Plus())
3623     return 1;
3624 
3625   switch (Opcode) {
3626   // 64-bit shift instructions can use only one scalar value input
3627   case AMDGPU::V_LSHLREV_B64_e64:
3628   case AMDGPU::V_LSHLREV_B64_gfx10:
3629   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3630   case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3631   case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3632   case AMDGPU::V_LSHRREV_B64_e64:
3633   case AMDGPU::V_LSHRREV_B64_gfx10:
3634   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3635   case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3636   case AMDGPU::V_ASHRREV_I64_e64:
3637   case AMDGPU::V_ASHRREV_I64_gfx10:
3638   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3639   case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3640   case AMDGPU::V_LSHL_B64_e64:
3641   case AMDGPU::V_LSHR_B64_e64:
3642   case AMDGPU::V_ASHR_I64_e64:
3643     return 1;
3644   default:
3645     return 2;
3646   }
3647 }
3648 
3649 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3650 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3651 
3652 // Get regular operand indices in the same order as specified
3653 // in the instruction (but append mandatory literals to the end).
3654 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3655                                            bool AddMandatoryLiterals = false) {
3656 
3657   int16_t ImmIdx =
3658       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3659 
3660   if (isVOPD(Opcode)) {
3661     int16_t ImmDeferredIdx =
3662         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3663                              : -1;
3664 
3665     return {getNamedOperandIdx(Opcode, OpName::src0X),
3666             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3667             getNamedOperandIdx(Opcode, OpName::src0Y),
3668             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3669             ImmDeferredIdx,
3670             ImmIdx};
3671   }
3672 
3673   return {getNamedOperandIdx(Opcode, OpName::src0),
3674           getNamedOperandIdx(Opcode, OpName::src1),
3675           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3676 }
3677 
3678 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3679   const MCOperand &MO = Inst.getOperand(OpIdx);
3680   if (MO.isImm())
3681     return !isInlineConstant(Inst, OpIdx);
3682   if (MO.isReg()) {
3683     auto Reg = MO.getReg();
3684     if (!Reg)
3685       return false;
3686     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3687     auto PReg = mc2PseudoReg(Reg);
3688     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3689   }
3690   return true;
3691 }
3692 
3693 // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3694 // Writelane is special in that it can use SGPR and M0 (which would normally
3695 // count as using the constant bus twice - but in this case it is allowed since
3696 // the lane selector doesn't count as a use of the constant bus). However, it is
3697 // still required to abide by the 1 SGPR rule.
3698 static bool checkWriteLane(const MCInst &Inst) {
3699   const unsigned Opcode = Inst.getOpcode();
3700   if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3701     return false;
3702   const MCOperand &LaneSelOp = Inst.getOperand(2);
3703   if (!LaneSelOp.isReg())
3704     return false;
3705   auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3706   return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3707 }
3708 
3709 bool AMDGPUAsmParser::validateConstantBusLimitations(
3710     const MCInst &Inst, const OperandVector &Operands) {
3711   const unsigned Opcode = Inst.getOpcode();
3712   const MCInstrDesc &Desc = MII.get(Opcode);
3713   unsigned LastSGPR = AMDGPU::NoRegister;
3714   unsigned ConstantBusUseCount = 0;
3715   unsigned NumLiterals = 0;
3716   unsigned LiteralSize;
3717 
3718   if (!(Desc.TSFlags &
3719         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3720          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3721       !isVOPD(Opcode))
3722     return true;
3723 
3724   if (checkWriteLane(Inst))
3725     return true;
3726 
3727   // Check special imm operands (used by madmk, etc)
3728   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3729     ++NumLiterals;
3730     LiteralSize = 4;
3731   }
3732 
3733   SmallDenseSet<unsigned> SGPRsUsed;
3734   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3735   if (SGPRUsed != AMDGPU::NoRegister) {
3736     SGPRsUsed.insert(SGPRUsed);
3737     ++ConstantBusUseCount;
3738   }
3739 
3740   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3741 
3742   for (int OpIdx : OpIndices) {
3743     if (OpIdx == -1)
3744       continue;
3745 
3746     const MCOperand &MO = Inst.getOperand(OpIdx);
3747     if (usesConstantBus(Inst, OpIdx)) {
3748       if (MO.isReg()) {
3749         LastSGPR = mc2PseudoReg(MO.getReg());
3750         // Pairs of registers with a partial intersections like these
3751         //   s0, s[0:1]
3752         //   flat_scratch_lo, flat_scratch
3753         //   flat_scratch_lo, flat_scratch_hi
3754         // are theoretically valid but they are disabled anyway.
3755         // Note that this code mimics SIInstrInfo::verifyInstruction
3756         if (SGPRsUsed.insert(LastSGPR).second) {
3757           ++ConstantBusUseCount;
3758         }
3759       } else { // Expression or a literal
3760 
3761         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3762           continue; // special operand like VINTERP attr_chan
3763 
3764         // An instruction may use only one literal.
3765         // This has been validated on the previous step.
3766         // See validateVOPLiteral.
3767         // This literal may be used as more than one operand.
3768         // If all these operands are of the same size,
3769         // this literal counts as one scalar value.
3770         // Otherwise it counts as 2 scalar values.
3771         // See "GFX10 Shader Programming", section 3.6.2.3.
3772 
3773         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3774         if (Size < 4)
3775           Size = 4;
3776 
3777         if (NumLiterals == 0) {
3778           NumLiterals = 1;
3779           LiteralSize = Size;
3780         } else if (LiteralSize != Size) {
3781           NumLiterals = 2;
3782         }
3783       }
3784     }
3785   }
3786   ConstantBusUseCount += NumLiterals;
3787 
3788   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3789     return true;
3790 
3791   SMLoc LitLoc = getLitLoc(Operands);
3792   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3793   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3794   Error(Loc, "invalid operand (violates constant bus restrictions)");
3795   return false;
3796 }
3797 
3798 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3799     const MCInst &Inst, const OperandVector &Operands) {
3800 
3801   const unsigned Opcode = Inst.getOpcode();
3802   if (!isVOPD(Opcode))
3803     return true;
3804 
3805   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3806 
3807   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3808     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3809     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3810                ? Opr.getReg()
3811                : MCRegister::NoRegister;
3812   };
3813 
3814   // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3815   bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3816 
3817   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3818   auto InvalidCompOprIdx =
3819       InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3820   if (!InvalidCompOprIdx)
3821     return true;
3822 
3823   auto CompOprIdx = *InvalidCompOprIdx;
3824   auto ParsedIdx =
3825       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3826                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3827   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3828 
3829   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3830   if (CompOprIdx == VOPD::Component::DST) {
3831     Error(Loc, "one dst register must be even and the other odd");
3832   } else {
3833     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3834     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3835                    " operands must use different VGPR banks");
3836   }
3837 
3838   return false;
3839 }
3840 
3841 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3842 
3843   const unsigned Opc = Inst.getOpcode();
3844   const MCInstrDesc &Desc = MII.get(Opc);
3845 
3846   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3847     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3848     assert(ClampIdx != -1);
3849     return Inst.getOperand(ClampIdx).getImm() == 0;
3850   }
3851 
3852   return true;
3853 }
3854 
3855 constexpr uint64_t MIMGFlags =
3856     SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3857 
3858 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3859                                            const SMLoc &IDLoc) {
3860 
3861   const unsigned Opc = Inst.getOpcode();
3862   const MCInstrDesc &Desc = MII.get(Opc);
3863 
3864   if ((Desc.TSFlags & MIMGFlags) == 0)
3865     return true;
3866 
3867   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3868   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3869   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3870 
3871   if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3872     return true;
3873 
3874   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3875     return true;
3876 
3877   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3878   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3879   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3880   if (DMask == 0)
3881     DMask = 1;
3882 
3883   bool IsPackedD16 = false;
3884   unsigned DataSize =
3885       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3886   if (hasPackedD16()) {
3887     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3888     IsPackedD16 = D16Idx >= 0;
3889     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3890       DataSize = (DataSize + 1) / 2;
3891   }
3892 
3893   if ((VDataSize / 4) == DataSize + TFESize)
3894     return true;
3895 
3896   StringRef Modifiers;
3897   if (isGFX90A())
3898     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3899   else
3900     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3901 
3902   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3903   return false;
3904 }
3905 
3906 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3907                                            const SMLoc &IDLoc) {
3908   const unsigned Opc = Inst.getOpcode();
3909   const MCInstrDesc &Desc = MII.get(Opc);
3910 
3911   if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3912     return true;
3913 
3914   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3915 
3916   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3917       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3918   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3919   int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3920                                                        : AMDGPU::OpName::rsrc;
3921   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3922   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3923   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3924 
3925   assert(VAddr0Idx != -1);
3926   assert(SrsrcIdx != -1);
3927   assert(SrsrcIdx > VAddr0Idx);
3928 
3929   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3930   if (BaseOpcode->BVH) {
3931     if (IsA16 == BaseOpcode->A16)
3932       return true;
3933     Error(IDLoc, "image address size does not match a16");
3934     return false;
3935   }
3936 
3937   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3938   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3939   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3940   unsigned ActualAddrSize =
3941       IsNSA ? SrsrcIdx - VAddr0Idx
3942             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3943 
3944   unsigned ExpectedAddrSize =
3945       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3946 
3947   if (IsNSA) {
3948     if (hasPartialNSAEncoding() &&
3949         ExpectedAddrSize >
3950             getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3951       int VAddrLastIdx = SrsrcIdx - 1;
3952       unsigned VAddrLastSize =
3953           AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3954 
3955       ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3956     }
3957   } else {
3958     if (ExpectedAddrSize > 12)
3959       ExpectedAddrSize = 16;
3960 
3961     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3962     // This provides backward compatibility for assembly created
3963     // before 160b/192b/224b types were directly supported.
3964     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3965       return true;
3966   }
3967 
3968   if (ActualAddrSize == ExpectedAddrSize)
3969     return true;
3970 
3971   Error(IDLoc, "image address size does not match dim and a16");
3972   return false;
3973 }
3974 
3975 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3976 
3977   const unsigned Opc = Inst.getOpcode();
3978   const MCInstrDesc &Desc = MII.get(Opc);
3979 
3980   if ((Desc.TSFlags & MIMGFlags) == 0)
3981     return true;
3982   if (!Desc.mayLoad() || !Desc.mayStore())
3983     return true; // Not atomic
3984 
3985   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3986   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3987 
3988   // This is an incomplete check because image_atomic_cmpswap
3989   // may only use 0x3 and 0xf while other atomic operations
3990   // may use 0x1 and 0x3. However these limitations are
3991   // verified when we check that dmask matches dst size.
3992   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3993 }
3994 
3995 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3996 
3997   const unsigned Opc = Inst.getOpcode();
3998   const MCInstrDesc &Desc = MII.get(Opc);
3999 
4000   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4001     return true;
4002 
4003   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4004   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4005 
4006   // GATHER4 instructions use dmask in a different fashion compared to
4007   // other MIMG instructions. The only useful DMASK values are
4008   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4009   // (red,red,red,red) etc.) The ISA document doesn't mention
4010   // this.
4011   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4012 }
4013 
4014 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4015   const unsigned Opc = Inst.getOpcode();
4016   const MCInstrDesc &Desc = MII.get(Opc);
4017 
4018   if ((Desc.TSFlags & MIMGFlags) == 0)
4019     return true;
4020 
4021   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4022   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4023       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
4024 
4025   if (!BaseOpcode->MSAA)
4026     return true;
4027 
4028   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4029   assert(DimIdx != -1);
4030 
4031   unsigned Dim = Inst.getOperand(DimIdx).getImm();
4032   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4033 
4034   return DimInfo->MSAA;
4035 }
4036 
4037 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4038 {
4039   switch (Opcode) {
4040   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4041   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4042   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4043     return true;
4044   default:
4045     return false;
4046   }
4047 }
4048 
4049 // movrels* opcodes should only allow VGPRS as src0.
4050 // This is specified in .td description for vop1/vop3,
4051 // but sdwa is handled differently. See isSDWAOperand.
4052 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4053                                       const OperandVector &Operands) {
4054 
4055   const unsigned Opc = Inst.getOpcode();
4056   const MCInstrDesc &Desc = MII.get(Opc);
4057 
4058   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4059     return true;
4060 
4061   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4062   assert(Src0Idx != -1);
4063 
4064   SMLoc ErrLoc;
4065   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4066   if (Src0.isReg()) {
4067     auto Reg = mc2PseudoReg(Src0.getReg());
4068     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4069     if (!isSGPR(Reg, TRI))
4070       return true;
4071     ErrLoc = getRegLoc(Reg, Operands);
4072   } else {
4073     ErrLoc = getConstLoc(Operands);
4074   }
4075 
4076   Error(ErrLoc, "source operand must be a VGPR");
4077   return false;
4078 }
4079 
4080 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4081                                           const OperandVector &Operands) {
4082 
4083   const unsigned Opc = Inst.getOpcode();
4084 
4085   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4086     return true;
4087 
4088   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4089   assert(Src0Idx != -1);
4090 
4091   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4092   if (!Src0.isReg())
4093     return true;
4094 
4095   auto Reg = mc2PseudoReg(Src0.getReg());
4096   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4097   if (!isGFX90A() && isSGPR(Reg, TRI)) {
4098     Error(getRegLoc(Reg, Operands),
4099           "source operand must be either a VGPR or an inline constant");
4100     return false;
4101   }
4102 
4103   return true;
4104 }
4105 
4106 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4107                                       const OperandVector &Operands) {
4108   unsigned Opcode = Inst.getOpcode();
4109   const MCInstrDesc &Desc = MII.get(Opcode);
4110 
4111   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4112       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4113     return true;
4114 
4115   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4116   if (Src2Idx == -1)
4117     return true;
4118 
4119   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4120     Error(getConstLoc(Operands),
4121           "inline constants are not allowed for this operand");
4122     return false;
4123   }
4124 
4125   return true;
4126 }
4127 
4128 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4129                                    const OperandVector &Operands) {
4130   const unsigned Opc = Inst.getOpcode();
4131   const MCInstrDesc &Desc = MII.get(Opc);
4132 
4133   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4134     return true;
4135 
4136   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4137   if (Src2Idx == -1)
4138     return true;
4139 
4140   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4141   if (!Src2.isReg())
4142     return true;
4143 
4144   MCRegister Src2Reg = Src2.getReg();
4145   MCRegister DstReg = Inst.getOperand(0).getReg();
4146   if (Src2Reg == DstReg)
4147     return true;
4148 
4149   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4150   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4151     return true;
4152 
4153   if (TRI->regsOverlap(Src2Reg, DstReg)) {
4154     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4155           "source 2 operand must not partially overlap with dst");
4156     return false;
4157   }
4158 
4159   return true;
4160 }
4161 
4162 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4163   switch (Inst.getOpcode()) {
4164   default:
4165     return true;
4166   case V_DIV_SCALE_F32_gfx6_gfx7:
4167   case V_DIV_SCALE_F32_vi:
4168   case V_DIV_SCALE_F32_gfx10:
4169   case V_DIV_SCALE_F64_gfx6_gfx7:
4170   case V_DIV_SCALE_F64_vi:
4171   case V_DIV_SCALE_F64_gfx10:
4172     break;
4173   }
4174 
4175   // TODO: Check that src0 = src1 or src2.
4176 
4177   for (auto Name : {AMDGPU::OpName::src0_modifiers,
4178                     AMDGPU::OpName::src2_modifiers,
4179                     AMDGPU::OpName::src2_modifiers}) {
4180     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4181             .getImm() &
4182         SISrcMods::ABS) {
4183       return false;
4184     }
4185   }
4186 
4187   return true;
4188 }
4189 
4190 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4191 
4192   const unsigned Opc = Inst.getOpcode();
4193   const MCInstrDesc &Desc = MII.get(Opc);
4194 
4195   if ((Desc.TSFlags & MIMGFlags) == 0)
4196     return true;
4197 
4198   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4199   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4200     if (isCI() || isSI())
4201       return false;
4202   }
4203 
4204   return true;
4205 }
4206 
4207 static bool IsRevOpcode(const unsigned Opcode)
4208 {
4209   switch (Opcode) {
4210   case AMDGPU::V_SUBREV_F32_e32:
4211   case AMDGPU::V_SUBREV_F32_e64:
4212   case AMDGPU::V_SUBREV_F32_e32_gfx10:
4213   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4214   case AMDGPU::V_SUBREV_F32_e32_vi:
4215   case AMDGPU::V_SUBREV_F32_e64_gfx10:
4216   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4217   case AMDGPU::V_SUBREV_F32_e64_vi:
4218 
4219   case AMDGPU::V_SUBREV_CO_U32_e32:
4220   case AMDGPU::V_SUBREV_CO_U32_e64:
4221   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4222   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4223 
4224   case AMDGPU::V_SUBBREV_U32_e32:
4225   case AMDGPU::V_SUBBREV_U32_e64:
4226   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4227   case AMDGPU::V_SUBBREV_U32_e32_vi:
4228   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4229   case AMDGPU::V_SUBBREV_U32_e64_vi:
4230 
4231   case AMDGPU::V_SUBREV_U32_e32:
4232   case AMDGPU::V_SUBREV_U32_e64:
4233   case AMDGPU::V_SUBREV_U32_e32_gfx9:
4234   case AMDGPU::V_SUBREV_U32_e32_vi:
4235   case AMDGPU::V_SUBREV_U32_e64_gfx9:
4236   case AMDGPU::V_SUBREV_U32_e64_vi:
4237 
4238   case AMDGPU::V_SUBREV_F16_e32:
4239   case AMDGPU::V_SUBREV_F16_e64:
4240   case AMDGPU::V_SUBREV_F16_e32_gfx10:
4241   case AMDGPU::V_SUBREV_F16_e32_vi:
4242   case AMDGPU::V_SUBREV_F16_e64_gfx10:
4243   case AMDGPU::V_SUBREV_F16_e64_vi:
4244 
4245   case AMDGPU::V_SUBREV_U16_e32:
4246   case AMDGPU::V_SUBREV_U16_e64:
4247   case AMDGPU::V_SUBREV_U16_e32_vi:
4248   case AMDGPU::V_SUBREV_U16_e64_vi:
4249 
4250   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4251   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4252   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4253 
4254   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4255   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4256 
4257   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4258   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4259 
4260   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4261   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4262 
4263   case AMDGPU::V_LSHRREV_B32_e32:
4264   case AMDGPU::V_LSHRREV_B32_e64:
4265   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4266   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4267   case AMDGPU::V_LSHRREV_B32_e32_vi:
4268   case AMDGPU::V_LSHRREV_B32_e64_vi:
4269   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4270   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4271 
4272   case AMDGPU::V_ASHRREV_I32_e32:
4273   case AMDGPU::V_ASHRREV_I32_e64:
4274   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4275   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4276   case AMDGPU::V_ASHRREV_I32_e32_vi:
4277   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4278   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4279   case AMDGPU::V_ASHRREV_I32_e64_vi:
4280 
4281   case AMDGPU::V_LSHLREV_B32_e32:
4282   case AMDGPU::V_LSHLREV_B32_e64:
4283   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4284   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4285   case AMDGPU::V_LSHLREV_B32_e32_vi:
4286   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4287   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4288   case AMDGPU::V_LSHLREV_B32_e64_vi:
4289 
4290   case AMDGPU::V_LSHLREV_B16_e32:
4291   case AMDGPU::V_LSHLREV_B16_e64:
4292   case AMDGPU::V_LSHLREV_B16_e32_vi:
4293   case AMDGPU::V_LSHLREV_B16_e64_vi:
4294   case AMDGPU::V_LSHLREV_B16_gfx10:
4295 
4296   case AMDGPU::V_LSHRREV_B16_e32:
4297   case AMDGPU::V_LSHRREV_B16_e64:
4298   case AMDGPU::V_LSHRREV_B16_e32_vi:
4299   case AMDGPU::V_LSHRREV_B16_e64_vi:
4300   case AMDGPU::V_LSHRREV_B16_gfx10:
4301 
4302   case AMDGPU::V_ASHRREV_I16_e32:
4303   case AMDGPU::V_ASHRREV_I16_e64:
4304   case AMDGPU::V_ASHRREV_I16_e32_vi:
4305   case AMDGPU::V_ASHRREV_I16_e64_vi:
4306   case AMDGPU::V_ASHRREV_I16_gfx10:
4307 
4308   case AMDGPU::V_LSHLREV_B64_e64:
4309   case AMDGPU::V_LSHLREV_B64_gfx10:
4310   case AMDGPU::V_LSHLREV_B64_vi:
4311 
4312   case AMDGPU::V_LSHRREV_B64_e64:
4313   case AMDGPU::V_LSHRREV_B64_gfx10:
4314   case AMDGPU::V_LSHRREV_B64_vi:
4315 
4316   case AMDGPU::V_ASHRREV_I64_e64:
4317   case AMDGPU::V_ASHRREV_I64_gfx10:
4318   case AMDGPU::V_ASHRREV_I64_vi:
4319 
4320   case AMDGPU::V_PK_LSHLREV_B16:
4321   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4322   case AMDGPU::V_PK_LSHLREV_B16_vi:
4323 
4324   case AMDGPU::V_PK_LSHRREV_B16:
4325   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4326   case AMDGPU::V_PK_LSHRREV_B16_vi:
4327   case AMDGPU::V_PK_ASHRREV_I16:
4328   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4329   case AMDGPU::V_PK_ASHRREV_I16_vi:
4330     return true;
4331   default:
4332     return false;
4333   }
4334 }
4335 
4336 std::optional<StringRef>
4337 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4338 
4339   using namespace SIInstrFlags;
4340   const unsigned Opcode = Inst.getOpcode();
4341   const MCInstrDesc &Desc = MII.get(Opcode);
4342 
4343   // lds_direct register is defined so that it can be used
4344   // with 9-bit operands only. Ignore encodings which do not accept these.
4345   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4346   if ((Desc.TSFlags & Enc) == 0)
4347     return std::nullopt;
4348 
4349   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4350     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4351     if (SrcIdx == -1)
4352       break;
4353     const auto &Src = Inst.getOperand(SrcIdx);
4354     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4355 
4356       if (isGFX90A() || isGFX11Plus())
4357         return StringRef("lds_direct is not supported on this GPU");
4358 
4359       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4360         return StringRef("lds_direct cannot be used with this instruction");
4361 
4362       if (SrcName != OpName::src0)
4363         return StringRef("lds_direct may be used as src0 only");
4364     }
4365   }
4366 
4367   return std::nullopt;
4368 }
4369 
4370 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4371   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4372     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4373     if (Op.isFlatOffset())
4374       return Op.getStartLoc();
4375   }
4376   return getLoc();
4377 }
4378 
4379 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4380                                      const OperandVector &Operands) {
4381   auto Opcode = Inst.getOpcode();
4382   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4383   if (OpNum == -1)
4384     return true;
4385 
4386   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4387   if ((TSFlags & SIInstrFlags::FLAT))
4388     return validateFlatOffset(Inst, Operands);
4389 
4390   if ((TSFlags & SIInstrFlags::SMRD))
4391     return validateSMEMOffset(Inst, Operands);
4392 
4393   const auto &Op = Inst.getOperand(OpNum);
4394   if (isGFX12Plus() &&
4395       (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4396     const unsigned OffsetSize = 24;
4397     if (!isIntN(OffsetSize, Op.getImm())) {
4398       Error(getFlatOffsetLoc(Operands),
4399             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4400       return false;
4401     }
4402   } else {
4403     const unsigned OffsetSize = 16;
4404     if (!isUIntN(OffsetSize, Op.getImm())) {
4405       Error(getFlatOffsetLoc(Operands),
4406             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4407       return false;
4408     }
4409   }
4410   return true;
4411 }
4412 
4413 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4414                                          const OperandVector &Operands) {
4415   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4416   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4417     return true;
4418 
4419   auto Opcode = Inst.getOpcode();
4420   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4421   assert(OpNum != -1);
4422 
4423   const auto &Op = Inst.getOperand(OpNum);
4424   if (!hasFlatOffsets() && Op.getImm() != 0) {
4425     Error(getFlatOffsetLoc(Operands),
4426           "flat offset modifier is not supported on this GPU");
4427     return false;
4428   }
4429 
4430   // For pre-GFX12 FLAT instructions the offset must be positive;
4431   // MSB is ignored and forced to zero.
4432   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4433   bool AllowNegative =
4434       (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4435       isGFX12Plus();
4436   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4437     Error(getFlatOffsetLoc(Operands),
4438           Twine("expected a ") +
4439               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4440                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4441     return false;
4442   }
4443 
4444   return true;
4445 }
4446 
4447 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4448   // Start with second operand because SMEM Offset cannot be dst or src0.
4449   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4450     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4451     if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4452       return Op.getStartLoc();
4453   }
4454   return getLoc();
4455 }
4456 
4457 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4458                                          const OperandVector &Operands) {
4459   if (isCI() || isSI())
4460     return true;
4461 
4462   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4463   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4464     return true;
4465 
4466   auto Opcode = Inst.getOpcode();
4467   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4468   if (OpNum == -1)
4469     return true;
4470 
4471   const auto &Op = Inst.getOperand(OpNum);
4472   if (!Op.isImm())
4473     return true;
4474 
4475   uint64_t Offset = Op.getImm();
4476   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4477   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4478       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4479     return true;
4480 
4481   Error(getSMEMOffsetLoc(Operands),
4482         isGFX12Plus()          ? "expected a 24-bit signed offset"
4483         : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4484                                : "expected a 21-bit signed offset");
4485 
4486   return false;
4487 }
4488 
4489 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4490   unsigned Opcode = Inst.getOpcode();
4491   const MCInstrDesc &Desc = MII.get(Opcode);
4492   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4493     return true;
4494 
4495   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4496   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4497 
4498   const int OpIndices[] = { Src0Idx, Src1Idx };
4499 
4500   unsigned NumExprs = 0;
4501   unsigned NumLiterals = 0;
4502   uint32_t LiteralValue;
4503 
4504   for (int OpIdx : OpIndices) {
4505     if (OpIdx == -1) break;
4506 
4507     const MCOperand &MO = Inst.getOperand(OpIdx);
4508     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4509     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4510       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4511         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4512         if (NumLiterals == 0 || LiteralValue != Value) {
4513           LiteralValue = Value;
4514           ++NumLiterals;
4515         }
4516       } else if (MO.isExpr()) {
4517         ++NumExprs;
4518       }
4519     }
4520   }
4521 
4522   return NumLiterals + NumExprs <= 1;
4523 }
4524 
4525 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4526   const unsigned Opc = Inst.getOpcode();
4527   if (isPermlane16(Opc)) {
4528     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4529     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4530 
4531     if (OpSel & ~3)
4532       return false;
4533   }
4534 
4535   uint64_t TSFlags = MII.get(Opc).TSFlags;
4536 
4537   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4538     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4539     if (OpSelIdx != -1) {
4540       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4541         return false;
4542     }
4543     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4544     if (OpSelHiIdx != -1) {
4545       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4546         return false;
4547     }
4548   }
4549 
4550   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4551   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4552       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4553     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4554     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4555     if (OpSel & 3)
4556       return false;
4557   }
4558 
4559   return true;
4560 }
4561 
4562 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4563   assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4564 
4565   const unsigned Opc = Inst.getOpcode();
4566   uint64_t TSFlags = MII.get(Opc).TSFlags;
4567 
4568   // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4569   // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4570   // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4571   // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4572   if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4573       !(TSFlags & SIInstrFlags::IsSWMMAC))
4574     return true;
4575 
4576   int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4577   if (NegIdx == -1)
4578     return true;
4579 
4580   unsigned Neg = Inst.getOperand(NegIdx).getImm();
4581 
4582   // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4583   // on some src operands but not allowed on other.
4584   // It is convenient that such instructions don't have src_modifiers operand
4585   // for src operands that don't allow neg because they also don't allow opsel.
4586 
4587   int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4588                     AMDGPU::OpName::src1_modifiers,
4589                     AMDGPU::OpName::src2_modifiers};
4590 
4591   for (unsigned i = 0; i < 3; ++i) {
4592     if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4593       if (Neg & (1 << i))
4594         return false;
4595     }
4596   }
4597 
4598   return true;
4599 }
4600 
4601 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4602                                   const OperandVector &Operands) {
4603   const unsigned Opc = Inst.getOpcode();
4604   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4605   if (DppCtrlIdx >= 0) {
4606     unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4607 
4608     if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4609         AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4610       // DP ALU DPP is supported for row_newbcast only on GFX9*
4611       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4612       Error(S, "DP ALU dpp only supports row_newbcast");
4613       return false;
4614     }
4615   }
4616 
4617   int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4618   bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4619 
4620   if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4621     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4622     if (Src1Idx >= 0) {
4623       const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4624       const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4625       if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4626         auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4627         SMLoc S = getRegLoc(Reg, Operands);
4628         Error(S, "invalid operand for instruction");
4629         return false;
4630       }
4631       if (Src1.isImm()) {
4632         Error(getInstLoc(Operands),
4633               "src1 immediate operand invalid for instruction");
4634         return false;
4635       }
4636     }
4637   }
4638 
4639   return true;
4640 }
4641 
4642 // Check if VCC register matches wavefront size
4643 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4644   auto FB = getFeatureBits();
4645   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4646     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4647 }
4648 
4649 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4650 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4651                                          const OperandVector &Operands) {
4652   unsigned Opcode = Inst.getOpcode();
4653   const MCInstrDesc &Desc = MII.get(Opcode);
4654   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4655   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4656       !HasMandatoryLiteral && !isVOPD(Opcode))
4657     return true;
4658 
4659   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4660 
4661   unsigned NumExprs = 0;
4662   unsigned NumLiterals = 0;
4663   uint32_t LiteralValue;
4664 
4665   for (int OpIdx : OpIndices) {
4666     if (OpIdx == -1)
4667       continue;
4668 
4669     const MCOperand &MO = Inst.getOperand(OpIdx);
4670     if (!MO.isImm() && !MO.isExpr())
4671       continue;
4672     if (!isSISrcOperand(Desc, OpIdx))
4673       continue;
4674 
4675     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4676       uint64_t Value = static_cast<uint64_t>(MO.getImm());
4677       bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4678                     AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4679       bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4680 
4681       if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4682         Error(getLitLoc(Operands), "invalid operand for instruction");
4683         return false;
4684       }
4685 
4686       if (IsFP64 && IsValid32Op)
4687         Value = Hi_32(Value);
4688 
4689       if (NumLiterals == 0 || LiteralValue != Value) {
4690         LiteralValue = Value;
4691         ++NumLiterals;
4692       }
4693     } else if (MO.isExpr()) {
4694       ++NumExprs;
4695     }
4696   }
4697   NumLiterals += NumExprs;
4698 
4699   if (!NumLiterals)
4700     return true;
4701 
4702   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4703     Error(getLitLoc(Operands), "literal operands are not supported");
4704     return false;
4705   }
4706 
4707   if (NumLiterals > 1) {
4708     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4709     return false;
4710   }
4711 
4712   return true;
4713 }
4714 
4715 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4716 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4717                          const MCRegisterInfo *MRI) {
4718   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4719   if (OpIdx < 0)
4720     return -1;
4721 
4722   const MCOperand &Op = Inst.getOperand(OpIdx);
4723   if (!Op.isReg())
4724     return -1;
4725 
4726   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4727   auto Reg = Sub ? Sub : Op.getReg();
4728   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4729   return AGPR32.contains(Reg) ? 1 : 0;
4730 }
4731 
4732 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4733   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4734   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4735                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4736                   SIInstrFlags::DS)) == 0)
4737     return true;
4738 
4739   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4740                                                       : AMDGPU::OpName::vdata;
4741 
4742   const MCRegisterInfo *MRI = getMRI();
4743   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4744   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4745 
4746   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4747     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4748     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4749       return false;
4750   }
4751 
4752   auto FB = getFeatureBits();
4753   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4754     if (DataAreg < 0 || DstAreg < 0)
4755       return true;
4756     return DstAreg == DataAreg;
4757   }
4758 
4759   return DstAreg < 1 && DataAreg < 1;
4760 }
4761 
4762 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4763   auto FB = getFeatureBits();
4764   if (!FB[AMDGPU::FeatureGFX90AInsts])
4765     return true;
4766 
4767   const MCRegisterInfo *MRI = getMRI();
4768   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4769   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4770   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4771     const MCOperand &Op = Inst.getOperand(I);
4772     if (!Op.isReg())
4773       continue;
4774 
4775     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4776     if (!Sub)
4777       continue;
4778 
4779     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4780       return false;
4781     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4782       return false;
4783   }
4784 
4785   return true;
4786 }
4787 
4788 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4789   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4790     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4791     if (Op.isBLGP())
4792       return Op.getStartLoc();
4793   }
4794   return SMLoc();
4795 }
4796 
4797 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4798                                    const OperandVector &Operands) {
4799   unsigned Opc = Inst.getOpcode();
4800   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4801   if (BlgpIdx == -1)
4802     return true;
4803   SMLoc BLGPLoc = getBLGPLoc(Operands);
4804   if (!BLGPLoc.isValid())
4805     return true;
4806   bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4807   auto FB = getFeatureBits();
4808   bool UsesNeg = false;
4809   if (FB[AMDGPU::FeatureGFX940Insts]) {
4810     switch (Opc) {
4811     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4812     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4813     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4814     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4815       UsesNeg = true;
4816     }
4817   }
4818 
4819   if (IsNeg == UsesNeg)
4820     return true;
4821 
4822   Error(BLGPLoc,
4823         UsesNeg ? "invalid modifier: blgp is not supported"
4824                 : "invalid modifier: neg is not supported");
4825 
4826   return false;
4827 }
4828 
4829 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4830                                       const OperandVector &Operands) {
4831   if (!isGFX11Plus())
4832     return true;
4833 
4834   unsigned Opc = Inst.getOpcode();
4835   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4836       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4837       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4838       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4839     return true;
4840 
4841   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4842   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4843   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4844   if (Reg == AMDGPU::SGPR_NULL)
4845     return true;
4846 
4847   SMLoc RegLoc = getRegLoc(Reg, Operands);
4848   Error(RegLoc, "src0 must be null");
4849   return false;
4850 }
4851 
4852 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4853                                  const OperandVector &Operands) {
4854   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4855   if ((TSFlags & SIInstrFlags::DS) == 0)
4856     return true;
4857   if (TSFlags & SIInstrFlags::GWS)
4858     return validateGWS(Inst, Operands);
4859   // Only validate GDS for non-GWS instructions.
4860   if (hasGDS())
4861     return true;
4862   int GDSIdx =
4863       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4864   if (GDSIdx < 0)
4865     return true;
4866   unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4867   if (GDS) {
4868     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4869     Error(S, "gds modifier is not supported on this GPU");
4870     return false;
4871   }
4872   return true;
4873 }
4874 
4875 // gfx90a has an undocumented limitation:
4876 // DS_GWS opcodes must use even aligned registers.
4877 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4878                                   const OperandVector &Operands) {
4879   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4880     return true;
4881 
4882   int Opc = Inst.getOpcode();
4883   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4884       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4885     return true;
4886 
4887   const MCRegisterInfo *MRI = getMRI();
4888   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4889   int Data0Pos =
4890       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4891   assert(Data0Pos != -1);
4892   auto Reg = Inst.getOperand(Data0Pos).getReg();
4893   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4894   if (RegIdx & 1) {
4895     SMLoc RegLoc = getRegLoc(Reg, Operands);
4896     Error(RegLoc, "vgpr must be even aligned");
4897     return false;
4898   }
4899 
4900   return true;
4901 }
4902 
4903 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4904                                             const OperandVector &Operands,
4905                                             const SMLoc &IDLoc) {
4906   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4907                                            AMDGPU::OpName::cpol);
4908   if (CPolPos == -1)
4909     return true;
4910 
4911   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4912 
4913   if (isGFX12Plus())
4914     return validateTHAndScopeBits(Inst, Operands, CPol);
4915 
4916   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4917   if (TSFlags & SIInstrFlags::SMRD) {
4918     if (CPol && (isSI() || isCI())) {
4919       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4920       Error(S, "cache policy is not supported for SMRD instructions");
4921       return false;
4922     }
4923     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4924       Error(IDLoc, "invalid cache policy for SMEM instruction");
4925       return false;
4926     }
4927   }
4928 
4929   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4930     const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4931                                       SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4932                                       SIInstrFlags::FLAT;
4933     if (!(TSFlags & AllowSCCModifier)) {
4934       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4935       StringRef CStr(S.getPointer());
4936       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4937       Error(S,
4938             "scc modifier is not supported for this instruction on this GPU");
4939       return false;
4940     }
4941   }
4942 
4943   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4944     return true;
4945 
4946   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4947     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4948       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4949                               : "instruction must use glc");
4950       return false;
4951     }
4952   } else {
4953     if (CPol & CPol::GLC) {
4954       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4955       StringRef CStr(S.getPointer());
4956       S = SMLoc::getFromPointer(
4957           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4958       Error(S, isGFX940() ? "instruction must not use sc0"
4959                           : "instruction must not use glc");
4960       return false;
4961     }
4962   }
4963 
4964   return true;
4965 }
4966 
4967 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4968                                              const OperandVector &Operands,
4969                                              const unsigned CPol) {
4970   const unsigned TH = CPol & AMDGPU::CPol::TH;
4971   const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4972 
4973   const unsigned Opcode = Inst.getOpcode();
4974   const MCInstrDesc &TID = MII.get(Opcode);
4975 
4976   auto PrintError = [&](StringRef Msg) {
4977     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4978     Error(S, Msg);
4979     return false;
4980   };
4981 
4982   if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4983       (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
4984       (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
4985     return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4986 
4987   if (TH == 0)
4988     return true;
4989 
4990   if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4991       ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4992        (TH == AMDGPU::CPol::TH_NT_HT)))
4993     return PrintError("invalid th value for SMEM instruction");
4994 
4995   if (TH == AMDGPU::CPol::TH_BYPASS) {
4996     if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4997          CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
4998         (Scope == AMDGPU::CPol::SCOPE_SYS &&
4999          !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5000       return PrintError("scope and th combination is not valid");
5001   }
5002 
5003   bool IsStore = TID.mayStore();
5004   bool IsAtomic =
5005       TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
5006 
5007   if (IsAtomic) {
5008     if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5009       return PrintError("invalid th value for atomic instructions");
5010   } else if (IsStore) {
5011     if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5012       return PrintError("invalid th value for store instructions");
5013   } else {
5014     if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5015       return PrintError("invalid th value for load instructions");
5016   }
5017 
5018   return true;
5019 }
5020 
5021 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5022   if (!isGFX11Plus())
5023     return true;
5024   for (auto &Operand : Operands) {
5025     if (!Operand->isReg())
5026       continue;
5027     unsigned Reg = Operand->getReg();
5028     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5029       Error(getRegLoc(Reg, Operands),
5030             "execz and vccz are not supported on this GPU");
5031       return false;
5032     }
5033   }
5034   return true;
5035 }
5036 
5037 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5038                                   const OperandVector &Operands) {
5039   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5040   if (Desc.mayStore() &&
5041       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
5042     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5043     if (Loc != getInstLoc(Operands)) {
5044       Error(Loc, "TFE modifier has no meaning for store instructions");
5045       return false;
5046     }
5047   }
5048 
5049   return true;
5050 }
5051 
5052 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5053                                           const SMLoc &IDLoc,
5054                                           const OperandVector &Operands) {
5055   if (auto ErrMsg = validateLdsDirect(Inst)) {
5056     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5057     return false;
5058   }
5059   if (!validateSOPLiteral(Inst)) {
5060     Error(getLitLoc(Operands),
5061       "only one unique literal operand is allowed");
5062     return false;
5063   }
5064   if (!validateVOPLiteral(Inst, Operands)) {
5065     return false;
5066   }
5067   if (!validateConstantBusLimitations(Inst, Operands)) {
5068     return false;
5069   }
5070   if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5071     return false;
5072   }
5073   if (!validateIntClampSupported(Inst)) {
5074     Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5075           "integer clamping is not supported on this GPU");
5076     return false;
5077   }
5078   if (!validateOpSel(Inst)) {
5079     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5080       "invalid op_sel operand");
5081     return false;
5082   }
5083   if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5084     Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5085           "invalid neg_lo operand");
5086     return false;
5087   }
5088   if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5089     Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5090           "invalid neg_hi operand");
5091     return false;
5092   }
5093   if (!validateDPP(Inst, Operands)) {
5094     return false;
5095   }
5096   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5097   if (!validateMIMGD16(Inst)) {
5098     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5099       "d16 modifier is not supported on this GPU");
5100     return false;
5101   }
5102   if (!validateMIMGMSAA(Inst)) {
5103     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5104           "invalid dim; must be MSAA type");
5105     return false;
5106   }
5107   if (!validateMIMGDataSize(Inst, IDLoc)) {
5108     return false;
5109   }
5110   if (!validateMIMGAddrSize(Inst, IDLoc))
5111     return false;
5112   if (!validateMIMGAtomicDMask(Inst)) {
5113     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5114       "invalid atomic image dmask");
5115     return false;
5116   }
5117   if (!validateMIMGGatherDMask(Inst)) {
5118     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5119       "invalid image_gather dmask: only one bit must be set");
5120     return false;
5121   }
5122   if (!validateMovrels(Inst, Operands)) {
5123     return false;
5124   }
5125   if (!validateOffset(Inst, Operands)) {
5126     return false;
5127   }
5128   if (!validateMAIAccWrite(Inst, Operands)) {
5129     return false;
5130   }
5131   if (!validateMAISrc2(Inst, Operands)) {
5132     return false;
5133   }
5134   if (!validateMFMA(Inst, Operands)) {
5135     return false;
5136   }
5137   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5138     return false;
5139   }
5140 
5141   if (!validateAGPRLdSt(Inst)) {
5142     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5143     ? "invalid register class: data and dst should be all VGPR or AGPR"
5144     : "invalid register class: agpr loads and stores not supported on this GPU"
5145     );
5146     return false;
5147   }
5148   if (!validateVGPRAlign(Inst)) {
5149     Error(IDLoc,
5150       "invalid register class: vgpr tuples must be 64 bit aligned");
5151     return false;
5152   }
5153   if (!validateDS(Inst, Operands)) {
5154     return false;
5155   }
5156 
5157   if (!validateBLGP(Inst, Operands)) {
5158     return false;
5159   }
5160 
5161   if (!validateDivScale(Inst)) {
5162     Error(IDLoc, "ABS not allowed in VOP3B instructions");
5163     return false;
5164   }
5165   if (!validateWaitCnt(Inst, Operands)) {
5166     return false;
5167   }
5168   if (!validateExeczVcczOperands(Operands)) {
5169     return false;
5170   }
5171   if (!validateTFE(Inst, Operands)) {
5172     return false;
5173   }
5174 
5175   return true;
5176 }
5177 
5178 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5179                                             const FeatureBitset &FBS,
5180                                             unsigned VariantID = 0);
5181 
5182 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5183                                 const FeatureBitset &AvailableFeatures,
5184                                 unsigned VariantID);
5185 
5186 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5187                                        const FeatureBitset &FBS) {
5188   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5189 }
5190 
5191 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5192                                        const FeatureBitset &FBS,
5193                                        ArrayRef<unsigned> Variants) {
5194   for (auto Variant : Variants) {
5195     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5196       return true;
5197   }
5198 
5199   return false;
5200 }
5201 
5202 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5203                                                   const SMLoc &IDLoc) {
5204   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5205 
5206   // Check if requested instruction variant is supported.
5207   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5208     return false;
5209 
5210   // This instruction is not supported.
5211   // Clear any other pending errors because they are no longer relevant.
5212   getParser().clearPendingErrors();
5213 
5214   // Requested instruction variant is not supported.
5215   // Check if any other variants are supported.
5216   StringRef VariantName = getMatchedVariantName();
5217   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5218     return Error(IDLoc,
5219                  Twine(VariantName,
5220                        " variant of this instruction is not supported"));
5221   }
5222 
5223   // Check if this instruction may be used with a different wavesize.
5224   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5225       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5226 
5227     FeatureBitset FeaturesWS32 = getFeatureBits();
5228     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5229         .flip(AMDGPU::FeatureWavefrontSize32);
5230     FeatureBitset AvailableFeaturesWS32 =
5231         ComputeAvailableFeatures(FeaturesWS32);
5232 
5233     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5234       return Error(IDLoc, "instruction requires wavesize=32");
5235   }
5236 
5237   // Finally check if this instruction is supported on any other GPU.
5238   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5239     return Error(IDLoc, "instruction not supported on this GPU");
5240   }
5241 
5242   // Instruction not supported on any GPU. Probably a typo.
5243   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5244   return Error(IDLoc, "invalid instruction" + Suggestion);
5245 }
5246 
5247 static bool isInvalidVOPDY(const OperandVector &Operands,
5248                            uint64_t InvalidOprIdx) {
5249   assert(InvalidOprIdx < Operands.size());
5250   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5251   if (Op.isToken() && InvalidOprIdx > 1) {
5252     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5253     return PrevOp.isToken() && PrevOp.getToken() == "::";
5254   }
5255   return false;
5256 }
5257 
5258 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5259                                               OperandVector &Operands,
5260                                               MCStreamer &Out,
5261                                               uint64_t &ErrorInfo,
5262                                               bool MatchingInlineAsm) {
5263   MCInst Inst;
5264   unsigned Result = Match_Success;
5265   for (auto Variant : getMatchedVariants()) {
5266     uint64_t EI;
5267     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5268                                   Variant);
5269     // We order match statuses from least to most specific. We use most specific
5270     // status as resulting
5271     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5272     if (R == Match_Success || R == Match_MissingFeature ||
5273         (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5274         (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5275          Result != Match_MissingFeature)) {
5276       Result = R;
5277       ErrorInfo = EI;
5278     }
5279     if (R == Match_Success)
5280       break;
5281   }
5282 
5283   if (Result == Match_Success) {
5284     if (!validateInstruction(Inst, IDLoc, Operands)) {
5285       return true;
5286     }
5287     Inst.setLoc(IDLoc);
5288     Out.emitInstruction(Inst, getSTI());
5289     return false;
5290   }
5291 
5292   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5293   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5294     return true;
5295   }
5296 
5297   switch (Result) {
5298   default: break;
5299   case Match_MissingFeature:
5300     // It has been verified that the specified instruction
5301     // mnemonic is valid. A match was found but it requires
5302     // features which are not supported on this GPU.
5303     return Error(IDLoc, "operands are not valid for this GPU or mode");
5304 
5305   case Match_InvalidOperand: {
5306     SMLoc ErrorLoc = IDLoc;
5307     if (ErrorInfo != ~0ULL) {
5308       if (ErrorInfo >= Operands.size()) {
5309         return Error(IDLoc, "too few operands for instruction");
5310       }
5311       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5312       if (ErrorLoc == SMLoc())
5313         ErrorLoc = IDLoc;
5314 
5315       if (isInvalidVOPDY(Operands, ErrorInfo))
5316         return Error(ErrorLoc, "invalid VOPDY instruction");
5317     }
5318     return Error(ErrorLoc, "invalid operand for instruction");
5319   }
5320 
5321   case Match_MnemonicFail:
5322     llvm_unreachable("Invalid instructions should have been handled already");
5323   }
5324   llvm_unreachable("Implement any new match types added!");
5325 }
5326 
5327 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5328   int64_t Tmp = -1;
5329   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5330     return true;
5331   }
5332   if (getParser().parseAbsoluteExpression(Tmp)) {
5333     return true;
5334   }
5335   Ret = static_cast<uint32_t>(Tmp);
5336   return false;
5337 }
5338 
5339 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5340   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5341     return TokError("directive only supported for amdgcn architecture");
5342 
5343   std::string TargetIDDirective;
5344   SMLoc TargetStart = getTok().getLoc();
5345   if (getParser().parseEscapedString(TargetIDDirective))
5346     return true;
5347 
5348   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5349   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5350     return getParser().Error(TargetRange.Start,
5351         (Twine(".amdgcn_target directive's target id ") +
5352          Twine(TargetIDDirective) +
5353          Twine(" does not match the specified target id ") +
5354          Twine(getTargetStreamer().getTargetID()->toString())).str());
5355 
5356   return false;
5357 }
5358 
5359 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5360   return Error(Range.Start, "value out of range", Range);
5361 }
5362 
5363 bool AMDGPUAsmParser::calculateGPRBlocks(
5364     const FeatureBitset &Features, const MCExpr *VCCUsed,
5365     const MCExpr *FlatScrUsed, bool XNACKUsed,
5366     std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5367     SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5368     const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5369   // TODO(scott.linder): These calculations are duplicated from
5370   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5371   IsaVersion Version = getIsaVersion(getSTI().getCPU());
5372   MCContext &Ctx = getContext();
5373 
5374   const MCExpr *NumSGPRs = NextFreeSGPR;
5375   int64_t EvaluatedSGPRs;
5376 
5377   if (Version.Major >= 10)
5378     NumSGPRs = MCConstantExpr::create(0, Ctx);
5379   else {
5380     unsigned MaxAddressableNumSGPRs =
5381         IsaInfo::getAddressableNumSGPRs(&getSTI());
5382 
5383     if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5384         !Features.test(FeatureSGPRInitBug) &&
5385         static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5386       return OutOfRangeError(SGPRRange);
5387 
5388     const MCExpr *ExtraSGPRs =
5389         AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5390     NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5391 
5392     if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5393         (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5394         static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5395       return OutOfRangeError(SGPRRange);
5396 
5397     if (Features.test(FeatureSGPRInitBug))
5398       NumSGPRs =
5399           MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5400   }
5401 
5402   // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5403   // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5404   auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5405                                 unsigned Granule) -> const MCExpr * {
5406     const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5407     const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5408     const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5409     const MCExpr *AlignToGPR =
5410         AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5411     const MCExpr *DivGPR =
5412         MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5413     const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5414     return SubGPR;
5415   };
5416 
5417   VGPRBlocks = GetNumGPRBlocks(
5418       NextFreeVGPR,
5419       IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5420   SGPRBlocks =
5421       GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5422 
5423   return false;
5424 }
5425 
5426 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5427   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5428     return TokError("directive only supported for amdgcn architecture");
5429 
5430   if (!isHsaAbi(getSTI()))
5431     return TokError("directive only supported for amdhsa OS");
5432 
5433   StringRef KernelName;
5434   if (getParser().parseIdentifier(KernelName))
5435     return true;
5436 
5437   AMDGPU::MCKernelDescriptor KD =
5438       AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5439           &getSTI(), getContext());
5440 
5441   StringSet<> Seen;
5442 
5443   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5444 
5445   const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5446   const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5447 
5448   SMRange VGPRRange;
5449   const MCExpr *NextFreeVGPR = ZeroExpr;
5450   const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5451   uint64_t SharedVGPRCount = 0;
5452   uint64_t PreloadLength = 0;
5453   uint64_t PreloadOffset = 0;
5454   SMRange SGPRRange;
5455   const MCExpr *NextFreeSGPR = ZeroExpr;
5456 
5457   // Count the number of user SGPRs implied from the enabled feature bits.
5458   unsigned ImpliedUserSGPRCount = 0;
5459 
5460   // Track if the asm explicitly contains the directive for the user SGPR
5461   // count.
5462   std::optional<unsigned> ExplicitUserSGPRCount;
5463   const MCExpr *ReserveVCC = OneExpr;
5464   const MCExpr *ReserveFlatScr = OneExpr;
5465   std::optional<bool> EnableWavefrontSize32;
5466 
5467   while (true) {
5468     while (trySkipToken(AsmToken::EndOfStatement));
5469 
5470     StringRef ID;
5471     SMRange IDRange = getTok().getLocRange();
5472     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5473       return true;
5474 
5475     if (ID == ".end_amdhsa_kernel")
5476       break;
5477 
5478     if (!Seen.insert(ID).second)
5479       return TokError(".amdhsa_ directives cannot be repeated");
5480 
5481     SMLoc ValStart = getLoc();
5482     const MCExpr *ExprVal;
5483     if (getParser().parseExpression(ExprVal))
5484       return true;
5485     SMLoc ValEnd = getLoc();
5486     SMRange ValRange = SMRange(ValStart, ValEnd);
5487 
5488     int64_t IVal = 0;
5489     uint64_t Val = IVal;
5490     bool EvaluatableExpr;
5491     if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5492       if (IVal < 0)
5493         return OutOfRangeError(ValRange);
5494       Val = IVal;
5495     }
5496 
5497 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
5498   if (!isUInt<ENTRY##_WIDTH>(Val))                                             \
5499     return OutOfRangeError(RANGE);                                             \
5500   AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY,     \
5501                                        getContext());
5502 
5503 // Some fields use the parsed value immediately which requires the expression to
5504 // be solvable.
5505 #define EXPR_RESOLVE_OR_ERROR(RESOLVED)                                        \
5506   if (!(RESOLVED))                                                             \
5507     return Error(IDRange.Start, "directive should have resolvable expression", \
5508                  IDRange);
5509 
5510     if (ID == ".amdhsa_group_segment_fixed_size") {
5511       if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
5512                   CHAR_BIT>(Val))
5513         return OutOfRangeError(ValRange);
5514       KD.group_segment_fixed_size = ExprVal;
5515     } else if (ID == ".amdhsa_private_segment_fixed_size") {
5516       if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
5517                   CHAR_BIT>(Val))
5518         return OutOfRangeError(ValRange);
5519       KD.private_segment_fixed_size = ExprVal;
5520     } else if (ID == ".amdhsa_kernarg_size") {
5521       if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5522         return OutOfRangeError(ValRange);
5523       KD.kernarg_size = ExprVal;
5524     } else if (ID == ".amdhsa_user_sgpr_count") {
5525       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5526       ExplicitUserSGPRCount = Val;
5527     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5528       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5529       if (hasArchitectedFlatScratch())
5530         return Error(IDRange.Start,
5531                      "directive is not supported with architected flat scratch",
5532                      IDRange);
5533       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5534                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5535                        ExprVal, ValRange);
5536       if (Val)
5537         ImpliedUserSGPRCount += 4;
5538     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5539       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5540       if (!hasKernargPreload())
5541         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5542 
5543       if (Val > getMaxNumUserSGPRs())
5544         return OutOfRangeError(ValRange);
5545       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5546                        ValRange);
5547       if (Val) {
5548         ImpliedUserSGPRCount += Val;
5549         PreloadLength = Val;
5550       }
5551     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5552       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5553       if (!hasKernargPreload())
5554         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5555 
5556       if (Val >= 1024)
5557         return OutOfRangeError(ValRange);
5558       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5559                        ValRange);
5560       if (Val)
5561         PreloadOffset = Val;
5562     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5563       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5564       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5565                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5566                        ValRange);
5567       if (Val)
5568         ImpliedUserSGPRCount += 2;
5569     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5570       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5571       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5572                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5573                        ValRange);
5574       if (Val)
5575         ImpliedUserSGPRCount += 2;
5576     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5577       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5578       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5579                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5580                        ExprVal, ValRange);
5581       if (Val)
5582         ImpliedUserSGPRCount += 2;
5583     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5584       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5585       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5586                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5587                        ValRange);
5588       if (Val)
5589         ImpliedUserSGPRCount += 2;
5590     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5591       if (hasArchitectedFlatScratch())
5592         return Error(IDRange.Start,
5593                      "directive is not supported with architected flat scratch",
5594                      IDRange);
5595       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5596       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5597                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5598                        ExprVal, ValRange);
5599       if (Val)
5600         ImpliedUserSGPRCount += 2;
5601     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5602       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5603       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5604                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5605                        ExprVal, ValRange);
5606       if (Val)
5607         ImpliedUserSGPRCount += 1;
5608     } else if (ID == ".amdhsa_wavefront_size32") {
5609       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5610       if (IVersion.Major < 10)
5611         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5612       EnableWavefrontSize32 = Val;
5613       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5614                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5615                        ValRange);
5616     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5617       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5618                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5619                        ValRange);
5620     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5621       if (hasArchitectedFlatScratch())
5622         return Error(IDRange.Start,
5623                      "directive is not supported with architected flat scratch",
5624                      IDRange);
5625       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5626                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5627                        ValRange);
5628     } else if (ID == ".amdhsa_enable_private_segment") {
5629       if (!hasArchitectedFlatScratch())
5630         return Error(
5631             IDRange.Start,
5632             "directive is not supported without architected flat scratch",
5633             IDRange);
5634       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5635                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5636                        ValRange);
5637     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5638       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5639                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5640                        ValRange);
5641     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5642       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5643                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5644                        ValRange);
5645     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5646       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5647                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5648                        ValRange);
5649     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5650       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5651                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5652                        ValRange);
5653     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5654       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5655                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5656                        ValRange);
5657     } else if (ID == ".amdhsa_next_free_vgpr") {
5658       VGPRRange = ValRange;
5659       NextFreeVGPR = ExprVal;
5660     } else if (ID == ".amdhsa_next_free_sgpr") {
5661       SGPRRange = ValRange;
5662       NextFreeSGPR = ExprVal;
5663     } else if (ID == ".amdhsa_accum_offset") {
5664       if (!isGFX90A())
5665         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5666       AccumOffset = ExprVal;
5667     } else if (ID == ".amdhsa_reserve_vcc") {
5668       if (EvaluatableExpr && !isUInt<1>(Val))
5669         return OutOfRangeError(ValRange);
5670       ReserveVCC = ExprVal;
5671     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5672       if (IVersion.Major < 7)
5673         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5674       if (hasArchitectedFlatScratch())
5675         return Error(IDRange.Start,
5676                      "directive is not supported with architected flat scratch",
5677                      IDRange);
5678       if (EvaluatableExpr && !isUInt<1>(Val))
5679         return OutOfRangeError(ValRange);
5680       ReserveFlatScr = ExprVal;
5681     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5682       if (IVersion.Major < 8)
5683         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5684       if (!isUInt<1>(Val))
5685         return OutOfRangeError(ValRange);
5686       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5687         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5688                                  IDRange);
5689     } else if (ID == ".amdhsa_float_round_mode_32") {
5690       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5691                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5692                        ValRange);
5693     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5694       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5695                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5696                        ValRange);
5697     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5698       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5699                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5700                        ValRange);
5701     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5702       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5703                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5704                        ValRange);
5705     } else if (ID == ".amdhsa_dx10_clamp") {
5706       if (IVersion.Major >= 12)
5707         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5708       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5709                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5710                        ValRange);
5711     } else if (ID == ".amdhsa_ieee_mode") {
5712       if (IVersion.Major >= 12)
5713         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5714       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5715                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5716                        ValRange);
5717     } else if (ID == ".amdhsa_fp16_overflow") {
5718       if (IVersion.Major < 9)
5719         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5720       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5721                        COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5722                        ValRange);
5723     } else if (ID == ".amdhsa_tg_split") {
5724       if (!isGFX90A())
5725         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5726       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5727                        ExprVal, ValRange);
5728     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5729       if (IVersion.Major < 10)
5730         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5731       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5732                        COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5733                        ValRange);
5734     } else if (ID == ".amdhsa_memory_ordered") {
5735       if (IVersion.Major < 10)
5736         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5737       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5738                        COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5739                        ValRange);
5740     } else if (ID == ".amdhsa_forward_progress") {
5741       if (IVersion.Major < 10)
5742         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5743       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5744                        COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5745                        ValRange);
5746     } else if (ID == ".amdhsa_shared_vgpr_count") {
5747       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5748       if (IVersion.Major < 10 || IVersion.Major >= 12)
5749         return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5750                      IDRange);
5751       SharedVGPRCount = Val;
5752       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5753                        COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5754                        ValRange);
5755     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5756       PARSE_BITS_ENTRY(
5757           KD.compute_pgm_rsrc2,
5758           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5759           ExprVal, ValRange);
5760     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5761       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5762                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5763                        ExprVal, ValRange);
5764     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5765       PARSE_BITS_ENTRY(
5766           KD.compute_pgm_rsrc2,
5767           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5768           ExprVal, ValRange);
5769     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5771                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5772                        ExprVal, ValRange);
5773     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5774       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5775                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5776                        ExprVal, ValRange);
5777     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5778       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5779                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5780                        ExprVal, ValRange);
5781     } else if (ID == ".amdhsa_exception_int_div_zero") {
5782       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5783                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5784                        ExprVal, ValRange);
5785     } else if (ID == ".amdhsa_round_robin_scheduling") {
5786       if (IVersion.Major < 12)
5787         return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5788       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5789                        COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5790                        ValRange);
5791     } else {
5792       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5793     }
5794 
5795 #undef PARSE_BITS_ENTRY
5796   }
5797 
5798   if (!Seen.contains(".amdhsa_next_free_vgpr"))
5799     return TokError(".amdhsa_next_free_vgpr directive is required");
5800 
5801   if (!Seen.contains(".amdhsa_next_free_sgpr"))
5802     return TokError(".amdhsa_next_free_sgpr directive is required");
5803 
5804   const MCExpr *VGPRBlocks;
5805   const MCExpr *SGPRBlocks;
5806   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5807                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5808                          EnableWavefrontSize32, NextFreeVGPR,
5809                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5810                          SGPRBlocks))
5811     return true;
5812 
5813   int64_t EvaluatedVGPRBlocks;
5814   bool VGPRBlocksEvaluatable =
5815       VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5816   if (VGPRBlocksEvaluatable &&
5817       !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5818           static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5819     return OutOfRangeError(VGPRRange);
5820   }
5821   AMDGPU::MCKernelDescriptor::bits_set(
5822       KD.compute_pgm_rsrc1, VGPRBlocks,
5823       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5824       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5825 
5826   int64_t EvaluatedSGPRBlocks;
5827   if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5828       !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5829           static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5830     return OutOfRangeError(SGPRRange);
5831   AMDGPU::MCKernelDescriptor::bits_set(
5832       KD.compute_pgm_rsrc1, SGPRBlocks,
5833       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5834       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5835 
5836   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5837     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5838                     "enabled user SGPRs");
5839 
5840   unsigned UserSGPRCount =
5841       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5842 
5843   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5844     return TokError("too many user SGPRs enabled");
5845   AMDGPU::MCKernelDescriptor::bits_set(
5846       KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5847       COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5848       COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5849 
5850   int64_t IVal = 0;
5851   if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5852     return TokError("Kernarg size should be resolvable");
5853   uint64_t kernarg_size = IVal;
5854   if (PreloadLength && kernarg_size &&
5855       (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5856     return TokError("Kernarg preload length + offset is larger than the "
5857                     "kernarg segment size");
5858 
5859   if (isGFX90A()) {
5860     if (!Seen.contains(".amdhsa_accum_offset"))
5861       return TokError(".amdhsa_accum_offset directive is required");
5862     int64_t EvaluatedAccum;
5863     bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5864     uint64_t UEvaluatedAccum = EvaluatedAccum;
5865     if (AccumEvaluatable &&
5866         (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5867       return TokError("accum_offset should be in range [4..256] in "
5868                       "increments of 4");
5869 
5870     int64_t EvaluatedNumVGPR;
5871     if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5872         AccumEvaluatable &&
5873         UEvaluatedAccum >
5874             alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
5875       return TokError("accum_offset exceeds total VGPR allocation");
5876     const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
5877         MCBinaryExpr::createDiv(
5878             AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
5879         MCConstantExpr::create(1, getContext()), getContext());
5880     MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum,
5881                                  COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5882                                  COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5883                                  getContext());
5884   }
5885 
5886   if (IVersion.Major >= 10 && IVersion.Major < 12) {
5887     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5888     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5889       return TokError("shared_vgpr_count directive not valid on "
5890                       "wavefront size 32");
5891     }
5892 
5893     if (VGPRBlocksEvaluatable &&
5894         (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5895          63)) {
5896       return TokError("shared_vgpr_count*2 + "
5897                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5898                       "exceed 63\n");
5899     }
5900   }
5901 
5902   getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5903                                                  NextFreeVGPR, NextFreeSGPR,
5904                                                  ReserveVCC, ReserveFlatScr);
5905   return false;
5906 }
5907 
5908 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5909   uint32_t Version;
5910   if (ParseAsAbsoluteExpression(Version))
5911     return true;
5912 
5913   getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5914   return false;
5915 }
5916 
5917 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5918                                                AMDGPUMCKernelCodeT &C) {
5919   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5920   // assembly for backwards compatibility.
5921   if (ID == "max_scratch_backing_memory_byte_size") {
5922     Parser.eatToEndOfStatement();
5923     return false;
5924   }
5925 
5926   SmallString<40> ErrStr;
5927   raw_svector_ostream Err(ErrStr);
5928   if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
5929     return TokError(Err.str());
5930   }
5931   Lex();
5932 
5933   if (ID == "enable_wavefront_size32") {
5934     if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5935       if (!isGFX10Plus())
5936         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5937       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5938         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5939     } else {
5940       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5941         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5942     }
5943   }
5944 
5945   if (ID == "wavefront_size") {
5946     if (C.wavefront_size == 5) {
5947       if (!isGFX10Plus())
5948         return TokError("wavefront_size=5 is only allowed on GFX10+");
5949       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5950         return TokError("wavefront_size=5 requires +WavefrontSize32");
5951     } else if (C.wavefront_size == 6) {
5952       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5953         return TokError("wavefront_size=6 requires +WavefrontSize64");
5954     }
5955   }
5956 
5957   return false;
5958 }
5959 
5960 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5961   AMDGPUMCKernelCodeT KernelCode;
5962   KernelCode.initDefault(&getSTI(), getContext());
5963 
5964   while (true) {
5965     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5966     // will set the current token to EndOfStatement.
5967     while(trySkipToken(AsmToken::EndOfStatement));
5968 
5969     StringRef ID;
5970     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5971       return true;
5972 
5973     if (ID == ".end_amd_kernel_code_t")
5974       break;
5975 
5976     if (ParseAMDKernelCodeTValue(ID, KernelCode))
5977       return true;
5978   }
5979 
5980   KernelCode.validate(&getSTI(), getContext());
5981   getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
5982 
5983   return false;
5984 }
5985 
5986 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5987   StringRef KernelName;
5988   if (!parseId(KernelName, "expected symbol name"))
5989     return true;
5990 
5991   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5992                                            ELF::STT_AMDGPU_HSA_KERNEL);
5993 
5994   KernelScope.initialize(getContext());
5995   return false;
5996 }
5997 
5998 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5999   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6000     return Error(getLoc(),
6001                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
6002                  "architectures");
6003   }
6004 
6005   auto TargetIDDirective = getLexer().getTok().getStringContents();
6006   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6007     return Error(getParser().getTok().getLoc(), "target id must match options");
6008 
6009   getTargetStreamer().EmitISAVersion();
6010   Lex();
6011 
6012   return false;
6013 }
6014 
6015 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6016   assert(isHsaAbi(getSTI()));
6017 
6018   std::string HSAMetadataString;
6019   if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6020                           HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6021     return true;
6022 
6023   if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6024     return Error(getLoc(), "invalid HSA metadata");
6025 
6026   return false;
6027 }
6028 
6029 /// Common code to parse out a block of text (typically YAML) between start and
6030 /// end directives.
6031 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6032                                           const char *AssemblerDirectiveEnd,
6033                                           std::string &CollectString) {
6034 
6035   raw_string_ostream CollectStream(CollectString);
6036 
6037   getLexer().setSkipSpace(false);
6038 
6039   bool FoundEnd = false;
6040   while (!isToken(AsmToken::Eof)) {
6041     while (isToken(AsmToken::Space)) {
6042       CollectStream << getTokenStr();
6043       Lex();
6044     }
6045 
6046     if (trySkipId(AssemblerDirectiveEnd)) {
6047       FoundEnd = true;
6048       break;
6049     }
6050 
6051     CollectStream << Parser.parseStringToEndOfStatement()
6052                   << getContext().getAsmInfo()->getSeparatorString();
6053 
6054     Parser.eatToEndOfStatement();
6055   }
6056 
6057   getLexer().setSkipSpace(true);
6058 
6059   if (isToken(AsmToken::Eof) && !FoundEnd) {
6060     return TokError(Twine("expected directive ") +
6061                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6062   }
6063 
6064   CollectStream.flush();
6065   return false;
6066 }
6067 
6068 /// Parse the assembler directive for new MsgPack-format PAL metadata.
6069 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6070   std::string String;
6071   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6072                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
6073     return true;
6074 
6075   auto PALMetadata = getTargetStreamer().getPALMetadata();
6076   if (!PALMetadata->setFromString(String))
6077     return Error(getLoc(), "invalid PAL metadata");
6078   return false;
6079 }
6080 
6081 /// Parse the assembler directive for old linear-format PAL metadata.
6082 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6083   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6084     return Error(getLoc(),
6085                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6086                  "not available on non-amdpal OSes")).str());
6087   }
6088 
6089   auto PALMetadata = getTargetStreamer().getPALMetadata();
6090   PALMetadata->setLegacy();
6091   for (;;) {
6092     uint32_t Key, Value;
6093     if (ParseAsAbsoluteExpression(Key)) {
6094       return TokError(Twine("invalid value in ") +
6095                       Twine(PALMD::AssemblerDirective));
6096     }
6097     if (!trySkipToken(AsmToken::Comma)) {
6098       return TokError(Twine("expected an even number of values in ") +
6099                       Twine(PALMD::AssemblerDirective));
6100     }
6101     if (ParseAsAbsoluteExpression(Value)) {
6102       return TokError(Twine("invalid value in ") +
6103                       Twine(PALMD::AssemblerDirective));
6104     }
6105     PALMetadata->setRegister(Key, Value);
6106     if (!trySkipToken(AsmToken::Comma))
6107       break;
6108   }
6109   return false;
6110 }
6111 
6112 /// ParseDirectiveAMDGPULDS
6113 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6114 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6115   if (getParser().checkForValidSection())
6116     return true;
6117 
6118   StringRef Name;
6119   SMLoc NameLoc = getLoc();
6120   if (getParser().parseIdentifier(Name))
6121     return TokError("expected identifier in directive");
6122 
6123   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6124   if (getParser().parseComma())
6125     return true;
6126 
6127   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6128 
6129   int64_t Size;
6130   SMLoc SizeLoc = getLoc();
6131   if (getParser().parseAbsoluteExpression(Size))
6132     return true;
6133   if (Size < 0)
6134     return Error(SizeLoc, "size must be non-negative");
6135   if (Size > LocalMemorySize)
6136     return Error(SizeLoc, "size is too large");
6137 
6138   int64_t Alignment = 4;
6139   if (trySkipToken(AsmToken::Comma)) {
6140     SMLoc AlignLoc = getLoc();
6141     if (getParser().parseAbsoluteExpression(Alignment))
6142       return true;
6143     if (Alignment < 0 || !isPowerOf2_64(Alignment))
6144       return Error(AlignLoc, "alignment must be a power of two");
6145 
6146     // Alignment larger than the size of LDS is possible in theory, as long
6147     // as the linker manages to place to symbol at address 0, but we do want
6148     // to make sure the alignment fits nicely into a 32-bit integer.
6149     if (Alignment >= 1u << 31)
6150       return Error(AlignLoc, "alignment is too large");
6151   }
6152 
6153   if (parseEOL())
6154     return true;
6155 
6156   Symbol->redefineIfPossible();
6157   if (!Symbol->isUndefined())
6158     return Error(NameLoc, "invalid symbol redefinition");
6159 
6160   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6161   return false;
6162 }
6163 
6164 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6165   StringRef IDVal = DirectiveID.getString();
6166 
6167   if (isHsaAbi(getSTI())) {
6168     if (IDVal == ".amdhsa_kernel")
6169      return ParseDirectiveAMDHSAKernel();
6170 
6171     if (IDVal == ".amdhsa_code_object_version")
6172       return ParseDirectiveAMDHSACodeObjectVersion();
6173 
6174     // TODO: Restructure/combine with PAL metadata directive.
6175     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6176       return ParseDirectiveHSAMetadata();
6177   } else {
6178     if (IDVal == ".amd_kernel_code_t")
6179       return ParseDirectiveAMDKernelCodeT();
6180 
6181     if (IDVal == ".amdgpu_hsa_kernel")
6182       return ParseDirectiveAMDGPUHsaKernel();
6183 
6184     if (IDVal == ".amd_amdgpu_isa")
6185       return ParseDirectiveISAVersion();
6186 
6187     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6188       return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6189                               Twine(" directive is "
6190                                     "not available on non-amdhsa OSes"))
6191                                  .str());
6192     }
6193   }
6194 
6195   if (IDVal == ".amdgcn_target")
6196     return ParseDirectiveAMDGCNTarget();
6197 
6198   if (IDVal == ".amdgpu_lds")
6199     return ParseDirectiveAMDGPULDS();
6200 
6201   if (IDVal == PALMD::AssemblerDirectiveBegin)
6202     return ParseDirectivePALMetadataBegin();
6203 
6204   if (IDVal == PALMD::AssemblerDirective)
6205     return ParseDirectivePALMetadata();
6206 
6207   return true;
6208 }
6209 
6210 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6211                                            unsigned RegNo) {
6212 
6213   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6214     return isGFX9Plus();
6215 
6216   // GFX10+ has 2 more SGPRs 104 and 105.
6217   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6218     return hasSGPR104_SGPR105();
6219 
6220   switch (RegNo) {
6221   case AMDGPU::SRC_SHARED_BASE_LO:
6222   case AMDGPU::SRC_SHARED_BASE:
6223   case AMDGPU::SRC_SHARED_LIMIT_LO:
6224   case AMDGPU::SRC_SHARED_LIMIT:
6225   case AMDGPU::SRC_PRIVATE_BASE_LO:
6226   case AMDGPU::SRC_PRIVATE_BASE:
6227   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6228   case AMDGPU::SRC_PRIVATE_LIMIT:
6229     return isGFX9Plus();
6230   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6231     return isGFX9Plus() && !isGFX11Plus();
6232   case AMDGPU::TBA:
6233   case AMDGPU::TBA_LO:
6234   case AMDGPU::TBA_HI:
6235   case AMDGPU::TMA:
6236   case AMDGPU::TMA_LO:
6237   case AMDGPU::TMA_HI:
6238     return !isGFX9Plus();
6239   case AMDGPU::XNACK_MASK:
6240   case AMDGPU::XNACK_MASK_LO:
6241   case AMDGPU::XNACK_MASK_HI:
6242     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6243   case AMDGPU::SGPR_NULL:
6244     return isGFX10Plus();
6245   default:
6246     break;
6247   }
6248 
6249   if (isCI())
6250     return true;
6251 
6252   if (isSI() || isGFX10Plus()) {
6253     // No flat_scr on SI.
6254     // On GFX10Plus flat scratch is not a valid register operand and can only be
6255     // accessed with s_setreg/s_getreg.
6256     switch (RegNo) {
6257     case AMDGPU::FLAT_SCR:
6258     case AMDGPU::FLAT_SCR_LO:
6259     case AMDGPU::FLAT_SCR_HI:
6260       return false;
6261     default:
6262       return true;
6263     }
6264   }
6265 
6266   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6267   // SI/CI have.
6268   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6269     return hasSGPR102_SGPR103();
6270 
6271   return true;
6272 }
6273 
6274 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6275                                           StringRef Mnemonic,
6276                                           OperandMode Mode) {
6277   ParseStatus Res = parseVOPD(Operands);
6278   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6279     return Res;
6280 
6281   // Try to parse with a custom parser
6282   Res = MatchOperandParserImpl(Operands, Mnemonic);
6283 
6284   // If we successfully parsed the operand or if there as an error parsing,
6285   // we are done.
6286   //
6287   // If we are parsing after we reach EndOfStatement then this means we
6288   // are appending default values to the Operands list.  This is only done
6289   // by custom parser, so we shouldn't continue on to the generic parsing.
6290   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6291     return Res;
6292 
6293   SMLoc RBraceLoc;
6294   SMLoc LBraceLoc = getLoc();
6295   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6296     unsigned Prefix = Operands.size();
6297 
6298     for (;;) {
6299       auto Loc = getLoc();
6300       Res = parseReg(Operands);
6301       if (Res.isNoMatch())
6302         Error(Loc, "expected a register");
6303       if (!Res.isSuccess())
6304         return ParseStatus::Failure;
6305 
6306       RBraceLoc = getLoc();
6307       if (trySkipToken(AsmToken::RBrac))
6308         break;
6309 
6310       if (!skipToken(AsmToken::Comma,
6311                      "expected a comma or a closing square bracket"))
6312         return ParseStatus::Failure;
6313     }
6314 
6315     if (Operands.size() - Prefix > 1) {
6316       Operands.insert(Operands.begin() + Prefix,
6317                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6318       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6319     }
6320 
6321     return ParseStatus::Success;
6322   }
6323 
6324   return parseRegOrImm(Operands);
6325 }
6326 
6327 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6328   // Clear any forced encodings from the previous instruction.
6329   setForcedEncodingSize(0);
6330   setForcedDPP(false);
6331   setForcedSDWA(false);
6332 
6333   if (Name.ends_with("_e64_dpp")) {
6334     setForcedDPP(true);
6335     setForcedEncodingSize(64);
6336     return Name.substr(0, Name.size() - 8);
6337   }
6338   if (Name.ends_with("_e64")) {
6339     setForcedEncodingSize(64);
6340     return Name.substr(0, Name.size() - 4);
6341   }
6342   if (Name.ends_with("_e32")) {
6343     setForcedEncodingSize(32);
6344     return Name.substr(0, Name.size() - 4);
6345   }
6346   if (Name.ends_with("_dpp")) {
6347     setForcedDPP(true);
6348     return Name.substr(0, Name.size() - 4);
6349   }
6350   if (Name.ends_with("_sdwa")) {
6351     setForcedSDWA(true);
6352     return Name.substr(0, Name.size() - 5);
6353   }
6354   return Name;
6355 }
6356 
6357 static void applyMnemonicAliases(StringRef &Mnemonic,
6358                                  const FeatureBitset &Features,
6359                                  unsigned VariantID);
6360 
6361 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6362                                        StringRef Name,
6363                                        SMLoc NameLoc, OperandVector &Operands) {
6364   // Add the instruction mnemonic
6365   Name = parseMnemonicSuffix(Name);
6366 
6367   // If the target architecture uses MnemonicAlias, call it here to parse
6368   // operands correctly.
6369   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6370 
6371   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6372 
6373   bool IsMIMG = Name.starts_with("image_");
6374 
6375   while (!trySkipToken(AsmToken::EndOfStatement)) {
6376     OperandMode Mode = OperandMode_Default;
6377     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6378       Mode = OperandMode_NSA;
6379     ParseStatus Res = parseOperand(Operands, Name, Mode);
6380 
6381     if (!Res.isSuccess()) {
6382       checkUnsupportedInstruction(Name, NameLoc);
6383       if (!Parser.hasPendingError()) {
6384         // FIXME: use real operand location rather than the current location.
6385         StringRef Msg = Res.isFailure() ? "failed parsing operand."
6386                                         : "not a valid operand.";
6387         Error(getLoc(), Msg);
6388       }
6389       while (!trySkipToken(AsmToken::EndOfStatement)) {
6390         lex();
6391       }
6392       return true;
6393     }
6394 
6395     // Eat the comma or space if there is one.
6396     trySkipToken(AsmToken::Comma);
6397   }
6398 
6399   return false;
6400 }
6401 
6402 //===----------------------------------------------------------------------===//
6403 // Utility functions
6404 //===----------------------------------------------------------------------===//
6405 
6406 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6407                                           OperandVector &Operands) {
6408   SMLoc S = getLoc();
6409   if (!trySkipId(Name))
6410     return ParseStatus::NoMatch;
6411 
6412   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6413   return ParseStatus::Success;
6414 }
6415 
6416 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6417                                                 int64_t &IntVal) {
6418 
6419   if (!trySkipId(Prefix, AsmToken::Colon))
6420     return ParseStatus::NoMatch;
6421 
6422   return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6423 }
6424 
6425 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6426     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6427     std::function<bool(int64_t &)> ConvertResult) {
6428   SMLoc S = getLoc();
6429   int64_t Value = 0;
6430 
6431   ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6432   if (!Res.isSuccess())
6433     return Res;
6434 
6435   if (ConvertResult && !ConvertResult(Value)) {
6436     Error(S, "invalid " + StringRef(Prefix) + " value.");
6437   }
6438 
6439   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6440   return ParseStatus::Success;
6441 }
6442 
6443 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6444     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6445     bool (*ConvertResult)(int64_t &)) {
6446   SMLoc S = getLoc();
6447   if (!trySkipId(Prefix, AsmToken::Colon))
6448     return ParseStatus::NoMatch;
6449 
6450   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6451     return ParseStatus::Failure;
6452 
6453   unsigned Val = 0;
6454   const unsigned MaxSize = 4;
6455 
6456   // FIXME: How to verify the number of elements matches the number of src
6457   // operands?
6458   for (int I = 0; ; ++I) {
6459     int64_t Op;
6460     SMLoc Loc = getLoc();
6461     if (!parseExpr(Op))
6462       return ParseStatus::Failure;
6463 
6464     if (Op != 0 && Op != 1)
6465       return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6466 
6467     Val |= (Op << I);
6468 
6469     if (trySkipToken(AsmToken::RBrac))
6470       break;
6471 
6472     if (I + 1 == MaxSize)
6473       return Error(getLoc(), "expected a closing square bracket");
6474 
6475     if (!skipToken(AsmToken::Comma, "expected a comma"))
6476       return ParseStatus::Failure;
6477   }
6478 
6479   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6480   return ParseStatus::Success;
6481 }
6482 
6483 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6484                                            OperandVector &Operands,
6485                                            AMDGPUOperand::ImmTy ImmTy) {
6486   int64_t Bit;
6487   SMLoc S = getLoc();
6488 
6489   if (trySkipId(Name)) {
6490     Bit = 1;
6491   } else if (trySkipId("no", Name)) {
6492     Bit = 0;
6493   } else {
6494     return ParseStatus::NoMatch;
6495   }
6496 
6497   if (Name == "r128" && !hasMIMG_R128())
6498     return Error(S, "r128 modifier is not supported on this GPU");
6499   if (Name == "a16" && !hasA16())
6500     return Error(S, "a16 modifier is not supported on this GPU");
6501 
6502   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6503     ImmTy = AMDGPUOperand::ImmTyR128A16;
6504 
6505   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6506   return ParseStatus::Success;
6507 }
6508 
6509 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6510                                       bool &Disabling) const {
6511   Disabling = Id.consume_front("no");
6512 
6513   if (isGFX940() && !Mnemo.starts_with("s_")) {
6514     return StringSwitch<unsigned>(Id)
6515         .Case("nt", AMDGPU::CPol::NT)
6516         .Case("sc0", AMDGPU::CPol::SC0)
6517         .Case("sc1", AMDGPU::CPol::SC1)
6518         .Default(0);
6519   }
6520 
6521   return StringSwitch<unsigned>(Id)
6522       .Case("dlc", AMDGPU::CPol::DLC)
6523       .Case("glc", AMDGPU::CPol::GLC)
6524       .Case("scc", AMDGPU::CPol::SCC)
6525       .Case("slc", AMDGPU::CPol::SLC)
6526       .Default(0);
6527 }
6528 
6529 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6530   if (isGFX12Plus()) {
6531     SMLoc StringLoc = getLoc();
6532 
6533     int64_t CPolVal = 0;
6534     ParseStatus ResTH = ParseStatus::NoMatch;
6535     ParseStatus ResScope = ParseStatus::NoMatch;
6536 
6537     for (;;) {
6538       if (ResTH.isNoMatch()) {
6539         int64_t TH;
6540         ResTH = parseTH(Operands, TH);
6541         if (ResTH.isFailure())
6542           return ResTH;
6543         if (ResTH.isSuccess()) {
6544           CPolVal |= TH;
6545           continue;
6546         }
6547       }
6548 
6549       if (ResScope.isNoMatch()) {
6550         int64_t Scope;
6551         ResScope = parseScope(Operands, Scope);
6552         if (ResScope.isFailure())
6553           return ResScope;
6554         if (ResScope.isSuccess()) {
6555           CPolVal |= Scope;
6556           continue;
6557         }
6558       }
6559 
6560       break;
6561     }
6562 
6563     if (ResTH.isNoMatch() && ResScope.isNoMatch())
6564       return ParseStatus::NoMatch;
6565 
6566     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6567                                                 AMDGPUOperand::ImmTyCPol));
6568     return ParseStatus::Success;
6569   }
6570 
6571   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6572   SMLoc OpLoc = getLoc();
6573   unsigned Enabled = 0, Seen = 0;
6574   for (;;) {
6575     SMLoc S = getLoc();
6576     bool Disabling;
6577     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6578     if (!CPol)
6579       break;
6580 
6581     lex();
6582 
6583     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6584       return Error(S, "dlc modifier is not supported on this GPU");
6585 
6586     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6587       return Error(S, "scc modifier is not supported on this GPU");
6588 
6589     if (Seen & CPol)
6590       return Error(S, "duplicate cache policy modifier");
6591 
6592     if (!Disabling)
6593       Enabled |= CPol;
6594 
6595     Seen |= CPol;
6596   }
6597 
6598   if (!Seen)
6599     return ParseStatus::NoMatch;
6600 
6601   Operands.push_back(
6602       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6603   return ParseStatus::Success;
6604 }
6605 
6606 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6607                                         int64_t &Scope) {
6608   Scope = AMDGPU::CPol::SCOPE_CU; // default;
6609 
6610   StringRef Value;
6611   SMLoc StringLoc;
6612   ParseStatus Res;
6613 
6614   Res = parseStringWithPrefix("scope", Value, StringLoc);
6615   if (!Res.isSuccess())
6616     return Res;
6617 
6618   Scope = StringSwitch<int64_t>(Value)
6619               .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6620               .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6621               .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6622               .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6623               .Default(0xffffffff);
6624 
6625   if (Scope == 0xffffffff)
6626     return Error(StringLoc, "invalid scope value");
6627 
6628   return ParseStatus::Success;
6629 }
6630 
6631 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6632   TH = AMDGPU::CPol::TH_RT; // default
6633 
6634   StringRef Value;
6635   SMLoc StringLoc;
6636   ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6637   if (!Res.isSuccess())
6638     return Res;
6639 
6640   if (Value == "TH_DEFAULT")
6641     TH = AMDGPU::CPol::TH_RT;
6642   else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6643            Value == "TH_LOAD_NT_WB") {
6644     return Error(StringLoc, "invalid th value");
6645   } else if (Value.consume_front("TH_ATOMIC_")) {
6646     TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6647   } else if (Value.consume_front("TH_LOAD_")) {
6648     TH = AMDGPU::CPol::TH_TYPE_LOAD;
6649   } else if (Value.consume_front("TH_STORE_")) {
6650     TH = AMDGPU::CPol::TH_TYPE_STORE;
6651   } else {
6652     return Error(StringLoc, "invalid th value");
6653   }
6654 
6655   if (Value == "BYPASS")
6656     TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6657 
6658   if (TH != 0) {
6659     if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6660       TH |= StringSwitch<int64_t>(Value)
6661                 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6662                 .Case("RT", AMDGPU::CPol::TH_RT)
6663                 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6664                 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
6665                 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6666                                        AMDGPU::CPol::TH_ATOMIC_RETURN)
6667                 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
6668                 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6669                                         AMDGPU::CPol::TH_ATOMIC_NT)
6670                 .Default(0xffffffff);
6671     else
6672       TH |= StringSwitch<int64_t>(Value)
6673                 .Case("RT", AMDGPU::CPol::TH_RT)
6674                 .Case("NT", AMDGPU::CPol::TH_NT)
6675                 .Case("HT", AMDGPU::CPol::TH_HT)
6676                 .Case("LU", AMDGPU::CPol::TH_LU)
6677                 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6678                 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6679                 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6680                 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6681                 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6682                 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6683                 .Default(0xffffffff);
6684   }
6685 
6686   if (TH == 0xffffffff)
6687     return Error(StringLoc, "invalid th value");
6688 
6689   return ParseStatus::Success;
6690 }
6691 
6692 static void addOptionalImmOperand(
6693   MCInst& Inst, const OperandVector& Operands,
6694   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6695   AMDGPUOperand::ImmTy ImmT,
6696   int64_t Default = 0) {
6697   auto i = OptionalIdx.find(ImmT);
6698   if (i != OptionalIdx.end()) {
6699     unsigned Idx = i->second;
6700     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6701   } else {
6702     Inst.addOperand(MCOperand::createImm(Default));
6703   }
6704 }
6705 
6706 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6707                                                    StringRef &Value,
6708                                                    SMLoc &StringLoc) {
6709   if (!trySkipId(Prefix, AsmToken::Colon))
6710     return ParseStatus::NoMatch;
6711 
6712   StringLoc = getLoc();
6713   return parseId(Value, "expected an identifier") ? ParseStatus::Success
6714                                                   : ParseStatus::Failure;
6715 }
6716 
6717 //===----------------------------------------------------------------------===//
6718 // MTBUF format
6719 //===----------------------------------------------------------------------===//
6720 
6721 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6722                                   int64_t MaxVal,
6723                                   int64_t &Fmt) {
6724   int64_t Val;
6725   SMLoc Loc = getLoc();
6726 
6727   auto Res = parseIntWithPrefix(Pref, Val);
6728   if (Res.isFailure())
6729     return false;
6730   if (Res.isNoMatch())
6731     return true;
6732 
6733   if (Val < 0 || Val > MaxVal) {
6734     Error(Loc, Twine("out of range ", StringRef(Pref)));
6735     return false;
6736   }
6737 
6738   Fmt = Val;
6739   return true;
6740 }
6741 
6742 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6743                                               AMDGPUOperand::ImmTy ImmTy) {
6744   const char *Pref = "index_key";
6745   int64_t ImmVal = 0;
6746   SMLoc Loc = getLoc();
6747   auto Res = parseIntWithPrefix(Pref, ImmVal);
6748   if (!Res.isSuccess())
6749     return Res;
6750 
6751   if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6752     return Error(Loc, Twine("out of range ", StringRef(Pref)));
6753 
6754   if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6755     return Error(Loc, Twine("out of range ", StringRef(Pref)));
6756 
6757   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6758   return ParseStatus::Success;
6759 }
6760 
6761 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6762   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6763 }
6764 
6765 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6766   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6767 }
6768 
6769 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6770 // values to live in a joint format operand in the MCInst encoding.
6771 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6772   using namespace llvm::AMDGPU::MTBUFFormat;
6773 
6774   int64_t Dfmt = DFMT_UNDEF;
6775   int64_t Nfmt = NFMT_UNDEF;
6776 
6777   // dfmt and nfmt can appear in either order, and each is optional.
6778   for (int I = 0; I < 2; ++I) {
6779     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6780       return ParseStatus::Failure;
6781 
6782     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6783       return ParseStatus::Failure;
6784 
6785     // Skip optional comma between dfmt/nfmt
6786     // but guard against 2 commas following each other.
6787     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6788         !peekToken().is(AsmToken::Comma)) {
6789       trySkipToken(AsmToken::Comma);
6790     }
6791   }
6792 
6793   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6794     return ParseStatus::NoMatch;
6795 
6796   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6797   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6798 
6799   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6800   return ParseStatus::Success;
6801 }
6802 
6803 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6804   using namespace llvm::AMDGPU::MTBUFFormat;
6805 
6806   int64_t Fmt = UFMT_UNDEF;
6807 
6808   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6809     return ParseStatus::Failure;
6810 
6811   if (Fmt == UFMT_UNDEF)
6812     return ParseStatus::NoMatch;
6813 
6814   Format = Fmt;
6815   return ParseStatus::Success;
6816 }
6817 
6818 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6819                                     int64_t &Nfmt,
6820                                     StringRef FormatStr,
6821                                     SMLoc Loc) {
6822   using namespace llvm::AMDGPU::MTBUFFormat;
6823   int64_t Format;
6824 
6825   Format = getDfmt(FormatStr);
6826   if (Format != DFMT_UNDEF) {
6827     Dfmt = Format;
6828     return true;
6829   }
6830 
6831   Format = getNfmt(FormatStr, getSTI());
6832   if (Format != NFMT_UNDEF) {
6833     Nfmt = Format;
6834     return true;
6835   }
6836 
6837   Error(Loc, "unsupported format");
6838   return false;
6839 }
6840 
6841 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6842                                                       SMLoc FormatLoc,
6843                                                       int64_t &Format) {
6844   using namespace llvm::AMDGPU::MTBUFFormat;
6845 
6846   int64_t Dfmt = DFMT_UNDEF;
6847   int64_t Nfmt = NFMT_UNDEF;
6848   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6849     return ParseStatus::Failure;
6850 
6851   if (trySkipToken(AsmToken::Comma)) {
6852     StringRef Str;
6853     SMLoc Loc = getLoc();
6854     if (!parseId(Str, "expected a format string") ||
6855         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6856       return ParseStatus::Failure;
6857     if (Dfmt == DFMT_UNDEF)
6858       return Error(Loc, "duplicate numeric format");
6859     if (Nfmt == NFMT_UNDEF)
6860       return Error(Loc, "duplicate data format");
6861   }
6862 
6863   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6864   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6865 
6866   if (isGFX10Plus()) {
6867     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6868     if (Ufmt == UFMT_UNDEF)
6869       return Error(FormatLoc, "unsupported format");
6870     Format = Ufmt;
6871   } else {
6872     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6873   }
6874 
6875   return ParseStatus::Success;
6876 }
6877 
6878 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6879                                                         SMLoc Loc,
6880                                                         int64_t &Format) {
6881   using namespace llvm::AMDGPU::MTBUFFormat;
6882 
6883   auto Id = getUnifiedFormat(FormatStr, getSTI());
6884   if (Id == UFMT_UNDEF)
6885     return ParseStatus::NoMatch;
6886 
6887   if (!isGFX10Plus())
6888     return Error(Loc, "unified format is not supported on this GPU");
6889 
6890   Format = Id;
6891   return ParseStatus::Success;
6892 }
6893 
6894 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6895   using namespace llvm::AMDGPU::MTBUFFormat;
6896   SMLoc Loc = getLoc();
6897 
6898   if (!parseExpr(Format))
6899     return ParseStatus::Failure;
6900   if (!isValidFormatEncoding(Format, getSTI()))
6901     return Error(Loc, "out of range format");
6902 
6903   return ParseStatus::Success;
6904 }
6905 
6906 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6907   using namespace llvm::AMDGPU::MTBUFFormat;
6908 
6909   if (!trySkipId("format", AsmToken::Colon))
6910     return ParseStatus::NoMatch;
6911 
6912   if (trySkipToken(AsmToken::LBrac)) {
6913     StringRef FormatStr;
6914     SMLoc Loc = getLoc();
6915     if (!parseId(FormatStr, "expected a format string"))
6916       return ParseStatus::Failure;
6917 
6918     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6919     if (Res.isNoMatch())
6920       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6921     if (!Res.isSuccess())
6922       return Res;
6923 
6924     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6925       return ParseStatus::Failure;
6926 
6927     return ParseStatus::Success;
6928   }
6929 
6930   return parseNumericFormat(Format);
6931 }
6932 
6933 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6934   using namespace llvm::AMDGPU::MTBUFFormat;
6935 
6936   int64_t Format = getDefaultFormatEncoding(getSTI());
6937   ParseStatus Res;
6938   SMLoc Loc = getLoc();
6939 
6940   // Parse legacy format syntax.
6941   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6942   if (Res.isFailure())
6943     return Res;
6944 
6945   bool FormatFound = Res.isSuccess();
6946 
6947   Operands.push_back(
6948     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6949 
6950   if (FormatFound)
6951     trySkipToken(AsmToken::Comma);
6952 
6953   if (isToken(AsmToken::EndOfStatement)) {
6954     // We are expecting an soffset operand,
6955     // but let matcher handle the error.
6956     return ParseStatus::Success;
6957   }
6958 
6959   // Parse soffset.
6960   Res = parseRegOrImm(Operands);
6961   if (!Res.isSuccess())
6962     return Res;
6963 
6964   trySkipToken(AsmToken::Comma);
6965 
6966   if (!FormatFound) {
6967     Res = parseSymbolicOrNumericFormat(Format);
6968     if (Res.isFailure())
6969       return Res;
6970     if (Res.isSuccess()) {
6971       auto Size = Operands.size();
6972       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6973       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6974       Op.setImm(Format);
6975     }
6976     return ParseStatus::Success;
6977   }
6978 
6979   if (isId("format") && peekToken().is(AsmToken::Colon))
6980     return Error(getLoc(), "duplicate format");
6981   return ParseStatus::Success;
6982 }
6983 
6984 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6985   ParseStatus Res =
6986       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6987   if (Res.isNoMatch()) {
6988     Res = parseIntWithPrefix("inst_offset", Operands,
6989                              AMDGPUOperand::ImmTyInstOffset);
6990   }
6991   return Res;
6992 }
6993 
6994 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6995   ParseStatus Res =
6996       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6997   if (Res.isNoMatch())
6998     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6999   return Res;
7000 }
7001 
7002 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7003   ParseStatus Res =
7004       parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7005   if (Res.isNoMatch()) {
7006     Res =
7007         parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7008   }
7009   return Res;
7010 }
7011 
7012 //===----------------------------------------------------------------------===//
7013 // Exp
7014 //===----------------------------------------------------------------------===//
7015 
7016 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7017   OptionalImmIndexMap OptionalIdx;
7018 
7019   unsigned OperandIdx[4];
7020   unsigned EnMask = 0;
7021   int SrcIdx = 0;
7022 
7023   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7024     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7025 
7026     // Add the register arguments
7027     if (Op.isReg()) {
7028       assert(SrcIdx < 4);
7029       OperandIdx[SrcIdx] = Inst.size();
7030       Op.addRegOperands(Inst, 1);
7031       ++SrcIdx;
7032       continue;
7033     }
7034 
7035     if (Op.isOff()) {
7036       assert(SrcIdx < 4);
7037       OperandIdx[SrcIdx] = Inst.size();
7038       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7039       ++SrcIdx;
7040       continue;
7041     }
7042 
7043     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7044       Op.addImmOperands(Inst, 1);
7045       continue;
7046     }
7047 
7048     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7049       continue;
7050 
7051     // Handle optional arguments
7052     OptionalIdx[Op.getImmTy()] = i;
7053   }
7054 
7055   assert(SrcIdx == 4);
7056 
7057   bool Compr = false;
7058   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7059     Compr = true;
7060     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7061     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7062     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7063   }
7064 
7065   for (auto i = 0; i < SrcIdx; ++i) {
7066     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7067       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7068     }
7069   }
7070 
7071   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7072   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7073 
7074   Inst.addOperand(MCOperand::createImm(EnMask));
7075 }
7076 
7077 //===----------------------------------------------------------------------===//
7078 // s_waitcnt
7079 //===----------------------------------------------------------------------===//
7080 
7081 static bool
7082 encodeCnt(
7083   const AMDGPU::IsaVersion ISA,
7084   int64_t &IntVal,
7085   int64_t CntVal,
7086   bool Saturate,
7087   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7088   unsigned (*decode)(const IsaVersion &Version, unsigned))
7089 {
7090   bool Failed = false;
7091 
7092   IntVal = encode(ISA, IntVal, CntVal);
7093   if (CntVal != decode(ISA, IntVal)) {
7094     if (Saturate) {
7095       IntVal = encode(ISA, IntVal, -1);
7096     } else {
7097       Failed = true;
7098     }
7099   }
7100   return Failed;
7101 }
7102 
7103 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7104 
7105   SMLoc CntLoc = getLoc();
7106   StringRef CntName = getTokenStr();
7107 
7108   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7109       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7110     return false;
7111 
7112   int64_t CntVal;
7113   SMLoc ValLoc = getLoc();
7114   if (!parseExpr(CntVal))
7115     return false;
7116 
7117   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7118 
7119   bool Failed = true;
7120   bool Sat = CntName.ends_with("_sat");
7121 
7122   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7123     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7124   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7125     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7126   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7127     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7128   } else {
7129     Error(CntLoc, "invalid counter name " + CntName);
7130     return false;
7131   }
7132 
7133   if (Failed) {
7134     Error(ValLoc, "too large value for " + CntName);
7135     return false;
7136   }
7137 
7138   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7139     return false;
7140 
7141   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7142     if (isToken(AsmToken::EndOfStatement)) {
7143       Error(getLoc(), "expected a counter name");
7144       return false;
7145     }
7146   }
7147 
7148   return true;
7149 }
7150 
7151 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7152   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7153   int64_t Waitcnt = getWaitcntBitMask(ISA);
7154   SMLoc S = getLoc();
7155 
7156   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7157     while (!isToken(AsmToken::EndOfStatement)) {
7158       if (!parseCnt(Waitcnt))
7159         return ParseStatus::Failure;
7160     }
7161   } else {
7162     if (!parseExpr(Waitcnt))
7163       return ParseStatus::Failure;
7164   }
7165 
7166   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7167   return ParseStatus::Success;
7168 }
7169 
7170 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7171   SMLoc FieldLoc = getLoc();
7172   StringRef FieldName = getTokenStr();
7173   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7174       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7175     return false;
7176 
7177   SMLoc ValueLoc = getLoc();
7178   StringRef ValueName = getTokenStr();
7179   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7180       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7181     return false;
7182 
7183   unsigned Shift;
7184   if (FieldName == "instid0") {
7185     Shift = 0;
7186   } else if (FieldName == "instskip") {
7187     Shift = 4;
7188   } else if (FieldName == "instid1") {
7189     Shift = 7;
7190   } else {
7191     Error(FieldLoc, "invalid field name " + FieldName);
7192     return false;
7193   }
7194 
7195   int Value;
7196   if (Shift == 4) {
7197     // Parse values for instskip.
7198     Value = StringSwitch<int>(ValueName)
7199                 .Case("SAME", 0)
7200                 .Case("NEXT", 1)
7201                 .Case("SKIP_1", 2)
7202                 .Case("SKIP_2", 3)
7203                 .Case("SKIP_3", 4)
7204                 .Case("SKIP_4", 5)
7205                 .Default(-1);
7206   } else {
7207     // Parse values for instid0 and instid1.
7208     Value = StringSwitch<int>(ValueName)
7209                 .Case("NO_DEP", 0)
7210                 .Case("VALU_DEP_1", 1)
7211                 .Case("VALU_DEP_2", 2)
7212                 .Case("VALU_DEP_3", 3)
7213                 .Case("VALU_DEP_4", 4)
7214                 .Case("TRANS32_DEP_1", 5)
7215                 .Case("TRANS32_DEP_2", 6)
7216                 .Case("TRANS32_DEP_3", 7)
7217                 .Case("FMA_ACCUM_CYCLE_1", 8)
7218                 .Case("SALU_CYCLE_1", 9)
7219                 .Case("SALU_CYCLE_2", 10)
7220                 .Case("SALU_CYCLE_3", 11)
7221                 .Default(-1);
7222   }
7223   if (Value < 0) {
7224     Error(ValueLoc, "invalid value name " + ValueName);
7225     return false;
7226   }
7227 
7228   Delay |= Value << Shift;
7229   return true;
7230 }
7231 
7232 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7233   int64_t Delay = 0;
7234   SMLoc S = getLoc();
7235 
7236   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7237     do {
7238       if (!parseDelay(Delay))
7239         return ParseStatus::Failure;
7240     } while (trySkipToken(AsmToken::Pipe));
7241   } else {
7242     if (!parseExpr(Delay))
7243       return ParseStatus::Failure;
7244   }
7245 
7246   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7247   return ParseStatus::Success;
7248 }
7249 
7250 bool
7251 AMDGPUOperand::isSWaitCnt() const {
7252   return isImm();
7253 }
7254 
7255 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7256 
7257 //===----------------------------------------------------------------------===//
7258 // DepCtr
7259 //===----------------------------------------------------------------------===//
7260 
7261 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7262                                   StringRef DepCtrName) {
7263   switch (ErrorId) {
7264   case OPR_ID_UNKNOWN:
7265     Error(Loc, Twine("invalid counter name ", DepCtrName));
7266     return;
7267   case OPR_ID_UNSUPPORTED:
7268     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7269     return;
7270   case OPR_ID_DUPLICATE:
7271     Error(Loc, Twine("duplicate counter name ", DepCtrName));
7272     return;
7273   case OPR_VAL_INVALID:
7274     Error(Loc, Twine("invalid value for ", DepCtrName));
7275     return;
7276   default:
7277     assert(false);
7278   }
7279 }
7280 
7281 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7282 
7283   using namespace llvm::AMDGPU::DepCtr;
7284 
7285   SMLoc DepCtrLoc = getLoc();
7286   StringRef DepCtrName = getTokenStr();
7287 
7288   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7289       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7290     return false;
7291 
7292   int64_t ExprVal;
7293   if (!parseExpr(ExprVal))
7294     return false;
7295 
7296   unsigned PrevOprMask = UsedOprMask;
7297   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7298 
7299   if (CntVal < 0) {
7300     depCtrError(DepCtrLoc, CntVal, DepCtrName);
7301     return false;
7302   }
7303 
7304   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7305     return false;
7306 
7307   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7308     if (isToken(AsmToken::EndOfStatement)) {
7309       Error(getLoc(), "expected a counter name");
7310       return false;
7311     }
7312   }
7313 
7314   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7315   DepCtr = (DepCtr & ~CntValMask) | CntVal;
7316   return true;
7317 }
7318 
7319 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7320   using namespace llvm::AMDGPU::DepCtr;
7321 
7322   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7323   SMLoc Loc = getLoc();
7324 
7325   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7326     unsigned UsedOprMask = 0;
7327     while (!isToken(AsmToken::EndOfStatement)) {
7328       if (!parseDepCtr(DepCtr, UsedOprMask))
7329         return ParseStatus::Failure;
7330     }
7331   } else {
7332     if (!parseExpr(DepCtr))
7333       return ParseStatus::Failure;
7334   }
7335 
7336   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7337   return ParseStatus::Success;
7338 }
7339 
7340 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7341 
7342 //===----------------------------------------------------------------------===//
7343 // hwreg
7344 //===----------------------------------------------------------------------===//
7345 
7346 ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7347                                             OperandInfoTy &Offset,
7348                                             OperandInfoTy &Width) {
7349   using namespace llvm::AMDGPU::Hwreg;
7350 
7351   if (!trySkipId("hwreg", AsmToken::LParen))
7352     return ParseStatus::NoMatch;
7353 
7354   // The register may be specified by name or using a numeric code
7355   HwReg.Loc = getLoc();
7356   if (isToken(AsmToken::Identifier) &&
7357       (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7358     HwReg.IsSymbolic = true;
7359     lex(); // skip register name
7360   } else if (!parseExpr(HwReg.Val, "a register name")) {
7361     return ParseStatus::Failure;
7362   }
7363 
7364   if (trySkipToken(AsmToken::RParen))
7365     return ParseStatus::Success;
7366 
7367   // parse optional params
7368   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7369     return ParseStatus::Failure;
7370 
7371   Offset.Loc = getLoc();
7372   if (!parseExpr(Offset.Val))
7373     return ParseStatus::Failure;
7374 
7375   if (!skipToken(AsmToken::Comma, "expected a comma"))
7376     return ParseStatus::Failure;
7377 
7378   Width.Loc = getLoc();
7379   if (!parseExpr(Width.Val) ||
7380       !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7381     return ParseStatus::Failure;
7382 
7383   return ParseStatus::Success;
7384 }
7385 
7386 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7387   using namespace llvm::AMDGPU::Hwreg;
7388 
7389   int64_t ImmVal = 0;
7390   SMLoc Loc = getLoc();
7391 
7392   StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7393                           HwregId::Default);
7394   StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7395                            HwregOffset::Default);
7396   struct : StructuredOpField {
7397     using StructuredOpField::StructuredOpField;
7398     bool validate(AMDGPUAsmParser &Parser) const override {
7399       if (!isUIntN(Width, Val - 1))
7400         return Error(Parser, "only values from 1 to 32 are legal");
7401       return true;
7402     }
7403   } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7404   ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7405 
7406   if (Res.isNoMatch())
7407     Res = parseHwregFunc(HwReg, Offset, Width);
7408 
7409   if (Res.isSuccess()) {
7410     if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7411       return ParseStatus::Failure;
7412     ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7413   }
7414 
7415   if (Res.isNoMatch() &&
7416       parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7417     Res = ParseStatus::Success;
7418 
7419   if (!Res.isSuccess())
7420     return ParseStatus::Failure;
7421 
7422   if (!isUInt<16>(ImmVal))
7423     return Error(Loc, "invalid immediate: only 16-bit values are legal");
7424   Operands.push_back(
7425       AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7426   return ParseStatus::Success;
7427 }
7428 
7429 bool AMDGPUOperand::isHwreg() const {
7430   return isImmTy(ImmTyHwreg);
7431 }
7432 
7433 //===----------------------------------------------------------------------===//
7434 // sendmsg
7435 //===----------------------------------------------------------------------===//
7436 
7437 bool
7438 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7439                                   OperandInfoTy &Op,
7440                                   OperandInfoTy &Stream) {
7441   using namespace llvm::AMDGPU::SendMsg;
7442 
7443   Msg.Loc = getLoc();
7444   if (isToken(AsmToken::Identifier) &&
7445       (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7446     Msg.IsSymbolic = true;
7447     lex(); // skip message name
7448   } else if (!parseExpr(Msg.Val, "a message name")) {
7449     return false;
7450   }
7451 
7452   if (trySkipToken(AsmToken::Comma)) {
7453     Op.IsDefined = true;
7454     Op.Loc = getLoc();
7455     if (isToken(AsmToken::Identifier) &&
7456         (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7457             OPR_ID_UNKNOWN) {
7458       lex(); // skip operation name
7459     } else if (!parseExpr(Op.Val, "an operation name")) {
7460       return false;
7461     }
7462 
7463     if (trySkipToken(AsmToken::Comma)) {
7464       Stream.IsDefined = true;
7465       Stream.Loc = getLoc();
7466       if (!parseExpr(Stream.Val))
7467         return false;
7468     }
7469   }
7470 
7471   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7472 }
7473 
7474 bool
7475 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7476                                  const OperandInfoTy &Op,
7477                                  const OperandInfoTy &Stream) {
7478   using namespace llvm::AMDGPU::SendMsg;
7479 
7480   // Validation strictness depends on whether message is specified
7481   // in a symbolic or in a numeric form. In the latter case
7482   // only encoding possibility is checked.
7483   bool Strict = Msg.IsSymbolic;
7484 
7485   if (Strict) {
7486     if (Msg.Val == OPR_ID_UNSUPPORTED) {
7487       Error(Msg.Loc, "specified message id is not supported on this GPU");
7488       return false;
7489     }
7490   } else {
7491     if (!isValidMsgId(Msg.Val, getSTI())) {
7492       Error(Msg.Loc, "invalid message id");
7493       return false;
7494     }
7495   }
7496   if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7497     if (Op.IsDefined) {
7498       Error(Op.Loc, "message does not support operations");
7499     } else {
7500       Error(Msg.Loc, "missing message operation");
7501     }
7502     return false;
7503   }
7504   if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7505     if (Op.Val == OPR_ID_UNSUPPORTED)
7506       Error(Op.Loc, "specified operation id is not supported on this GPU");
7507     else
7508       Error(Op.Loc, "invalid operation id");
7509     return false;
7510   }
7511   if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7512       Stream.IsDefined) {
7513     Error(Stream.Loc, "message operation does not support streams");
7514     return false;
7515   }
7516   if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7517     Error(Stream.Loc, "invalid message stream id");
7518     return false;
7519   }
7520   return true;
7521 }
7522 
7523 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7524   using namespace llvm::AMDGPU::SendMsg;
7525 
7526   int64_t ImmVal = 0;
7527   SMLoc Loc = getLoc();
7528 
7529   if (trySkipId("sendmsg", AsmToken::LParen)) {
7530     OperandInfoTy Msg(OPR_ID_UNKNOWN);
7531     OperandInfoTy Op(OP_NONE_);
7532     OperandInfoTy Stream(STREAM_ID_NONE_);
7533     if (parseSendMsgBody(Msg, Op, Stream) &&
7534         validateSendMsg(Msg, Op, Stream)) {
7535       ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7536     } else {
7537       return ParseStatus::Failure;
7538     }
7539   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7540     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7541       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7542   } else {
7543     return ParseStatus::Failure;
7544   }
7545 
7546   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7547   return ParseStatus::Success;
7548 }
7549 
7550 bool AMDGPUOperand::isSendMsg() const {
7551   return isImmTy(ImmTySendMsg);
7552 }
7553 
7554 //===----------------------------------------------------------------------===//
7555 // v_interp
7556 //===----------------------------------------------------------------------===//
7557 
7558 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7559   StringRef Str;
7560   SMLoc S = getLoc();
7561 
7562   if (!parseId(Str))
7563     return ParseStatus::NoMatch;
7564 
7565   int Slot = StringSwitch<int>(Str)
7566     .Case("p10", 0)
7567     .Case("p20", 1)
7568     .Case("p0", 2)
7569     .Default(-1);
7570 
7571   if (Slot == -1)
7572     return Error(S, "invalid interpolation slot");
7573 
7574   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7575                                               AMDGPUOperand::ImmTyInterpSlot));
7576   return ParseStatus::Success;
7577 }
7578 
7579 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7580   StringRef Str;
7581   SMLoc S = getLoc();
7582 
7583   if (!parseId(Str))
7584     return ParseStatus::NoMatch;
7585 
7586   if (!Str.starts_with("attr"))
7587     return Error(S, "invalid interpolation attribute");
7588 
7589   StringRef Chan = Str.take_back(2);
7590   int AttrChan = StringSwitch<int>(Chan)
7591     .Case(".x", 0)
7592     .Case(".y", 1)
7593     .Case(".z", 2)
7594     .Case(".w", 3)
7595     .Default(-1);
7596   if (AttrChan == -1)
7597     return Error(S, "invalid or missing interpolation attribute channel");
7598 
7599   Str = Str.drop_back(2).drop_front(4);
7600 
7601   uint8_t Attr;
7602   if (Str.getAsInteger(10, Attr))
7603     return Error(S, "invalid or missing interpolation attribute number");
7604 
7605   if (Attr > 32)
7606     return Error(S, "out of bounds interpolation attribute number");
7607 
7608   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7609 
7610   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7611                                               AMDGPUOperand::ImmTyInterpAttr));
7612   Operands.push_back(AMDGPUOperand::CreateImm(
7613       this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7614   return ParseStatus::Success;
7615 }
7616 
7617 //===----------------------------------------------------------------------===//
7618 // exp
7619 //===----------------------------------------------------------------------===//
7620 
7621 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7622   using namespace llvm::AMDGPU::Exp;
7623 
7624   StringRef Str;
7625   SMLoc S = getLoc();
7626 
7627   if (!parseId(Str))
7628     return ParseStatus::NoMatch;
7629 
7630   unsigned Id = getTgtId(Str);
7631   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7632     return Error(S, (Id == ET_INVALID)
7633                         ? "invalid exp target"
7634                         : "exp target is not supported on this GPU");
7635 
7636   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7637                                               AMDGPUOperand::ImmTyExpTgt));
7638   return ParseStatus::Success;
7639 }
7640 
7641 //===----------------------------------------------------------------------===//
7642 // parser helpers
7643 //===----------------------------------------------------------------------===//
7644 
7645 bool
7646 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7647   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7648 }
7649 
7650 bool
7651 AMDGPUAsmParser::isId(const StringRef Id) const {
7652   return isId(getToken(), Id);
7653 }
7654 
7655 bool
7656 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7657   return getTokenKind() == Kind;
7658 }
7659 
7660 StringRef AMDGPUAsmParser::getId() const {
7661   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7662 }
7663 
7664 bool
7665 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7666   if (isId(Id)) {
7667     lex();
7668     return true;
7669   }
7670   return false;
7671 }
7672 
7673 bool
7674 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7675   if (isToken(AsmToken::Identifier)) {
7676     StringRef Tok = getTokenStr();
7677     if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7678       lex();
7679       return true;
7680     }
7681   }
7682   return false;
7683 }
7684 
7685 bool
7686 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7687   if (isId(Id) && peekToken().is(Kind)) {
7688     lex();
7689     lex();
7690     return true;
7691   }
7692   return false;
7693 }
7694 
7695 bool
7696 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7697   if (isToken(Kind)) {
7698     lex();
7699     return true;
7700   }
7701   return false;
7702 }
7703 
7704 bool
7705 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7706                            const StringRef ErrMsg) {
7707   if (!trySkipToken(Kind)) {
7708     Error(getLoc(), ErrMsg);
7709     return false;
7710   }
7711   return true;
7712 }
7713 
7714 bool
7715 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7716   SMLoc S = getLoc();
7717 
7718   const MCExpr *Expr;
7719   if (Parser.parseExpression(Expr))
7720     return false;
7721 
7722   if (Expr->evaluateAsAbsolute(Imm))
7723     return true;
7724 
7725   if (Expected.empty()) {
7726     Error(S, "expected absolute expression");
7727   } else {
7728     Error(S, Twine("expected ", Expected) +
7729              Twine(" or an absolute expression"));
7730   }
7731   return false;
7732 }
7733 
7734 bool
7735 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7736   SMLoc S = getLoc();
7737 
7738   const MCExpr *Expr;
7739   if (Parser.parseExpression(Expr))
7740     return false;
7741 
7742   int64_t IntVal;
7743   if (Expr->evaluateAsAbsolute(IntVal)) {
7744     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7745   } else {
7746     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7747   }
7748   return true;
7749 }
7750 
7751 bool
7752 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7753   if (isToken(AsmToken::String)) {
7754     Val = getToken().getStringContents();
7755     lex();
7756     return true;
7757   }
7758   Error(getLoc(), ErrMsg);
7759   return false;
7760 }
7761 
7762 bool
7763 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7764   if (isToken(AsmToken::Identifier)) {
7765     Val = getTokenStr();
7766     lex();
7767     return true;
7768   }
7769   if (!ErrMsg.empty())
7770     Error(getLoc(), ErrMsg);
7771   return false;
7772 }
7773 
7774 AsmToken
7775 AMDGPUAsmParser::getToken() const {
7776   return Parser.getTok();
7777 }
7778 
7779 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7780   return isToken(AsmToken::EndOfStatement)
7781              ? getToken()
7782              : getLexer().peekTok(ShouldSkipSpace);
7783 }
7784 
7785 void
7786 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7787   auto TokCount = getLexer().peekTokens(Tokens);
7788 
7789   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7790     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7791 }
7792 
7793 AsmToken::TokenKind
7794 AMDGPUAsmParser::getTokenKind() const {
7795   return getLexer().getKind();
7796 }
7797 
7798 SMLoc
7799 AMDGPUAsmParser::getLoc() const {
7800   return getToken().getLoc();
7801 }
7802 
7803 StringRef
7804 AMDGPUAsmParser::getTokenStr() const {
7805   return getToken().getString();
7806 }
7807 
7808 void
7809 AMDGPUAsmParser::lex() {
7810   Parser.Lex();
7811 }
7812 
7813 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7814   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7815 }
7816 
7817 SMLoc
7818 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7819                                const OperandVector &Operands) const {
7820   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7821     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7822     if (Test(Op))
7823       return Op.getStartLoc();
7824   }
7825   return getInstLoc(Operands);
7826 }
7827 
7828 SMLoc
7829 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7830                            const OperandVector &Operands) const {
7831   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7832   return getOperandLoc(Test, Operands);
7833 }
7834 
7835 SMLoc
7836 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7837                            const OperandVector &Operands) const {
7838   auto Test = [=](const AMDGPUOperand& Op) {
7839     return Op.isRegKind() && Op.getReg() == Reg;
7840   };
7841   return getOperandLoc(Test, Operands);
7842 }
7843 
7844 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7845                                  bool SearchMandatoryLiterals) const {
7846   auto Test = [](const AMDGPUOperand& Op) {
7847     return Op.IsImmKindLiteral() || Op.isExpr();
7848   };
7849   SMLoc Loc = getOperandLoc(Test, Operands);
7850   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7851     Loc = getMandatoryLitLoc(Operands);
7852   return Loc;
7853 }
7854 
7855 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7856   auto Test = [](const AMDGPUOperand &Op) {
7857     return Op.IsImmKindMandatoryLiteral();
7858   };
7859   return getOperandLoc(Test, Operands);
7860 }
7861 
7862 SMLoc
7863 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7864   auto Test = [](const AMDGPUOperand& Op) {
7865     return Op.isImmKindConst();
7866   };
7867   return getOperandLoc(Test, Operands);
7868 }
7869 
7870 ParseStatus
7871 AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7872   if (!trySkipToken(AsmToken::LCurly))
7873     return ParseStatus::NoMatch;
7874 
7875   bool First = true;
7876   while (!trySkipToken(AsmToken::RCurly)) {
7877     if (!First &&
7878         !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7879       return ParseStatus::Failure;
7880 
7881     StringRef Id = getTokenStr();
7882     SMLoc IdLoc = getLoc();
7883     if (!skipToken(AsmToken::Identifier, "field name expected") ||
7884         !skipToken(AsmToken::Colon, "colon expected"))
7885       return ParseStatus::Failure;
7886 
7887     auto I =
7888         find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7889     if (I == Fields.end())
7890       return Error(IdLoc, "unknown field");
7891     if ((*I)->IsDefined)
7892       return Error(IdLoc, "duplicate field");
7893 
7894     // TODO: Support symbolic values.
7895     (*I)->Loc = getLoc();
7896     if (!parseExpr((*I)->Val))
7897       return ParseStatus::Failure;
7898     (*I)->IsDefined = true;
7899 
7900     First = false;
7901   }
7902   return ParseStatus::Success;
7903 }
7904 
7905 bool AMDGPUAsmParser::validateStructuredOpFields(
7906     ArrayRef<const StructuredOpField *> Fields) {
7907   return all_of(Fields, [this](const StructuredOpField *F) {
7908     return F->validate(*this);
7909   });
7910 }
7911 
7912 //===----------------------------------------------------------------------===//
7913 // swizzle
7914 //===----------------------------------------------------------------------===//
7915 
7916 LLVM_READNONE
7917 static unsigned
7918 encodeBitmaskPerm(const unsigned AndMask,
7919                   const unsigned OrMask,
7920                   const unsigned XorMask) {
7921   using namespace llvm::AMDGPU::Swizzle;
7922 
7923   return BITMASK_PERM_ENC |
7924          (AndMask << BITMASK_AND_SHIFT) |
7925          (OrMask  << BITMASK_OR_SHIFT)  |
7926          (XorMask << BITMASK_XOR_SHIFT);
7927 }
7928 
7929 bool
7930 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7931                                      const unsigned MinVal,
7932                                      const unsigned MaxVal,
7933                                      const StringRef ErrMsg,
7934                                      SMLoc &Loc) {
7935   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7936     return false;
7937   }
7938   Loc = getLoc();
7939   if (!parseExpr(Op)) {
7940     return false;
7941   }
7942   if (Op < MinVal || Op > MaxVal) {
7943     Error(Loc, ErrMsg);
7944     return false;
7945   }
7946 
7947   return true;
7948 }
7949 
7950 bool
7951 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7952                                       const unsigned MinVal,
7953                                       const unsigned MaxVal,
7954                                       const StringRef ErrMsg) {
7955   SMLoc Loc;
7956   for (unsigned i = 0; i < OpNum; ++i) {
7957     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7958       return false;
7959   }
7960 
7961   return true;
7962 }
7963 
7964 bool
7965 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7966   using namespace llvm::AMDGPU::Swizzle;
7967 
7968   int64_t Lane[LANE_NUM];
7969   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7970                            "expected a 2-bit lane id")) {
7971     Imm = QUAD_PERM_ENC;
7972     for (unsigned I = 0; I < LANE_NUM; ++I) {
7973       Imm |= Lane[I] << (LANE_SHIFT * I);
7974     }
7975     return true;
7976   }
7977   return false;
7978 }
7979 
7980 bool
7981 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7982   using namespace llvm::AMDGPU::Swizzle;
7983 
7984   SMLoc Loc;
7985   int64_t GroupSize;
7986   int64_t LaneIdx;
7987 
7988   if (!parseSwizzleOperand(GroupSize,
7989                            2, 32,
7990                            "group size must be in the interval [2,32]",
7991                            Loc)) {
7992     return false;
7993   }
7994   if (!isPowerOf2_64(GroupSize)) {
7995     Error(Loc, "group size must be a power of two");
7996     return false;
7997   }
7998   if (parseSwizzleOperand(LaneIdx,
7999                           0, GroupSize - 1,
8000                           "lane id must be in the interval [0,group size - 1]",
8001                           Loc)) {
8002     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8003     return true;
8004   }
8005   return false;
8006 }
8007 
8008 bool
8009 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8010   using namespace llvm::AMDGPU::Swizzle;
8011 
8012   SMLoc Loc;
8013   int64_t GroupSize;
8014 
8015   if (!parseSwizzleOperand(GroupSize,
8016                            2, 32,
8017                            "group size must be in the interval [2,32]",
8018                            Loc)) {
8019     return false;
8020   }
8021   if (!isPowerOf2_64(GroupSize)) {
8022     Error(Loc, "group size must be a power of two");
8023     return false;
8024   }
8025 
8026   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8027   return true;
8028 }
8029 
8030 bool
8031 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8032   using namespace llvm::AMDGPU::Swizzle;
8033 
8034   SMLoc Loc;
8035   int64_t GroupSize;
8036 
8037   if (!parseSwizzleOperand(GroupSize,
8038                            1, 16,
8039                            "group size must be in the interval [1,16]",
8040                            Loc)) {
8041     return false;
8042   }
8043   if (!isPowerOf2_64(GroupSize)) {
8044     Error(Loc, "group size must be a power of two");
8045     return false;
8046   }
8047 
8048   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8049   return true;
8050 }
8051 
8052 bool
8053 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8054   using namespace llvm::AMDGPU::Swizzle;
8055 
8056   if (!skipToken(AsmToken::Comma, "expected a comma")) {
8057     return false;
8058   }
8059 
8060   StringRef Ctl;
8061   SMLoc StrLoc = getLoc();
8062   if (!parseString(Ctl)) {
8063     return false;
8064   }
8065   if (Ctl.size() != BITMASK_WIDTH) {
8066     Error(StrLoc, "expected a 5-character mask");
8067     return false;
8068   }
8069 
8070   unsigned AndMask = 0;
8071   unsigned OrMask = 0;
8072   unsigned XorMask = 0;
8073 
8074   for (size_t i = 0; i < Ctl.size(); ++i) {
8075     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8076     switch(Ctl[i]) {
8077     default:
8078       Error(StrLoc, "invalid mask");
8079       return false;
8080     case '0':
8081       break;
8082     case '1':
8083       OrMask |= Mask;
8084       break;
8085     case 'p':
8086       AndMask |= Mask;
8087       break;
8088     case 'i':
8089       AndMask |= Mask;
8090       XorMask |= Mask;
8091       break;
8092     }
8093   }
8094 
8095   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8096   return true;
8097 }
8098 
8099 bool
8100 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8101 
8102   SMLoc OffsetLoc = getLoc();
8103 
8104   if (!parseExpr(Imm, "a swizzle macro")) {
8105     return false;
8106   }
8107   if (!isUInt<16>(Imm)) {
8108     Error(OffsetLoc, "expected a 16-bit offset");
8109     return false;
8110   }
8111   return true;
8112 }
8113 
8114 bool
8115 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8116   using namespace llvm::AMDGPU::Swizzle;
8117 
8118   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8119 
8120     SMLoc ModeLoc = getLoc();
8121     bool Ok = false;
8122 
8123     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8124       Ok = parseSwizzleQuadPerm(Imm);
8125     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8126       Ok = parseSwizzleBitmaskPerm(Imm);
8127     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8128       Ok = parseSwizzleBroadcast(Imm);
8129     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8130       Ok = parseSwizzleSwap(Imm);
8131     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8132       Ok = parseSwizzleReverse(Imm);
8133     } else {
8134       Error(ModeLoc, "expected a swizzle mode");
8135     }
8136 
8137     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8138   }
8139 
8140   return false;
8141 }
8142 
8143 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8144   SMLoc S = getLoc();
8145   int64_t Imm = 0;
8146 
8147   if (trySkipId("offset")) {
8148 
8149     bool Ok = false;
8150     if (skipToken(AsmToken::Colon, "expected a colon")) {
8151       if (trySkipId("swizzle")) {
8152         Ok = parseSwizzleMacro(Imm);
8153       } else {
8154         Ok = parseSwizzleOffset(Imm);
8155       }
8156     }
8157 
8158     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8159 
8160     return Ok ? ParseStatus::Success : ParseStatus::Failure;
8161   }
8162   return ParseStatus::NoMatch;
8163 }
8164 
8165 bool
8166 AMDGPUOperand::isSwizzle() const {
8167   return isImmTy(ImmTySwizzle);
8168 }
8169 
8170 //===----------------------------------------------------------------------===//
8171 // VGPR Index Mode
8172 //===----------------------------------------------------------------------===//
8173 
8174 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8175 
8176   using namespace llvm::AMDGPU::VGPRIndexMode;
8177 
8178   if (trySkipToken(AsmToken::RParen)) {
8179     return OFF;
8180   }
8181 
8182   int64_t Imm = 0;
8183 
8184   while (true) {
8185     unsigned Mode = 0;
8186     SMLoc S = getLoc();
8187 
8188     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8189       if (trySkipId(IdSymbolic[ModeId])) {
8190         Mode = 1 << ModeId;
8191         break;
8192       }
8193     }
8194 
8195     if (Mode == 0) {
8196       Error(S, (Imm == 0)?
8197                "expected a VGPR index mode or a closing parenthesis" :
8198                "expected a VGPR index mode");
8199       return UNDEF;
8200     }
8201 
8202     if (Imm & Mode) {
8203       Error(S, "duplicate VGPR index mode");
8204       return UNDEF;
8205     }
8206     Imm |= Mode;
8207 
8208     if (trySkipToken(AsmToken::RParen))
8209       break;
8210     if (!skipToken(AsmToken::Comma,
8211                    "expected a comma or a closing parenthesis"))
8212       return UNDEF;
8213   }
8214 
8215   return Imm;
8216 }
8217 
8218 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8219 
8220   using namespace llvm::AMDGPU::VGPRIndexMode;
8221 
8222   int64_t Imm = 0;
8223   SMLoc S = getLoc();
8224 
8225   if (trySkipId("gpr_idx", AsmToken::LParen)) {
8226     Imm = parseGPRIdxMacro();
8227     if (Imm == UNDEF)
8228       return ParseStatus::Failure;
8229   } else {
8230     if (getParser().parseAbsoluteExpression(Imm))
8231       return ParseStatus::Failure;
8232     if (Imm < 0 || !isUInt<4>(Imm))
8233       return Error(S, "invalid immediate: only 4-bit values are legal");
8234   }
8235 
8236   Operands.push_back(
8237       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8238   return ParseStatus::Success;
8239 }
8240 
8241 bool AMDGPUOperand::isGPRIdxMode() const {
8242   return isImmTy(ImmTyGprIdxMode);
8243 }
8244 
8245 //===----------------------------------------------------------------------===//
8246 // sopp branch targets
8247 //===----------------------------------------------------------------------===//
8248 
8249 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8250 
8251   // Make sure we are not parsing something
8252   // that looks like a label or an expression but is not.
8253   // This will improve error messages.
8254   if (isRegister() || isModifier())
8255     return ParseStatus::NoMatch;
8256 
8257   if (!parseExpr(Operands))
8258     return ParseStatus::Failure;
8259 
8260   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8261   assert(Opr.isImm() || Opr.isExpr());
8262   SMLoc Loc = Opr.getStartLoc();
8263 
8264   // Currently we do not support arbitrary expressions as branch targets.
8265   // Only labels and absolute expressions are accepted.
8266   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8267     Error(Loc, "expected an absolute expression or a label");
8268   } else if (Opr.isImm() && !Opr.isS16Imm()) {
8269     Error(Loc, "expected a 16-bit signed jump offset");
8270   }
8271 
8272   return ParseStatus::Success;
8273 }
8274 
8275 //===----------------------------------------------------------------------===//
8276 // Boolean holding registers
8277 //===----------------------------------------------------------------------===//
8278 
8279 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8280   return parseReg(Operands);
8281 }
8282 
8283 //===----------------------------------------------------------------------===//
8284 // mubuf
8285 //===----------------------------------------------------------------------===//
8286 
8287 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8288                                    const OperandVector &Operands,
8289                                    bool IsAtomic) {
8290   OptionalImmIndexMap OptionalIdx;
8291   unsigned FirstOperandIdx = 1;
8292   bool IsAtomicReturn = false;
8293 
8294   if (IsAtomic) {
8295     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
8296                       SIInstrFlags::IsAtomicRet;
8297   }
8298 
8299   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8300     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8301 
8302     // Add the register arguments
8303     if (Op.isReg()) {
8304       Op.addRegOperands(Inst, 1);
8305       // Insert a tied src for atomic return dst.
8306       // This cannot be postponed as subsequent calls to
8307       // addImmOperands rely on correct number of MC operands.
8308       if (IsAtomicReturn && i == FirstOperandIdx)
8309         Op.addRegOperands(Inst, 1);
8310       continue;
8311     }
8312 
8313     // Handle the case where soffset is an immediate
8314     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8315       Op.addImmOperands(Inst, 1);
8316       continue;
8317     }
8318 
8319     // Handle tokens like 'offen' which are sometimes hard-coded into the
8320     // asm string.  There are no MCInst operands for these.
8321     if (Op.isToken()) {
8322       continue;
8323     }
8324     assert(Op.isImm());
8325 
8326     // Handle optional arguments
8327     OptionalIdx[Op.getImmTy()] = i;
8328   }
8329 
8330   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8331   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8332 }
8333 
8334 //===----------------------------------------------------------------------===//
8335 // smrd
8336 //===----------------------------------------------------------------------===//
8337 
8338 bool AMDGPUOperand::isSMRDOffset8() const {
8339   return isImmLiteral() && isUInt<8>(getImm());
8340 }
8341 
8342 bool AMDGPUOperand::isSMEMOffset() const {
8343   // Offset range is checked later by validator.
8344   return isImmLiteral();
8345 }
8346 
8347 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8348   // 32-bit literals are only supported on CI and we only want to use them
8349   // when the offset is > 8-bits.
8350   return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8351 }
8352 
8353 //===----------------------------------------------------------------------===//
8354 // vop3
8355 //===----------------------------------------------------------------------===//
8356 
8357 static bool ConvertOmodMul(int64_t &Mul) {
8358   if (Mul != 1 && Mul != 2 && Mul != 4)
8359     return false;
8360 
8361   Mul >>= 1;
8362   return true;
8363 }
8364 
8365 static bool ConvertOmodDiv(int64_t &Div) {
8366   if (Div == 1) {
8367     Div = 0;
8368     return true;
8369   }
8370 
8371   if (Div == 2) {
8372     Div = 3;
8373     return true;
8374   }
8375 
8376   return false;
8377 }
8378 
8379 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8380 // This is intentional and ensures compatibility with sp3.
8381 // See bug 35397 for details.
8382 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8383   if (BoundCtrl == 0 || BoundCtrl == 1) {
8384     if (!isGFX11Plus())
8385       BoundCtrl = 1;
8386     return true;
8387   }
8388   return false;
8389 }
8390 
8391 void AMDGPUAsmParser::onBeginOfFile() {
8392   if (!getParser().getStreamer().getTargetStreamer() ||
8393       getSTI().getTargetTriple().getArch() == Triple::r600)
8394     return;
8395 
8396   if (!getTargetStreamer().getTargetID())
8397     getTargetStreamer().initializeTargetID(getSTI(),
8398                                            getSTI().getFeatureString());
8399 
8400   if (isHsaAbi(getSTI()))
8401     getTargetStreamer().EmitDirectiveAMDGCNTarget();
8402 }
8403 
8404 /// Parse AMDGPU specific expressions.
8405 ///
8406 ///  expr ::= or(expr, ...) |
8407 ///           max(expr, ...)
8408 ///
8409 bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8410   using AGVK = AMDGPUMCExpr::VariantKind;
8411 
8412   if (isToken(AsmToken::Identifier)) {
8413     StringRef TokenId = getTokenStr();
8414     AGVK VK = StringSwitch<AGVK>(TokenId)
8415                   .Case("max", AGVK::AGVK_Max)
8416                   .Case("or", AGVK::AGVK_Or)
8417                   .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8418                   .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8419                   .Case("alignto", AGVK::AGVK_AlignTo)
8420                   .Case("occupancy", AGVK::AGVK_Occupancy)
8421                   .Default(AGVK::AGVK_None);
8422 
8423     if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8424       SmallVector<const MCExpr *, 4> Exprs;
8425       uint64_t CommaCount = 0;
8426       lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8427       lex(); // Eat '('
8428       while (true) {
8429         if (trySkipToken(AsmToken::RParen)) {
8430           if (Exprs.empty()) {
8431             Error(getToken().getLoc(),
8432                   "empty " + Twine(TokenId) + " expression");
8433             return true;
8434           }
8435           if (CommaCount + 1 != Exprs.size()) {
8436             Error(getToken().getLoc(),
8437                   "mismatch of commas in " + Twine(TokenId) + " expression");
8438             return true;
8439           }
8440           Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8441           return false;
8442         }
8443         const MCExpr *Expr;
8444         if (getParser().parseExpression(Expr, EndLoc))
8445           return true;
8446         Exprs.push_back(Expr);
8447         bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8448         if (LastTokenWasComma)
8449           CommaCount++;
8450         if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8451           Error(getToken().getLoc(),
8452                 "unexpected token in " + Twine(TokenId) + " expression");
8453           return true;
8454         }
8455       }
8456     }
8457   }
8458   return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8459 }
8460 
8461 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8462   StringRef Name = getTokenStr();
8463   if (Name == "mul") {
8464     return parseIntWithPrefix("mul", Operands,
8465                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8466   }
8467 
8468   if (Name == "div") {
8469     return parseIntWithPrefix("div", Operands,
8470                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8471   }
8472 
8473   return ParseStatus::NoMatch;
8474 }
8475 
8476 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8477 // the number of src operands present, then copies that bit into src0_modifiers.
8478 static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8479   int Opc = Inst.getOpcode();
8480   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8481   if (OpSelIdx == -1)
8482     return;
8483 
8484   int SrcNum;
8485   const int Ops[] = { AMDGPU::OpName::src0,
8486                       AMDGPU::OpName::src1,
8487                       AMDGPU::OpName::src2 };
8488   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8489        ++SrcNum)
8490     ;
8491   assert(SrcNum > 0);
8492 
8493   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8494 
8495   int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8496   if (DstIdx == -1)
8497     return;
8498 
8499   const MCOperand &DstOp = Inst.getOperand(DstIdx);
8500   int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8501   uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8502   if (DstOp.isReg() &&
8503       MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8504     if (AMDGPU::isHi(DstOp.getReg(), MRI))
8505       ModVal |= SISrcMods::DST_OP_SEL;
8506   } else {
8507     if ((OpSel & (1 << SrcNum)) != 0)
8508       ModVal |= SISrcMods::DST_OP_SEL;
8509   }
8510   Inst.getOperand(ModIdx).setImm(ModVal);
8511 }
8512 
8513 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8514                                    const OperandVector &Operands) {
8515   cvtVOP3P(Inst, Operands);
8516   cvtVOP3DstOpSelOnly(Inst, *getMRI());
8517 }
8518 
8519 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8520                                    OptionalImmIndexMap &OptionalIdx) {
8521   cvtVOP3P(Inst, Operands, OptionalIdx);
8522   cvtVOP3DstOpSelOnly(Inst, *getMRI());
8523 }
8524 
8525 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8526   return
8527       // 1. This operand is input modifiers
8528       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8529       // 2. This is not last operand
8530       && Desc.NumOperands > (OpNum + 1)
8531       // 3. Next operand is register class
8532       && Desc.operands()[OpNum + 1].RegClass != -1
8533       // 4. Next register is not tied to any other operand
8534       && Desc.getOperandConstraint(OpNum + 1,
8535                                    MCOI::OperandConstraint::TIED_TO) == -1;
8536 }
8537 
8538 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8539 {
8540   OptionalImmIndexMap OptionalIdx;
8541   unsigned Opc = Inst.getOpcode();
8542 
8543   unsigned I = 1;
8544   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8545   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8546     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8547   }
8548 
8549   for (unsigned E = Operands.size(); I != E; ++I) {
8550     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8551     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8552       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8553     } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8554                Op.isInterpAttrChan()) {
8555       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8556     } else if (Op.isImmModifier()) {
8557       OptionalIdx[Op.getImmTy()] = I;
8558     } else {
8559       llvm_unreachable("unhandled operand type");
8560     }
8561   }
8562 
8563   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8564     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8565                           AMDGPUOperand::ImmTyHigh);
8566 
8567   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8568     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8569                           AMDGPUOperand::ImmTyClamp);
8570 
8571   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8572     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8573                           AMDGPUOperand::ImmTyOModSI);
8574 }
8575 
8576 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8577 {
8578   OptionalImmIndexMap OptionalIdx;
8579   unsigned Opc = Inst.getOpcode();
8580 
8581   unsigned I = 1;
8582   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8583   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8584     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8585   }
8586 
8587   for (unsigned E = Operands.size(); I != E; ++I) {
8588     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8589     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8590       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8591     } else if (Op.isImmModifier()) {
8592       OptionalIdx[Op.getImmTy()] = I;
8593     } else {
8594       llvm_unreachable("unhandled operand type");
8595     }
8596   }
8597 
8598   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
8599 
8600   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8601   if (OpSelIdx != -1)
8602     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8603 
8604   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8605 
8606   if (OpSelIdx == -1)
8607     return;
8608 
8609   const int Ops[] = { AMDGPU::OpName::src0,
8610                       AMDGPU::OpName::src1,
8611                       AMDGPU::OpName::src2 };
8612   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8613                          AMDGPU::OpName::src1_modifiers,
8614                          AMDGPU::OpName::src2_modifiers };
8615 
8616   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8617 
8618   for (int J = 0; J < 3; ++J) {
8619     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8620     if (OpIdx == -1)
8621       break;
8622 
8623     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8624     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8625 
8626     if ((OpSel & (1 << J)) != 0)
8627       ModVal |= SISrcMods::OP_SEL_0;
8628     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8629         (OpSel & (1 << 3)) != 0)
8630       ModVal |= SISrcMods::DST_OP_SEL;
8631 
8632     Inst.getOperand(ModIdx).setImm(ModVal);
8633   }
8634 }
8635 
8636 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8637                               OptionalImmIndexMap &OptionalIdx) {
8638   unsigned Opc = Inst.getOpcode();
8639 
8640   unsigned I = 1;
8641   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8642   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8643     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8644   }
8645 
8646   for (unsigned E = Operands.size(); I != E; ++I) {
8647     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8648     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8649       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8650     } else if (Op.isImmModifier()) {
8651       OptionalIdx[Op.getImmTy()] = I;
8652     } else if (Op.isRegOrImm()) {
8653       Op.addRegOrImmOperands(Inst, 1);
8654     } else {
8655       llvm_unreachable("unhandled operand type");
8656     }
8657   }
8658 
8659   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8660     if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8661       Inst.addOperand(Inst.getOperand(0));
8662     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8663                           AMDGPUOperand::ImmTyByteSel);
8664   }
8665 
8666   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8667     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8668                           AMDGPUOperand::ImmTyClamp);
8669 
8670   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8671     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8672                           AMDGPUOperand::ImmTyOModSI);
8673 
8674   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8675   // it has src2 register operand that is tied to dst operand
8676   // we don't allow modifiers for this operand in assembler so src2_modifiers
8677   // should be 0.
8678   if (isMAC(Opc)) {
8679     auto it = Inst.begin();
8680     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8681     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8682     ++it;
8683     // Copy the operand to ensure it's not invalidated when Inst grows.
8684     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8685   }
8686 }
8687 
8688 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8689   OptionalImmIndexMap OptionalIdx;
8690   cvtVOP3(Inst, Operands, OptionalIdx);
8691 }
8692 
8693 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8694                                OptionalImmIndexMap &OptIdx) {
8695   const int Opc = Inst.getOpcode();
8696   const MCInstrDesc &Desc = MII.get(Opc);
8697 
8698   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8699 
8700   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8701       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8702       Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8703       Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8704     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8705     Inst.addOperand(Inst.getOperand(0));
8706   }
8707 
8708   // Adding vdst_in operand is already covered for these DPP instructions in
8709   // cvtVOP3DPP.
8710   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8711       !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8712         Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8713         Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8714         Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8715         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8716         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8717         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8718         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8719     assert(!IsPacked);
8720     Inst.addOperand(Inst.getOperand(0));
8721   }
8722 
8723   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8724   // instruction, and then figure out where to actually put the modifiers
8725 
8726   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8727   if (OpSelIdx != -1) {
8728     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8729   }
8730 
8731   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8732   if (OpSelHiIdx != -1) {
8733     int DefaultVal = IsPacked ? -1 : 0;
8734     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8735                           DefaultVal);
8736   }
8737 
8738   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8739   if (NegLoIdx != -1)
8740     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8741 
8742   int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8743   if (NegHiIdx != -1)
8744     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8745 
8746   const int Ops[] = { AMDGPU::OpName::src0,
8747                       AMDGPU::OpName::src1,
8748                       AMDGPU::OpName::src2 };
8749   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8750                          AMDGPU::OpName::src1_modifiers,
8751                          AMDGPU::OpName::src2_modifiers };
8752 
8753   unsigned OpSel = 0;
8754   unsigned OpSelHi = 0;
8755   unsigned NegLo = 0;
8756   unsigned NegHi = 0;
8757 
8758   if (OpSelIdx != -1)
8759     OpSel = Inst.getOperand(OpSelIdx).getImm();
8760 
8761   if (OpSelHiIdx != -1)
8762     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8763 
8764   if (NegLoIdx != -1)
8765     NegLo = Inst.getOperand(NegLoIdx).getImm();
8766 
8767   if (NegHiIdx != -1)
8768     NegHi = Inst.getOperand(NegHiIdx).getImm();
8769 
8770   for (int J = 0; J < 3; ++J) {
8771     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8772     if (OpIdx == -1)
8773       break;
8774 
8775     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8776 
8777     if (ModIdx == -1)
8778       continue;
8779 
8780     uint32_t ModVal = 0;
8781 
8782     const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8783     if (SrcOp.isReg() && getMRI()
8784                              ->getRegClass(AMDGPU::VGPR_16RegClassID)
8785                              .contains(SrcOp.getReg())) {
8786       bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8787       if (VGPRSuffixIsHi)
8788         ModVal |= SISrcMods::OP_SEL_0;
8789     } else {
8790       if ((OpSel & (1 << J)) != 0)
8791         ModVal |= SISrcMods::OP_SEL_0;
8792     }
8793 
8794     if ((OpSelHi & (1 << J)) != 0)
8795       ModVal |= SISrcMods::OP_SEL_1;
8796 
8797     if ((NegLo & (1 << J)) != 0)
8798       ModVal |= SISrcMods::NEG;
8799 
8800     if ((NegHi & (1 << J)) != 0)
8801       ModVal |= SISrcMods::NEG_HI;
8802 
8803     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8804   }
8805 }
8806 
8807 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8808   OptionalImmIndexMap OptIdx;
8809   cvtVOP3(Inst, Operands, OptIdx);
8810   cvtVOP3P(Inst, Operands, OptIdx);
8811 }
8812 
8813 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
8814                                   unsigned i, unsigned Opc, unsigned OpName) {
8815   if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8816     ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8817   else
8818     ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8819 }
8820 
8821 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8822   unsigned Opc = Inst.getOpcode();
8823 
8824   ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8825   addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8826   addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8827   ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8828   ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8829 
8830   OptionalImmIndexMap OptIdx;
8831   for (unsigned i = 5; i < Operands.size(); ++i) {
8832     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8833     OptIdx[Op.getImmTy()] = i;
8834   }
8835 
8836   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8837     addOptionalImmOperand(Inst, Operands, OptIdx,
8838                           AMDGPUOperand::ImmTyIndexKey8bit);
8839 
8840   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8841     addOptionalImmOperand(Inst, Operands, OptIdx,
8842                           AMDGPUOperand::ImmTyIndexKey16bit);
8843 
8844   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8845     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
8846 
8847   cvtVOP3P(Inst, Operands, OptIdx);
8848 }
8849 
8850 //===----------------------------------------------------------------------===//
8851 // VOPD
8852 //===----------------------------------------------------------------------===//
8853 
8854 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8855   if (!hasVOPD(getSTI()))
8856     return ParseStatus::NoMatch;
8857 
8858   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8859     SMLoc S = getLoc();
8860     lex();
8861     lex();
8862     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8863     SMLoc OpYLoc = getLoc();
8864     StringRef OpYName;
8865     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8866       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8867       return ParseStatus::Success;
8868     }
8869     return Error(OpYLoc, "expected a VOPDY instruction after ::");
8870   }
8871   return ParseStatus::NoMatch;
8872 }
8873 
8874 // Create VOPD MCInst operands using parsed assembler operands.
8875 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8876   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8877     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8878     if (Op.isReg()) {
8879       Op.addRegOperands(Inst, 1);
8880       return;
8881     }
8882     if (Op.isImm()) {
8883       Op.addImmOperands(Inst, 1);
8884       return;
8885     }
8886     llvm_unreachable("Unhandled operand type in cvtVOPD");
8887   };
8888 
8889   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8890 
8891   // MCInst operands are ordered as follows:
8892   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8893 
8894   for (auto CompIdx : VOPD::COMPONENTS) {
8895     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8896   }
8897 
8898   for (auto CompIdx : VOPD::COMPONENTS) {
8899     const auto &CInfo = InstInfo[CompIdx];
8900     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8901     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8902       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8903     if (CInfo.hasSrc2Acc())
8904       addOp(CInfo.getIndexOfDstInParsedOperands());
8905   }
8906 }
8907 
8908 //===----------------------------------------------------------------------===//
8909 // dpp
8910 //===----------------------------------------------------------------------===//
8911 
8912 bool AMDGPUOperand::isDPP8() const {
8913   return isImmTy(ImmTyDPP8);
8914 }
8915 
8916 bool AMDGPUOperand::isDPPCtrl() const {
8917   using namespace AMDGPU::DPP;
8918 
8919   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8920   if (result) {
8921     int64_t Imm = getImm();
8922     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8923            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8924            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8925            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8926            (Imm == DppCtrl::WAVE_SHL1) ||
8927            (Imm == DppCtrl::WAVE_ROL1) ||
8928            (Imm == DppCtrl::WAVE_SHR1) ||
8929            (Imm == DppCtrl::WAVE_ROR1) ||
8930            (Imm == DppCtrl::ROW_MIRROR) ||
8931            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8932            (Imm == DppCtrl::BCAST15) ||
8933            (Imm == DppCtrl::BCAST31) ||
8934            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8935            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8936   }
8937   return false;
8938 }
8939 
8940 //===----------------------------------------------------------------------===//
8941 // mAI
8942 //===----------------------------------------------------------------------===//
8943 
8944 bool AMDGPUOperand::isBLGP() const {
8945   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8946 }
8947 
8948 bool AMDGPUOperand::isS16Imm() const {
8949   return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8950 }
8951 
8952 bool AMDGPUOperand::isU16Imm() const {
8953   return isImmLiteral() && isUInt<16>(getImm());
8954 }
8955 
8956 //===----------------------------------------------------------------------===//
8957 // dim
8958 //===----------------------------------------------------------------------===//
8959 
8960 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8961   // We want to allow "dim:1D" etc.,
8962   // but the initial 1 is tokenized as an integer.
8963   std::string Token;
8964   if (isToken(AsmToken::Integer)) {
8965     SMLoc Loc = getToken().getEndLoc();
8966     Token = std::string(getTokenStr());
8967     lex();
8968     if (getLoc() != Loc)
8969       return false;
8970   }
8971 
8972   StringRef Suffix;
8973   if (!parseId(Suffix))
8974     return false;
8975   Token += Suffix;
8976 
8977   StringRef DimId = Token;
8978   if (DimId.starts_with("SQ_RSRC_IMG_"))
8979     DimId = DimId.drop_front(12);
8980 
8981   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8982   if (!DimInfo)
8983     return false;
8984 
8985   Encoding = DimInfo->Encoding;
8986   return true;
8987 }
8988 
8989 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8990   if (!isGFX10Plus())
8991     return ParseStatus::NoMatch;
8992 
8993   SMLoc S = getLoc();
8994 
8995   if (!trySkipId("dim", AsmToken::Colon))
8996     return ParseStatus::NoMatch;
8997 
8998   unsigned Encoding;
8999   SMLoc Loc = getLoc();
9000   if (!parseDimId(Encoding))
9001     return Error(Loc, "invalid dim value");
9002 
9003   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9004                                               AMDGPUOperand::ImmTyDim));
9005   return ParseStatus::Success;
9006 }
9007 
9008 //===----------------------------------------------------------------------===//
9009 // dpp
9010 //===----------------------------------------------------------------------===//
9011 
9012 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9013   SMLoc S = getLoc();
9014 
9015   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9016     return ParseStatus::NoMatch;
9017 
9018   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9019 
9020   int64_t Sels[8];
9021 
9022   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9023     return ParseStatus::Failure;
9024 
9025   for (size_t i = 0; i < 8; ++i) {
9026     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9027       return ParseStatus::Failure;
9028 
9029     SMLoc Loc = getLoc();
9030     if (getParser().parseAbsoluteExpression(Sels[i]))
9031       return ParseStatus::Failure;
9032     if (0 > Sels[i] || 7 < Sels[i])
9033       return Error(Loc, "expected a 3-bit value");
9034   }
9035 
9036   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9037     return ParseStatus::Failure;
9038 
9039   unsigned DPP8 = 0;
9040   for (size_t i = 0; i < 8; ++i)
9041     DPP8 |= (Sels[i] << (i * 3));
9042 
9043   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9044   return ParseStatus::Success;
9045 }
9046 
9047 bool
9048 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9049                                     const OperandVector &Operands) {
9050   if (Ctrl == "row_newbcast")
9051     return isGFX90A();
9052 
9053   if (Ctrl == "row_share" ||
9054       Ctrl == "row_xmask")
9055     return isGFX10Plus();
9056 
9057   if (Ctrl == "wave_shl" ||
9058       Ctrl == "wave_shr" ||
9059       Ctrl == "wave_rol" ||
9060       Ctrl == "wave_ror" ||
9061       Ctrl == "row_bcast")
9062     return isVI() || isGFX9();
9063 
9064   return Ctrl == "row_mirror" ||
9065          Ctrl == "row_half_mirror" ||
9066          Ctrl == "quad_perm" ||
9067          Ctrl == "row_shl" ||
9068          Ctrl == "row_shr" ||
9069          Ctrl == "row_ror";
9070 }
9071 
9072 int64_t
9073 AMDGPUAsmParser::parseDPPCtrlPerm() {
9074   // quad_perm:[%d,%d,%d,%d]
9075 
9076   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9077     return -1;
9078 
9079   int64_t Val = 0;
9080   for (int i = 0; i < 4; ++i) {
9081     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9082       return -1;
9083 
9084     int64_t Temp;
9085     SMLoc Loc = getLoc();
9086     if (getParser().parseAbsoluteExpression(Temp))
9087       return -1;
9088     if (Temp < 0 || Temp > 3) {
9089       Error(Loc, "expected a 2-bit value");
9090       return -1;
9091     }
9092 
9093     Val += (Temp << i * 2);
9094   }
9095 
9096   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9097     return -1;
9098 
9099   return Val;
9100 }
9101 
9102 int64_t
9103 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9104   using namespace AMDGPU::DPP;
9105 
9106   // sel:%d
9107 
9108   int64_t Val;
9109   SMLoc Loc = getLoc();
9110 
9111   if (getParser().parseAbsoluteExpression(Val))
9112     return -1;
9113 
9114   struct DppCtrlCheck {
9115     int64_t Ctrl;
9116     int Lo;
9117     int Hi;
9118   };
9119 
9120   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9121     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
9122     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
9123     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
9124     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
9125     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
9126     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
9127     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
9128     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9129     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9130     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9131     .Default({-1, 0, 0});
9132 
9133   bool Valid;
9134   if (Check.Ctrl == -1) {
9135     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9136     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9137   } else {
9138     Valid = Check.Lo <= Val && Val <= Check.Hi;
9139     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9140   }
9141 
9142   if (!Valid) {
9143     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9144     return -1;
9145   }
9146 
9147   return Val;
9148 }
9149 
9150 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9151   using namespace AMDGPU::DPP;
9152 
9153   if (!isToken(AsmToken::Identifier) ||
9154       !isSupportedDPPCtrl(getTokenStr(), Operands))
9155     return ParseStatus::NoMatch;
9156 
9157   SMLoc S = getLoc();
9158   int64_t Val = -1;
9159   StringRef Ctrl;
9160 
9161   parseId(Ctrl);
9162 
9163   if (Ctrl == "row_mirror") {
9164     Val = DppCtrl::ROW_MIRROR;
9165   } else if (Ctrl == "row_half_mirror") {
9166     Val = DppCtrl::ROW_HALF_MIRROR;
9167   } else {
9168     if (skipToken(AsmToken::Colon, "expected a colon")) {
9169       if (Ctrl == "quad_perm") {
9170         Val = parseDPPCtrlPerm();
9171       } else {
9172         Val = parseDPPCtrlSel(Ctrl);
9173       }
9174     }
9175   }
9176 
9177   if (Val == -1)
9178     return ParseStatus::Failure;
9179 
9180   Operands.push_back(
9181     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9182   return ParseStatus::Success;
9183 }
9184 
9185 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9186                                  bool IsDPP8) {
9187   OptionalImmIndexMap OptionalIdx;
9188   unsigned Opc = Inst.getOpcode();
9189   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9190 
9191   // MAC instructions are special because they have 'old'
9192   // operand which is not tied to dst (but assumed to be).
9193   // They also have dummy unused src2_modifiers.
9194   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9195   int Src2ModIdx =
9196       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9197   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9198                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9199 
9200   unsigned I = 1;
9201   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9202     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9203   }
9204 
9205   int Fi = 0;
9206   for (unsigned E = Operands.size(); I != E; ++I) {
9207 
9208     if (IsMAC) {
9209       int NumOperands = Inst.getNumOperands();
9210       if (OldIdx == NumOperands) {
9211         // Handle old operand
9212         constexpr int DST_IDX = 0;
9213         Inst.addOperand(Inst.getOperand(DST_IDX));
9214       } else if (Src2ModIdx == NumOperands) {
9215         // Add unused dummy src2_modifiers
9216         Inst.addOperand(MCOperand::createImm(0));
9217       }
9218     }
9219 
9220     int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9221     if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9222       Inst.addOperand(Inst.getOperand(0));
9223     }
9224 
9225     bool IsVOP3CvtSrDpp =
9226         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9227         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9228         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9229         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9230     if (IsVOP3CvtSrDpp) {
9231       if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9232         Inst.addOperand(MCOperand::createImm(0));
9233         Inst.addOperand(MCOperand::createReg(0));
9234       }
9235     }
9236 
9237     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9238                                             MCOI::TIED_TO);
9239     if (TiedTo != -1) {
9240       assert((unsigned)TiedTo < Inst.getNumOperands());
9241       // handle tied old or src2 for MAC instructions
9242       Inst.addOperand(Inst.getOperand(TiedTo));
9243     }
9244     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9245     // Add the register arguments
9246     if (IsDPP8 && Op.isDppFI()) {
9247       Fi = Op.getImm();
9248     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9249       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9250     } else if (Op.isReg()) {
9251       Op.addRegOperands(Inst, 1);
9252     } else if (Op.isImm() &&
9253                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9254       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9255       Op.addImmOperands(Inst, 1);
9256     } else if (Op.isImm()) {
9257       OptionalIdx[Op.getImmTy()] = I;
9258     } else {
9259       llvm_unreachable("unhandled operand type");
9260     }
9261   }
9262 
9263   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9264     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9265                           AMDGPUOperand::ImmTyByteSel);
9266 
9267   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9268     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9269                           AMDGPUOperand::ImmTyClamp);
9270 
9271   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9272     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9273 
9274   if (Desc.TSFlags & SIInstrFlags::VOP3P)
9275     cvtVOP3P(Inst, Operands, OptionalIdx);
9276   else if (Desc.TSFlags & SIInstrFlags::VOP3)
9277     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9278   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9279     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9280   }
9281 
9282   if (IsDPP8) {
9283     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9284     using namespace llvm::AMDGPU::DPP;
9285     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9286   } else {
9287     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9288     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9289     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9290     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9291 
9292     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9293       addOptionalImmOperand(Inst, Operands, OptionalIdx,
9294                             AMDGPUOperand::ImmTyDppFI);
9295   }
9296 }
9297 
9298 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9299   OptionalImmIndexMap OptionalIdx;
9300 
9301   unsigned I = 1;
9302   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9303   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9304     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9305   }
9306 
9307   int Fi = 0;
9308   for (unsigned E = Operands.size(); I != E; ++I) {
9309     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9310                                             MCOI::TIED_TO);
9311     if (TiedTo != -1) {
9312       assert((unsigned)TiedTo < Inst.getNumOperands());
9313       // handle tied old or src2 for MAC instructions
9314       Inst.addOperand(Inst.getOperand(TiedTo));
9315     }
9316     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9317     // Add the register arguments
9318     if (Op.isReg() && validateVccOperand(Op.getReg())) {
9319       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9320       // Skip it.
9321       continue;
9322     }
9323 
9324     if (IsDPP8) {
9325       if (Op.isDPP8()) {
9326         Op.addImmOperands(Inst, 1);
9327       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9328         Op.addRegWithFPInputModsOperands(Inst, 2);
9329       } else if (Op.isDppFI()) {
9330         Fi = Op.getImm();
9331       } else if (Op.isReg()) {
9332         Op.addRegOperands(Inst, 1);
9333       } else {
9334         llvm_unreachable("Invalid operand type");
9335       }
9336     } else {
9337       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9338         Op.addRegWithFPInputModsOperands(Inst, 2);
9339       } else if (Op.isReg()) {
9340         Op.addRegOperands(Inst, 1);
9341       } else if (Op.isDPPCtrl()) {
9342         Op.addImmOperands(Inst, 1);
9343       } else if (Op.isImm()) {
9344         // Handle optional arguments
9345         OptionalIdx[Op.getImmTy()] = I;
9346       } else {
9347         llvm_unreachable("Invalid operand type");
9348       }
9349     }
9350   }
9351 
9352   if (IsDPP8) {
9353     using namespace llvm::AMDGPU::DPP;
9354     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9355   } else {
9356     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9357     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9358     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9359     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9360       addOptionalImmOperand(Inst, Operands, OptionalIdx,
9361                             AMDGPUOperand::ImmTyDppFI);
9362     }
9363   }
9364 }
9365 
9366 //===----------------------------------------------------------------------===//
9367 // sdwa
9368 //===----------------------------------------------------------------------===//
9369 
9370 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9371                                           StringRef Prefix,
9372                                           AMDGPUOperand::ImmTy Type) {
9373   using namespace llvm::AMDGPU::SDWA;
9374 
9375   SMLoc S = getLoc();
9376   StringRef Value;
9377 
9378   SMLoc StringLoc;
9379   ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9380   if (!Res.isSuccess())
9381     return Res;
9382 
9383   int64_t Int;
9384   Int = StringSwitch<int64_t>(Value)
9385         .Case("BYTE_0", SdwaSel::BYTE_0)
9386         .Case("BYTE_1", SdwaSel::BYTE_1)
9387         .Case("BYTE_2", SdwaSel::BYTE_2)
9388         .Case("BYTE_3", SdwaSel::BYTE_3)
9389         .Case("WORD_0", SdwaSel::WORD_0)
9390         .Case("WORD_1", SdwaSel::WORD_1)
9391         .Case("DWORD", SdwaSel::DWORD)
9392         .Default(0xffffffff);
9393 
9394   if (Int == 0xffffffff)
9395     return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9396 
9397   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9398   return ParseStatus::Success;
9399 }
9400 
9401 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9402   using namespace llvm::AMDGPU::SDWA;
9403 
9404   SMLoc S = getLoc();
9405   StringRef Value;
9406 
9407   SMLoc StringLoc;
9408   ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9409   if (!Res.isSuccess())
9410     return Res;
9411 
9412   int64_t Int;
9413   Int = StringSwitch<int64_t>(Value)
9414         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9415         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9416         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9417         .Default(0xffffffff);
9418 
9419   if (Int == 0xffffffff)
9420     return Error(StringLoc, "invalid dst_unused value");
9421 
9422   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9423   return ParseStatus::Success;
9424 }
9425 
9426 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9427   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9428 }
9429 
9430 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9431   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9432 }
9433 
9434 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9435   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9436 }
9437 
9438 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9439   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9440 }
9441 
9442 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9443   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9444 }
9445 
9446 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9447                               uint64_t BasicInstType,
9448                               bool SkipDstVcc,
9449                               bool SkipSrcVcc) {
9450   using namespace llvm::AMDGPU::SDWA;
9451 
9452   OptionalImmIndexMap OptionalIdx;
9453   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9454   bool SkippedVcc = false;
9455 
9456   unsigned I = 1;
9457   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9458   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9459     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9460   }
9461 
9462   for (unsigned E = Operands.size(); I != E; ++I) {
9463     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9464     if (SkipVcc && !SkippedVcc && Op.isReg() &&
9465         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9466       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9467       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9468       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9469       // Skip VCC only if we didn't skip it on previous iteration.
9470       // Note that src0 and src1 occupy 2 slots each because of modifiers.
9471       if (BasicInstType == SIInstrFlags::VOP2 &&
9472           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9473            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9474         SkippedVcc = true;
9475         continue;
9476       }
9477       if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
9478         SkippedVcc = true;
9479         continue;
9480       }
9481     }
9482     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9483       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9484     } else if (Op.isImm()) {
9485       // Handle optional arguments
9486       OptionalIdx[Op.getImmTy()] = I;
9487     } else {
9488       llvm_unreachable("Invalid operand type");
9489     }
9490     SkippedVcc = false;
9491   }
9492 
9493   const unsigned Opc = Inst.getOpcode();
9494   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9495       Opc != AMDGPU::V_NOP_sdwa_vi) {
9496     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9497     switch (BasicInstType) {
9498     case SIInstrFlags::VOP1:
9499       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9500         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9501                               AMDGPUOperand::ImmTyClamp, 0);
9502 
9503       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9504         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9505                               AMDGPUOperand::ImmTyOModSI, 0);
9506 
9507       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9508         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9509                               AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9510 
9511       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9512         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9513                               AMDGPUOperand::ImmTySDWADstUnused,
9514                               DstUnused::UNUSED_PRESERVE);
9515 
9516       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9517       break;
9518 
9519     case SIInstrFlags::VOP2:
9520       addOptionalImmOperand(Inst, Operands, OptionalIdx,
9521                             AMDGPUOperand::ImmTyClamp, 0);
9522 
9523       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9524         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9525 
9526       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9527       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9528       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9529       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9530       break;
9531 
9532     case SIInstrFlags::VOPC:
9533       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9534         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9535                               AMDGPUOperand::ImmTyClamp, 0);
9536       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9537       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9538       break;
9539 
9540     default:
9541       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9542     }
9543   }
9544 
9545   // special case v_mac_{f16, f32}:
9546   // it has src2 register operand that is tied to dst operand
9547   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9548       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9549     auto it = Inst.begin();
9550     std::advance(
9551       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9552     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9553   }
9554 }
9555 
9556 /// Force static initialization.
9557 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9558   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9559   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9560 }
9561 
9562 #define GET_REGISTER_MATCHER
9563 #define GET_MATCHER_IMPLEMENTATION
9564 #define GET_MNEMONIC_SPELL_CHECKER
9565 #define GET_MNEMONIC_CHECKER
9566 #include "AMDGPUGenAsmMatcher.inc"
9567 
9568 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9569                                                 unsigned MCK) {
9570   switch (MCK) {
9571   case MCK_addr64:
9572     return parseTokenOp("addr64", Operands);
9573   case MCK_done:
9574     return parseTokenOp("done", Operands);
9575   case MCK_idxen:
9576     return parseTokenOp("idxen", Operands);
9577   case MCK_lds:
9578     return parseTokenOp("lds", Operands);
9579   case MCK_offen:
9580     return parseTokenOp("offen", Operands);
9581   case MCK_off:
9582     return parseTokenOp("off", Operands);
9583   case MCK_row_95_en:
9584     return parseTokenOp("row_en", Operands);
9585   case MCK_gds:
9586     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9587   case MCK_tfe:
9588     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9589   }
9590   return tryCustomParseOperand(Operands, MCK);
9591 }
9592 
9593 // This function should be defined after auto-generated include so that we have
9594 // MatchClassKind enum defined
9595 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9596                                                      unsigned Kind) {
9597   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9598   // But MatchInstructionImpl() expects to meet token and fails to validate
9599   // operand. This method checks if we are given immediate operand but expect to
9600   // get corresponding token.
9601   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9602   switch (Kind) {
9603   case MCK_addr64:
9604     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9605   case MCK_gds:
9606     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9607   case MCK_lds:
9608     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9609   case MCK_idxen:
9610     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9611   case MCK_offen:
9612     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9613   case MCK_tfe:
9614     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9615   case MCK_SSrc_b32:
9616     // When operands have expression values, they will return true for isToken,
9617     // because it is not possible to distinguish between a token and an
9618     // expression at parse time. MatchInstructionImpl() will always try to
9619     // match an operand as a token, when isToken returns true, and when the
9620     // name of the expression is not a valid token, the match will fail,
9621     // so we need to handle it here.
9622     return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9623   case MCK_SSrc_f32:
9624     return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9625   case MCK_SOPPBrTarget:
9626     return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9627   case MCK_VReg32OrOff:
9628     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9629   case MCK_InterpSlot:
9630     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9631   case MCK_InterpAttr:
9632     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9633   case MCK_InterpAttrChan:
9634     return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9635   case MCK_SReg_64:
9636   case MCK_SReg_64_XEXEC:
9637     // Null is defined as a 32-bit register but
9638     // it should also be enabled with 64-bit operands.
9639     // The following code enables it for SReg_64 operands
9640     // used as source and destination. Remaining source
9641     // operands are handled in isInlinableImm.
9642     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9643   default:
9644     return Match_InvalidOperand;
9645   }
9646 }
9647 
9648 //===----------------------------------------------------------------------===//
9649 // endpgm
9650 //===----------------------------------------------------------------------===//
9651 
9652 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9653   SMLoc S = getLoc();
9654   int64_t Imm = 0;
9655 
9656   if (!parseExpr(Imm)) {
9657     // The operand is optional, if not present default to 0
9658     Imm = 0;
9659   }
9660 
9661   if (!isUInt<16>(Imm))
9662     return Error(S, "expected a 16-bit value");
9663 
9664   Operands.push_back(
9665       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9666   return ParseStatus::Success;
9667 }
9668 
9669 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9670 
9671 //===----------------------------------------------------------------------===//
9672 // Split Barrier
9673 //===----------------------------------------------------------------------===//
9674 
9675 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
9676