xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision c9539b89010900499a200cdd6c0265ea5d950875)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45 
46 namespace {
47 
48 class AMDGPUAsmParser;
49 
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51 
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55 
56 class AMDGPUOperand : public MCParsedAsmOperand {
57   enum KindTy {
58     Token,
59     Immediate,
60     Register,
61     Expression
62   } Kind;
63 
64   SMLoc StartLoc, EndLoc;
65   const AMDGPUAsmParser *AsmParser;
66 
67 public:
68   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69       : Kind(Kind_), AsmParser(AsmParser_) {}
70 
71   using Ptr = std::unique_ptr<AMDGPUOperand>;
72 
73   struct Modifiers {
74     bool Abs = false;
75     bool Neg = false;
76     bool Sext = false;
77 
78     bool hasFPModifiers() const { return Abs || Neg; }
79     bool hasIntModifiers() const { return Sext; }
80     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81 
82     int64_t getFPModifiersOperand() const {
83       int64_t Operand = 0;
84       Operand |= Abs ? SISrcMods::ABS : 0u;
85       Operand |= Neg ? SISrcMods::NEG : 0u;
86       return Operand;
87     }
88 
89     int64_t getIntModifiersOperand() const {
90       int64_t Operand = 0;
91       Operand |= Sext ? SISrcMods::SEXT : 0u;
92       return Operand;
93     }
94 
95     int64_t getModifiersOperand() const {
96       assert(!(hasFPModifiers() && hasIntModifiers())
97            && "fp and int modifiers should not be used simultaneously");
98       if (hasFPModifiers()) {
99         return getFPModifiersOperand();
100       } else if (hasIntModifiers()) {
101         return getIntModifiersOperand();
102       } else {
103         return 0;
104       }
105     }
106 
107     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108   };
109 
110   enum ImmTy {
111     ImmTyNone,
112     ImmTyGDS,
113     ImmTyLDS,
114     ImmTyOffen,
115     ImmTyIdxen,
116     ImmTyAddr64,
117     ImmTyOffset,
118     ImmTyInstOffset,
119     ImmTyOffset0,
120     ImmTyOffset1,
121     ImmTyCPol,
122     ImmTySWZ,
123     ImmTyTFE,
124     ImmTyD16,
125     ImmTyClampSI,
126     ImmTyOModSI,
127     ImmTySdwaDstSel,
128     ImmTySdwaSrc0Sel,
129     ImmTySdwaSrc1Sel,
130     ImmTySdwaDstUnused,
131     ImmTyDMask,
132     ImmTyDim,
133     ImmTyUNorm,
134     ImmTyDA,
135     ImmTyR128A16,
136     ImmTyA16,
137     ImmTyLWE,
138     ImmTyExpTgt,
139     ImmTyExpCompr,
140     ImmTyExpVM,
141     ImmTyFORMAT,
142     ImmTyHwreg,
143     ImmTyOff,
144     ImmTySendMsg,
145     ImmTyInterpSlot,
146     ImmTyInterpAttr,
147     ImmTyAttrChan,
148     ImmTyOpSel,
149     ImmTyOpSelHi,
150     ImmTyNegLo,
151     ImmTyNegHi,
152     ImmTyDPP8,
153     ImmTyDppCtrl,
154     ImmTyDppRowMask,
155     ImmTyDppBankMask,
156     ImmTyDppBoundCtrl,
157     ImmTyDppFi,
158     ImmTySwizzle,
159     ImmTyGprIdxMode,
160     ImmTyHigh,
161     ImmTyBLGP,
162     ImmTyCBSZ,
163     ImmTyABID,
164     ImmTyEndpgm,
165     ImmTyWaitVDST,
166     ImmTyWaitEXP,
167   };
168 
169   enum ImmKindTy {
170     ImmKindTyNone,
171     ImmKindTyLiteral,
172     ImmKindTyConst,
173   };
174 
175 private:
176   struct TokOp {
177     const char *Data;
178     unsigned Length;
179   };
180 
181   struct ImmOp {
182     int64_t Val;
183     ImmTy Type;
184     bool IsFPImm;
185     mutable ImmKindTy Kind;
186     Modifiers Mods;
187   };
188 
189   struct RegOp {
190     unsigned RegNo;
191     Modifiers Mods;
192   };
193 
194   union {
195     TokOp Tok;
196     ImmOp Imm;
197     RegOp Reg;
198     const MCExpr *Expr;
199   };
200 
201 public:
202   bool isToken() const override {
203     if (Kind == Token)
204       return true;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isSymbolRefExpr();
211   }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindConst() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyConst;
234   }
235 
236   bool IsImmKindLiteral() const {
237     return isImm() && Imm.Kind == ImmKindTyLiteral;
238   }
239 
240   bool isImmKindConst() const {
241     return isImm() && Imm.Kind == ImmKindTyConst;
242   }
243 
244   bool isInlinableImm(MVT type) const;
245   bool isLiteralImm(MVT type) const;
246 
247   bool isRegKind() const {
248     return Kind == Register;
249   }
250 
251   bool isReg() const override {
252     return isRegKind() && !hasModifiers();
253   }
254 
255   bool isRegOrInline(unsigned RCID, MVT type) const {
256     return isRegClass(RCID) || isInlinableImm(type);
257   }
258 
259   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
260     return isRegOrInline(RCID, type) || isLiteralImm(type);
261   }
262 
263   bool isRegOrImmWithInt16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
265   }
266 
267   bool isRegOrImmWithInt32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
269   }
270 
271   bool isRegOrInlineImmWithInt16InputMods() const {
272     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
273   }
274 
275   bool isRegOrInlineImmWithInt32InputMods() const {
276     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277   }
278 
279   bool isRegOrImmWithInt64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281   }
282 
283   bool isRegOrImmWithFP16InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285   }
286 
287   bool isRegOrImmWithFP32InputMods() const {
288     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289   }
290 
291   bool isRegOrImmWithFP64InputMods() const {
292     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293   }
294 
295   bool isRegOrInlineImmWithFP16InputMods() const {
296     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
297   }
298 
299   bool isRegOrInlineImmWithFP32InputMods() const {
300     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
301   }
302 
303 
304   bool isVReg() const {
305     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
306            isRegClass(AMDGPU::VReg_64RegClassID) ||
307            isRegClass(AMDGPU::VReg_96RegClassID) ||
308            isRegClass(AMDGPU::VReg_128RegClassID) ||
309            isRegClass(AMDGPU::VReg_160RegClassID) ||
310            isRegClass(AMDGPU::VReg_192RegClassID) ||
311            isRegClass(AMDGPU::VReg_256RegClassID) ||
312            isRegClass(AMDGPU::VReg_512RegClassID) ||
313            isRegClass(AMDGPU::VReg_1024RegClassID);
314   }
315 
316   bool isVReg32() const {
317     return isRegClass(AMDGPU::VGPR_32RegClassID);
318   }
319 
320   bool isVReg32OrOff() const {
321     return isOff() || isVReg32();
322   }
323 
324   bool isNull() const {
325     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
326   }
327 
328   bool isVRegWithInputMods() const;
329 
330   bool isSDWAOperand(MVT type) const;
331   bool isSDWAFP16Operand() const;
332   bool isSDWAFP32Operand() const;
333   bool isSDWAInt16Operand() const;
334   bool isSDWAInt32Operand() const;
335 
336   bool isImmTy(ImmTy ImmT) const {
337     return isImm() && Imm.Type == ImmT;
338   }
339 
340   bool isImmModifier() const {
341     return isImm() && Imm.Type != ImmTyNone;
342   }
343 
344   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
345   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
346   bool isDMask() const { return isImmTy(ImmTyDMask); }
347   bool isDim() const { return isImmTy(ImmTyDim); }
348   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
349   bool isDA() const { return isImmTy(ImmTyDA); }
350   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
351   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
352   bool isLWE() const { return isImmTy(ImmTyLWE); }
353   bool isOff() const { return isImmTy(ImmTyOff); }
354   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
355   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
356   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
357   bool isOffen() const { return isImmTy(ImmTyOffen); }
358   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
359   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
360   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
361   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
362   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
363 
364   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
365   bool isGDS() const { return isImmTy(ImmTyGDS); }
366   bool isLDS() const { return isImmTy(ImmTyLDS); }
367   bool isCPol() const { return isImmTy(ImmTyCPol); }
368   bool isSWZ() const { return isImmTy(ImmTySWZ); }
369   bool isTFE() const { return isImmTy(ImmTyTFE); }
370   bool isD16() const { return isImmTy(ImmTyD16); }
371   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
372   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
373   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
374   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
375   bool isFI() const { return isImmTy(ImmTyDppFi); }
376   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
377   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
378   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
379   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
380   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
381   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
382   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
383   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
384   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
385   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
386   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
387   bool isHigh() const { return isImmTy(ImmTyHigh); }
388 
389   bool isMod() const {
390     return isClampSI() || isOModSI();
391   }
392 
393   bool isRegOrImm() const {
394     return isReg() || isImm();
395   }
396 
397   bool isRegClass(unsigned RCID) const;
398 
399   bool isInlineValue() const;
400 
401   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
402     return isRegOrInline(RCID, type) && !hasModifiers();
403   }
404 
405   bool isSCSrcB16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
407   }
408 
409   bool isSCSrcV2B16() const {
410     return isSCSrcB16();
411   }
412 
413   bool isSCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
415   }
416 
417   bool isSCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
419   }
420 
421   bool isBoolReg() const;
422 
423   bool isSCSrcF16() const {
424     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
425   }
426 
427   bool isSCSrcV2F16() const {
428     return isSCSrcF16();
429   }
430 
431   bool isSCSrcF32() const {
432     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
433   }
434 
435   bool isSCSrcF64() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
437   }
438 
439   bool isSSrcB32() const {
440     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
441   }
442 
443   bool isSSrcB16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isSSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcB16();
450   }
451 
452   bool isSSrcB64() const {
453     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
454     // See isVSrc64().
455     return isSCSrcB64() || isLiteralImm(MVT::i64);
456   }
457 
458   bool isSSrcF32() const {
459     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
460   }
461 
462   bool isSSrcF64() const {
463     return isSCSrcB64() || isLiteralImm(MVT::f64);
464   }
465 
466   bool isSSrcF16() const {
467     return isSCSrcB16() || isLiteralImm(MVT::f16);
468   }
469 
470   bool isSSrcV2F16() const {
471     llvm_unreachable("cannot happen");
472     return isSSrcF16();
473   }
474 
475   bool isSSrcV2FP32() const {
476     llvm_unreachable("cannot happen");
477     return isSSrcF32();
478   }
479 
480   bool isSCSrcV2FP32() const {
481     llvm_unreachable("cannot happen");
482     return isSCSrcF32();
483   }
484 
485   bool isSSrcV2INT32() const {
486     llvm_unreachable("cannot happen");
487     return isSSrcB32();
488   }
489 
490   bool isSCSrcV2INT32() const {
491     llvm_unreachable("cannot happen");
492     return isSCSrcB32();
493   }
494 
495   bool isSSrcOrLdsB32() const {
496     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
497            isLiteralImm(MVT::i32) || isExpr();
498   }
499 
500   bool isVCSrcB32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
502   }
503 
504   bool isVCSrcB64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
506   }
507 
508   bool isVCSrcB16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
510   }
511 
512   bool isVCSrcV2B16() const {
513     return isVCSrcB16();
514   }
515 
516   bool isVCSrcF32() const {
517     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
518   }
519 
520   bool isVCSrcF64() const {
521     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
522   }
523 
524   bool isVCSrcF16() const {
525     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
526   }
527 
528   bool isVCSrcV2F16() const {
529     return isVCSrcF16();
530   }
531 
532   bool isVSrcB32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
534   }
535 
536   bool isVSrcB64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::i64);
538   }
539 
540   bool isVSrcB16() const {
541     return isVCSrcB16() || isLiteralImm(MVT::i16);
542   }
543 
544   bool isVSrcV2B16() const {
545     return isVSrcB16() || isLiteralImm(MVT::v2i16);
546   }
547 
548   bool isVCSrcV2FP32() const {
549     return isVCSrcF64();
550   }
551 
552   bool isVSrcV2FP32() const {
553     return isVSrcF64() || isLiteralImm(MVT::v2f32);
554   }
555 
556   bool isVCSrcV2INT32() const {
557     return isVCSrcB64();
558   }
559 
560   bool isVSrcV2INT32() const {
561     return isVSrcB64() || isLiteralImm(MVT::v2i32);
562   }
563 
564   bool isVSrcF32() const {
565     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
566   }
567 
568   bool isVSrcF64() const {
569     return isVCSrcF64() || isLiteralImm(MVT::f64);
570   }
571 
572   bool isVSrcF16() const {
573     return isVCSrcF16() || isLiteralImm(MVT::f16);
574   }
575 
576   bool isVSrcV2F16() const {
577     return isVSrcF16() || isLiteralImm(MVT::v2f16);
578   }
579 
580   bool isVISrcB32() const {
581     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
582   }
583 
584   bool isVISrcB16() const {
585     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
586   }
587 
588   bool isVISrcV2B16() const {
589     return isVISrcB16();
590   }
591 
592   bool isVISrcF32() const {
593     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
594   }
595 
596   bool isVISrcF16() const {
597     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
598   }
599 
600   bool isVISrcV2F16() const {
601     return isVISrcF16() || isVISrcB32();
602   }
603 
604   bool isVISrc_64B64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
606   }
607 
608   bool isVISrc_64F64() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
610   }
611 
612   bool isVISrc_64V2FP32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
614   }
615 
616   bool isVISrc_64V2INT32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_256B64() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
622   }
623 
624   bool isVISrc_256F64() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
626   }
627 
628   bool isVISrc_128B16() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
630   }
631 
632   bool isVISrc_128V2B16() const {
633     return isVISrc_128B16();
634   }
635 
636   bool isVISrc_128B32() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
638   }
639 
640   bool isVISrc_128F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_256V2FP32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_256V2INT32() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
650   }
651 
652   bool isVISrc_512B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_512B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_512V2B16() const {
661     return isVISrc_512B16();
662   }
663 
664   bool isVISrc_512F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_512F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_512V2F16() const {
673     return isVISrc_512F16() || isVISrc_512B32();
674   }
675 
676   bool isVISrc_1024B32() const {
677     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
678   }
679 
680   bool isVISrc_1024B16() const {
681     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
682   }
683 
684   bool isVISrc_1024V2B16() const {
685     return isVISrc_1024B16();
686   }
687 
688   bool isVISrc_1024F32() const {
689     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
690   }
691 
692   bool isVISrc_1024F16() const {
693     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
694   }
695 
696   bool isVISrc_1024V2F16() const {
697     return isVISrc_1024F16() || isVISrc_1024B32();
698   }
699 
700   bool isAISrcB32() const {
701     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
702   }
703 
704   bool isAISrcB16() const {
705     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
706   }
707 
708   bool isAISrcV2B16() const {
709     return isAISrcB16();
710   }
711 
712   bool isAISrcF32() const {
713     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
714   }
715 
716   bool isAISrcF16() const {
717     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
718   }
719 
720   bool isAISrcV2F16() const {
721     return isAISrcF16() || isAISrcB32();
722   }
723 
724   bool isAISrc_64B64() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
726   }
727 
728   bool isAISrc_64F64() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
730   }
731 
732   bool isAISrc_128B32() const {
733     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
734   }
735 
736   bool isAISrc_128B16() const {
737     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
738   }
739 
740   bool isAISrc_128V2B16() const {
741     return isAISrc_128B16();
742   }
743 
744   bool isAISrc_128F32() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
746   }
747 
748   bool isAISrc_128F16() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
750   }
751 
752   bool isAISrc_128V2F16() const {
753     return isAISrc_128F16() || isAISrc_128B32();
754   }
755 
756   bool isVISrc_128F16() const {
757     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
758   }
759 
760   bool isVISrc_128V2F16() const {
761     return isVISrc_128F16() || isVISrc_128B32();
762   }
763 
764   bool isAISrc_256B64() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
766   }
767 
768   bool isAISrc_256F64() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
770   }
771 
772   bool isAISrc_512B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_512B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_512V2B16() const {
781     return isAISrc_512B16();
782   }
783 
784   bool isAISrc_512F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_512F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_512V2F16() const {
793     return isAISrc_512F16() || isAISrc_512B32();
794   }
795 
796   bool isAISrc_1024B32() const {
797     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
798   }
799 
800   bool isAISrc_1024B16() const {
801     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
802   }
803 
804   bool isAISrc_1024V2B16() const {
805     return isAISrc_1024B16();
806   }
807 
808   bool isAISrc_1024F32() const {
809     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
810   }
811 
812   bool isAISrc_1024F16() const {
813     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
814   }
815 
816   bool isAISrc_1024V2F16() const {
817     return isAISrc_1024F16() || isAISrc_1024B32();
818   }
819 
820   bool isKImmFP32() const {
821     return isLiteralImm(MVT::f32);
822   }
823 
824   bool isKImmFP16() const {
825     return isLiteralImm(MVT::f16);
826   }
827 
828   bool isMem() const override {
829     return false;
830   }
831 
832   bool isExpr() const {
833     return Kind == Expression;
834   }
835 
836   bool isSoppBrTarget() const {
837     return isExpr() || isImm();
838   }
839 
840   bool isSWaitCnt() const;
841   bool isDepCtr() const;
842   bool isSDelayAlu() const;
843   bool isHwreg() const;
844   bool isSendMsg() const;
845   bool isSwizzle() const;
846   bool isSMRDOffset8() const;
847   bool isSMEMOffset() const;
848   bool isSMRDLiteralOffset() const;
849   bool isDPP8() const;
850   bool isDPPCtrl() const;
851   bool isBLGP() const;
852   bool isCBSZ() const;
853   bool isABID() const;
854   bool isGPRIdxMode() const;
855   bool isS16Imm() const;
856   bool isU16Imm() const;
857   bool isEndpgm() const;
858   bool isWaitVDST() const;
859   bool isWaitEXP() const;
860 
861   StringRef getExpressionAsToken() const {
862     assert(isExpr());
863     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
864     return S->getSymbol().getName();
865   }
866 
867   StringRef getToken() const {
868     assert(isToken());
869 
870     if (Kind == Expression)
871       return getExpressionAsToken();
872 
873     return StringRef(Tok.Data, Tok.Length);
874   }
875 
876   int64_t getImm() const {
877     assert(isImm());
878     return Imm.Val;
879   }
880 
881   void setImm(int64_t Val) {
882     assert(isImm());
883     Imm.Val = Val;
884   }
885 
886   ImmTy getImmTy() const {
887     assert(isImm());
888     return Imm.Type;
889   }
890 
891   unsigned getReg() const override {
892     assert(isRegKind());
893     return Reg.RegNo;
894   }
895 
896   SMLoc getStartLoc() const override {
897     return StartLoc;
898   }
899 
900   SMLoc getEndLoc() const override {
901     return EndLoc;
902   }
903 
904   SMRange getLocRange() const {
905     return SMRange(StartLoc, EndLoc);
906   }
907 
908   Modifiers getModifiers() const {
909     assert(isRegKind() || isImmTy(ImmTyNone));
910     return isRegKind() ? Reg.Mods : Imm.Mods;
911   }
912 
913   void setModifiers(Modifiers Mods) {
914     assert(isRegKind() || isImmTy(ImmTyNone));
915     if (isRegKind())
916       Reg.Mods = Mods;
917     else
918       Imm.Mods = Mods;
919   }
920 
921   bool hasModifiers() const {
922     return getModifiers().hasModifiers();
923   }
924 
925   bool hasFPModifiers() const {
926     return getModifiers().hasFPModifiers();
927   }
928 
929   bool hasIntModifiers() const {
930     return getModifiers().hasIntModifiers();
931   }
932 
933   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
934 
935   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
936 
937   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
938 
939   template <unsigned Bitwidth>
940   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
941 
942   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
943     addKImmFPOperands<16>(Inst, N);
944   }
945 
946   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
947     addKImmFPOperands<32>(Inst, N);
948   }
949 
950   void addRegOperands(MCInst &Inst, unsigned N) const;
951 
952   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
953     addRegOperands(Inst, N);
954   }
955 
956   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
957     if (isRegKind())
958       addRegOperands(Inst, N);
959     else if (isExpr())
960       Inst.addOperand(MCOperand::createExpr(Expr));
961     else
962       addImmOperands(Inst, N);
963   }
964 
965   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
966     Modifiers Mods = getModifiers();
967     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968     if (isRegKind()) {
969       addRegOperands(Inst, N);
970     } else {
971       addImmOperands(Inst, N, false);
972     }
973   }
974 
975   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasIntModifiers());
977     addRegOrImmWithInputModsOperands(Inst, N);
978   }
979 
980   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
981     assert(!hasFPModifiers());
982     addRegOrImmWithInputModsOperands(Inst, N);
983   }
984 
985   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
986     Modifiers Mods = getModifiers();
987     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
988     assert(isRegKind());
989     addRegOperands(Inst, N);
990   }
991 
992   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993     assert(!hasIntModifiers());
994     addRegWithInputModsOperands(Inst, N);
995   }
996 
997   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998     assert(!hasFPModifiers());
999     addRegWithInputModsOperands(Inst, N);
1000   }
1001 
1002   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1003     if (isImm())
1004       addImmOperands(Inst, N);
1005     else {
1006       assert(isExpr());
1007       Inst.addOperand(MCOperand::createExpr(Expr));
1008     }
1009   }
1010 
1011   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1012     switch (Type) {
1013     case ImmTyNone: OS << "None"; break;
1014     case ImmTyGDS: OS << "GDS"; break;
1015     case ImmTyLDS: OS << "LDS"; break;
1016     case ImmTyOffen: OS << "Offen"; break;
1017     case ImmTyIdxen: OS << "Idxen"; break;
1018     case ImmTyAddr64: OS << "Addr64"; break;
1019     case ImmTyOffset: OS << "Offset"; break;
1020     case ImmTyInstOffset: OS << "InstOffset"; break;
1021     case ImmTyOffset0: OS << "Offset0"; break;
1022     case ImmTyOffset1: OS << "Offset1"; break;
1023     case ImmTyCPol: OS << "CPol"; break;
1024     case ImmTySWZ: OS << "SWZ"; break;
1025     case ImmTyTFE: OS << "TFE"; break;
1026     case ImmTyD16: OS << "D16"; break;
1027     case ImmTyFORMAT: OS << "FORMAT"; break;
1028     case ImmTyClampSI: OS << "ClampSI"; break;
1029     case ImmTyOModSI: OS << "OModSI"; break;
1030     case ImmTyDPP8: OS << "DPP8"; break;
1031     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1032     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1033     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1034     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1035     case ImmTyDppFi: OS << "FI"; break;
1036     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1037     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1038     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1039     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1040     case ImmTyDMask: OS << "DMask"; break;
1041     case ImmTyDim: OS << "Dim"; break;
1042     case ImmTyUNorm: OS << "UNorm"; break;
1043     case ImmTyDA: OS << "DA"; break;
1044     case ImmTyR128A16: OS << "R128A16"; break;
1045     case ImmTyA16: OS << "A16"; break;
1046     case ImmTyLWE: OS << "LWE"; break;
1047     case ImmTyOff: OS << "Off"; break;
1048     case ImmTyExpTgt: OS << "ExpTgt"; break;
1049     case ImmTyExpCompr: OS << "ExpCompr"; break;
1050     case ImmTyExpVM: OS << "ExpVM"; break;
1051     case ImmTyHwreg: OS << "Hwreg"; break;
1052     case ImmTySendMsg: OS << "SendMsg"; break;
1053     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1054     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1055     case ImmTyAttrChan: OS << "AttrChan"; break;
1056     case ImmTyOpSel: OS << "OpSel"; break;
1057     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1058     case ImmTyNegLo: OS << "NegLo"; break;
1059     case ImmTyNegHi: OS << "NegHi"; break;
1060     case ImmTySwizzle: OS << "Swizzle"; break;
1061     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1062     case ImmTyHigh: OS << "High"; break;
1063     case ImmTyBLGP: OS << "BLGP"; break;
1064     case ImmTyCBSZ: OS << "CBSZ"; break;
1065     case ImmTyABID: OS << "ABID"; break;
1066     case ImmTyEndpgm: OS << "Endpgm"; break;
1067     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1068     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1069     }
1070   }
1071 
1072   void print(raw_ostream &OS) const override {
1073     switch (Kind) {
1074     case Register:
1075       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1076       break;
1077     case Immediate:
1078       OS << '<' << getImm();
1079       if (getImmTy() != ImmTyNone) {
1080         OS << " type: "; printImmTy(OS, getImmTy());
1081       }
1082       OS << " mods: " << Imm.Mods << '>';
1083       break;
1084     case Token:
1085       OS << '\'' << getToken() << '\'';
1086       break;
1087     case Expression:
1088       OS << "<expr " << *Expr << '>';
1089       break;
1090     }
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1094                                       int64_t Val, SMLoc Loc,
1095                                       ImmTy Type = ImmTyNone,
1096                                       bool IsFPImm = false) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1098     Op->Imm.Val = Val;
1099     Op->Imm.IsFPImm = IsFPImm;
1100     Op->Imm.Kind = ImmKindTyNone;
1101     Op->Imm.Type = Type;
1102     Op->Imm.Mods = Modifiers();
1103     Op->StartLoc = Loc;
1104     Op->EndLoc = Loc;
1105     return Op;
1106   }
1107 
1108   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1109                                         StringRef Str, SMLoc Loc,
1110                                         bool HasExplicitEncodingSize = true) {
1111     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1112     Res->Tok.Data = Str.data();
1113     Res->Tok.Length = Str.size();
1114     Res->StartLoc = Loc;
1115     Res->EndLoc = Loc;
1116     return Res;
1117   }
1118 
1119   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1120                                       unsigned RegNo, SMLoc S,
1121                                       SMLoc E) {
1122     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1123     Op->Reg.RegNo = RegNo;
1124     Op->Reg.Mods = Modifiers();
1125     Op->StartLoc = S;
1126     Op->EndLoc = E;
1127     return Op;
1128   }
1129 
1130   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1131                                        const class MCExpr *Expr, SMLoc S) {
1132     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1133     Op->Expr = Expr;
1134     Op->StartLoc = S;
1135     Op->EndLoc = S;
1136     return Op;
1137   }
1138 };
1139 
1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1141   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1142   return OS;
1143 }
1144 
1145 //===----------------------------------------------------------------------===//
1146 // AsmParser
1147 //===----------------------------------------------------------------------===//
1148 
1149 // Holds info related to the current kernel, e.g. count of SGPRs used.
1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1151 // .amdgpu_hsa_kernel or at EOF.
1152 class KernelScopeInfo {
1153   int SgprIndexUnusedMin = -1;
1154   int VgprIndexUnusedMin = -1;
1155   int AgprIndexUnusedMin = -1;
1156   MCContext *Ctx = nullptr;
1157   MCSubtargetInfo const *MSTI = nullptr;
1158 
1159   void usesSgprAt(int i) {
1160     if (i >= SgprIndexUnusedMin) {
1161       SgprIndexUnusedMin = ++i;
1162       if (Ctx) {
1163         MCSymbol* const Sym =
1164           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1165         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1166       }
1167     }
1168   }
1169 
1170   void usesVgprAt(int i) {
1171     if (i >= VgprIndexUnusedMin) {
1172       VgprIndexUnusedMin = ++i;
1173       if (Ctx) {
1174         MCSymbol* const Sym =
1175           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1176         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1177                                          VgprIndexUnusedMin);
1178         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1179       }
1180     }
1181   }
1182 
1183   void usesAgprAt(int i) {
1184     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1185     if (!hasMAIInsts(*MSTI))
1186       return;
1187 
1188     if (i >= AgprIndexUnusedMin) {
1189       AgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1194 
1195         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1196         MCSymbol* const vSym =
1197           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1198         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1199                                          VgprIndexUnusedMin);
1200         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1201       }
1202     }
1203   }
1204 
1205 public:
1206   KernelScopeInfo() = default;
1207 
1208   void initialize(MCContext &Context) {
1209     Ctx = &Context;
1210     MSTI = Ctx->getSubtargetInfo();
1211 
1212     usesSgprAt(SgprIndexUnusedMin = -1);
1213     usesVgprAt(VgprIndexUnusedMin = -1);
1214     if (hasMAIInsts(*MSTI)) {
1215       usesAgprAt(AgprIndexUnusedMin = -1);
1216     }
1217   }
1218 
1219   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1220                     unsigned RegWidth) {
1221     switch (RegKind) {
1222     case IS_SGPR:
1223       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1224       break;
1225     case IS_AGPR:
1226       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1227       break;
1228     case IS_VGPR:
1229       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1230       break;
1231     default:
1232       break;
1233     }
1234   }
1235 };
1236 
1237 class AMDGPUAsmParser : public MCTargetAsmParser {
1238   MCAsmParser &Parser;
1239 
1240   // Number of extra operands parsed after the first optional operand.
1241   // This may be necessary to skip hardcoded mandatory operands.
1242   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1243 
1244   unsigned ForcedEncodingSize = 0;
1245   bool ForcedDPP = false;
1246   bool ForcedSDWA = false;
1247   KernelScopeInfo KernelScope;
1248   unsigned CPolSeen;
1249 
1250   /// @name Auto-generated Match Functions
1251   /// {
1252 
1253 #define GET_ASSEMBLER_HEADER
1254 #include "AMDGPUGenAsmMatcher.inc"
1255 
1256   /// }
1257 
1258 private:
1259   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1260   bool OutOfRangeError(SMRange Range);
1261   /// Calculate VGPR/SGPR blocks required for given target, reserved
1262   /// registers, and user-specified NextFreeXGPR values.
1263   ///
1264   /// \param Features [in] Target features, used for bug corrections.
1265   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1266   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1267   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1268   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1269   /// descriptor field, if valid.
1270   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1271   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1272   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1273   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1274   /// \param VGPRBlocks [out] Result VGPR block count.
1275   /// \param SGPRBlocks [out] Result SGPR block count.
1276   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1277                           bool FlatScrUsed, bool XNACKUsed,
1278                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1279                           SMRange VGPRRange, unsigned NextFreeSGPR,
1280                           SMRange SGPRRange, unsigned &VGPRBlocks,
1281                           unsigned &SGPRBlocks);
1282   bool ParseDirectiveAMDGCNTarget();
1283   bool ParseDirectiveAMDHSAKernel();
1284   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1285   bool ParseDirectiveHSACodeObjectVersion();
1286   bool ParseDirectiveHSACodeObjectISA();
1287   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1288   bool ParseDirectiveAMDKernelCodeT();
1289   // TODO: Possibly make subtargetHasRegister const.
1290   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1291   bool ParseDirectiveAMDGPUHsaKernel();
1292 
1293   bool ParseDirectiveISAVersion();
1294   bool ParseDirectiveHSAMetadata();
1295   bool ParseDirectivePALMetadataBegin();
1296   bool ParseDirectivePALMetadata();
1297   bool ParseDirectiveAMDGPULDS();
1298 
1299   /// Common code to parse out a block of text (typically YAML) between start and
1300   /// end directives.
1301   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1302                            const char *AssemblerDirectiveEnd,
1303                            std::string &CollectString);
1304 
1305   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1306                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1307   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1308                            unsigned &RegNum, unsigned &RegWidth,
1309                            bool RestoreOnFailure = false);
1310   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1311                            unsigned &RegNum, unsigned &RegWidth,
1312                            SmallVectorImpl<AsmToken> &Tokens);
1313   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1314                            unsigned &RegWidth,
1315                            SmallVectorImpl<AsmToken> &Tokens);
1316   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1317                            unsigned &RegWidth,
1318                            SmallVectorImpl<AsmToken> &Tokens);
1319   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1320                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1321   bool ParseRegRange(unsigned& Num, unsigned& Width);
1322   unsigned getRegularReg(RegisterKind RegKind,
1323                          unsigned RegNum,
1324                          unsigned RegWidth,
1325                          SMLoc Loc);
1326 
1327   bool isRegister();
1328   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1329   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1330   void initializeGprCountSymbol(RegisterKind RegKind);
1331   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1332                              unsigned RegWidth);
1333   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1334                     bool IsAtomic, bool IsLds = false);
1335   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1336                  bool IsGdsHardcoded);
1337 
1338 public:
1339   enum AMDGPUMatchResultTy {
1340     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1341   };
1342   enum OperandMode {
1343     OperandMode_Default,
1344     OperandMode_NSA,
1345   };
1346 
1347   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1348 
1349   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1350                const MCInstrInfo &MII,
1351                const MCTargetOptions &Options)
1352       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1353     MCAsmParserExtension::Initialize(Parser);
1354 
1355     if (getFeatureBits().none()) {
1356       // Set default features.
1357       copySTI().ToggleFeature("southern-islands");
1358     }
1359 
1360     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1361 
1362     {
1363       // TODO: make those pre-defined variables read-only.
1364       // Currently there is none suitable machinery in the core llvm-mc for this.
1365       // MCSymbol::isRedefinable is intended for another purpose, and
1366       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1367       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1368       MCContext &Ctx = getContext();
1369       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1370         MCSymbol *Sym =
1371             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1372         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1373         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1374         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1375         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1376         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1377       } else {
1378         MCSymbol *Sym =
1379             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1380         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1381         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1382         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1383         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1384         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1385       }
1386       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387         initializeGprCountSymbol(IS_VGPR);
1388         initializeGprCountSymbol(IS_SGPR);
1389       } else
1390         KernelScope.initialize(getContext());
1391     }
1392   }
1393 
1394   bool hasMIMG_R128() const {
1395     return AMDGPU::hasMIMG_R128(getSTI());
1396   }
1397 
1398   bool hasPackedD16() const {
1399     return AMDGPU::hasPackedD16(getSTI());
1400   }
1401 
1402   bool hasGFX10A16() const {
1403     return AMDGPU::hasGFX10A16(getSTI());
1404   }
1405 
1406   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1407 
1408   bool isSI() const {
1409     return AMDGPU::isSI(getSTI());
1410   }
1411 
1412   bool isCI() const {
1413     return AMDGPU::isCI(getSTI());
1414   }
1415 
1416   bool isVI() const {
1417     return AMDGPU::isVI(getSTI());
1418   }
1419 
1420   bool isGFX9() const {
1421     return AMDGPU::isGFX9(getSTI());
1422   }
1423 
1424   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1425   bool isGFX90A() const {
1426     return AMDGPU::isGFX90A(getSTI());
1427   }
1428 
1429   bool isGFX940() const {
1430     return AMDGPU::isGFX940(getSTI());
1431   }
1432 
1433   bool isGFX9Plus() const {
1434     return AMDGPU::isGFX9Plus(getSTI());
1435   }
1436 
1437   bool isGFX10() const {
1438     return AMDGPU::isGFX10(getSTI());
1439   }
1440 
1441   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1442 
1443   bool isGFX11() const {
1444     return AMDGPU::isGFX11(getSTI());
1445   }
1446 
1447   bool isGFX11Plus() const {
1448     return AMDGPU::isGFX11Plus(getSTI());
1449   }
1450 
1451   bool isGFX10_BEncoding() const {
1452     return AMDGPU::isGFX10_BEncoding(getSTI());
1453   }
1454 
1455   bool hasInv2PiInlineImm() const {
1456     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1457   }
1458 
1459   bool hasFlatOffsets() const {
1460     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1461   }
1462 
1463   bool hasArchitectedFlatScratch() const {
1464     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1465   }
1466 
1467   bool hasSGPR102_SGPR103() const {
1468     return !isVI() && !isGFX9();
1469   }
1470 
1471   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1472 
1473   bool hasIntClamp() const {
1474     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1475   }
1476 
1477   AMDGPUTargetStreamer &getTargetStreamer() {
1478     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1479     return static_cast<AMDGPUTargetStreamer &>(TS);
1480   }
1481 
1482   const MCRegisterInfo *getMRI() const {
1483     // We need this const_cast because for some reason getContext() is not const
1484     // in MCAsmParser.
1485     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1486   }
1487 
1488   const MCInstrInfo *getMII() const {
1489     return &MII;
1490   }
1491 
1492   const FeatureBitset &getFeatureBits() const {
1493     return getSTI().getFeatureBits();
1494   }
1495 
1496   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1497   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1498   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1499 
1500   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1501   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1502   bool isForcedDPP() const { return ForcedDPP; }
1503   bool isForcedSDWA() const { return ForcedSDWA; }
1504   ArrayRef<unsigned> getMatchedVariants() const;
1505   StringRef getMatchedVariantName() const;
1506 
1507   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1508   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1509                      bool RestoreOnFailure);
1510   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1511   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1512                                         SMLoc &EndLoc) override;
1513   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1514   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1515                                       unsigned Kind) override;
1516   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1517                                OperandVector &Operands, MCStreamer &Out,
1518                                uint64_t &ErrorInfo,
1519                                bool MatchingInlineAsm) override;
1520   bool ParseDirective(AsmToken DirectiveID) override;
1521   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1522                                     OperandMode Mode = OperandMode_Default);
1523   StringRef parseMnemonicSuffix(StringRef Name);
1524   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1525                         SMLoc NameLoc, OperandVector &Operands) override;
1526   //bool ProcessInstruction(MCInst &Inst);
1527 
1528   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1529 
1530   OperandMatchResultTy
1531   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1532                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1533                      bool (*ConvertResult)(int64_t &) = nullptr);
1534 
1535   OperandMatchResultTy
1536   parseOperandArrayWithPrefix(const char *Prefix,
1537                               OperandVector &Operands,
1538                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1539                               bool (*ConvertResult)(int64_t&) = nullptr);
1540 
1541   OperandMatchResultTy
1542   parseNamedBit(StringRef Name, OperandVector &Operands,
1543                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1544   OperandMatchResultTy parseCPol(OperandVector &Operands);
1545   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1546                                              StringRef &Value,
1547                                              SMLoc &StringLoc);
1548 
1549   bool isModifier();
1550   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1551   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1552   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1553   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1554   bool parseSP3NegModifier();
1555   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1556   OperandMatchResultTy parseReg(OperandVector &Operands);
1557   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1558   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1559   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1560   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1561   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1562   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1563   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1564   OperandMatchResultTy parseUfmt(int64_t &Format);
1565   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1566   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1567   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1568   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1569   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1570   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1571   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1572 
1573   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1574   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1575   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1576   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1577 
1578   bool parseCnt(int64_t &IntVal);
1579   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1580 
1581   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1582   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1583   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1584 
1585   bool parseDelay(int64_t &Delay);
1586   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1587 
1588   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1589 
1590 private:
1591   struct OperandInfoTy {
1592     SMLoc Loc;
1593     int64_t Id;
1594     bool IsSymbolic = false;
1595     bool IsDefined = false;
1596 
1597     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1598   };
1599 
1600   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1601   bool validateSendMsg(const OperandInfoTy &Msg,
1602                        const OperandInfoTy &Op,
1603                        const OperandInfoTy &Stream);
1604 
1605   bool parseHwregBody(OperandInfoTy &HwReg,
1606                       OperandInfoTy &Offset,
1607                       OperandInfoTy &Width);
1608   bool validateHwreg(const OperandInfoTy &HwReg,
1609                      const OperandInfoTy &Offset,
1610                      const OperandInfoTy &Width);
1611 
1612   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1613   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1614   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1615 
1616   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1617                       const OperandVector &Operands) const;
1618   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1619   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1620   SMLoc getLitLoc(const OperandVector &Operands) const;
1621   SMLoc getConstLoc(const OperandVector &Operands) const;
1622 
1623   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateSOPLiteral(const MCInst &Inst) const;
1627   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateIntClampSupported(const MCInst &Inst);
1630   bool validateMIMGAtomicDMask(const MCInst &Inst);
1631   bool validateMIMGGatherDMask(const MCInst &Inst);
1632   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1633   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1634   bool validateMIMGAddrSize(const MCInst &Inst);
1635   bool validateMIMGD16(const MCInst &Inst);
1636   bool validateMIMGDim(const MCInst &Inst);
1637   bool validateMIMGMSAA(const MCInst &Inst);
1638   bool validateOpSel(const MCInst &Inst);
1639   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateVccOperand(unsigned Reg) const;
1641   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateAGPRLdSt(const MCInst &Inst) const;
1645   bool validateVGPRAlign(const MCInst &Inst) const;
1646   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1647   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1648   bool validateDivScale(const MCInst &Inst);
1649   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1650                              const SMLoc &IDLoc);
1651   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1652                           const SMLoc &IDLoc);
1653   bool validateExeczVcczOperands(const OperandVector &Operands);
1654   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1655   unsigned getConstantBusLimit(unsigned Opcode) const;
1656   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1657   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1658   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1659 
1660   bool isSupportedMnemo(StringRef Mnemo,
1661                         const FeatureBitset &FBS);
1662   bool isSupportedMnemo(StringRef Mnemo,
1663                         const FeatureBitset &FBS,
1664                         ArrayRef<unsigned> Variants);
1665   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1666 
1667   bool isId(const StringRef Id) const;
1668   bool isId(const AsmToken &Token, const StringRef Id) const;
1669   bool isToken(const AsmToken::TokenKind Kind) const;
1670   bool trySkipId(const StringRef Id);
1671   bool trySkipId(const StringRef Pref, const StringRef Id);
1672   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1673   bool trySkipToken(const AsmToken::TokenKind Kind);
1674   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1675   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1676   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1677 
1678   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1679   AsmToken::TokenKind getTokenKind() const;
1680   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1681   bool parseExpr(OperandVector &Operands);
1682   StringRef getTokenStr() const;
1683   AsmToken peekToken(bool ShouldSkipSpace = true);
1684   AsmToken getToken() const;
1685   SMLoc getLoc() const;
1686   void lex();
1687 
1688 public:
1689   void onBeginOfFile() override;
1690 
1691   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1692   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1693 
1694   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1695   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1696   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1697   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1698   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1699   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1700 
1701   bool parseSwizzleOperand(int64_t &Op,
1702                            const unsigned MinVal,
1703                            const unsigned MaxVal,
1704                            const StringRef ErrMsg,
1705                            SMLoc &Loc);
1706   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1707                             const unsigned MinVal,
1708                             const unsigned MaxVal,
1709                             const StringRef ErrMsg);
1710   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1711   bool parseSwizzleOffset(int64_t &Imm);
1712   bool parseSwizzleMacro(int64_t &Imm);
1713   bool parseSwizzleQuadPerm(int64_t &Imm);
1714   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1715   bool parseSwizzleBroadcast(int64_t &Imm);
1716   bool parseSwizzleSwap(int64_t &Imm);
1717   bool parseSwizzleReverse(int64_t &Imm);
1718 
1719   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1720   int64_t parseGPRIdxMacro();
1721 
1722   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1723   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1724   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1725   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1726 
1727   AMDGPUOperand::Ptr defaultCPol() const;
1728 
1729   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1730   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1731   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1732   AMDGPUOperand::Ptr defaultFlatOffset() const;
1733 
1734   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1735 
1736   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1737                OptionalImmIndexMap &OptionalIdx);
1738   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1739   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1740   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1741   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1742   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1743                     OptionalImmIndexMap &OptionalIdx);
1744   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1745                 OptionalImmIndexMap &OptionalIdx);
1746 
1747   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1748   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1749 
1750   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1751                bool IsAtomic = false);
1752   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1753   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1754 
1755   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1756 
1757   bool parseDimId(unsigned &Encoding);
1758   OperandMatchResultTy parseDim(OperandVector &Operands);
1759   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1760   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1761   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1762   int64_t parseDPPCtrlSel(StringRef Ctrl);
1763   int64_t parseDPPCtrlPerm();
1764   AMDGPUOperand::Ptr defaultRowMask() const;
1765   AMDGPUOperand::Ptr defaultBankMask() const;
1766   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1767   AMDGPUOperand::Ptr defaultFI() const;
1768   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1769   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1770     cvtDPP(Inst, Operands, true);
1771   }
1772   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1773                   bool IsDPP8 = false);
1774   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1775     cvtVOP3DPP(Inst, Operands, true);
1776   }
1777 
1778   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1779                                     AMDGPUOperand::ImmTy Type);
1780   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1781   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1782   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1783   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1784   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1785   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1786   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1787                uint64_t BasicInstType,
1788                bool SkipDstVcc = false,
1789                bool SkipSrcVcc = false);
1790 
1791   AMDGPUOperand::Ptr defaultBLGP() const;
1792   AMDGPUOperand::Ptr defaultCBSZ() const;
1793   AMDGPUOperand::Ptr defaultABID() const;
1794 
1795   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1796   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1797 
1798   AMDGPUOperand::Ptr defaultWaitVDST() const;
1799   AMDGPUOperand::Ptr defaultWaitEXP() const;
1800   OperandMatchResultTy parseVOPD(OperandVector &Operands);
1801 };
1802 
1803 struct OptionalOperand {
1804   const char *Name;
1805   AMDGPUOperand::ImmTy Type;
1806   bool IsBit;
1807   bool (*ConvertResult)(int64_t&);
1808 };
1809 
1810 } // end anonymous namespace
1811 
1812 // May be called with integer type with equivalent bitwidth.
1813 static const fltSemantics *getFltSemantics(unsigned Size) {
1814   switch (Size) {
1815   case 4:
1816     return &APFloat::IEEEsingle();
1817   case 8:
1818     return &APFloat::IEEEdouble();
1819   case 2:
1820     return &APFloat::IEEEhalf();
1821   default:
1822     llvm_unreachable("unsupported fp type");
1823   }
1824 }
1825 
1826 static const fltSemantics *getFltSemantics(MVT VT) {
1827   return getFltSemantics(VT.getSizeInBits() / 8);
1828 }
1829 
1830 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1831   switch (OperandType) {
1832   case AMDGPU::OPERAND_REG_IMM_INT32:
1833   case AMDGPU::OPERAND_REG_IMM_FP32:
1834   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1835   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1836   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1837   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1838   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1839   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1840   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1841   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1842   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1843   case AMDGPU::OPERAND_KIMM32:
1844     return &APFloat::IEEEsingle();
1845   case AMDGPU::OPERAND_REG_IMM_INT64:
1846   case AMDGPU::OPERAND_REG_IMM_FP64:
1847   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1848   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1849   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1850     return &APFloat::IEEEdouble();
1851   case AMDGPU::OPERAND_REG_IMM_INT16:
1852   case AMDGPU::OPERAND_REG_IMM_FP16:
1853   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1854   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1855   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1856   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1857   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1858   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1859   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1860   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1861   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1862   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1863   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1864   case AMDGPU::OPERAND_KIMM16:
1865     return &APFloat::IEEEhalf();
1866   default:
1867     llvm_unreachable("unsupported fp type");
1868   }
1869 }
1870 
1871 //===----------------------------------------------------------------------===//
1872 // Operand
1873 //===----------------------------------------------------------------------===//
1874 
1875 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1876   bool Lost;
1877 
1878   // Convert literal to single precision
1879   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1880                                                APFloat::rmNearestTiesToEven,
1881                                                &Lost);
1882   // We allow precision lost but not overflow or underflow
1883   if (Status != APFloat::opOK &&
1884       Lost &&
1885       ((Status & APFloat::opOverflow)  != 0 ||
1886        (Status & APFloat::opUnderflow) != 0)) {
1887     return false;
1888   }
1889 
1890   return true;
1891 }
1892 
1893 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1894   return isUIntN(Size, Val) || isIntN(Size, Val);
1895 }
1896 
1897 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1898   if (VT.getScalarType() == MVT::i16) {
1899     // FP immediate values are broken.
1900     return isInlinableIntLiteral(Val);
1901   }
1902 
1903   // f16/v2f16 operands work correctly for all values.
1904   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1905 }
1906 
1907 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1908 
1909   // This is a hack to enable named inline values like
1910   // shared_base with both 32-bit and 64-bit operands.
1911   // Note that these values are defined as
1912   // 32-bit operands only.
1913   if (isInlineValue()) {
1914     return true;
1915   }
1916 
1917   if (!isImmTy(ImmTyNone)) {
1918     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1919     return false;
1920   }
1921   // TODO: We should avoid using host float here. It would be better to
1922   // check the float bit values which is what a few other places do.
1923   // We've had bot failures before due to weird NaN support on mips hosts.
1924 
1925   APInt Literal(64, Imm.Val);
1926 
1927   if (Imm.IsFPImm) { // We got fp literal token
1928     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1929       return AMDGPU::isInlinableLiteral64(Imm.Val,
1930                                           AsmParser->hasInv2PiInlineImm());
1931     }
1932 
1933     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1934     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1935       return false;
1936 
1937     if (type.getScalarSizeInBits() == 16) {
1938       return isInlineableLiteralOp16(
1939         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1940         type, AsmParser->hasInv2PiInlineImm());
1941     }
1942 
1943     // Check if single precision literal is inlinable
1944     return AMDGPU::isInlinableLiteral32(
1945       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1946       AsmParser->hasInv2PiInlineImm());
1947   }
1948 
1949   // We got int literal token.
1950   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1951     return AMDGPU::isInlinableLiteral64(Imm.Val,
1952                                         AsmParser->hasInv2PiInlineImm());
1953   }
1954 
1955   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1956     return false;
1957   }
1958 
1959   if (type.getScalarSizeInBits() == 16) {
1960     return isInlineableLiteralOp16(
1961       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1962       type, AsmParser->hasInv2PiInlineImm());
1963   }
1964 
1965   return AMDGPU::isInlinableLiteral32(
1966     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1967     AsmParser->hasInv2PiInlineImm());
1968 }
1969 
1970 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1971   // Check that this immediate can be added as literal
1972   if (!isImmTy(ImmTyNone)) {
1973     return false;
1974   }
1975 
1976   if (!Imm.IsFPImm) {
1977     // We got int literal token.
1978 
1979     if (type == MVT::f64 && hasFPModifiers()) {
1980       // Cannot apply fp modifiers to int literals preserving the same semantics
1981       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1982       // disable these cases.
1983       return false;
1984     }
1985 
1986     unsigned Size = type.getSizeInBits();
1987     if (Size == 64)
1988       Size = 32;
1989 
1990     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1991     // types.
1992     return isSafeTruncation(Imm.Val, Size);
1993   }
1994 
1995   // We got fp literal token
1996   if (type == MVT::f64) { // Expected 64-bit fp operand
1997     // We would set low 64-bits of literal to zeroes but we accept this literals
1998     return true;
1999   }
2000 
2001   if (type == MVT::i64) { // Expected 64-bit int operand
2002     // We don't allow fp literals in 64-bit integer instructions. It is
2003     // unclear how we should encode them.
2004     return false;
2005   }
2006 
2007   // We allow fp literals with f16x2 operands assuming that the specified
2008   // literal goes into the lower half and the upper half is zero. We also
2009   // require that the literal may be losslessly converted to f16.
2010   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2011                      (type == MVT::v2i16)? MVT::i16 :
2012                      (type == MVT::v2f32)? MVT::f32 : type;
2013 
2014   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2015   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2016 }
2017 
2018 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2019   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2020 }
2021 
2022 bool AMDGPUOperand::isVRegWithInputMods() const {
2023   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2024          // GFX90A allows DPP on 64-bit operands.
2025          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2026           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2027 }
2028 
2029 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2030   if (AsmParser->isVI())
2031     return isVReg32();
2032   else if (AsmParser->isGFX9Plus())
2033     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2034   else
2035     return false;
2036 }
2037 
2038 bool AMDGPUOperand::isSDWAFP16Operand() const {
2039   return isSDWAOperand(MVT::f16);
2040 }
2041 
2042 bool AMDGPUOperand::isSDWAFP32Operand() const {
2043   return isSDWAOperand(MVT::f32);
2044 }
2045 
2046 bool AMDGPUOperand::isSDWAInt16Operand() const {
2047   return isSDWAOperand(MVT::i16);
2048 }
2049 
2050 bool AMDGPUOperand::isSDWAInt32Operand() const {
2051   return isSDWAOperand(MVT::i32);
2052 }
2053 
2054 bool AMDGPUOperand::isBoolReg() const {
2055   auto FB = AsmParser->getFeatureBits();
2056   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2057                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2058 }
2059 
2060 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2061 {
2062   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2063   assert(Size == 2 || Size == 4 || Size == 8);
2064 
2065   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2066 
2067   if (Imm.Mods.Abs) {
2068     Val &= ~FpSignMask;
2069   }
2070   if (Imm.Mods.Neg) {
2071     Val ^= FpSignMask;
2072   }
2073 
2074   return Val;
2075 }
2076 
2077 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2078   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2079                              Inst.getNumOperands())) {
2080     addLiteralImmOperand(Inst, Imm.Val,
2081                          ApplyModifiers &
2082                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2083   } else {
2084     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2085     Inst.addOperand(MCOperand::createImm(Imm.Val));
2086     setImmKindNone();
2087   }
2088 }
2089 
2090 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2091   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2092   auto OpNum = Inst.getNumOperands();
2093   // Check that this operand accepts literals
2094   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2095 
2096   if (ApplyModifiers) {
2097     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2098     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2099     Val = applyInputFPModifiers(Val, Size);
2100   }
2101 
2102   APInt Literal(64, Val);
2103   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2104 
2105   if (Imm.IsFPImm) { // We got fp literal token
2106     switch (OpTy) {
2107     case AMDGPU::OPERAND_REG_IMM_INT64:
2108     case AMDGPU::OPERAND_REG_IMM_FP64:
2109     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2110     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2111     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2112       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2113                                        AsmParser->hasInv2PiInlineImm())) {
2114         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2115         setImmKindConst();
2116         return;
2117       }
2118 
2119       // Non-inlineable
2120       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2121         // For fp operands we check if low 32 bits are zeros
2122         if (Literal.getLoBits(32) != 0) {
2123           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2124           "Can't encode literal as exact 64-bit floating-point operand. "
2125           "Low 32-bits will be set to zero");
2126         }
2127 
2128         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2129         setImmKindLiteral();
2130         return;
2131       }
2132 
2133       // We don't allow fp literals in 64-bit integer instructions. It is
2134       // unclear how we should encode them. This case should be checked earlier
2135       // in predicate methods (isLiteralImm())
2136       llvm_unreachable("fp literal in 64-bit integer instruction.");
2137 
2138     case AMDGPU::OPERAND_REG_IMM_INT32:
2139     case AMDGPU::OPERAND_REG_IMM_FP32:
2140     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2141     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2142     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2143     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2144     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2145     case AMDGPU::OPERAND_REG_IMM_INT16:
2146     case AMDGPU::OPERAND_REG_IMM_FP16:
2147     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2148     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2149     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2150     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2151     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2152     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2153     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2154     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2155     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2156     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2157     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2158     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2159     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2160     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2161     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2162     case AMDGPU::OPERAND_KIMM32:
2163     case AMDGPU::OPERAND_KIMM16: {
2164       bool lost;
2165       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2166       // Convert literal to single precision
2167       FPLiteral.convert(*getOpFltSemantics(OpTy),
2168                         APFloat::rmNearestTiesToEven, &lost);
2169       // We allow precision lost but not overflow or underflow. This should be
2170       // checked earlier in isLiteralImm()
2171 
2172       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2173       Inst.addOperand(MCOperand::createImm(ImmVal));
2174       setImmKindLiteral();
2175       return;
2176     }
2177     default:
2178       llvm_unreachable("invalid operand size");
2179     }
2180 
2181     return;
2182   }
2183 
2184   // We got int literal token.
2185   // Only sign extend inline immediates.
2186   switch (OpTy) {
2187   case AMDGPU::OPERAND_REG_IMM_INT32:
2188   case AMDGPU::OPERAND_REG_IMM_FP32:
2189   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2190   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2191   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2192   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2193   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2194   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2195   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2196   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2197   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2198   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2199   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2200     if (isSafeTruncation(Val, 32) &&
2201         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2202                                      AsmParser->hasInv2PiInlineImm())) {
2203       Inst.addOperand(MCOperand::createImm(Val));
2204       setImmKindConst();
2205       return;
2206     }
2207 
2208     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2209     setImmKindLiteral();
2210     return;
2211 
2212   case AMDGPU::OPERAND_REG_IMM_INT64:
2213   case AMDGPU::OPERAND_REG_IMM_FP64:
2214   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2215   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2216   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2217     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2218       Inst.addOperand(MCOperand::createImm(Val));
2219       setImmKindConst();
2220       return;
2221     }
2222 
2223     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2224     setImmKindLiteral();
2225     return;
2226 
2227   case AMDGPU::OPERAND_REG_IMM_INT16:
2228   case AMDGPU::OPERAND_REG_IMM_FP16:
2229   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2230   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2231   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2232   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2233   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2234     if (isSafeTruncation(Val, 16) &&
2235         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2236                                      AsmParser->hasInv2PiInlineImm())) {
2237       Inst.addOperand(MCOperand::createImm(Val));
2238       setImmKindConst();
2239       return;
2240     }
2241 
2242     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2243     setImmKindLiteral();
2244     return;
2245 
2246   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2247   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2248   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2249   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2250     assert(isSafeTruncation(Val, 16));
2251     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2252                                         AsmParser->hasInv2PiInlineImm()));
2253 
2254     Inst.addOperand(MCOperand::createImm(Val));
2255     return;
2256   }
2257   case AMDGPU::OPERAND_KIMM32:
2258     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2259     setImmKindNone();
2260     return;
2261   case AMDGPU::OPERAND_KIMM16:
2262     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2263     setImmKindNone();
2264     return;
2265   default:
2266     llvm_unreachable("invalid operand size");
2267   }
2268 }
2269 
2270 template <unsigned Bitwidth>
2271 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2272   APInt Literal(64, Imm.Val);
2273   setImmKindNone();
2274 
2275   if (!Imm.IsFPImm) {
2276     // We got int literal token.
2277     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2278     return;
2279   }
2280 
2281   bool Lost;
2282   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2283   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2284                     APFloat::rmNearestTiesToEven, &Lost);
2285   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2286 }
2287 
2288 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2289   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2290 }
2291 
2292 static bool isInlineValue(unsigned Reg) {
2293   switch (Reg) {
2294   case AMDGPU::SRC_SHARED_BASE:
2295   case AMDGPU::SRC_SHARED_LIMIT:
2296   case AMDGPU::SRC_PRIVATE_BASE:
2297   case AMDGPU::SRC_PRIVATE_LIMIT:
2298   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2299     return true;
2300   case AMDGPU::SRC_VCCZ:
2301   case AMDGPU::SRC_EXECZ:
2302   case AMDGPU::SRC_SCC:
2303     return true;
2304   case AMDGPU::SGPR_NULL:
2305     return true;
2306   default:
2307     return false;
2308   }
2309 }
2310 
2311 bool AMDGPUOperand::isInlineValue() const {
2312   return isRegKind() && ::isInlineValue(getReg());
2313 }
2314 
2315 //===----------------------------------------------------------------------===//
2316 // AsmParser
2317 //===----------------------------------------------------------------------===//
2318 
2319 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2320   if (Is == IS_VGPR) {
2321     switch (RegWidth) {
2322       default: return -1;
2323       case 32:
2324         return AMDGPU::VGPR_32RegClassID;
2325       case 64:
2326         return AMDGPU::VReg_64RegClassID;
2327       case 96:
2328         return AMDGPU::VReg_96RegClassID;
2329       case 128:
2330         return AMDGPU::VReg_128RegClassID;
2331       case 160:
2332         return AMDGPU::VReg_160RegClassID;
2333       case 192:
2334         return AMDGPU::VReg_192RegClassID;
2335       case 224:
2336         return AMDGPU::VReg_224RegClassID;
2337       case 256:
2338         return AMDGPU::VReg_256RegClassID;
2339       case 512:
2340         return AMDGPU::VReg_512RegClassID;
2341       case 1024:
2342         return AMDGPU::VReg_1024RegClassID;
2343     }
2344   } else if (Is == IS_TTMP) {
2345     switch (RegWidth) {
2346       default: return -1;
2347       case 32:
2348         return AMDGPU::TTMP_32RegClassID;
2349       case 64:
2350         return AMDGPU::TTMP_64RegClassID;
2351       case 128:
2352         return AMDGPU::TTMP_128RegClassID;
2353       case 256:
2354         return AMDGPU::TTMP_256RegClassID;
2355       case 512:
2356         return AMDGPU::TTMP_512RegClassID;
2357     }
2358   } else if (Is == IS_SGPR) {
2359     switch (RegWidth) {
2360       default: return -1;
2361       case 32:
2362         return AMDGPU::SGPR_32RegClassID;
2363       case 64:
2364         return AMDGPU::SGPR_64RegClassID;
2365       case 96:
2366         return AMDGPU::SGPR_96RegClassID;
2367       case 128:
2368         return AMDGPU::SGPR_128RegClassID;
2369       case 160:
2370         return AMDGPU::SGPR_160RegClassID;
2371       case 192:
2372         return AMDGPU::SGPR_192RegClassID;
2373       case 224:
2374         return AMDGPU::SGPR_224RegClassID;
2375       case 256:
2376         return AMDGPU::SGPR_256RegClassID;
2377       case 512:
2378         return AMDGPU::SGPR_512RegClassID;
2379     }
2380   } else if (Is == IS_AGPR) {
2381     switch (RegWidth) {
2382       default: return -1;
2383       case 32:
2384         return AMDGPU::AGPR_32RegClassID;
2385       case 64:
2386         return AMDGPU::AReg_64RegClassID;
2387       case 96:
2388         return AMDGPU::AReg_96RegClassID;
2389       case 128:
2390         return AMDGPU::AReg_128RegClassID;
2391       case 160:
2392         return AMDGPU::AReg_160RegClassID;
2393       case 192:
2394         return AMDGPU::AReg_192RegClassID;
2395       case 224:
2396         return AMDGPU::AReg_224RegClassID;
2397       case 256:
2398         return AMDGPU::AReg_256RegClassID;
2399       case 512:
2400         return AMDGPU::AReg_512RegClassID;
2401       case 1024:
2402         return AMDGPU::AReg_1024RegClassID;
2403     }
2404   }
2405   return -1;
2406 }
2407 
2408 static unsigned getSpecialRegForName(StringRef RegName) {
2409   return StringSwitch<unsigned>(RegName)
2410     .Case("exec", AMDGPU::EXEC)
2411     .Case("vcc", AMDGPU::VCC)
2412     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2413     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2414     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2415     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2416     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2417     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2418     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2419     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2420     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2421     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2422     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2423     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2424     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2425     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2426     .Case("m0", AMDGPU::M0)
2427     .Case("vccz", AMDGPU::SRC_VCCZ)
2428     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2429     .Case("execz", AMDGPU::SRC_EXECZ)
2430     .Case("src_execz", AMDGPU::SRC_EXECZ)
2431     .Case("scc", AMDGPU::SRC_SCC)
2432     .Case("src_scc", AMDGPU::SRC_SCC)
2433     .Case("tba", AMDGPU::TBA)
2434     .Case("tma", AMDGPU::TMA)
2435     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2436     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2437     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2438     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2439     .Case("vcc_lo", AMDGPU::VCC_LO)
2440     .Case("vcc_hi", AMDGPU::VCC_HI)
2441     .Case("exec_lo", AMDGPU::EXEC_LO)
2442     .Case("exec_hi", AMDGPU::EXEC_HI)
2443     .Case("tma_lo", AMDGPU::TMA_LO)
2444     .Case("tma_hi", AMDGPU::TMA_HI)
2445     .Case("tba_lo", AMDGPU::TBA_LO)
2446     .Case("tba_hi", AMDGPU::TBA_HI)
2447     .Case("pc", AMDGPU::PC_REG)
2448     .Case("null", AMDGPU::SGPR_NULL)
2449     .Default(AMDGPU::NoRegister);
2450 }
2451 
2452 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2453                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2454   auto R = parseRegister();
2455   if (!R) return true;
2456   assert(R->isReg());
2457   RegNo = R->getReg();
2458   StartLoc = R->getStartLoc();
2459   EndLoc = R->getEndLoc();
2460   return false;
2461 }
2462 
2463 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2464                                     SMLoc &EndLoc) {
2465   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2466 }
2467 
2468 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2469                                                        SMLoc &StartLoc,
2470                                                        SMLoc &EndLoc) {
2471   bool Result =
2472       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2473   bool PendingErrors = getParser().hasPendingError();
2474   getParser().clearPendingErrors();
2475   if (PendingErrors)
2476     return MatchOperand_ParseFail;
2477   if (Result)
2478     return MatchOperand_NoMatch;
2479   return MatchOperand_Success;
2480 }
2481 
2482 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2483                                             RegisterKind RegKind, unsigned Reg1,
2484                                             SMLoc Loc) {
2485   switch (RegKind) {
2486   case IS_SPECIAL:
2487     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2488       Reg = AMDGPU::EXEC;
2489       RegWidth = 64;
2490       return true;
2491     }
2492     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2493       Reg = AMDGPU::FLAT_SCR;
2494       RegWidth = 64;
2495       return true;
2496     }
2497     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2498       Reg = AMDGPU::XNACK_MASK;
2499       RegWidth = 64;
2500       return true;
2501     }
2502     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2503       Reg = AMDGPU::VCC;
2504       RegWidth = 64;
2505       return true;
2506     }
2507     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2508       Reg = AMDGPU::TBA;
2509       RegWidth = 64;
2510       return true;
2511     }
2512     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2513       Reg = AMDGPU::TMA;
2514       RegWidth = 64;
2515       return true;
2516     }
2517     Error(Loc, "register does not fit in the list");
2518     return false;
2519   case IS_VGPR:
2520   case IS_SGPR:
2521   case IS_AGPR:
2522   case IS_TTMP:
2523     if (Reg1 != Reg + RegWidth / 32) {
2524       Error(Loc, "registers in a list must have consecutive indices");
2525       return false;
2526     }
2527     RegWidth += 32;
2528     return true;
2529   default:
2530     llvm_unreachable("unexpected register kind");
2531   }
2532 }
2533 
2534 struct RegInfo {
2535   StringLiteral Name;
2536   RegisterKind Kind;
2537 };
2538 
2539 static constexpr RegInfo RegularRegisters[] = {
2540   {{"v"},    IS_VGPR},
2541   {{"s"},    IS_SGPR},
2542   {{"ttmp"}, IS_TTMP},
2543   {{"acc"},  IS_AGPR},
2544   {{"a"},    IS_AGPR},
2545 };
2546 
2547 static bool isRegularReg(RegisterKind Kind) {
2548   return Kind == IS_VGPR ||
2549          Kind == IS_SGPR ||
2550          Kind == IS_TTMP ||
2551          Kind == IS_AGPR;
2552 }
2553 
2554 static const RegInfo* getRegularRegInfo(StringRef Str) {
2555   for (const RegInfo &Reg : RegularRegisters)
2556     if (Str.startswith(Reg.Name))
2557       return &Reg;
2558   return nullptr;
2559 }
2560 
2561 static bool getRegNum(StringRef Str, unsigned& Num) {
2562   return !Str.getAsInteger(10, Num);
2563 }
2564 
2565 bool
2566 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2567                             const AsmToken &NextToken) const {
2568 
2569   // A list of consecutive registers: [s0,s1,s2,s3]
2570   if (Token.is(AsmToken::LBrac))
2571     return true;
2572 
2573   if (!Token.is(AsmToken::Identifier))
2574     return false;
2575 
2576   // A single register like s0 or a range of registers like s[0:1]
2577 
2578   StringRef Str = Token.getString();
2579   const RegInfo *Reg = getRegularRegInfo(Str);
2580   if (Reg) {
2581     StringRef RegName = Reg->Name;
2582     StringRef RegSuffix = Str.substr(RegName.size());
2583     if (!RegSuffix.empty()) {
2584       unsigned Num;
2585       // A single register with an index: rXX
2586       if (getRegNum(RegSuffix, Num))
2587         return true;
2588     } else {
2589       // A range of registers: r[XX:YY].
2590       if (NextToken.is(AsmToken::LBrac))
2591         return true;
2592     }
2593   }
2594 
2595   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2596 }
2597 
2598 bool
2599 AMDGPUAsmParser::isRegister()
2600 {
2601   return isRegister(getToken(), peekToken());
2602 }
2603 
2604 unsigned
2605 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2606                                unsigned RegNum,
2607                                unsigned RegWidth,
2608                                SMLoc Loc) {
2609 
2610   assert(isRegularReg(RegKind));
2611 
2612   unsigned AlignSize = 1;
2613   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2614     // SGPR and TTMP registers must be aligned.
2615     // Max required alignment is 4 dwords.
2616     AlignSize = std::min(RegWidth / 32, 4u);
2617   }
2618 
2619   if (RegNum % AlignSize != 0) {
2620     Error(Loc, "invalid register alignment");
2621     return AMDGPU::NoRegister;
2622   }
2623 
2624   unsigned RegIdx = RegNum / AlignSize;
2625   int RCID = getRegClass(RegKind, RegWidth);
2626   if (RCID == -1) {
2627     Error(Loc, "invalid or unsupported register size");
2628     return AMDGPU::NoRegister;
2629   }
2630 
2631   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2632   const MCRegisterClass RC = TRI->getRegClass(RCID);
2633   if (RegIdx >= RC.getNumRegs()) {
2634     Error(Loc, "register index is out of range");
2635     return AMDGPU::NoRegister;
2636   }
2637 
2638   return RC.getRegister(RegIdx);
2639 }
2640 
2641 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2642   int64_t RegLo, RegHi;
2643   if (!skipToken(AsmToken::LBrac, "missing register index"))
2644     return false;
2645 
2646   SMLoc FirstIdxLoc = getLoc();
2647   SMLoc SecondIdxLoc;
2648 
2649   if (!parseExpr(RegLo))
2650     return false;
2651 
2652   if (trySkipToken(AsmToken::Colon)) {
2653     SecondIdxLoc = getLoc();
2654     if (!parseExpr(RegHi))
2655       return false;
2656   } else {
2657     RegHi = RegLo;
2658   }
2659 
2660   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2661     return false;
2662 
2663   if (!isUInt<32>(RegLo)) {
2664     Error(FirstIdxLoc, "invalid register index");
2665     return false;
2666   }
2667 
2668   if (!isUInt<32>(RegHi)) {
2669     Error(SecondIdxLoc, "invalid register index");
2670     return false;
2671   }
2672 
2673   if (RegLo > RegHi) {
2674     Error(FirstIdxLoc, "first register index should not exceed second index");
2675     return false;
2676   }
2677 
2678   Num = static_cast<unsigned>(RegLo);
2679   RegWidth = 32 * ((RegHi - RegLo) + 1);
2680   return true;
2681 }
2682 
2683 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2684                                           unsigned &RegNum, unsigned &RegWidth,
2685                                           SmallVectorImpl<AsmToken> &Tokens) {
2686   assert(isToken(AsmToken::Identifier));
2687   unsigned Reg = getSpecialRegForName(getTokenStr());
2688   if (Reg) {
2689     RegNum = 0;
2690     RegWidth = 32;
2691     RegKind = IS_SPECIAL;
2692     Tokens.push_back(getToken());
2693     lex(); // skip register name
2694   }
2695   return Reg;
2696 }
2697 
2698 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2699                                           unsigned &RegNum, unsigned &RegWidth,
2700                                           SmallVectorImpl<AsmToken> &Tokens) {
2701   assert(isToken(AsmToken::Identifier));
2702   StringRef RegName = getTokenStr();
2703   auto Loc = getLoc();
2704 
2705   const RegInfo *RI = getRegularRegInfo(RegName);
2706   if (!RI) {
2707     Error(Loc, "invalid register name");
2708     return AMDGPU::NoRegister;
2709   }
2710 
2711   Tokens.push_back(getToken());
2712   lex(); // skip register name
2713 
2714   RegKind = RI->Kind;
2715   StringRef RegSuffix = RegName.substr(RI->Name.size());
2716   if (!RegSuffix.empty()) {
2717     // Single 32-bit register: vXX.
2718     if (!getRegNum(RegSuffix, RegNum)) {
2719       Error(Loc, "invalid register index");
2720       return AMDGPU::NoRegister;
2721     }
2722     RegWidth = 32;
2723   } else {
2724     // Range of registers: v[XX:YY]. ":YY" is optional.
2725     if (!ParseRegRange(RegNum, RegWidth))
2726       return AMDGPU::NoRegister;
2727   }
2728 
2729   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2730 }
2731 
2732 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2733                                        unsigned &RegWidth,
2734                                        SmallVectorImpl<AsmToken> &Tokens) {
2735   unsigned Reg = AMDGPU::NoRegister;
2736   auto ListLoc = getLoc();
2737 
2738   if (!skipToken(AsmToken::LBrac,
2739                  "expected a register or a list of registers")) {
2740     return AMDGPU::NoRegister;
2741   }
2742 
2743   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2744 
2745   auto Loc = getLoc();
2746   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2747     return AMDGPU::NoRegister;
2748   if (RegWidth != 32) {
2749     Error(Loc, "expected a single 32-bit register");
2750     return AMDGPU::NoRegister;
2751   }
2752 
2753   for (; trySkipToken(AsmToken::Comma); ) {
2754     RegisterKind NextRegKind;
2755     unsigned NextReg, NextRegNum, NextRegWidth;
2756     Loc = getLoc();
2757 
2758     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2759                              NextRegNum, NextRegWidth,
2760                              Tokens)) {
2761       return AMDGPU::NoRegister;
2762     }
2763     if (NextRegWidth != 32) {
2764       Error(Loc, "expected a single 32-bit register");
2765       return AMDGPU::NoRegister;
2766     }
2767     if (NextRegKind != RegKind) {
2768       Error(Loc, "registers in a list must be of the same kind");
2769       return AMDGPU::NoRegister;
2770     }
2771     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2772       return AMDGPU::NoRegister;
2773   }
2774 
2775   if (!skipToken(AsmToken::RBrac,
2776                  "expected a comma or a closing square bracket")) {
2777     return AMDGPU::NoRegister;
2778   }
2779 
2780   if (isRegularReg(RegKind))
2781     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2782 
2783   return Reg;
2784 }
2785 
2786 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2787                                           unsigned &RegNum, unsigned &RegWidth,
2788                                           SmallVectorImpl<AsmToken> &Tokens) {
2789   auto Loc = getLoc();
2790   Reg = AMDGPU::NoRegister;
2791 
2792   if (isToken(AsmToken::Identifier)) {
2793     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2794     if (Reg == AMDGPU::NoRegister)
2795       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2796   } else {
2797     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2798   }
2799 
2800   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2801   if (Reg == AMDGPU::NoRegister) {
2802     assert(Parser.hasPendingError());
2803     return false;
2804   }
2805 
2806   if (!subtargetHasRegister(*TRI, Reg)) {
2807     if (Reg == AMDGPU::SGPR_NULL) {
2808       Error(Loc, "'null' operand is not supported on this GPU");
2809     } else {
2810       Error(Loc, "register not available on this GPU");
2811     }
2812     return false;
2813   }
2814 
2815   return true;
2816 }
2817 
2818 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2819                                           unsigned &RegNum, unsigned &RegWidth,
2820                                           bool RestoreOnFailure /*=false*/) {
2821   Reg = AMDGPU::NoRegister;
2822 
2823   SmallVector<AsmToken, 1> Tokens;
2824   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2825     if (RestoreOnFailure) {
2826       while (!Tokens.empty()) {
2827         getLexer().UnLex(Tokens.pop_back_val());
2828       }
2829     }
2830     return true;
2831   }
2832   return false;
2833 }
2834 
2835 Optional<StringRef>
2836 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2837   switch (RegKind) {
2838   case IS_VGPR:
2839     return StringRef(".amdgcn.next_free_vgpr");
2840   case IS_SGPR:
2841     return StringRef(".amdgcn.next_free_sgpr");
2842   default:
2843     return None;
2844   }
2845 }
2846 
2847 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2848   auto SymbolName = getGprCountSymbolName(RegKind);
2849   assert(SymbolName && "initializing invalid register kind");
2850   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2851   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2852 }
2853 
2854 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2855                                             unsigned DwordRegIndex,
2856                                             unsigned RegWidth) {
2857   // Symbols are only defined for GCN targets
2858   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2859     return true;
2860 
2861   auto SymbolName = getGprCountSymbolName(RegKind);
2862   if (!SymbolName)
2863     return true;
2864   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2865 
2866   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2867   int64_t OldCount;
2868 
2869   if (!Sym->isVariable())
2870     return !Error(getLoc(),
2871                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2872   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2873     return !Error(
2874         getLoc(),
2875         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2876 
2877   if (OldCount <= NewMax)
2878     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2879 
2880   return true;
2881 }
2882 
2883 std::unique_ptr<AMDGPUOperand>
2884 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2885   const auto &Tok = getToken();
2886   SMLoc StartLoc = Tok.getLoc();
2887   SMLoc EndLoc = Tok.getEndLoc();
2888   RegisterKind RegKind;
2889   unsigned Reg, RegNum, RegWidth;
2890 
2891   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2892     return nullptr;
2893   }
2894   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2895     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2896       return nullptr;
2897   } else
2898     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2899   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2900 }
2901 
2902 OperandMatchResultTy
2903 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2904   // TODO: add syntactic sugar for 1/(2*PI)
2905 
2906   if (isRegister())
2907     return MatchOperand_NoMatch;
2908   assert(!isModifier());
2909 
2910   const auto& Tok = getToken();
2911   const auto& NextTok = peekToken();
2912   bool IsReal = Tok.is(AsmToken::Real);
2913   SMLoc S = getLoc();
2914   bool Negate = false;
2915 
2916   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2917     lex();
2918     IsReal = true;
2919     Negate = true;
2920   }
2921 
2922   if (IsReal) {
2923     // Floating-point expressions are not supported.
2924     // Can only allow floating-point literals with an
2925     // optional sign.
2926 
2927     StringRef Num = getTokenStr();
2928     lex();
2929 
2930     APFloat RealVal(APFloat::IEEEdouble());
2931     auto roundMode = APFloat::rmNearestTiesToEven;
2932     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2933       return MatchOperand_ParseFail;
2934     }
2935     if (Negate)
2936       RealVal.changeSign();
2937 
2938     Operands.push_back(
2939       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2940                                AMDGPUOperand::ImmTyNone, true));
2941 
2942     return MatchOperand_Success;
2943 
2944   } else {
2945     int64_t IntVal;
2946     const MCExpr *Expr;
2947     SMLoc S = getLoc();
2948 
2949     if (HasSP3AbsModifier) {
2950       // This is a workaround for handling expressions
2951       // as arguments of SP3 'abs' modifier, for example:
2952       //     |1.0|
2953       //     |-1|
2954       //     |1+x|
2955       // This syntax is not compatible with syntax of standard
2956       // MC expressions (due to the trailing '|').
2957       SMLoc EndLoc;
2958       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2959         return MatchOperand_ParseFail;
2960     } else {
2961       if (Parser.parseExpression(Expr))
2962         return MatchOperand_ParseFail;
2963     }
2964 
2965     if (Expr->evaluateAsAbsolute(IntVal)) {
2966       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2967     } else {
2968       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2969     }
2970 
2971     return MatchOperand_Success;
2972   }
2973 
2974   return MatchOperand_NoMatch;
2975 }
2976 
2977 OperandMatchResultTy
2978 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2979   if (!isRegister())
2980     return MatchOperand_NoMatch;
2981 
2982   if (auto R = parseRegister()) {
2983     assert(R->isReg());
2984     Operands.push_back(std::move(R));
2985     return MatchOperand_Success;
2986   }
2987   return MatchOperand_ParseFail;
2988 }
2989 
2990 OperandMatchResultTy
2991 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2992   auto res = parseReg(Operands);
2993   if (res != MatchOperand_NoMatch) {
2994     return res;
2995   } else if (isModifier()) {
2996     return MatchOperand_NoMatch;
2997   } else {
2998     return parseImm(Operands, HasSP3AbsMod);
2999   }
3000 }
3001 
3002 bool
3003 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3004   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3005     const auto &str = Token.getString();
3006     return str == "abs" || str == "neg" || str == "sext";
3007   }
3008   return false;
3009 }
3010 
3011 bool
3012 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3013   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3014 }
3015 
3016 bool
3017 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3018   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3019 }
3020 
3021 bool
3022 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3023   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3024 }
3025 
3026 // Check if this is an operand modifier or an opcode modifier
3027 // which may look like an expression but it is not. We should
3028 // avoid parsing these modifiers as expressions. Currently
3029 // recognized sequences are:
3030 //   |...|
3031 //   abs(...)
3032 //   neg(...)
3033 //   sext(...)
3034 //   -reg
3035 //   -|...|
3036 //   -abs(...)
3037 //   name:...
3038 // Note that simple opcode modifiers like 'gds' may be parsed as
3039 // expressions; this is a special case. See getExpressionAsToken.
3040 //
3041 bool
3042 AMDGPUAsmParser::isModifier() {
3043 
3044   AsmToken Tok = getToken();
3045   AsmToken NextToken[2];
3046   peekTokens(NextToken);
3047 
3048   return isOperandModifier(Tok, NextToken[0]) ||
3049          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3050          isOpcodeModifierWithVal(Tok, NextToken[0]);
3051 }
3052 
3053 // Check if the current token is an SP3 'neg' modifier.
3054 // Currently this modifier is allowed in the following context:
3055 //
3056 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3057 // 2. Before an 'abs' modifier: -abs(...)
3058 // 3. Before an SP3 'abs' modifier: -|...|
3059 //
3060 // In all other cases "-" is handled as a part
3061 // of an expression that follows the sign.
3062 //
3063 // Note: When "-" is followed by an integer literal,
3064 // this is interpreted as integer negation rather
3065 // than a floating-point NEG modifier applied to N.
3066 // Beside being contr-intuitive, such use of floating-point
3067 // NEG modifier would have resulted in different meaning
3068 // of integer literals used with VOP1/2/C and VOP3,
3069 // for example:
3070 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3071 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3072 // Negative fp literals with preceding "-" are
3073 // handled likewise for uniformity
3074 //
3075 bool
3076 AMDGPUAsmParser::parseSP3NegModifier() {
3077 
3078   AsmToken NextToken[2];
3079   peekTokens(NextToken);
3080 
3081   if (isToken(AsmToken::Minus) &&
3082       (isRegister(NextToken[0], NextToken[1]) ||
3083        NextToken[0].is(AsmToken::Pipe) ||
3084        isId(NextToken[0], "abs"))) {
3085     lex();
3086     return true;
3087   }
3088 
3089   return false;
3090 }
3091 
3092 OperandMatchResultTy
3093 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3094                                               bool AllowImm) {
3095   bool Neg, SP3Neg;
3096   bool Abs, SP3Abs;
3097   SMLoc Loc;
3098 
3099   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3100   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3101     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3102     return MatchOperand_ParseFail;
3103   }
3104 
3105   SP3Neg = parseSP3NegModifier();
3106 
3107   Loc = getLoc();
3108   Neg = trySkipId("neg");
3109   if (Neg && SP3Neg) {
3110     Error(Loc, "expected register or immediate");
3111     return MatchOperand_ParseFail;
3112   }
3113   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3114     return MatchOperand_ParseFail;
3115 
3116   Abs = trySkipId("abs");
3117   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3118     return MatchOperand_ParseFail;
3119 
3120   Loc = getLoc();
3121   SP3Abs = trySkipToken(AsmToken::Pipe);
3122   if (Abs && SP3Abs) {
3123     Error(Loc, "expected register or immediate");
3124     return MatchOperand_ParseFail;
3125   }
3126 
3127   OperandMatchResultTy Res;
3128   if (AllowImm) {
3129     Res = parseRegOrImm(Operands, SP3Abs);
3130   } else {
3131     Res = parseReg(Operands);
3132   }
3133   if (Res != MatchOperand_Success) {
3134     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3135   }
3136 
3137   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3138     return MatchOperand_ParseFail;
3139   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3140     return MatchOperand_ParseFail;
3141   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3142     return MatchOperand_ParseFail;
3143 
3144   AMDGPUOperand::Modifiers Mods;
3145   Mods.Abs = Abs || SP3Abs;
3146   Mods.Neg = Neg || SP3Neg;
3147 
3148   if (Mods.hasFPModifiers()) {
3149     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3150     if (Op.isExpr()) {
3151       Error(Op.getStartLoc(), "expected an absolute expression");
3152       return MatchOperand_ParseFail;
3153     }
3154     Op.setModifiers(Mods);
3155   }
3156   return MatchOperand_Success;
3157 }
3158 
3159 OperandMatchResultTy
3160 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3161                                                bool AllowImm) {
3162   bool Sext = trySkipId("sext");
3163   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3164     return MatchOperand_ParseFail;
3165 
3166   OperandMatchResultTy Res;
3167   if (AllowImm) {
3168     Res = parseRegOrImm(Operands);
3169   } else {
3170     Res = parseReg(Operands);
3171   }
3172   if (Res != MatchOperand_Success) {
3173     return Sext? MatchOperand_ParseFail : Res;
3174   }
3175 
3176   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3177     return MatchOperand_ParseFail;
3178 
3179   AMDGPUOperand::Modifiers Mods;
3180   Mods.Sext = Sext;
3181 
3182   if (Mods.hasIntModifiers()) {
3183     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3184     if (Op.isExpr()) {
3185       Error(Op.getStartLoc(), "expected an absolute expression");
3186       return MatchOperand_ParseFail;
3187     }
3188     Op.setModifiers(Mods);
3189   }
3190 
3191   return MatchOperand_Success;
3192 }
3193 
3194 OperandMatchResultTy
3195 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3196   return parseRegOrImmWithFPInputMods(Operands, false);
3197 }
3198 
3199 OperandMatchResultTy
3200 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3201   return parseRegOrImmWithIntInputMods(Operands, false);
3202 }
3203 
3204 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3205   auto Loc = getLoc();
3206   if (trySkipId("off")) {
3207     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3208                                                 AMDGPUOperand::ImmTyOff, false));
3209     return MatchOperand_Success;
3210   }
3211 
3212   if (!isRegister())
3213     return MatchOperand_NoMatch;
3214 
3215   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3216   if (Reg) {
3217     Operands.push_back(std::move(Reg));
3218     return MatchOperand_Success;
3219   }
3220 
3221   return MatchOperand_ParseFail;
3222 
3223 }
3224 
3225 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3226   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3227 
3228   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3229       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3230       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3231       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3232     return Match_InvalidOperand;
3233 
3234   if ((TSFlags & SIInstrFlags::VOP3) &&
3235       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3236       getForcedEncodingSize() != 64)
3237     return Match_PreferE32;
3238 
3239   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3240       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3241     // v_mac_f32/16 allow only dst_sel == DWORD;
3242     auto OpNum =
3243         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3244     const auto &Op = Inst.getOperand(OpNum);
3245     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3246       return Match_InvalidOperand;
3247     }
3248   }
3249 
3250   return Match_Success;
3251 }
3252 
3253 static ArrayRef<unsigned> getAllVariants() {
3254   static const unsigned Variants[] = {
3255     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3256     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3257     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3258   };
3259 
3260   return makeArrayRef(Variants);
3261 }
3262 
3263 // What asm variants we should check
3264 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3265   if (isForcedDPP() && isForcedVOP3()) {
3266     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3267     return makeArrayRef(Variants);
3268   }
3269   if (getForcedEncodingSize() == 32) {
3270     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3271     return makeArrayRef(Variants);
3272   }
3273 
3274   if (isForcedVOP3()) {
3275     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3276     return makeArrayRef(Variants);
3277   }
3278 
3279   if (isForcedSDWA()) {
3280     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3281                                         AMDGPUAsmVariants::SDWA9};
3282     return makeArrayRef(Variants);
3283   }
3284 
3285   if (isForcedDPP()) {
3286     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3287     return makeArrayRef(Variants);
3288   }
3289 
3290   return getAllVariants();
3291 }
3292 
3293 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3294   if (isForcedDPP() && isForcedVOP3())
3295     return "e64_dpp";
3296 
3297   if (getForcedEncodingSize() == 32)
3298     return "e32";
3299 
3300   if (isForcedVOP3())
3301     return "e64";
3302 
3303   if (isForcedSDWA())
3304     return "sdwa";
3305 
3306   if (isForcedDPP())
3307     return "dpp";
3308 
3309   return "";
3310 }
3311 
3312 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3313   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3314   const unsigned Num = Desc.getNumImplicitUses();
3315   for (unsigned i = 0; i < Num; ++i) {
3316     unsigned Reg = Desc.ImplicitUses[i];
3317     switch (Reg) {
3318     case AMDGPU::FLAT_SCR:
3319     case AMDGPU::VCC:
3320     case AMDGPU::VCC_LO:
3321     case AMDGPU::VCC_HI:
3322     case AMDGPU::M0:
3323       return Reg;
3324     default:
3325       break;
3326     }
3327   }
3328   return AMDGPU::NoRegister;
3329 }
3330 
3331 // NB: This code is correct only when used to check constant
3332 // bus limitations because GFX7 support no f16 inline constants.
3333 // Note that there are no cases when a GFX7 opcode violates
3334 // constant bus limitations due to the use of an f16 constant.
3335 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3336                                        unsigned OpIdx) const {
3337   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3338 
3339   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3340     return false;
3341   }
3342 
3343   const MCOperand &MO = Inst.getOperand(OpIdx);
3344 
3345   int64_t Val = MO.getImm();
3346   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3347 
3348   switch (OpSize) { // expected operand size
3349   case 8:
3350     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3351   case 4:
3352     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3353   case 2: {
3354     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3355     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3356         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3357         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3358       return AMDGPU::isInlinableIntLiteral(Val);
3359 
3360     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3361         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3362         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3363       return AMDGPU::isInlinableIntLiteralV216(Val);
3364 
3365     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3366         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3367         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3368       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3369 
3370     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3371   }
3372   default:
3373     llvm_unreachable("invalid operand size");
3374   }
3375 }
3376 
3377 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3378   if (!isGFX10Plus())
3379     return 1;
3380 
3381   switch (Opcode) {
3382   // 64-bit shift instructions can use only one scalar value input
3383   case AMDGPU::V_LSHLREV_B64_e64:
3384   case AMDGPU::V_LSHLREV_B64_gfx10:
3385   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3386   case AMDGPU::V_LSHRREV_B64_e64:
3387   case AMDGPU::V_LSHRREV_B64_gfx10:
3388   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3389   case AMDGPU::V_ASHRREV_I64_e64:
3390   case AMDGPU::V_ASHRREV_I64_gfx10:
3391   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3392   case AMDGPU::V_LSHL_B64_e64:
3393   case AMDGPU::V_LSHR_B64_e64:
3394   case AMDGPU::V_ASHR_I64_e64:
3395     return 1;
3396   default:
3397     return 2;
3398   }
3399 }
3400 
3401 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3402   const MCOperand &MO = Inst.getOperand(OpIdx);
3403   if (MO.isImm()) {
3404     return !isInlineConstant(Inst, OpIdx);
3405   } else if (MO.isReg()) {
3406     auto Reg = MO.getReg();
3407     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3408     auto PReg = mc2PseudoReg(Reg);
3409     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3410   } else {
3411     return true;
3412   }
3413 }
3414 
3415 bool
3416 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3417                                                 const OperandVector &Operands) {
3418   const unsigned Opcode = Inst.getOpcode();
3419   const MCInstrDesc &Desc = MII.get(Opcode);
3420   unsigned LastSGPR = AMDGPU::NoRegister;
3421   unsigned ConstantBusUseCount = 0;
3422   unsigned NumLiterals = 0;
3423   unsigned LiteralSize;
3424 
3425   if (Desc.TSFlags &
3426       (SIInstrFlags::VOPC |
3427        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3428        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3429        SIInstrFlags::SDWA)) {
3430     // Check special imm operands (used by madmk, etc)
3431     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3432       ++NumLiterals;
3433       LiteralSize = 4;
3434     }
3435 
3436     SmallDenseSet<unsigned> SGPRsUsed;
3437     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3438     if (SGPRUsed != AMDGPU::NoRegister) {
3439       SGPRsUsed.insert(SGPRUsed);
3440       ++ConstantBusUseCount;
3441     }
3442 
3443     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3444     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3445     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3446 
3447     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3448 
3449     for (int OpIdx : OpIndices) {
3450       if (OpIdx == -1) break;
3451 
3452       const MCOperand &MO = Inst.getOperand(OpIdx);
3453       if (usesConstantBus(Inst, OpIdx)) {
3454         if (MO.isReg()) {
3455           LastSGPR = mc2PseudoReg(MO.getReg());
3456           // Pairs of registers with a partial intersections like these
3457           //   s0, s[0:1]
3458           //   flat_scratch_lo, flat_scratch
3459           //   flat_scratch_lo, flat_scratch_hi
3460           // are theoretically valid but they are disabled anyway.
3461           // Note that this code mimics SIInstrInfo::verifyInstruction
3462           if (SGPRsUsed.insert(LastSGPR).second) {
3463             ++ConstantBusUseCount;
3464           }
3465         } else { // Expression or a literal
3466 
3467           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3468             continue; // special operand like VINTERP attr_chan
3469 
3470           // An instruction may use only one literal.
3471           // This has been validated on the previous step.
3472           // See validateVOPLiteral.
3473           // This literal may be used as more than one operand.
3474           // If all these operands are of the same size,
3475           // this literal counts as one scalar value.
3476           // Otherwise it counts as 2 scalar values.
3477           // See "GFX10 Shader Programming", section 3.6.2.3.
3478 
3479           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3480           if (Size < 4) Size = 4;
3481 
3482           if (NumLiterals == 0) {
3483             NumLiterals = 1;
3484             LiteralSize = Size;
3485           } else if (LiteralSize != Size) {
3486             NumLiterals = 2;
3487           }
3488         }
3489       }
3490     }
3491   }
3492   ConstantBusUseCount += NumLiterals;
3493 
3494   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3495     return true;
3496 
3497   SMLoc LitLoc = getLitLoc(Operands);
3498   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3499   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3500   Error(Loc, "invalid operand (violates constant bus restrictions)");
3501   return false;
3502 }
3503 
3504 bool
3505 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3506                                                  const OperandVector &Operands) {
3507   const unsigned Opcode = Inst.getOpcode();
3508   const MCInstrDesc &Desc = MII.get(Opcode);
3509 
3510   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3511   if (DstIdx == -1 ||
3512       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3513     return true;
3514   }
3515 
3516   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3517 
3518   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3519   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3520   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3521 
3522   assert(DstIdx != -1);
3523   const MCOperand &Dst = Inst.getOperand(DstIdx);
3524   assert(Dst.isReg());
3525 
3526   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3527 
3528   for (int SrcIdx : SrcIndices) {
3529     if (SrcIdx == -1) break;
3530     const MCOperand &Src = Inst.getOperand(SrcIdx);
3531     if (Src.isReg()) {
3532       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3533         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3534         Error(getRegLoc(SrcReg, Operands),
3535           "destination must be different than all sources");
3536         return false;
3537       }
3538     }
3539   }
3540 
3541   return true;
3542 }
3543 
3544 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3545 
3546   const unsigned Opc = Inst.getOpcode();
3547   const MCInstrDesc &Desc = MII.get(Opc);
3548 
3549   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3550     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3551     assert(ClampIdx != -1);
3552     return Inst.getOperand(ClampIdx).getImm() == 0;
3553   }
3554 
3555   return true;
3556 }
3557 
3558 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3559 
3560   const unsigned Opc = Inst.getOpcode();
3561   const MCInstrDesc &Desc = MII.get(Opc);
3562 
3563   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3564     return None;
3565 
3566   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3567   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3568   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3569 
3570   assert(VDataIdx != -1);
3571 
3572   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3573     return None;
3574 
3575   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3576   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3577   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3578   if (DMask == 0)
3579     DMask = 1;
3580 
3581   bool isPackedD16 = false;
3582   unsigned DataSize =
3583     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3584   if (hasPackedD16()) {
3585     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3586     isPackedD16 = D16Idx >= 0;
3587     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3588       DataSize = (DataSize + 1) / 2;
3589   }
3590 
3591   if ((VDataSize / 4) == DataSize + TFESize)
3592     return None;
3593 
3594   return StringRef(isPackedD16
3595                        ? "image data size does not match dmask, d16 and tfe"
3596                        : "image data size does not match dmask and tfe");
3597 }
3598 
3599 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3600   const unsigned Opc = Inst.getOpcode();
3601   const MCInstrDesc &Desc = MII.get(Opc);
3602 
3603   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3604     return true;
3605 
3606   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3607 
3608   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3609       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3610   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3611   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3612   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3613   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3614 
3615   assert(VAddr0Idx != -1);
3616   assert(SrsrcIdx != -1);
3617   assert(SrsrcIdx > VAddr0Idx);
3618 
3619   if (DimIdx == -1)
3620     return true; // intersect_ray
3621 
3622   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3623   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3624   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3625   unsigned ActualAddrSize =
3626       IsNSA ? SrsrcIdx - VAddr0Idx
3627             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3628   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3629 
3630   unsigned ExpectedAddrSize =
3631       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3632 
3633   if (!IsNSA) {
3634     if (ExpectedAddrSize > 8)
3635       ExpectedAddrSize = 16;
3636 
3637     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3638     // This provides backward compatibility for assembly created
3639     // before 160b/192b/224b types were directly supported.
3640     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3641       return true;
3642   }
3643 
3644   return ActualAddrSize == ExpectedAddrSize;
3645 }
3646 
3647 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3648 
3649   const unsigned Opc = Inst.getOpcode();
3650   const MCInstrDesc &Desc = MII.get(Opc);
3651 
3652   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3653     return true;
3654   if (!Desc.mayLoad() || !Desc.mayStore())
3655     return true; // Not atomic
3656 
3657   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3658   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3659 
3660   // This is an incomplete check because image_atomic_cmpswap
3661   // may only use 0x3 and 0xf while other atomic operations
3662   // may use 0x1 and 0x3. However these limitations are
3663   // verified when we check that dmask matches dst size.
3664   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3665 }
3666 
3667 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3668 
3669   const unsigned Opc = Inst.getOpcode();
3670   const MCInstrDesc &Desc = MII.get(Opc);
3671 
3672   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3673     return true;
3674 
3675   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3676   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3677 
3678   // GATHER4 instructions use dmask in a different fashion compared to
3679   // other MIMG instructions. The only useful DMASK values are
3680   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3681   // (red,red,red,red) etc.) The ISA document doesn't mention
3682   // this.
3683   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3684 }
3685 
3686 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3687   const unsigned Opc = Inst.getOpcode();
3688   const MCInstrDesc &Desc = MII.get(Opc);
3689 
3690   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3691     return true;
3692 
3693   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3694   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3695       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3696 
3697   if (!BaseOpcode->MSAA)
3698     return true;
3699 
3700   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3701   assert(DimIdx != -1);
3702 
3703   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3704   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3705 
3706   return DimInfo->MSAA;
3707 }
3708 
3709 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3710 {
3711   switch (Opcode) {
3712   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3713   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3714   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3715     return true;
3716   default:
3717     return false;
3718   }
3719 }
3720 
3721 // movrels* opcodes should only allow VGPRS as src0.
3722 // This is specified in .td description for vop1/vop3,
3723 // but sdwa is handled differently. See isSDWAOperand.
3724 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3725                                       const OperandVector &Operands) {
3726 
3727   const unsigned Opc = Inst.getOpcode();
3728   const MCInstrDesc &Desc = MII.get(Opc);
3729 
3730   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3731     return true;
3732 
3733   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3734   assert(Src0Idx != -1);
3735 
3736   SMLoc ErrLoc;
3737   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3738   if (Src0.isReg()) {
3739     auto Reg = mc2PseudoReg(Src0.getReg());
3740     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3741     if (!isSGPR(Reg, TRI))
3742       return true;
3743     ErrLoc = getRegLoc(Reg, Operands);
3744   } else {
3745     ErrLoc = getConstLoc(Operands);
3746   }
3747 
3748   Error(ErrLoc, "source operand must be a VGPR");
3749   return false;
3750 }
3751 
3752 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3753                                           const OperandVector &Operands) {
3754 
3755   const unsigned Opc = Inst.getOpcode();
3756 
3757   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3758     return true;
3759 
3760   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3761   assert(Src0Idx != -1);
3762 
3763   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3764   if (!Src0.isReg())
3765     return true;
3766 
3767   auto Reg = mc2PseudoReg(Src0.getReg());
3768   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3769   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3770     Error(getRegLoc(Reg, Operands),
3771           "source operand must be either a VGPR or an inline constant");
3772     return false;
3773   }
3774 
3775   return true;
3776 }
3777 
3778 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3779                                    const OperandVector &Operands) {
3780   const unsigned Opc = Inst.getOpcode();
3781   const MCInstrDesc &Desc = MII.get(Opc);
3782 
3783   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3784     return true;
3785 
3786   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3787   if (Src2Idx == -1)
3788     return true;
3789 
3790   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3791   if (!Src2.isReg())
3792     return true;
3793 
3794   MCRegister Src2Reg = Src2.getReg();
3795   MCRegister DstReg = Inst.getOperand(0).getReg();
3796   if (Src2Reg == DstReg)
3797     return true;
3798 
3799   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3800   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3801     return true;
3802 
3803   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3804     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3805           "source 2 operand must not partially overlap with dst");
3806     return false;
3807   }
3808 
3809   return true;
3810 }
3811 
3812 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3813   switch (Inst.getOpcode()) {
3814   default:
3815     return true;
3816   case V_DIV_SCALE_F32_gfx6_gfx7:
3817   case V_DIV_SCALE_F32_vi:
3818   case V_DIV_SCALE_F32_gfx10:
3819   case V_DIV_SCALE_F64_gfx6_gfx7:
3820   case V_DIV_SCALE_F64_vi:
3821   case V_DIV_SCALE_F64_gfx10:
3822     break;
3823   }
3824 
3825   // TODO: Check that src0 = src1 or src2.
3826 
3827   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3828                     AMDGPU::OpName::src2_modifiers,
3829                     AMDGPU::OpName::src2_modifiers}) {
3830     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3831             .getImm() &
3832         SISrcMods::ABS) {
3833       return false;
3834     }
3835   }
3836 
3837   return true;
3838 }
3839 
3840 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3841 
3842   const unsigned Opc = Inst.getOpcode();
3843   const MCInstrDesc &Desc = MII.get(Opc);
3844 
3845   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3846     return true;
3847 
3848   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3849   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3850     if (isCI() || isSI())
3851       return false;
3852   }
3853 
3854   return true;
3855 }
3856 
3857 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3858   const unsigned Opc = Inst.getOpcode();
3859   const MCInstrDesc &Desc = MII.get(Opc);
3860 
3861   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3862     return true;
3863 
3864   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3865   if (DimIdx < 0)
3866     return true;
3867 
3868   long Imm = Inst.getOperand(DimIdx).getImm();
3869   if (Imm < 0 || Imm >= 8)
3870     return false;
3871 
3872   return true;
3873 }
3874 
3875 static bool IsRevOpcode(const unsigned Opcode)
3876 {
3877   switch (Opcode) {
3878   case AMDGPU::V_SUBREV_F32_e32:
3879   case AMDGPU::V_SUBREV_F32_e64:
3880   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3881   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3882   case AMDGPU::V_SUBREV_F32_e32_vi:
3883   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3884   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3885   case AMDGPU::V_SUBREV_F32_e64_vi:
3886 
3887   case AMDGPU::V_SUBREV_CO_U32_e32:
3888   case AMDGPU::V_SUBREV_CO_U32_e64:
3889   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3890   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3891 
3892   case AMDGPU::V_SUBBREV_U32_e32:
3893   case AMDGPU::V_SUBBREV_U32_e64:
3894   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3895   case AMDGPU::V_SUBBREV_U32_e32_vi:
3896   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3897   case AMDGPU::V_SUBBREV_U32_e64_vi:
3898 
3899   case AMDGPU::V_SUBREV_U32_e32:
3900   case AMDGPU::V_SUBREV_U32_e64:
3901   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3902   case AMDGPU::V_SUBREV_U32_e32_vi:
3903   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3904   case AMDGPU::V_SUBREV_U32_e64_vi:
3905 
3906   case AMDGPU::V_SUBREV_F16_e32:
3907   case AMDGPU::V_SUBREV_F16_e64:
3908   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3909   case AMDGPU::V_SUBREV_F16_e32_vi:
3910   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3911   case AMDGPU::V_SUBREV_F16_e64_vi:
3912 
3913   case AMDGPU::V_SUBREV_U16_e32:
3914   case AMDGPU::V_SUBREV_U16_e64:
3915   case AMDGPU::V_SUBREV_U16_e32_vi:
3916   case AMDGPU::V_SUBREV_U16_e64_vi:
3917 
3918   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3919   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3920   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3921 
3922   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3923   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3924 
3925   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3926   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3927 
3928   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3929   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3930 
3931   case AMDGPU::V_LSHRREV_B32_e32:
3932   case AMDGPU::V_LSHRREV_B32_e64:
3933   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3934   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3935   case AMDGPU::V_LSHRREV_B32_e32_vi:
3936   case AMDGPU::V_LSHRREV_B32_e64_vi:
3937   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3938   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3939 
3940   case AMDGPU::V_ASHRREV_I32_e32:
3941   case AMDGPU::V_ASHRREV_I32_e64:
3942   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3943   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3944   case AMDGPU::V_ASHRREV_I32_e32_vi:
3945   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3946   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3947   case AMDGPU::V_ASHRREV_I32_e64_vi:
3948 
3949   case AMDGPU::V_LSHLREV_B32_e32:
3950   case AMDGPU::V_LSHLREV_B32_e64:
3951   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3952   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3953   case AMDGPU::V_LSHLREV_B32_e32_vi:
3954   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3955   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3956   case AMDGPU::V_LSHLREV_B32_e64_vi:
3957 
3958   case AMDGPU::V_LSHLREV_B16_e32:
3959   case AMDGPU::V_LSHLREV_B16_e64:
3960   case AMDGPU::V_LSHLREV_B16_e32_vi:
3961   case AMDGPU::V_LSHLREV_B16_e64_vi:
3962   case AMDGPU::V_LSHLREV_B16_gfx10:
3963 
3964   case AMDGPU::V_LSHRREV_B16_e32:
3965   case AMDGPU::V_LSHRREV_B16_e64:
3966   case AMDGPU::V_LSHRREV_B16_e32_vi:
3967   case AMDGPU::V_LSHRREV_B16_e64_vi:
3968   case AMDGPU::V_LSHRREV_B16_gfx10:
3969 
3970   case AMDGPU::V_ASHRREV_I16_e32:
3971   case AMDGPU::V_ASHRREV_I16_e64:
3972   case AMDGPU::V_ASHRREV_I16_e32_vi:
3973   case AMDGPU::V_ASHRREV_I16_e64_vi:
3974   case AMDGPU::V_ASHRREV_I16_gfx10:
3975 
3976   case AMDGPU::V_LSHLREV_B64_e64:
3977   case AMDGPU::V_LSHLREV_B64_gfx10:
3978   case AMDGPU::V_LSHLREV_B64_vi:
3979 
3980   case AMDGPU::V_LSHRREV_B64_e64:
3981   case AMDGPU::V_LSHRREV_B64_gfx10:
3982   case AMDGPU::V_LSHRREV_B64_vi:
3983 
3984   case AMDGPU::V_ASHRREV_I64_e64:
3985   case AMDGPU::V_ASHRREV_I64_gfx10:
3986   case AMDGPU::V_ASHRREV_I64_vi:
3987 
3988   case AMDGPU::V_PK_LSHLREV_B16:
3989   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3990   case AMDGPU::V_PK_LSHLREV_B16_vi:
3991 
3992   case AMDGPU::V_PK_LSHRREV_B16:
3993   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3994   case AMDGPU::V_PK_LSHRREV_B16_vi:
3995   case AMDGPU::V_PK_ASHRREV_I16:
3996   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3997   case AMDGPU::V_PK_ASHRREV_I16_vi:
3998     return true;
3999   default:
4000     return false;
4001   }
4002 }
4003 
4004 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4005 
4006   using namespace SIInstrFlags;
4007   const unsigned Opcode = Inst.getOpcode();
4008   const MCInstrDesc &Desc = MII.get(Opcode);
4009 
4010   // lds_direct register is defined so that it can be used
4011   // with 9-bit operands only. Ignore encodings which do not accept these.
4012   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4013   if ((Desc.TSFlags & Enc) == 0)
4014     return None;
4015 
4016   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4017     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4018     if (SrcIdx == -1)
4019       break;
4020     const auto &Src = Inst.getOperand(SrcIdx);
4021     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4022 
4023       if (isGFX90A() || isGFX11Plus())
4024         return StringRef("lds_direct is not supported on this GPU");
4025 
4026       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4027         return StringRef("lds_direct cannot be used with this instruction");
4028 
4029       if (SrcName != OpName::src0)
4030         return StringRef("lds_direct may be used as src0 only");
4031     }
4032   }
4033 
4034   return None;
4035 }
4036 
4037 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4038   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4039     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4040     if (Op.isFlatOffset())
4041       return Op.getStartLoc();
4042   }
4043   return getLoc();
4044 }
4045 
4046 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4047                                          const OperandVector &Operands) {
4048   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4049   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4050     return true;
4051 
4052   auto Opcode = Inst.getOpcode();
4053   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4054   assert(OpNum != -1);
4055 
4056   const auto &Op = Inst.getOperand(OpNum);
4057   if (!hasFlatOffsets() && Op.getImm() != 0) {
4058     Error(getFlatOffsetLoc(Operands),
4059           "flat offset modifier is not supported on this GPU");
4060     return false;
4061   }
4062 
4063   // For FLAT segment the offset must be positive;
4064   // MSB is ignored and forced to zero.
4065   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4066     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4067     if (!isIntN(OffsetSize, Op.getImm())) {
4068       Error(getFlatOffsetLoc(Operands),
4069             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4070       return false;
4071     }
4072   } else {
4073     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4074     if (!isUIntN(OffsetSize, Op.getImm())) {
4075       Error(getFlatOffsetLoc(Operands),
4076             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4077       return false;
4078     }
4079   }
4080 
4081   return true;
4082 }
4083 
4084 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4085   // Start with second operand because SMEM Offset cannot be dst or src0.
4086   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4087     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4088     if (Op.isSMEMOffset())
4089       return Op.getStartLoc();
4090   }
4091   return getLoc();
4092 }
4093 
4094 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4095                                          const OperandVector &Operands) {
4096   if (isCI() || isSI())
4097     return true;
4098 
4099   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4100   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4101     return true;
4102 
4103   auto Opcode = Inst.getOpcode();
4104   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4105   if (OpNum == -1)
4106     return true;
4107 
4108   const auto &Op = Inst.getOperand(OpNum);
4109   if (!Op.isImm())
4110     return true;
4111 
4112   uint64_t Offset = Op.getImm();
4113   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4114   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4115       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4116     return true;
4117 
4118   Error(getSMEMOffsetLoc(Operands),
4119         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4120                                "expected a 21-bit signed offset");
4121 
4122   return false;
4123 }
4124 
4125 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4126   unsigned Opcode = Inst.getOpcode();
4127   const MCInstrDesc &Desc = MII.get(Opcode);
4128   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4129     return true;
4130 
4131   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4132   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4133 
4134   const int OpIndices[] = { Src0Idx, Src1Idx };
4135 
4136   unsigned NumExprs = 0;
4137   unsigned NumLiterals = 0;
4138   uint32_t LiteralValue;
4139 
4140   for (int OpIdx : OpIndices) {
4141     if (OpIdx == -1) break;
4142 
4143     const MCOperand &MO = Inst.getOperand(OpIdx);
4144     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4145     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4146       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4147         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4148         if (NumLiterals == 0 || LiteralValue != Value) {
4149           LiteralValue = Value;
4150           ++NumLiterals;
4151         }
4152       } else if (MO.isExpr()) {
4153         ++NumExprs;
4154       }
4155     }
4156   }
4157 
4158   return NumLiterals + NumExprs <= 1;
4159 }
4160 
4161 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4162   const unsigned Opc = Inst.getOpcode();
4163   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4164       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4165     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4166     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4167 
4168     if (OpSel & ~3)
4169       return false;
4170   }
4171 
4172   uint64_t TSFlags = MII.get(Opc).TSFlags;
4173 
4174   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4175     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4176     if (OpSelIdx != -1) {
4177       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4178         return false;
4179     }
4180     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4181     if (OpSelHiIdx != -1) {
4182       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4183         return false;
4184     }
4185   }
4186 
4187   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4188   if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) &&
4189       !(TSFlags & SIInstrFlags::VOP3P)) {
4190     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4191     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4192     if (OpSel & 3)
4193       return false;
4194   }
4195 
4196   return true;
4197 }
4198 
4199 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4200                                   const OperandVector &Operands) {
4201   const unsigned Opc = Inst.getOpcode();
4202   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4203   if (DppCtrlIdx < 0)
4204     return true;
4205   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4206 
4207   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4208     // DPP64 is supported for row_newbcast only.
4209     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4210     if (Src0Idx >= 0 &&
4211         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4212       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4213       Error(S, "64 bit dpp only supports row_newbcast");
4214       return false;
4215     }
4216   }
4217 
4218   return true;
4219 }
4220 
4221 // Check if VCC register matches wavefront size
4222 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4223   auto FB = getFeatureBits();
4224   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4225     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4226 }
4227 
4228 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4229 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4230                                          const OperandVector &Operands) {
4231   unsigned Opcode = Inst.getOpcode();
4232   const MCInstrDesc &Desc = MII.get(Opcode);
4233   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4234   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4235       ImmIdx == -1)
4236     return true;
4237 
4238   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4239   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4240   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4241 
4242   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4243 
4244   unsigned NumExprs = 0;
4245   unsigned NumLiterals = 0;
4246   uint32_t LiteralValue;
4247 
4248   for (int OpIdx : OpIndices) {
4249     if (OpIdx == -1)
4250       continue;
4251 
4252     const MCOperand &MO = Inst.getOperand(OpIdx);
4253     if (!MO.isImm() && !MO.isExpr())
4254       continue;
4255     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4256       continue;
4257 
4258     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4259         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4260       Error(getConstLoc(Operands),
4261             "inline constants are not allowed for this operand");
4262       return false;
4263     }
4264 
4265     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4266       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4267       if (NumLiterals == 0 || LiteralValue != Value) {
4268         LiteralValue = Value;
4269         ++NumLiterals;
4270       }
4271     } else if (MO.isExpr()) {
4272       ++NumExprs;
4273     }
4274   }
4275   NumLiterals += NumExprs;
4276 
4277   if (!NumLiterals)
4278     return true;
4279 
4280   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4281     Error(getLitLoc(Operands), "literal operands are not supported");
4282     return false;
4283   }
4284 
4285   if (NumLiterals > 1) {
4286     Error(getLitLoc(Operands), "only one literal operand is allowed");
4287     return false;
4288   }
4289 
4290   return true;
4291 }
4292 
4293 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4294 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4295                          const MCRegisterInfo *MRI) {
4296   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4297   if (OpIdx < 0)
4298     return -1;
4299 
4300   const MCOperand &Op = Inst.getOperand(OpIdx);
4301   if (!Op.isReg())
4302     return -1;
4303 
4304   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4305   auto Reg = Sub ? Sub : Op.getReg();
4306   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4307   return AGPR32.contains(Reg) ? 1 : 0;
4308 }
4309 
4310 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4311   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4312   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4313                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4314                   SIInstrFlags::DS)) == 0)
4315     return true;
4316 
4317   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4318                                                       : AMDGPU::OpName::vdata;
4319 
4320   const MCRegisterInfo *MRI = getMRI();
4321   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4322   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4323 
4324   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4325     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4326     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4327       return false;
4328   }
4329 
4330   auto FB = getFeatureBits();
4331   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4332     if (DataAreg < 0 || DstAreg < 0)
4333       return true;
4334     return DstAreg == DataAreg;
4335   }
4336 
4337   return DstAreg < 1 && DataAreg < 1;
4338 }
4339 
4340 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4341   auto FB = getFeatureBits();
4342   if (!FB[AMDGPU::FeatureGFX90AInsts])
4343     return true;
4344 
4345   const MCRegisterInfo *MRI = getMRI();
4346   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4347   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4348   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4349     const MCOperand &Op = Inst.getOperand(I);
4350     if (!Op.isReg())
4351       continue;
4352 
4353     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4354     if (!Sub)
4355       continue;
4356 
4357     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4358       return false;
4359     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4360       return false;
4361   }
4362 
4363   return true;
4364 }
4365 
4366 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4367   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4368     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4369     if (Op.isBLGP())
4370       return Op.getStartLoc();
4371   }
4372   return SMLoc();
4373 }
4374 
4375 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4376                                    const OperandVector &Operands) {
4377   unsigned Opc = Inst.getOpcode();
4378   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4379   if (BlgpIdx == -1)
4380     return true;
4381   SMLoc BLGPLoc = getBLGPLoc(Operands);
4382   if (!BLGPLoc.isValid())
4383     return true;
4384   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4385   auto FB = getFeatureBits();
4386   bool UsesNeg = false;
4387   if (FB[AMDGPU::FeatureGFX940Insts]) {
4388     switch (Opc) {
4389     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4390     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4391     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4392     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4393       UsesNeg = true;
4394     }
4395   }
4396 
4397   if (IsNeg == UsesNeg)
4398     return true;
4399 
4400   Error(BLGPLoc,
4401         UsesNeg ? "invalid modifier: blgp is not supported"
4402                 : "invalid modifier: neg is not supported");
4403 
4404   return false;
4405 }
4406 
4407 // gfx90a has an undocumented limitation:
4408 // DS_GWS opcodes must use even aligned registers.
4409 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4410                                   const OperandVector &Operands) {
4411   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4412     return true;
4413 
4414   int Opc = Inst.getOpcode();
4415   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4416       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4417     return true;
4418 
4419   const MCRegisterInfo *MRI = getMRI();
4420   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4421   int Data0Pos =
4422       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4423   assert(Data0Pos != -1);
4424   auto Reg = Inst.getOperand(Data0Pos).getReg();
4425   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4426   if (RegIdx & 1) {
4427     SMLoc RegLoc = getRegLoc(Reg, Operands);
4428     Error(RegLoc, "vgpr must be even aligned");
4429     return false;
4430   }
4431 
4432   return true;
4433 }
4434 
4435 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4436                                             const OperandVector &Operands,
4437                                             const SMLoc &IDLoc) {
4438   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4439                                            AMDGPU::OpName::cpol);
4440   if (CPolPos == -1)
4441     return true;
4442 
4443   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4444 
4445   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4446   if (TSFlags & SIInstrFlags::SMRD) {
4447     if (CPol && (isSI() || isCI())) {
4448       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4449       Error(S, "cache policy is not supported for SMRD instructions");
4450       return false;
4451     }
4452     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4453       Error(IDLoc, "invalid cache policy for SMEM instruction");
4454       return false;
4455     }
4456   }
4457 
4458   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4459     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4460     StringRef CStr(S.getPointer());
4461     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4462     Error(S, "scc is not supported on this GPU");
4463     return false;
4464   }
4465 
4466   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4467     return true;
4468 
4469   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4470     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4471       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4472                               : "instruction must use glc");
4473       return false;
4474     }
4475   } else {
4476     if (CPol & CPol::GLC) {
4477       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4478       StringRef CStr(S.getPointer());
4479       S = SMLoc::getFromPointer(
4480           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4481       Error(S, isGFX940() ? "instruction must not use sc0"
4482                           : "instruction must not use glc");
4483       return false;
4484     }
4485   }
4486 
4487   return true;
4488 }
4489 
4490 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4491                                          const OperandVector &Operands,
4492                                          const SMLoc &IDLoc) {
4493   if (isGFX940())
4494     return true;
4495 
4496   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4497   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4498       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4499     return true;
4500   // This is FLAT LDS DMA.
4501 
4502   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4503   StringRef CStr(S.getPointer());
4504   if (!CStr.startswith("lds")) {
4505     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4506     // And LDS version should have 'lds' modifier, but it follows optional
4507     // operands so its absense is ignored by the matcher.
4508     Error(IDLoc, "invalid operands for instruction");
4509     return false;
4510   }
4511 
4512   return true;
4513 }
4514 
4515 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4516   if (!isGFX11Plus())
4517     return true;
4518   for (auto &Operand : Operands) {
4519     if (!Operand->isReg())
4520       continue;
4521     unsigned Reg = Operand->getReg();
4522     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4523       Error(getRegLoc(Reg, Operands),
4524             "execz and vccz are not supported on this GPU");
4525       return false;
4526     }
4527   }
4528   return true;
4529 }
4530 
4531 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4532                                           const SMLoc &IDLoc,
4533                                           const OperandVector &Operands) {
4534   if (auto ErrMsg = validateLdsDirect(Inst)) {
4535     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4536     return false;
4537   }
4538   if (!validateSOPLiteral(Inst)) {
4539     Error(getLitLoc(Operands),
4540       "only one literal operand is allowed");
4541     return false;
4542   }
4543   if (!validateVOPLiteral(Inst, Operands)) {
4544     return false;
4545   }
4546   if (!validateConstantBusLimitations(Inst, Operands)) {
4547     return false;
4548   }
4549   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4550     return false;
4551   }
4552   if (!validateIntClampSupported(Inst)) {
4553     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4554       "integer clamping is not supported on this GPU");
4555     return false;
4556   }
4557   if (!validateOpSel(Inst)) {
4558     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4559       "invalid op_sel operand");
4560     return false;
4561   }
4562   if (!validateDPP(Inst, Operands)) {
4563     return false;
4564   }
4565   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4566   if (!validateMIMGD16(Inst)) {
4567     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4568       "d16 modifier is not supported on this GPU");
4569     return false;
4570   }
4571   if (!validateMIMGDim(Inst)) {
4572     Error(IDLoc, "dim modifier is required on this GPU");
4573     return false;
4574   }
4575   if (!validateMIMGMSAA(Inst)) {
4576     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4577           "invalid dim; must be MSAA type");
4578     return false;
4579   }
4580   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4581     Error(IDLoc, *ErrMsg);
4582     return false;
4583   }
4584   if (!validateMIMGAddrSize(Inst)) {
4585     Error(IDLoc,
4586       "image address size does not match dim and a16");
4587     return false;
4588   }
4589   if (!validateMIMGAtomicDMask(Inst)) {
4590     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4591       "invalid atomic image dmask");
4592     return false;
4593   }
4594   if (!validateMIMGGatherDMask(Inst)) {
4595     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4596       "invalid image_gather dmask: only one bit must be set");
4597     return false;
4598   }
4599   if (!validateMovrels(Inst, Operands)) {
4600     return false;
4601   }
4602   if (!validateFlatOffset(Inst, Operands)) {
4603     return false;
4604   }
4605   if (!validateSMEMOffset(Inst, Operands)) {
4606     return false;
4607   }
4608   if (!validateMAIAccWrite(Inst, Operands)) {
4609     return false;
4610   }
4611   if (!validateMFMA(Inst, Operands)) {
4612     return false;
4613   }
4614   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4615     return false;
4616   }
4617 
4618   if (!validateAGPRLdSt(Inst)) {
4619     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4620     ? "invalid register class: data and dst should be all VGPR or AGPR"
4621     : "invalid register class: agpr loads and stores not supported on this GPU"
4622     );
4623     return false;
4624   }
4625   if (!validateVGPRAlign(Inst)) {
4626     Error(IDLoc,
4627       "invalid register class: vgpr tuples must be 64 bit aligned");
4628     return false;
4629   }
4630   if (!validateGWS(Inst, Operands)) {
4631     return false;
4632   }
4633 
4634   if (!validateBLGP(Inst, Operands)) {
4635     return false;
4636   }
4637 
4638   if (!validateDivScale(Inst)) {
4639     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4640     return false;
4641   }
4642   if (!validateExeczVcczOperands(Operands)) {
4643     return false;
4644   }
4645 
4646   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4647     return false;
4648   }
4649 
4650   return true;
4651 }
4652 
4653 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4654                                             const FeatureBitset &FBS,
4655                                             unsigned VariantID = 0);
4656 
4657 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4658                                 const FeatureBitset &AvailableFeatures,
4659                                 unsigned VariantID);
4660 
4661 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4662                                        const FeatureBitset &FBS) {
4663   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4664 }
4665 
4666 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4667                                        const FeatureBitset &FBS,
4668                                        ArrayRef<unsigned> Variants) {
4669   for (auto Variant : Variants) {
4670     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4671       return true;
4672   }
4673 
4674   return false;
4675 }
4676 
4677 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4678                                                   const SMLoc &IDLoc) {
4679   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4680 
4681   // Check if requested instruction variant is supported.
4682   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4683     return false;
4684 
4685   // This instruction is not supported.
4686   // Clear any other pending errors because they are no longer relevant.
4687   getParser().clearPendingErrors();
4688 
4689   // Requested instruction variant is not supported.
4690   // Check if any other variants are supported.
4691   StringRef VariantName = getMatchedVariantName();
4692   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4693     return Error(IDLoc,
4694                  Twine(VariantName,
4695                        " variant of this instruction is not supported"));
4696   }
4697 
4698   // Finally check if this instruction is supported on any other GPU.
4699   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4700     return Error(IDLoc, "instruction not supported on this GPU");
4701   }
4702 
4703   // Instruction not supported on any GPU. Probably a typo.
4704   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4705   return Error(IDLoc, "invalid instruction" + Suggestion);
4706 }
4707 
4708 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4709                                               OperandVector &Operands,
4710                                               MCStreamer &Out,
4711                                               uint64_t &ErrorInfo,
4712                                               bool MatchingInlineAsm) {
4713   MCInst Inst;
4714   unsigned Result = Match_Success;
4715   for (auto Variant : getMatchedVariants()) {
4716     uint64_t EI;
4717     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4718                                   Variant);
4719     // We order match statuses from least to most specific. We use most specific
4720     // status as resulting
4721     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4722     if ((R == Match_Success) ||
4723         (R == Match_PreferE32) ||
4724         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4725         (R == Match_InvalidOperand && Result != Match_MissingFeature
4726                                    && Result != Match_PreferE32) ||
4727         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4728                                    && Result != Match_MissingFeature
4729                                    && Result != Match_PreferE32)) {
4730       Result = R;
4731       ErrorInfo = EI;
4732     }
4733     if (R == Match_Success)
4734       break;
4735   }
4736 
4737   if (Result == Match_Success) {
4738     if (!validateInstruction(Inst, IDLoc, Operands)) {
4739       return true;
4740     }
4741     Inst.setLoc(IDLoc);
4742     Out.emitInstruction(Inst, getSTI());
4743     return false;
4744   }
4745 
4746   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4747   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4748     return true;
4749   }
4750 
4751   switch (Result) {
4752   default: break;
4753   case Match_MissingFeature:
4754     // It has been verified that the specified instruction
4755     // mnemonic is valid. A match was found but it requires
4756     // features which are not supported on this GPU.
4757     return Error(IDLoc, "operands are not valid for this GPU or mode");
4758 
4759   case Match_InvalidOperand: {
4760     SMLoc ErrorLoc = IDLoc;
4761     if (ErrorInfo != ~0ULL) {
4762       if (ErrorInfo >= Operands.size()) {
4763         return Error(IDLoc, "too few operands for instruction");
4764       }
4765       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4766       if (ErrorLoc == SMLoc())
4767         ErrorLoc = IDLoc;
4768     }
4769     return Error(ErrorLoc, "invalid operand for instruction");
4770   }
4771 
4772   case Match_PreferE32:
4773     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4774                         "should be encoded as e32");
4775   case Match_MnemonicFail:
4776     llvm_unreachable("Invalid instructions should have been handled already");
4777   }
4778   llvm_unreachable("Implement any new match types added!");
4779 }
4780 
4781 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4782   int64_t Tmp = -1;
4783   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4784     return true;
4785   }
4786   if (getParser().parseAbsoluteExpression(Tmp)) {
4787     return true;
4788   }
4789   Ret = static_cast<uint32_t>(Tmp);
4790   return false;
4791 }
4792 
4793 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4794                                                uint32_t &Minor) {
4795   if (ParseAsAbsoluteExpression(Major))
4796     return TokError("invalid major version");
4797 
4798   if (!trySkipToken(AsmToken::Comma))
4799     return TokError("minor version number required, comma expected");
4800 
4801   if (ParseAsAbsoluteExpression(Minor))
4802     return TokError("invalid minor version");
4803 
4804   return false;
4805 }
4806 
4807 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4808   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4809     return TokError("directive only supported for amdgcn architecture");
4810 
4811   std::string TargetIDDirective;
4812   SMLoc TargetStart = getTok().getLoc();
4813   if (getParser().parseEscapedString(TargetIDDirective))
4814     return true;
4815 
4816   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4817   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4818     return getParser().Error(TargetRange.Start,
4819         (Twine(".amdgcn_target directive's target id ") +
4820          Twine(TargetIDDirective) +
4821          Twine(" does not match the specified target id ") +
4822          Twine(getTargetStreamer().getTargetID()->toString())).str());
4823 
4824   return false;
4825 }
4826 
4827 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4828   return Error(Range.Start, "value out of range", Range);
4829 }
4830 
4831 bool AMDGPUAsmParser::calculateGPRBlocks(
4832     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4833     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4834     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4835     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4836   // TODO(scott.linder): These calculations are duplicated from
4837   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4838   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4839 
4840   unsigned NumVGPRs = NextFreeVGPR;
4841   unsigned NumSGPRs = NextFreeSGPR;
4842 
4843   if (Version.Major >= 10)
4844     NumSGPRs = 0;
4845   else {
4846     unsigned MaxAddressableNumSGPRs =
4847         IsaInfo::getAddressableNumSGPRs(&getSTI());
4848 
4849     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4850         NumSGPRs > MaxAddressableNumSGPRs)
4851       return OutOfRangeError(SGPRRange);
4852 
4853     NumSGPRs +=
4854         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4855 
4856     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4857         NumSGPRs > MaxAddressableNumSGPRs)
4858       return OutOfRangeError(SGPRRange);
4859 
4860     if (Features.test(FeatureSGPRInitBug))
4861       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4862   }
4863 
4864   VGPRBlocks =
4865       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4866   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4867 
4868   return false;
4869 }
4870 
4871 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4872   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4873     return TokError("directive only supported for amdgcn architecture");
4874 
4875   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4876     return TokError("directive only supported for amdhsa OS");
4877 
4878   StringRef KernelName;
4879   if (getParser().parseIdentifier(KernelName))
4880     return true;
4881 
4882   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4883 
4884   StringSet<> Seen;
4885 
4886   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4887 
4888   SMRange VGPRRange;
4889   uint64_t NextFreeVGPR = 0;
4890   uint64_t AccumOffset = 0;
4891   uint64_t SharedVGPRCount = 0;
4892   SMRange SGPRRange;
4893   uint64_t NextFreeSGPR = 0;
4894 
4895   // Count the number of user SGPRs implied from the enabled feature bits.
4896   unsigned ImpliedUserSGPRCount = 0;
4897 
4898   // Track if the asm explicitly contains the directive for the user SGPR
4899   // count.
4900   Optional<unsigned> ExplicitUserSGPRCount;
4901   bool ReserveVCC = true;
4902   bool ReserveFlatScr = true;
4903   Optional<bool> EnableWavefrontSize32;
4904 
4905   while (true) {
4906     while (trySkipToken(AsmToken::EndOfStatement));
4907 
4908     StringRef ID;
4909     SMRange IDRange = getTok().getLocRange();
4910     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4911       return true;
4912 
4913     if (ID == ".end_amdhsa_kernel")
4914       break;
4915 
4916     if (!Seen.insert(ID).second)
4917       return TokError(".amdhsa_ directives cannot be repeated");
4918 
4919     SMLoc ValStart = getLoc();
4920     int64_t IVal;
4921     if (getParser().parseAbsoluteExpression(IVal))
4922       return true;
4923     SMLoc ValEnd = getLoc();
4924     SMRange ValRange = SMRange(ValStart, ValEnd);
4925 
4926     if (IVal < 0)
4927       return OutOfRangeError(ValRange);
4928 
4929     uint64_t Val = IVal;
4930 
4931 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4932   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4933     return OutOfRangeError(RANGE);                                             \
4934   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4935 
4936     if (ID == ".amdhsa_group_segment_fixed_size") {
4937       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4938         return OutOfRangeError(ValRange);
4939       KD.group_segment_fixed_size = Val;
4940     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4941       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4942         return OutOfRangeError(ValRange);
4943       KD.private_segment_fixed_size = Val;
4944     } else if (ID == ".amdhsa_kernarg_size") {
4945       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4946         return OutOfRangeError(ValRange);
4947       KD.kernarg_size = Val;
4948     } else if (ID == ".amdhsa_user_sgpr_count") {
4949       ExplicitUserSGPRCount = Val;
4950     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4951       if (hasArchitectedFlatScratch())
4952         return Error(IDRange.Start,
4953                      "directive is not supported with architected flat scratch",
4954                      IDRange);
4955       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4956                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4957                        Val, ValRange);
4958       if (Val)
4959         ImpliedUserSGPRCount += 4;
4960     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4961       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4962                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4963                        ValRange);
4964       if (Val)
4965         ImpliedUserSGPRCount += 2;
4966     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4967       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4968                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4969                        ValRange);
4970       if (Val)
4971         ImpliedUserSGPRCount += 2;
4972     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4973       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4974                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4975                        Val, ValRange);
4976       if (Val)
4977         ImpliedUserSGPRCount += 2;
4978     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4979       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4980                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4981                        ValRange);
4982       if (Val)
4983         ImpliedUserSGPRCount += 2;
4984     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4985       if (hasArchitectedFlatScratch())
4986         return Error(IDRange.Start,
4987                      "directive is not supported with architected flat scratch",
4988                      IDRange);
4989       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4990                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4991                        ValRange);
4992       if (Val)
4993         ImpliedUserSGPRCount += 2;
4994     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4995       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4996                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4997                        Val, ValRange);
4998       if (Val)
4999         ImpliedUserSGPRCount += 1;
5000     } else if (ID == ".amdhsa_wavefront_size32") {
5001       if (IVersion.Major < 10)
5002         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5003       EnableWavefrontSize32 = Val;
5004       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5005                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5006                        Val, ValRange);
5007     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5008       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5009                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5010     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5011       if (hasArchitectedFlatScratch())
5012         return Error(IDRange.Start,
5013                      "directive is not supported with architected flat scratch",
5014                      IDRange);
5015       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5016                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5017     } else if (ID == ".amdhsa_enable_private_segment") {
5018       if (!hasArchitectedFlatScratch())
5019         return Error(
5020             IDRange.Start,
5021             "directive is not supported without architected flat scratch",
5022             IDRange);
5023       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5024                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5025     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5026       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5027                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5028                        ValRange);
5029     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5030       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5031                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5032                        ValRange);
5033     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5034       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5035                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5036                        ValRange);
5037     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5038       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5039                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5040                        ValRange);
5041     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5042       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5043                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5044                        ValRange);
5045     } else if (ID == ".amdhsa_next_free_vgpr") {
5046       VGPRRange = ValRange;
5047       NextFreeVGPR = Val;
5048     } else if (ID == ".amdhsa_next_free_sgpr") {
5049       SGPRRange = ValRange;
5050       NextFreeSGPR = Val;
5051     } else if (ID == ".amdhsa_accum_offset") {
5052       if (!isGFX90A())
5053         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5054       AccumOffset = Val;
5055     } else if (ID == ".amdhsa_reserve_vcc") {
5056       if (!isUInt<1>(Val))
5057         return OutOfRangeError(ValRange);
5058       ReserveVCC = Val;
5059     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5060       if (IVersion.Major < 7)
5061         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5062       if (hasArchitectedFlatScratch())
5063         return Error(IDRange.Start,
5064                      "directive is not supported with architected flat scratch",
5065                      IDRange);
5066       if (!isUInt<1>(Val))
5067         return OutOfRangeError(ValRange);
5068       ReserveFlatScr = Val;
5069     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5070       if (IVersion.Major < 8)
5071         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5072       if (!isUInt<1>(Val))
5073         return OutOfRangeError(ValRange);
5074       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5075         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5076                                  IDRange);
5077     } else if (ID == ".amdhsa_float_round_mode_32") {
5078       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5079                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5080     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5081       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5082                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5083     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5084       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5085                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5086     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5087       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5088                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5089                        ValRange);
5090     } else if (ID == ".amdhsa_dx10_clamp") {
5091       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5092                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5093     } else if (ID == ".amdhsa_ieee_mode") {
5094       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5095                        Val, ValRange);
5096     } else if (ID == ".amdhsa_fp16_overflow") {
5097       if (IVersion.Major < 9)
5098         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5099       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5100                        ValRange);
5101     } else if (ID == ".amdhsa_tg_split") {
5102       if (!isGFX90A())
5103         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5104       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5105                        ValRange);
5106     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5107       if (IVersion.Major < 10)
5108         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5109       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5110                        ValRange);
5111     } else if (ID == ".amdhsa_memory_ordered") {
5112       if (IVersion.Major < 10)
5113         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5114       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5115                        ValRange);
5116     } else if (ID == ".amdhsa_forward_progress") {
5117       if (IVersion.Major < 10)
5118         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5119       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5120                        ValRange);
5121     } else if (ID == ".amdhsa_shared_vgpr_count") {
5122       if (IVersion.Major < 10)
5123         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5124       SharedVGPRCount = Val;
5125       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5126                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5127                        ValRange);
5128     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5129       PARSE_BITS_ENTRY(
5130           KD.compute_pgm_rsrc2,
5131           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5132           ValRange);
5133     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5134       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5135                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5136                        Val, ValRange);
5137     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5138       PARSE_BITS_ENTRY(
5139           KD.compute_pgm_rsrc2,
5140           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5141           ValRange);
5142     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5143       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5144                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5145                        Val, ValRange);
5146     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5147       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5148                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5149                        Val, ValRange);
5150     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5151       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5152                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5153                        Val, ValRange);
5154     } else if (ID == ".amdhsa_exception_int_div_zero") {
5155       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5156                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5157                        Val, ValRange);
5158     } else {
5159       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5160     }
5161 
5162 #undef PARSE_BITS_ENTRY
5163   }
5164 
5165   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5166     return TokError(".amdhsa_next_free_vgpr directive is required");
5167 
5168   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5169     return TokError(".amdhsa_next_free_sgpr directive is required");
5170 
5171   unsigned VGPRBlocks;
5172   unsigned SGPRBlocks;
5173   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5174                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5175                          EnableWavefrontSize32, NextFreeVGPR,
5176                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5177                          SGPRBlocks))
5178     return true;
5179 
5180   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5181           VGPRBlocks))
5182     return OutOfRangeError(VGPRRange);
5183   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5184                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5185 
5186   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5187           SGPRBlocks))
5188     return OutOfRangeError(SGPRRange);
5189   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5190                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5191                   SGPRBlocks);
5192 
5193   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5194     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5195                     "enabled user SGPRs");
5196 
5197   unsigned UserSGPRCount =
5198       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5199 
5200   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5201     return TokError("too many user SGPRs enabled");
5202   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5203                   UserSGPRCount);
5204 
5205   if (isGFX90A()) {
5206     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5207       return TokError(".amdhsa_accum_offset directive is required");
5208     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5209       return TokError("accum_offset should be in range [4..256] in "
5210                       "increments of 4");
5211     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5212       return TokError("accum_offset exceeds total VGPR allocation");
5213     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5214                     (AccumOffset / 4 - 1));
5215   }
5216 
5217   if (IVersion.Major == 10) {
5218     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5219     if (SharedVGPRCount && EnableWavefrontSize32) {
5220       return TokError("shared_vgpr_count directive not valid on "
5221                       "wavefront size 32");
5222     }
5223     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5224       return TokError("shared_vgpr_count*2 + "
5225                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5226                       "exceed 63\n");
5227     }
5228   }
5229 
5230   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5231       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5232       ReserveFlatScr);
5233   return false;
5234 }
5235 
5236 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5237   uint32_t Major;
5238   uint32_t Minor;
5239 
5240   if (ParseDirectiveMajorMinor(Major, Minor))
5241     return true;
5242 
5243   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5244   return false;
5245 }
5246 
5247 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5248   uint32_t Major;
5249   uint32_t Minor;
5250   uint32_t Stepping;
5251   StringRef VendorName;
5252   StringRef ArchName;
5253 
5254   // If this directive has no arguments, then use the ISA version for the
5255   // targeted GPU.
5256   if (isToken(AsmToken::EndOfStatement)) {
5257     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5258     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5259                                                         ISA.Stepping,
5260                                                         "AMD", "AMDGPU");
5261     return false;
5262   }
5263 
5264   if (ParseDirectiveMajorMinor(Major, Minor))
5265     return true;
5266 
5267   if (!trySkipToken(AsmToken::Comma))
5268     return TokError("stepping version number required, comma expected");
5269 
5270   if (ParseAsAbsoluteExpression(Stepping))
5271     return TokError("invalid stepping version");
5272 
5273   if (!trySkipToken(AsmToken::Comma))
5274     return TokError("vendor name required, comma expected");
5275 
5276   if (!parseString(VendorName, "invalid vendor name"))
5277     return true;
5278 
5279   if (!trySkipToken(AsmToken::Comma))
5280     return TokError("arch name required, comma expected");
5281 
5282   if (!parseString(ArchName, "invalid arch name"))
5283     return true;
5284 
5285   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5286                                                       VendorName, ArchName);
5287   return false;
5288 }
5289 
5290 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5291                                                amd_kernel_code_t &Header) {
5292   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5293   // assembly for backwards compatibility.
5294   if (ID == "max_scratch_backing_memory_byte_size") {
5295     Parser.eatToEndOfStatement();
5296     return false;
5297   }
5298 
5299   SmallString<40> ErrStr;
5300   raw_svector_ostream Err(ErrStr);
5301   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5302     return TokError(Err.str());
5303   }
5304   Lex();
5305 
5306   if (ID == "enable_wavefront_size32") {
5307     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5308       if (!isGFX10Plus())
5309         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5310       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5311         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5312     } else {
5313       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5314         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5315     }
5316   }
5317 
5318   if (ID == "wavefront_size") {
5319     if (Header.wavefront_size == 5) {
5320       if (!isGFX10Plus())
5321         return TokError("wavefront_size=5 is only allowed on GFX10+");
5322       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5323         return TokError("wavefront_size=5 requires +WavefrontSize32");
5324     } else if (Header.wavefront_size == 6) {
5325       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5326         return TokError("wavefront_size=6 requires +WavefrontSize64");
5327     }
5328   }
5329 
5330   if (ID == "enable_wgp_mode") {
5331     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5332         !isGFX10Plus())
5333       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5334   }
5335 
5336   if (ID == "enable_mem_ordered") {
5337     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5338         !isGFX10Plus())
5339       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5340   }
5341 
5342   if (ID == "enable_fwd_progress") {
5343     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5344         !isGFX10Plus())
5345       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5346   }
5347 
5348   return false;
5349 }
5350 
5351 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5352   amd_kernel_code_t Header;
5353   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5354 
5355   while (true) {
5356     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5357     // will set the current token to EndOfStatement.
5358     while(trySkipToken(AsmToken::EndOfStatement));
5359 
5360     StringRef ID;
5361     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5362       return true;
5363 
5364     if (ID == ".end_amd_kernel_code_t")
5365       break;
5366 
5367     if (ParseAMDKernelCodeTValue(ID, Header))
5368       return true;
5369   }
5370 
5371   getTargetStreamer().EmitAMDKernelCodeT(Header);
5372 
5373   return false;
5374 }
5375 
5376 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5377   StringRef KernelName;
5378   if (!parseId(KernelName, "expected symbol name"))
5379     return true;
5380 
5381   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5382                                            ELF::STT_AMDGPU_HSA_KERNEL);
5383 
5384   KernelScope.initialize(getContext());
5385   return false;
5386 }
5387 
5388 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5389   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5390     return Error(getLoc(),
5391                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5392                  "architectures");
5393   }
5394 
5395   auto TargetIDDirective = getLexer().getTok().getStringContents();
5396   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5397     return Error(getParser().getTok().getLoc(), "target id must match options");
5398 
5399   getTargetStreamer().EmitISAVersion();
5400   Lex();
5401 
5402   return false;
5403 }
5404 
5405 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5406   const char *AssemblerDirectiveBegin;
5407   const char *AssemblerDirectiveEnd;
5408   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5409       isHsaAbiVersion3AndAbove(&getSTI())
5410           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5411                             HSAMD::V3::AssemblerDirectiveEnd)
5412           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5413                             HSAMD::AssemblerDirectiveEnd);
5414 
5415   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5416     return Error(getLoc(),
5417                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5418                  "not available on non-amdhsa OSes")).str());
5419   }
5420 
5421   std::string HSAMetadataString;
5422   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5423                           HSAMetadataString))
5424     return true;
5425 
5426   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5427     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5428       return Error(getLoc(), "invalid HSA metadata");
5429   } else {
5430     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5431       return Error(getLoc(), "invalid HSA metadata");
5432   }
5433 
5434   return false;
5435 }
5436 
5437 /// Common code to parse out a block of text (typically YAML) between start and
5438 /// end directives.
5439 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5440                                           const char *AssemblerDirectiveEnd,
5441                                           std::string &CollectString) {
5442 
5443   raw_string_ostream CollectStream(CollectString);
5444 
5445   getLexer().setSkipSpace(false);
5446 
5447   bool FoundEnd = false;
5448   while (!isToken(AsmToken::Eof)) {
5449     while (isToken(AsmToken::Space)) {
5450       CollectStream << getTokenStr();
5451       Lex();
5452     }
5453 
5454     if (trySkipId(AssemblerDirectiveEnd)) {
5455       FoundEnd = true;
5456       break;
5457     }
5458 
5459     CollectStream << Parser.parseStringToEndOfStatement()
5460                   << getContext().getAsmInfo()->getSeparatorString();
5461 
5462     Parser.eatToEndOfStatement();
5463   }
5464 
5465   getLexer().setSkipSpace(true);
5466 
5467   if (isToken(AsmToken::Eof) && !FoundEnd) {
5468     return TokError(Twine("expected directive ") +
5469                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5470   }
5471 
5472   CollectStream.flush();
5473   return false;
5474 }
5475 
5476 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5477 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5478   std::string String;
5479   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5480                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5481     return true;
5482 
5483   auto PALMetadata = getTargetStreamer().getPALMetadata();
5484   if (!PALMetadata->setFromString(String))
5485     return Error(getLoc(), "invalid PAL metadata");
5486   return false;
5487 }
5488 
5489 /// Parse the assembler directive for old linear-format PAL metadata.
5490 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5491   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5492     return Error(getLoc(),
5493                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5494                  "not available on non-amdpal OSes")).str());
5495   }
5496 
5497   auto PALMetadata = getTargetStreamer().getPALMetadata();
5498   PALMetadata->setLegacy();
5499   for (;;) {
5500     uint32_t Key, Value;
5501     if (ParseAsAbsoluteExpression(Key)) {
5502       return TokError(Twine("invalid value in ") +
5503                       Twine(PALMD::AssemblerDirective));
5504     }
5505     if (!trySkipToken(AsmToken::Comma)) {
5506       return TokError(Twine("expected an even number of values in ") +
5507                       Twine(PALMD::AssemblerDirective));
5508     }
5509     if (ParseAsAbsoluteExpression(Value)) {
5510       return TokError(Twine("invalid value in ") +
5511                       Twine(PALMD::AssemblerDirective));
5512     }
5513     PALMetadata->setRegister(Key, Value);
5514     if (!trySkipToken(AsmToken::Comma))
5515       break;
5516   }
5517   return false;
5518 }
5519 
5520 /// ParseDirectiveAMDGPULDS
5521 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5522 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5523   if (getParser().checkForValidSection())
5524     return true;
5525 
5526   StringRef Name;
5527   SMLoc NameLoc = getLoc();
5528   if (getParser().parseIdentifier(Name))
5529     return TokError("expected identifier in directive");
5530 
5531   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5532   if (parseToken(AsmToken::Comma, "expected ','"))
5533     return true;
5534 
5535   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5536 
5537   int64_t Size;
5538   SMLoc SizeLoc = getLoc();
5539   if (getParser().parseAbsoluteExpression(Size))
5540     return true;
5541   if (Size < 0)
5542     return Error(SizeLoc, "size must be non-negative");
5543   if (Size > LocalMemorySize)
5544     return Error(SizeLoc, "size is too large");
5545 
5546   int64_t Alignment = 4;
5547   if (trySkipToken(AsmToken::Comma)) {
5548     SMLoc AlignLoc = getLoc();
5549     if (getParser().parseAbsoluteExpression(Alignment))
5550       return true;
5551     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5552       return Error(AlignLoc, "alignment must be a power of two");
5553 
5554     // Alignment larger than the size of LDS is possible in theory, as long
5555     // as the linker manages to place to symbol at address 0, but we do want
5556     // to make sure the alignment fits nicely into a 32-bit integer.
5557     if (Alignment >= 1u << 31)
5558       return Error(AlignLoc, "alignment is too large");
5559   }
5560 
5561   if (parseEOL())
5562     return true;
5563 
5564   Symbol->redefineIfPossible();
5565   if (!Symbol->isUndefined())
5566     return Error(NameLoc, "invalid symbol redefinition");
5567 
5568   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5569   return false;
5570 }
5571 
5572 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5573   StringRef IDVal = DirectiveID.getString();
5574 
5575   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5576     if (IDVal == ".amdhsa_kernel")
5577      return ParseDirectiveAMDHSAKernel();
5578 
5579     // TODO: Restructure/combine with PAL metadata directive.
5580     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5581       return ParseDirectiveHSAMetadata();
5582   } else {
5583     if (IDVal == ".hsa_code_object_version")
5584       return ParseDirectiveHSACodeObjectVersion();
5585 
5586     if (IDVal == ".hsa_code_object_isa")
5587       return ParseDirectiveHSACodeObjectISA();
5588 
5589     if (IDVal == ".amd_kernel_code_t")
5590       return ParseDirectiveAMDKernelCodeT();
5591 
5592     if (IDVal == ".amdgpu_hsa_kernel")
5593       return ParseDirectiveAMDGPUHsaKernel();
5594 
5595     if (IDVal == ".amd_amdgpu_isa")
5596       return ParseDirectiveISAVersion();
5597 
5598     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5599       return ParseDirectiveHSAMetadata();
5600   }
5601 
5602   if (IDVal == ".amdgcn_target")
5603     return ParseDirectiveAMDGCNTarget();
5604 
5605   if (IDVal == ".amdgpu_lds")
5606     return ParseDirectiveAMDGPULDS();
5607 
5608   if (IDVal == PALMD::AssemblerDirectiveBegin)
5609     return ParseDirectivePALMetadataBegin();
5610 
5611   if (IDVal == PALMD::AssemblerDirective)
5612     return ParseDirectivePALMetadata();
5613 
5614   return true;
5615 }
5616 
5617 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5618                                            unsigned RegNo) {
5619 
5620   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5621     return isGFX9Plus();
5622 
5623   // GFX10+ has 2 more SGPRs 104 and 105.
5624   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5625     return hasSGPR104_SGPR105();
5626 
5627   switch (RegNo) {
5628   case AMDGPU::SRC_SHARED_BASE:
5629   case AMDGPU::SRC_SHARED_LIMIT:
5630   case AMDGPU::SRC_PRIVATE_BASE:
5631   case AMDGPU::SRC_PRIVATE_LIMIT:
5632     return isGFX9Plus();
5633   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5634     return isGFX9Plus() && !isGFX11Plus();
5635   case AMDGPU::TBA:
5636   case AMDGPU::TBA_LO:
5637   case AMDGPU::TBA_HI:
5638   case AMDGPU::TMA:
5639   case AMDGPU::TMA_LO:
5640   case AMDGPU::TMA_HI:
5641     return !isGFX9Plus();
5642   case AMDGPU::XNACK_MASK:
5643   case AMDGPU::XNACK_MASK_LO:
5644   case AMDGPU::XNACK_MASK_HI:
5645     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5646   case AMDGPU::SGPR_NULL:
5647     return isGFX10Plus();
5648   default:
5649     break;
5650   }
5651 
5652   if (isCI())
5653     return true;
5654 
5655   if (isSI() || isGFX10Plus()) {
5656     // No flat_scr on SI.
5657     // On GFX10Plus flat scratch is not a valid register operand and can only be
5658     // accessed with s_setreg/s_getreg.
5659     switch (RegNo) {
5660     case AMDGPU::FLAT_SCR:
5661     case AMDGPU::FLAT_SCR_LO:
5662     case AMDGPU::FLAT_SCR_HI:
5663       return false;
5664     default:
5665       return true;
5666     }
5667   }
5668 
5669   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5670   // SI/CI have.
5671   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5672     return hasSGPR102_SGPR103();
5673 
5674   return true;
5675 }
5676 
5677 OperandMatchResultTy
5678 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5679                               OperandMode Mode) {
5680   OperandMatchResultTy ResTy = parseVOPD(Operands);
5681   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5682       isToken(AsmToken::EndOfStatement))
5683     return ResTy;
5684 
5685   // Try to parse with a custom parser
5686   ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5687 
5688   // If we successfully parsed the operand or if there as an error parsing,
5689   // we are done.
5690   //
5691   // If we are parsing after we reach EndOfStatement then this means we
5692   // are appending default values to the Operands list.  This is only done
5693   // by custom parser, so we shouldn't continue on to the generic parsing.
5694   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5695       isToken(AsmToken::EndOfStatement))
5696     return ResTy;
5697 
5698   SMLoc RBraceLoc;
5699   SMLoc LBraceLoc = getLoc();
5700   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5701     unsigned Prefix = Operands.size();
5702 
5703     for (;;) {
5704       auto Loc = getLoc();
5705       ResTy = parseReg(Operands);
5706       if (ResTy == MatchOperand_NoMatch)
5707         Error(Loc, "expected a register");
5708       if (ResTy != MatchOperand_Success)
5709         return MatchOperand_ParseFail;
5710 
5711       RBraceLoc = getLoc();
5712       if (trySkipToken(AsmToken::RBrac))
5713         break;
5714 
5715       if (!skipToken(AsmToken::Comma,
5716                      "expected a comma or a closing square bracket")) {
5717         return MatchOperand_ParseFail;
5718       }
5719     }
5720 
5721     if (Operands.size() - Prefix > 1) {
5722       Operands.insert(Operands.begin() + Prefix,
5723                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5724       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5725     }
5726 
5727     return MatchOperand_Success;
5728   }
5729 
5730   return parseRegOrImm(Operands);
5731 }
5732 
5733 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5734   // Clear any forced encodings from the previous instruction.
5735   setForcedEncodingSize(0);
5736   setForcedDPP(false);
5737   setForcedSDWA(false);
5738 
5739   if (Name.endswith("_e64_dpp")) {
5740     setForcedDPP(true);
5741     setForcedEncodingSize(64);
5742     return Name.substr(0, Name.size() - 8);
5743   } else if (Name.endswith("_e64")) {
5744     setForcedEncodingSize(64);
5745     return Name.substr(0, Name.size() - 4);
5746   } else if (Name.endswith("_e32")) {
5747     setForcedEncodingSize(32);
5748     return Name.substr(0, Name.size() - 4);
5749   } else if (Name.endswith("_dpp")) {
5750     setForcedDPP(true);
5751     return Name.substr(0, Name.size() - 4);
5752   } else if (Name.endswith("_sdwa")) {
5753     setForcedSDWA(true);
5754     return Name.substr(0, Name.size() - 5);
5755   }
5756   return Name;
5757 }
5758 
5759 static void applyMnemonicAliases(StringRef &Mnemonic,
5760                                  const FeatureBitset &Features,
5761                                  unsigned VariantID);
5762 
5763 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5764                                        StringRef Name,
5765                                        SMLoc NameLoc, OperandVector &Operands) {
5766   // Add the instruction mnemonic
5767   Name = parseMnemonicSuffix(Name);
5768 
5769   // If the target architecture uses MnemonicAlias, call it here to parse
5770   // operands correctly.
5771   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5772 
5773   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5774 
5775   bool IsMIMG = Name.startswith("image_");
5776 
5777   while (!trySkipToken(AsmToken::EndOfStatement)) {
5778     OperandMode Mode = OperandMode_Default;
5779     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5780       Mode = OperandMode_NSA;
5781     CPolSeen = 0;
5782     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5783 
5784     if (Res != MatchOperand_Success) {
5785       checkUnsupportedInstruction(Name, NameLoc);
5786       if (!Parser.hasPendingError()) {
5787         // FIXME: use real operand location rather than the current location.
5788         StringRef Msg =
5789           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5790                                             "not a valid operand.";
5791         Error(getLoc(), Msg);
5792       }
5793       while (!trySkipToken(AsmToken::EndOfStatement)) {
5794         lex();
5795       }
5796       return true;
5797     }
5798 
5799     // Eat the comma or space if there is one.
5800     trySkipToken(AsmToken::Comma);
5801   }
5802 
5803   return false;
5804 }
5805 
5806 //===----------------------------------------------------------------------===//
5807 // Utility functions
5808 //===----------------------------------------------------------------------===//
5809 
5810 OperandMatchResultTy
5811 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5812 
5813   if (!trySkipId(Prefix, AsmToken::Colon))
5814     return MatchOperand_NoMatch;
5815 
5816   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5817 }
5818 
5819 OperandMatchResultTy
5820 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5821                                     AMDGPUOperand::ImmTy ImmTy,
5822                                     bool (*ConvertResult)(int64_t&)) {
5823   SMLoc S = getLoc();
5824   int64_t Value = 0;
5825 
5826   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5827   if (Res != MatchOperand_Success)
5828     return Res;
5829 
5830   if (ConvertResult && !ConvertResult(Value)) {
5831     Error(S, "invalid " + StringRef(Prefix) + " value.");
5832   }
5833 
5834   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5835   return MatchOperand_Success;
5836 }
5837 
5838 OperandMatchResultTy
5839 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5840                                              OperandVector &Operands,
5841                                              AMDGPUOperand::ImmTy ImmTy,
5842                                              bool (*ConvertResult)(int64_t&)) {
5843   SMLoc S = getLoc();
5844   if (!trySkipId(Prefix, AsmToken::Colon))
5845     return MatchOperand_NoMatch;
5846 
5847   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5848     return MatchOperand_ParseFail;
5849 
5850   unsigned Val = 0;
5851   const unsigned MaxSize = 4;
5852 
5853   // FIXME: How to verify the number of elements matches the number of src
5854   // operands?
5855   for (int I = 0; ; ++I) {
5856     int64_t Op;
5857     SMLoc Loc = getLoc();
5858     if (!parseExpr(Op))
5859       return MatchOperand_ParseFail;
5860 
5861     if (Op != 0 && Op != 1) {
5862       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5863       return MatchOperand_ParseFail;
5864     }
5865 
5866     Val |= (Op << I);
5867 
5868     if (trySkipToken(AsmToken::RBrac))
5869       break;
5870 
5871     if (I + 1 == MaxSize) {
5872       Error(getLoc(), "expected a closing square bracket");
5873       return MatchOperand_ParseFail;
5874     }
5875 
5876     if (!skipToken(AsmToken::Comma, "expected a comma"))
5877       return MatchOperand_ParseFail;
5878   }
5879 
5880   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5881   return MatchOperand_Success;
5882 }
5883 
5884 OperandMatchResultTy
5885 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5886                                AMDGPUOperand::ImmTy ImmTy) {
5887   int64_t Bit;
5888   SMLoc S = getLoc();
5889 
5890   if (trySkipId(Name)) {
5891     Bit = 1;
5892   } else if (trySkipId("no", Name)) {
5893     Bit = 0;
5894   } else {
5895     return MatchOperand_NoMatch;
5896   }
5897 
5898   if (Name == "r128" && !hasMIMG_R128()) {
5899     Error(S, "r128 modifier is not supported on this GPU");
5900     return MatchOperand_ParseFail;
5901   }
5902   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5903     Error(S, "a16 modifier is not supported on this GPU");
5904     return MatchOperand_ParseFail;
5905   }
5906 
5907   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5908     ImmTy = AMDGPUOperand::ImmTyR128A16;
5909 
5910   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5911   return MatchOperand_Success;
5912 }
5913 
5914 OperandMatchResultTy
5915 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5916   unsigned CPolOn = 0;
5917   unsigned CPolOff = 0;
5918   SMLoc S = getLoc();
5919 
5920   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5921   if (isGFX940() && !Mnemo.startswith("s_")) {
5922     if (trySkipId("sc0"))
5923       CPolOn = AMDGPU::CPol::SC0;
5924     else if (trySkipId("nosc0"))
5925       CPolOff = AMDGPU::CPol::SC0;
5926     else if (trySkipId("nt"))
5927       CPolOn = AMDGPU::CPol::NT;
5928     else if (trySkipId("nont"))
5929       CPolOff = AMDGPU::CPol::NT;
5930     else if (trySkipId("sc1"))
5931       CPolOn = AMDGPU::CPol::SC1;
5932     else if (trySkipId("nosc1"))
5933       CPolOff = AMDGPU::CPol::SC1;
5934     else
5935       return MatchOperand_NoMatch;
5936   }
5937   else if (trySkipId("glc"))
5938     CPolOn = AMDGPU::CPol::GLC;
5939   else if (trySkipId("noglc"))
5940     CPolOff = AMDGPU::CPol::GLC;
5941   else if (trySkipId("slc"))
5942     CPolOn = AMDGPU::CPol::SLC;
5943   else if (trySkipId("noslc"))
5944     CPolOff = AMDGPU::CPol::SLC;
5945   else if (trySkipId("dlc"))
5946     CPolOn = AMDGPU::CPol::DLC;
5947   else if (trySkipId("nodlc"))
5948     CPolOff = AMDGPU::CPol::DLC;
5949   else if (trySkipId("scc"))
5950     CPolOn = AMDGPU::CPol::SCC;
5951   else if (trySkipId("noscc"))
5952     CPolOff = AMDGPU::CPol::SCC;
5953   else
5954     return MatchOperand_NoMatch;
5955 
5956   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5957     Error(S, "dlc modifier is not supported on this GPU");
5958     return MatchOperand_ParseFail;
5959   }
5960 
5961   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5962     Error(S, "scc modifier is not supported on this GPU");
5963     return MatchOperand_ParseFail;
5964   }
5965 
5966   if (CPolSeen & (CPolOn | CPolOff)) {
5967     Error(S, "duplicate cache policy modifier");
5968     return MatchOperand_ParseFail;
5969   }
5970 
5971   CPolSeen |= (CPolOn | CPolOff);
5972 
5973   for (unsigned I = 1; I != Operands.size(); ++I) {
5974     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5975     if (Op.isCPol()) {
5976       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5977       return MatchOperand_Success;
5978     }
5979   }
5980 
5981   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5982                                               AMDGPUOperand::ImmTyCPol));
5983 
5984   return MatchOperand_Success;
5985 }
5986 
5987 static void addOptionalImmOperand(
5988   MCInst& Inst, const OperandVector& Operands,
5989   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5990   AMDGPUOperand::ImmTy ImmT,
5991   int64_t Default = 0) {
5992   auto i = OptionalIdx.find(ImmT);
5993   if (i != OptionalIdx.end()) {
5994     unsigned Idx = i->second;
5995     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5996   } else {
5997     Inst.addOperand(MCOperand::createImm(Default));
5998   }
5999 }
6000 
6001 OperandMatchResultTy
6002 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6003                                        StringRef &Value,
6004                                        SMLoc &StringLoc) {
6005   if (!trySkipId(Prefix, AsmToken::Colon))
6006     return MatchOperand_NoMatch;
6007 
6008   StringLoc = getLoc();
6009   return parseId(Value, "expected an identifier") ? MatchOperand_Success
6010                                                   : MatchOperand_ParseFail;
6011 }
6012 
6013 //===----------------------------------------------------------------------===//
6014 // MTBUF format
6015 //===----------------------------------------------------------------------===//
6016 
6017 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6018                                   int64_t MaxVal,
6019                                   int64_t &Fmt) {
6020   int64_t Val;
6021   SMLoc Loc = getLoc();
6022 
6023   auto Res = parseIntWithPrefix(Pref, Val);
6024   if (Res == MatchOperand_ParseFail)
6025     return false;
6026   if (Res == MatchOperand_NoMatch)
6027     return true;
6028 
6029   if (Val < 0 || Val > MaxVal) {
6030     Error(Loc, Twine("out of range ", StringRef(Pref)));
6031     return false;
6032   }
6033 
6034   Fmt = Val;
6035   return true;
6036 }
6037 
6038 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6039 // values to live in a joint format operand in the MCInst encoding.
6040 OperandMatchResultTy
6041 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6042   using namespace llvm::AMDGPU::MTBUFFormat;
6043 
6044   int64_t Dfmt = DFMT_UNDEF;
6045   int64_t Nfmt = NFMT_UNDEF;
6046 
6047   // dfmt and nfmt can appear in either order, and each is optional.
6048   for (int I = 0; I < 2; ++I) {
6049     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6050       return MatchOperand_ParseFail;
6051 
6052     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6053       return MatchOperand_ParseFail;
6054     }
6055     // Skip optional comma between dfmt/nfmt
6056     // but guard against 2 commas following each other.
6057     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6058         !peekToken().is(AsmToken::Comma)) {
6059       trySkipToken(AsmToken::Comma);
6060     }
6061   }
6062 
6063   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6064     return MatchOperand_NoMatch;
6065 
6066   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6067   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6068 
6069   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6070   return MatchOperand_Success;
6071 }
6072 
6073 OperandMatchResultTy
6074 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6075   using namespace llvm::AMDGPU::MTBUFFormat;
6076 
6077   int64_t Fmt = UFMT_UNDEF;
6078 
6079   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6080     return MatchOperand_ParseFail;
6081 
6082   if (Fmt == UFMT_UNDEF)
6083     return MatchOperand_NoMatch;
6084 
6085   Format = Fmt;
6086   return MatchOperand_Success;
6087 }
6088 
6089 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6090                                     int64_t &Nfmt,
6091                                     StringRef FormatStr,
6092                                     SMLoc Loc) {
6093   using namespace llvm::AMDGPU::MTBUFFormat;
6094   int64_t Format;
6095 
6096   Format = getDfmt(FormatStr);
6097   if (Format != DFMT_UNDEF) {
6098     Dfmt = Format;
6099     return true;
6100   }
6101 
6102   Format = getNfmt(FormatStr, getSTI());
6103   if (Format != NFMT_UNDEF) {
6104     Nfmt = Format;
6105     return true;
6106   }
6107 
6108   Error(Loc, "unsupported format");
6109   return false;
6110 }
6111 
6112 OperandMatchResultTy
6113 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6114                                           SMLoc FormatLoc,
6115                                           int64_t &Format) {
6116   using namespace llvm::AMDGPU::MTBUFFormat;
6117 
6118   int64_t Dfmt = DFMT_UNDEF;
6119   int64_t Nfmt = NFMT_UNDEF;
6120   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6121     return MatchOperand_ParseFail;
6122 
6123   if (trySkipToken(AsmToken::Comma)) {
6124     StringRef Str;
6125     SMLoc Loc = getLoc();
6126     if (!parseId(Str, "expected a format string") ||
6127         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6128       return MatchOperand_ParseFail;
6129     }
6130     if (Dfmt == DFMT_UNDEF) {
6131       Error(Loc, "duplicate numeric format");
6132       return MatchOperand_ParseFail;
6133     } else if (Nfmt == NFMT_UNDEF) {
6134       Error(Loc, "duplicate data format");
6135       return MatchOperand_ParseFail;
6136     }
6137   }
6138 
6139   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6140   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6141 
6142   if (isGFX10Plus()) {
6143     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6144     if (Ufmt == UFMT_UNDEF) {
6145       Error(FormatLoc, "unsupported format");
6146       return MatchOperand_ParseFail;
6147     }
6148     Format = Ufmt;
6149   } else {
6150     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6151   }
6152 
6153   return MatchOperand_Success;
6154 }
6155 
6156 OperandMatchResultTy
6157 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6158                                             SMLoc Loc,
6159                                             int64_t &Format) {
6160   using namespace llvm::AMDGPU::MTBUFFormat;
6161 
6162   auto Id = getUnifiedFormat(FormatStr, getSTI());
6163   if (Id == UFMT_UNDEF)
6164     return MatchOperand_NoMatch;
6165 
6166   if (!isGFX10Plus()) {
6167     Error(Loc, "unified format is not supported on this GPU");
6168     return MatchOperand_ParseFail;
6169   }
6170 
6171   Format = Id;
6172   return MatchOperand_Success;
6173 }
6174 
6175 OperandMatchResultTy
6176 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6177   using namespace llvm::AMDGPU::MTBUFFormat;
6178   SMLoc Loc = getLoc();
6179 
6180   if (!parseExpr(Format))
6181     return MatchOperand_ParseFail;
6182   if (!isValidFormatEncoding(Format, getSTI())) {
6183     Error(Loc, "out of range format");
6184     return MatchOperand_ParseFail;
6185   }
6186 
6187   return MatchOperand_Success;
6188 }
6189 
6190 OperandMatchResultTy
6191 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6192   using namespace llvm::AMDGPU::MTBUFFormat;
6193 
6194   if (!trySkipId("format", AsmToken::Colon))
6195     return MatchOperand_NoMatch;
6196 
6197   if (trySkipToken(AsmToken::LBrac)) {
6198     StringRef FormatStr;
6199     SMLoc Loc = getLoc();
6200     if (!parseId(FormatStr, "expected a format string"))
6201       return MatchOperand_ParseFail;
6202 
6203     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6204     if (Res == MatchOperand_NoMatch)
6205       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6206     if (Res != MatchOperand_Success)
6207       return Res;
6208 
6209     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6210       return MatchOperand_ParseFail;
6211 
6212     return MatchOperand_Success;
6213   }
6214 
6215   return parseNumericFormat(Format);
6216 }
6217 
6218 OperandMatchResultTy
6219 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6220   using namespace llvm::AMDGPU::MTBUFFormat;
6221 
6222   int64_t Format = getDefaultFormatEncoding(getSTI());
6223   OperandMatchResultTy Res;
6224   SMLoc Loc = getLoc();
6225 
6226   // Parse legacy format syntax.
6227   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6228   if (Res == MatchOperand_ParseFail)
6229     return Res;
6230 
6231   bool FormatFound = (Res == MatchOperand_Success);
6232 
6233   Operands.push_back(
6234     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6235 
6236   if (FormatFound)
6237     trySkipToken(AsmToken::Comma);
6238 
6239   if (isToken(AsmToken::EndOfStatement)) {
6240     // We are expecting an soffset operand,
6241     // but let matcher handle the error.
6242     return MatchOperand_Success;
6243   }
6244 
6245   // Parse soffset.
6246   Res = parseRegOrImm(Operands);
6247   if (Res != MatchOperand_Success)
6248     return Res;
6249 
6250   trySkipToken(AsmToken::Comma);
6251 
6252   if (!FormatFound) {
6253     Res = parseSymbolicOrNumericFormat(Format);
6254     if (Res == MatchOperand_ParseFail)
6255       return Res;
6256     if (Res == MatchOperand_Success) {
6257       auto Size = Operands.size();
6258       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6259       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6260       Op.setImm(Format);
6261     }
6262     return MatchOperand_Success;
6263   }
6264 
6265   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6266     Error(getLoc(), "duplicate format");
6267     return MatchOperand_ParseFail;
6268   }
6269   return MatchOperand_Success;
6270 }
6271 
6272 //===----------------------------------------------------------------------===//
6273 // ds
6274 //===----------------------------------------------------------------------===//
6275 
6276 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6277                                     const OperandVector &Operands) {
6278   OptionalImmIndexMap OptionalIdx;
6279 
6280   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6281     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6282 
6283     // Add the register arguments
6284     if (Op.isReg()) {
6285       Op.addRegOperands(Inst, 1);
6286       continue;
6287     }
6288 
6289     // Handle optional arguments
6290     OptionalIdx[Op.getImmTy()] = i;
6291   }
6292 
6293   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6294   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6295   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6296 
6297   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6298 }
6299 
6300 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6301                                 bool IsGdsHardcoded) {
6302   OptionalImmIndexMap OptionalIdx;
6303   AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
6304 
6305   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6306     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6307 
6308     // Add the register arguments
6309     if (Op.isReg()) {
6310       Op.addRegOperands(Inst, 1);
6311       continue;
6312     }
6313 
6314     if (Op.isToken() && Op.getToken() == "gds") {
6315       IsGdsHardcoded = true;
6316       continue;
6317     }
6318 
6319     // Handle optional arguments
6320     OptionalIdx[Op.getImmTy()] = i;
6321 
6322     if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
6323       OffsetType = AMDGPUOperand::ImmTySwizzle;
6324   }
6325 
6326   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6327 
6328   if (!IsGdsHardcoded) {
6329     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6330   }
6331   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6332 }
6333 
6334 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6335   OptionalImmIndexMap OptionalIdx;
6336 
6337   unsigned OperandIdx[4];
6338   unsigned EnMask = 0;
6339   int SrcIdx = 0;
6340 
6341   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6342     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6343 
6344     // Add the register arguments
6345     if (Op.isReg()) {
6346       assert(SrcIdx < 4);
6347       OperandIdx[SrcIdx] = Inst.size();
6348       Op.addRegOperands(Inst, 1);
6349       ++SrcIdx;
6350       continue;
6351     }
6352 
6353     if (Op.isOff()) {
6354       assert(SrcIdx < 4);
6355       OperandIdx[SrcIdx] = Inst.size();
6356       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6357       ++SrcIdx;
6358       continue;
6359     }
6360 
6361     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6362       Op.addImmOperands(Inst, 1);
6363       continue;
6364     }
6365 
6366     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6367       continue;
6368 
6369     // Handle optional arguments
6370     OptionalIdx[Op.getImmTy()] = i;
6371   }
6372 
6373   assert(SrcIdx == 4);
6374 
6375   bool Compr = false;
6376   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6377     Compr = true;
6378     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6379     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6380     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6381   }
6382 
6383   for (auto i = 0; i < SrcIdx; ++i) {
6384     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6385       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6386     }
6387   }
6388 
6389   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6390   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6391 
6392   Inst.addOperand(MCOperand::createImm(EnMask));
6393 }
6394 
6395 //===----------------------------------------------------------------------===//
6396 // s_waitcnt
6397 //===----------------------------------------------------------------------===//
6398 
6399 static bool
6400 encodeCnt(
6401   const AMDGPU::IsaVersion ISA,
6402   int64_t &IntVal,
6403   int64_t CntVal,
6404   bool Saturate,
6405   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6406   unsigned (*decode)(const IsaVersion &Version, unsigned))
6407 {
6408   bool Failed = false;
6409 
6410   IntVal = encode(ISA, IntVal, CntVal);
6411   if (CntVal != decode(ISA, IntVal)) {
6412     if (Saturate) {
6413       IntVal = encode(ISA, IntVal, -1);
6414     } else {
6415       Failed = true;
6416     }
6417   }
6418   return Failed;
6419 }
6420 
6421 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6422 
6423   SMLoc CntLoc = getLoc();
6424   StringRef CntName = getTokenStr();
6425 
6426   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6427       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6428     return false;
6429 
6430   int64_t CntVal;
6431   SMLoc ValLoc = getLoc();
6432   if (!parseExpr(CntVal))
6433     return false;
6434 
6435   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6436 
6437   bool Failed = true;
6438   bool Sat = CntName.endswith("_sat");
6439 
6440   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6441     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6442   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6443     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6444   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6445     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6446   } else {
6447     Error(CntLoc, "invalid counter name " + CntName);
6448     return false;
6449   }
6450 
6451   if (Failed) {
6452     Error(ValLoc, "too large value for " + CntName);
6453     return false;
6454   }
6455 
6456   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6457     return false;
6458 
6459   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6460     if (isToken(AsmToken::EndOfStatement)) {
6461       Error(getLoc(), "expected a counter name");
6462       return false;
6463     }
6464   }
6465 
6466   return true;
6467 }
6468 
6469 OperandMatchResultTy
6470 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6471   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6472   int64_t Waitcnt = getWaitcntBitMask(ISA);
6473   SMLoc S = getLoc();
6474 
6475   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6476     while (!isToken(AsmToken::EndOfStatement)) {
6477       if (!parseCnt(Waitcnt))
6478         return MatchOperand_ParseFail;
6479     }
6480   } else {
6481     if (!parseExpr(Waitcnt))
6482       return MatchOperand_ParseFail;
6483   }
6484 
6485   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6486   return MatchOperand_Success;
6487 }
6488 
6489 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6490   SMLoc FieldLoc = getLoc();
6491   StringRef FieldName = getTokenStr();
6492   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6493       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6494     return false;
6495 
6496   SMLoc ValueLoc = getLoc();
6497   StringRef ValueName = getTokenStr();
6498   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6499       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6500     return false;
6501 
6502   unsigned Shift;
6503   if (FieldName == "instid0") {
6504     Shift = 0;
6505   } else if (FieldName == "instskip") {
6506     Shift = 4;
6507   } else if (FieldName == "instid1") {
6508     Shift = 7;
6509   } else {
6510     Error(FieldLoc, "invalid field name " + FieldName);
6511     return false;
6512   }
6513 
6514   int Value;
6515   if (Shift == 4) {
6516     // Parse values for instskip.
6517     Value = StringSwitch<int>(ValueName)
6518                 .Case("SAME", 0)
6519                 .Case("NEXT", 1)
6520                 .Case("SKIP_1", 2)
6521                 .Case("SKIP_2", 3)
6522                 .Case("SKIP_3", 4)
6523                 .Case("SKIP_4", 5)
6524                 .Default(-1);
6525   } else {
6526     // Parse values for instid0 and instid1.
6527     Value = StringSwitch<int>(ValueName)
6528                 .Case("NO_DEP", 0)
6529                 .Case("VALU_DEP_1", 1)
6530                 .Case("VALU_DEP_2", 2)
6531                 .Case("VALU_DEP_3", 3)
6532                 .Case("VALU_DEP_4", 4)
6533                 .Case("TRANS32_DEP_1", 5)
6534                 .Case("TRANS32_DEP_2", 6)
6535                 .Case("TRANS32_DEP_3", 7)
6536                 .Case("FMA_ACCUM_CYCLE_1", 8)
6537                 .Case("SALU_CYCLE_1", 9)
6538                 .Case("SALU_CYCLE_2", 10)
6539                 .Case("SALU_CYCLE_3", 11)
6540                 .Default(-1);
6541   }
6542   if (Value < 0) {
6543     Error(ValueLoc, "invalid value name " + ValueName);
6544     return false;
6545   }
6546 
6547   Delay |= Value << Shift;
6548   return true;
6549 }
6550 
6551 OperandMatchResultTy
6552 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6553   int64_t Delay = 0;
6554   SMLoc S = getLoc();
6555 
6556   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6557     do {
6558       if (!parseDelay(Delay))
6559         return MatchOperand_ParseFail;
6560     } while (trySkipToken(AsmToken::Pipe));
6561   } else {
6562     if (!parseExpr(Delay))
6563       return MatchOperand_ParseFail;
6564   }
6565 
6566   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6567   return MatchOperand_Success;
6568 }
6569 
6570 bool
6571 AMDGPUOperand::isSWaitCnt() const {
6572   return isImm();
6573 }
6574 
6575 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6576 
6577 //===----------------------------------------------------------------------===//
6578 // DepCtr
6579 //===----------------------------------------------------------------------===//
6580 
6581 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6582                                   StringRef DepCtrName) {
6583   switch (ErrorId) {
6584   case OPR_ID_UNKNOWN:
6585     Error(Loc, Twine("invalid counter name ", DepCtrName));
6586     return;
6587   case OPR_ID_UNSUPPORTED:
6588     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6589     return;
6590   case OPR_ID_DUPLICATE:
6591     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6592     return;
6593   case OPR_VAL_INVALID:
6594     Error(Loc, Twine("invalid value for ", DepCtrName));
6595     return;
6596   default:
6597     assert(false);
6598   }
6599 }
6600 
6601 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6602 
6603   using namespace llvm::AMDGPU::DepCtr;
6604 
6605   SMLoc DepCtrLoc = getLoc();
6606   StringRef DepCtrName = getTokenStr();
6607 
6608   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6609       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6610     return false;
6611 
6612   int64_t ExprVal;
6613   if (!parseExpr(ExprVal))
6614     return false;
6615 
6616   unsigned PrevOprMask = UsedOprMask;
6617   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6618 
6619   if (CntVal < 0) {
6620     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6621     return false;
6622   }
6623 
6624   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6625     return false;
6626 
6627   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6628     if (isToken(AsmToken::EndOfStatement)) {
6629       Error(getLoc(), "expected a counter name");
6630       return false;
6631     }
6632   }
6633 
6634   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6635   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6636   return true;
6637 }
6638 
6639 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6640   using namespace llvm::AMDGPU::DepCtr;
6641 
6642   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6643   SMLoc Loc = getLoc();
6644 
6645   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6646     unsigned UsedOprMask = 0;
6647     while (!isToken(AsmToken::EndOfStatement)) {
6648       if (!parseDepCtr(DepCtr, UsedOprMask))
6649         return MatchOperand_ParseFail;
6650     }
6651   } else {
6652     if (!parseExpr(DepCtr))
6653       return MatchOperand_ParseFail;
6654   }
6655 
6656   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6657   return MatchOperand_Success;
6658 }
6659 
6660 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6661 
6662 //===----------------------------------------------------------------------===//
6663 // hwreg
6664 //===----------------------------------------------------------------------===//
6665 
6666 bool
6667 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6668                                 OperandInfoTy &Offset,
6669                                 OperandInfoTy &Width) {
6670   using namespace llvm::AMDGPU::Hwreg;
6671 
6672   // The register may be specified by name or using a numeric code
6673   HwReg.Loc = getLoc();
6674   if (isToken(AsmToken::Identifier) &&
6675       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6676     HwReg.IsSymbolic = true;
6677     lex(); // skip register name
6678   } else if (!parseExpr(HwReg.Id, "a register name")) {
6679     return false;
6680   }
6681 
6682   if (trySkipToken(AsmToken::RParen))
6683     return true;
6684 
6685   // parse optional params
6686   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6687     return false;
6688 
6689   Offset.Loc = getLoc();
6690   if (!parseExpr(Offset.Id))
6691     return false;
6692 
6693   if (!skipToken(AsmToken::Comma, "expected a comma"))
6694     return false;
6695 
6696   Width.Loc = getLoc();
6697   return parseExpr(Width.Id) &&
6698          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6699 }
6700 
6701 bool
6702 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6703                                const OperandInfoTy &Offset,
6704                                const OperandInfoTy &Width) {
6705 
6706   using namespace llvm::AMDGPU::Hwreg;
6707 
6708   if (HwReg.IsSymbolic) {
6709     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6710       Error(HwReg.Loc,
6711             "specified hardware register is not supported on this GPU");
6712       return false;
6713     }
6714   } else {
6715     if (!isValidHwreg(HwReg.Id)) {
6716       Error(HwReg.Loc,
6717             "invalid code of hardware register: only 6-bit values are legal");
6718       return false;
6719     }
6720   }
6721   if (!isValidHwregOffset(Offset.Id)) {
6722     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6723     return false;
6724   }
6725   if (!isValidHwregWidth(Width.Id)) {
6726     Error(Width.Loc,
6727           "invalid bitfield width: only values from 1 to 32 are legal");
6728     return false;
6729   }
6730   return true;
6731 }
6732 
6733 OperandMatchResultTy
6734 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6735   using namespace llvm::AMDGPU::Hwreg;
6736 
6737   int64_t ImmVal = 0;
6738   SMLoc Loc = getLoc();
6739 
6740   if (trySkipId("hwreg", AsmToken::LParen)) {
6741     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6742     OperandInfoTy Offset(OFFSET_DEFAULT_);
6743     OperandInfoTy Width(WIDTH_DEFAULT_);
6744     if (parseHwregBody(HwReg, Offset, Width) &&
6745         validateHwreg(HwReg, Offset, Width)) {
6746       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6747     } else {
6748       return MatchOperand_ParseFail;
6749     }
6750   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6751     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6752       Error(Loc, "invalid immediate: only 16-bit values are legal");
6753       return MatchOperand_ParseFail;
6754     }
6755   } else {
6756     return MatchOperand_ParseFail;
6757   }
6758 
6759   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6760   return MatchOperand_Success;
6761 }
6762 
6763 bool AMDGPUOperand::isHwreg() const {
6764   return isImmTy(ImmTyHwreg);
6765 }
6766 
6767 //===----------------------------------------------------------------------===//
6768 // sendmsg
6769 //===----------------------------------------------------------------------===//
6770 
6771 bool
6772 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6773                                   OperandInfoTy &Op,
6774                                   OperandInfoTy &Stream) {
6775   using namespace llvm::AMDGPU::SendMsg;
6776 
6777   Msg.Loc = getLoc();
6778   if (isToken(AsmToken::Identifier) &&
6779       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6780     Msg.IsSymbolic = true;
6781     lex(); // skip message name
6782   } else if (!parseExpr(Msg.Id, "a message name")) {
6783     return false;
6784   }
6785 
6786   if (trySkipToken(AsmToken::Comma)) {
6787     Op.IsDefined = true;
6788     Op.Loc = getLoc();
6789     if (isToken(AsmToken::Identifier) &&
6790         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6791       lex(); // skip operation name
6792     } else if (!parseExpr(Op.Id, "an operation name")) {
6793       return false;
6794     }
6795 
6796     if (trySkipToken(AsmToken::Comma)) {
6797       Stream.IsDefined = true;
6798       Stream.Loc = getLoc();
6799       if (!parseExpr(Stream.Id))
6800         return false;
6801     }
6802   }
6803 
6804   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6805 }
6806 
6807 bool
6808 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6809                                  const OperandInfoTy &Op,
6810                                  const OperandInfoTy &Stream) {
6811   using namespace llvm::AMDGPU::SendMsg;
6812 
6813   // Validation strictness depends on whether message is specified
6814   // in a symbolic or in a numeric form. In the latter case
6815   // only encoding possibility is checked.
6816   bool Strict = Msg.IsSymbolic;
6817 
6818   if (Strict) {
6819     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6820       Error(Msg.Loc, "specified message id is not supported on this GPU");
6821       return false;
6822     }
6823   } else {
6824     if (!isValidMsgId(Msg.Id, getSTI())) {
6825       Error(Msg.Loc, "invalid message id");
6826       return false;
6827     }
6828   }
6829   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6830     if (Op.IsDefined) {
6831       Error(Op.Loc, "message does not support operations");
6832     } else {
6833       Error(Msg.Loc, "missing message operation");
6834     }
6835     return false;
6836   }
6837   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6838     Error(Op.Loc, "invalid operation id");
6839     return false;
6840   }
6841   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6842       Stream.IsDefined) {
6843     Error(Stream.Loc, "message operation does not support streams");
6844     return false;
6845   }
6846   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6847     Error(Stream.Loc, "invalid message stream id");
6848     return false;
6849   }
6850   return true;
6851 }
6852 
6853 OperandMatchResultTy
6854 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6855   using namespace llvm::AMDGPU::SendMsg;
6856 
6857   int64_t ImmVal = 0;
6858   SMLoc Loc = getLoc();
6859 
6860   if (trySkipId("sendmsg", AsmToken::LParen)) {
6861     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6862     OperandInfoTy Op(OP_NONE_);
6863     OperandInfoTy Stream(STREAM_ID_NONE_);
6864     if (parseSendMsgBody(Msg, Op, Stream) &&
6865         validateSendMsg(Msg, Op, Stream)) {
6866       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6867     } else {
6868       return MatchOperand_ParseFail;
6869     }
6870   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6871     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6872       Error(Loc, "invalid immediate: only 16-bit values are legal");
6873       return MatchOperand_ParseFail;
6874     }
6875   } else {
6876     return MatchOperand_ParseFail;
6877   }
6878 
6879   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6880   return MatchOperand_Success;
6881 }
6882 
6883 bool AMDGPUOperand::isSendMsg() const {
6884   return isImmTy(ImmTySendMsg);
6885 }
6886 
6887 //===----------------------------------------------------------------------===//
6888 // v_interp
6889 //===----------------------------------------------------------------------===//
6890 
6891 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6892   StringRef Str;
6893   SMLoc S = getLoc();
6894 
6895   if (!parseId(Str))
6896     return MatchOperand_NoMatch;
6897 
6898   int Slot = StringSwitch<int>(Str)
6899     .Case("p10", 0)
6900     .Case("p20", 1)
6901     .Case("p0", 2)
6902     .Default(-1);
6903 
6904   if (Slot == -1) {
6905     Error(S, "invalid interpolation slot");
6906     return MatchOperand_ParseFail;
6907   }
6908 
6909   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6910                                               AMDGPUOperand::ImmTyInterpSlot));
6911   return MatchOperand_Success;
6912 }
6913 
6914 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6915   StringRef Str;
6916   SMLoc S = getLoc();
6917 
6918   if (!parseId(Str))
6919     return MatchOperand_NoMatch;
6920 
6921   if (!Str.startswith("attr")) {
6922     Error(S, "invalid interpolation attribute");
6923     return MatchOperand_ParseFail;
6924   }
6925 
6926   StringRef Chan = Str.take_back(2);
6927   int AttrChan = StringSwitch<int>(Chan)
6928     .Case(".x", 0)
6929     .Case(".y", 1)
6930     .Case(".z", 2)
6931     .Case(".w", 3)
6932     .Default(-1);
6933   if (AttrChan == -1) {
6934     Error(S, "invalid or missing interpolation attribute channel");
6935     return MatchOperand_ParseFail;
6936   }
6937 
6938   Str = Str.drop_back(2).drop_front(4);
6939 
6940   uint8_t Attr;
6941   if (Str.getAsInteger(10, Attr)) {
6942     Error(S, "invalid or missing interpolation attribute number");
6943     return MatchOperand_ParseFail;
6944   }
6945 
6946   if (Attr > 63) {
6947     Error(S, "out of bounds interpolation attribute number");
6948     return MatchOperand_ParseFail;
6949   }
6950 
6951   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6952 
6953   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6954                                               AMDGPUOperand::ImmTyInterpAttr));
6955   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6956                                               AMDGPUOperand::ImmTyAttrChan));
6957   return MatchOperand_Success;
6958 }
6959 
6960 //===----------------------------------------------------------------------===//
6961 // exp
6962 //===----------------------------------------------------------------------===//
6963 
6964 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6965   using namespace llvm::AMDGPU::Exp;
6966 
6967   StringRef Str;
6968   SMLoc S = getLoc();
6969 
6970   if (!parseId(Str))
6971     return MatchOperand_NoMatch;
6972 
6973   unsigned Id = getTgtId(Str);
6974   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6975     Error(S, (Id == ET_INVALID) ?
6976                 "invalid exp target" :
6977                 "exp target is not supported on this GPU");
6978     return MatchOperand_ParseFail;
6979   }
6980 
6981   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6982                                               AMDGPUOperand::ImmTyExpTgt));
6983   return MatchOperand_Success;
6984 }
6985 
6986 //===----------------------------------------------------------------------===//
6987 // parser helpers
6988 //===----------------------------------------------------------------------===//
6989 
6990 bool
6991 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6992   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6993 }
6994 
6995 bool
6996 AMDGPUAsmParser::isId(const StringRef Id) const {
6997   return isId(getToken(), Id);
6998 }
6999 
7000 bool
7001 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7002   return getTokenKind() == Kind;
7003 }
7004 
7005 bool
7006 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7007   if (isId(Id)) {
7008     lex();
7009     return true;
7010   }
7011   return false;
7012 }
7013 
7014 bool
7015 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7016   if (isToken(AsmToken::Identifier)) {
7017     StringRef Tok = getTokenStr();
7018     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7019       lex();
7020       return true;
7021     }
7022   }
7023   return false;
7024 }
7025 
7026 bool
7027 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7028   if (isId(Id) && peekToken().is(Kind)) {
7029     lex();
7030     lex();
7031     return true;
7032   }
7033   return false;
7034 }
7035 
7036 bool
7037 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7038   if (isToken(Kind)) {
7039     lex();
7040     return true;
7041   }
7042   return false;
7043 }
7044 
7045 bool
7046 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7047                            const StringRef ErrMsg) {
7048   if (!trySkipToken(Kind)) {
7049     Error(getLoc(), ErrMsg);
7050     return false;
7051   }
7052   return true;
7053 }
7054 
7055 bool
7056 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7057   SMLoc S = getLoc();
7058 
7059   const MCExpr *Expr;
7060   if (Parser.parseExpression(Expr))
7061     return false;
7062 
7063   if (Expr->evaluateAsAbsolute(Imm))
7064     return true;
7065 
7066   if (Expected.empty()) {
7067     Error(S, "expected absolute expression");
7068   } else {
7069     Error(S, Twine("expected ", Expected) +
7070              Twine(" or an absolute expression"));
7071   }
7072   return false;
7073 }
7074 
7075 bool
7076 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7077   SMLoc S = getLoc();
7078 
7079   const MCExpr *Expr;
7080   if (Parser.parseExpression(Expr))
7081     return false;
7082 
7083   int64_t IntVal;
7084   if (Expr->evaluateAsAbsolute(IntVal)) {
7085     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7086   } else {
7087     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7088   }
7089   return true;
7090 }
7091 
7092 bool
7093 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7094   if (isToken(AsmToken::String)) {
7095     Val = getToken().getStringContents();
7096     lex();
7097     return true;
7098   } else {
7099     Error(getLoc(), ErrMsg);
7100     return false;
7101   }
7102 }
7103 
7104 bool
7105 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7106   if (isToken(AsmToken::Identifier)) {
7107     Val = getTokenStr();
7108     lex();
7109     return true;
7110   } else {
7111     if (!ErrMsg.empty())
7112       Error(getLoc(), ErrMsg);
7113     return false;
7114   }
7115 }
7116 
7117 AsmToken
7118 AMDGPUAsmParser::getToken() const {
7119   return Parser.getTok();
7120 }
7121 
7122 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7123   return isToken(AsmToken::EndOfStatement)
7124              ? getToken()
7125              : getLexer().peekTok(ShouldSkipSpace);
7126 }
7127 
7128 void
7129 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7130   auto TokCount = getLexer().peekTokens(Tokens);
7131 
7132   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7133     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7134 }
7135 
7136 AsmToken::TokenKind
7137 AMDGPUAsmParser::getTokenKind() const {
7138   return getLexer().getKind();
7139 }
7140 
7141 SMLoc
7142 AMDGPUAsmParser::getLoc() const {
7143   return getToken().getLoc();
7144 }
7145 
7146 StringRef
7147 AMDGPUAsmParser::getTokenStr() const {
7148   return getToken().getString();
7149 }
7150 
7151 void
7152 AMDGPUAsmParser::lex() {
7153   Parser.Lex();
7154 }
7155 
7156 SMLoc
7157 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7158                                const OperandVector &Operands) const {
7159   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7160     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7161     if (Test(Op))
7162       return Op.getStartLoc();
7163   }
7164   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7165 }
7166 
7167 SMLoc
7168 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7169                            const OperandVector &Operands) const {
7170   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7171   return getOperandLoc(Test, Operands);
7172 }
7173 
7174 SMLoc
7175 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7176                            const OperandVector &Operands) const {
7177   auto Test = [=](const AMDGPUOperand& Op) {
7178     return Op.isRegKind() && Op.getReg() == Reg;
7179   };
7180   return getOperandLoc(Test, Operands);
7181 }
7182 
7183 SMLoc
7184 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7185   auto Test = [](const AMDGPUOperand& Op) {
7186     return Op.IsImmKindLiteral() || Op.isExpr();
7187   };
7188   return getOperandLoc(Test, Operands);
7189 }
7190 
7191 SMLoc
7192 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7193   auto Test = [](const AMDGPUOperand& Op) {
7194     return Op.isImmKindConst();
7195   };
7196   return getOperandLoc(Test, Operands);
7197 }
7198 
7199 //===----------------------------------------------------------------------===//
7200 // swizzle
7201 //===----------------------------------------------------------------------===//
7202 
7203 LLVM_READNONE
7204 static unsigned
7205 encodeBitmaskPerm(const unsigned AndMask,
7206                   const unsigned OrMask,
7207                   const unsigned XorMask) {
7208   using namespace llvm::AMDGPU::Swizzle;
7209 
7210   return BITMASK_PERM_ENC |
7211          (AndMask << BITMASK_AND_SHIFT) |
7212          (OrMask  << BITMASK_OR_SHIFT)  |
7213          (XorMask << BITMASK_XOR_SHIFT);
7214 }
7215 
7216 bool
7217 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7218                                      const unsigned MinVal,
7219                                      const unsigned MaxVal,
7220                                      const StringRef ErrMsg,
7221                                      SMLoc &Loc) {
7222   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7223     return false;
7224   }
7225   Loc = getLoc();
7226   if (!parseExpr(Op)) {
7227     return false;
7228   }
7229   if (Op < MinVal || Op > MaxVal) {
7230     Error(Loc, ErrMsg);
7231     return false;
7232   }
7233 
7234   return true;
7235 }
7236 
7237 bool
7238 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7239                                       const unsigned MinVal,
7240                                       const unsigned MaxVal,
7241                                       const StringRef ErrMsg) {
7242   SMLoc Loc;
7243   for (unsigned i = 0; i < OpNum; ++i) {
7244     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7245       return false;
7246   }
7247 
7248   return true;
7249 }
7250 
7251 bool
7252 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7253   using namespace llvm::AMDGPU::Swizzle;
7254 
7255   int64_t Lane[LANE_NUM];
7256   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7257                            "expected a 2-bit lane id")) {
7258     Imm = QUAD_PERM_ENC;
7259     for (unsigned I = 0; I < LANE_NUM; ++I) {
7260       Imm |= Lane[I] << (LANE_SHIFT * I);
7261     }
7262     return true;
7263   }
7264   return false;
7265 }
7266 
7267 bool
7268 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7269   using namespace llvm::AMDGPU::Swizzle;
7270 
7271   SMLoc Loc;
7272   int64_t GroupSize;
7273   int64_t LaneIdx;
7274 
7275   if (!parseSwizzleOperand(GroupSize,
7276                            2, 32,
7277                            "group size must be in the interval [2,32]",
7278                            Loc)) {
7279     return false;
7280   }
7281   if (!isPowerOf2_64(GroupSize)) {
7282     Error(Loc, "group size must be a power of two");
7283     return false;
7284   }
7285   if (parseSwizzleOperand(LaneIdx,
7286                           0, GroupSize - 1,
7287                           "lane id must be in the interval [0,group size - 1]",
7288                           Loc)) {
7289     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7290     return true;
7291   }
7292   return false;
7293 }
7294 
7295 bool
7296 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7297   using namespace llvm::AMDGPU::Swizzle;
7298 
7299   SMLoc Loc;
7300   int64_t GroupSize;
7301 
7302   if (!parseSwizzleOperand(GroupSize,
7303                            2, 32,
7304                            "group size must be in the interval [2,32]",
7305                            Loc)) {
7306     return false;
7307   }
7308   if (!isPowerOf2_64(GroupSize)) {
7309     Error(Loc, "group size must be a power of two");
7310     return false;
7311   }
7312 
7313   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7314   return true;
7315 }
7316 
7317 bool
7318 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7319   using namespace llvm::AMDGPU::Swizzle;
7320 
7321   SMLoc Loc;
7322   int64_t GroupSize;
7323 
7324   if (!parseSwizzleOperand(GroupSize,
7325                            1, 16,
7326                            "group size must be in the interval [1,16]",
7327                            Loc)) {
7328     return false;
7329   }
7330   if (!isPowerOf2_64(GroupSize)) {
7331     Error(Loc, "group size must be a power of two");
7332     return false;
7333   }
7334 
7335   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7336   return true;
7337 }
7338 
7339 bool
7340 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7341   using namespace llvm::AMDGPU::Swizzle;
7342 
7343   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7344     return false;
7345   }
7346 
7347   StringRef Ctl;
7348   SMLoc StrLoc = getLoc();
7349   if (!parseString(Ctl)) {
7350     return false;
7351   }
7352   if (Ctl.size() != BITMASK_WIDTH) {
7353     Error(StrLoc, "expected a 5-character mask");
7354     return false;
7355   }
7356 
7357   unsigned AndMask = 0;
7358   unsigned OrMask = 0;
7359   unsigned XorMask = 0;
7360 
7361   for (size_t i = 0; i < Ctl.size(); ++i) {
7362     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7363     switch(Ctl[i]) {
7364     default:
7365       Error(StrLoc, "invalid mask");
7366       return false;
7367     case '0':
7368       break;
7369     case '1':
7370       OrMask |= Mask;
7371       break;
7372     case 'p':
7373       AndMask |= Mask;
7374       break;
7375     case 'i':
7376       AndMask |= Mask;
7377       XorMask |= Mask;
7378       break;
7379     }
7380   }
7381 
7382   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7383   return true;
7384 }
7385 
7386 bool
7387 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7388 
7389   SMLoc OffsetLoc = getLoc();
7390 
7391   if (!parseExpr(Imm, "a swizzle macro")) {
7392     return false;
7393   }
7394   if (!isUInt<16>(Imm)) {
7395     Error(OffsetLoc, "expected a 16-bit offset");
7396     return false;
7397   }
7398   return true;
7399 }
7400 
7401 bool
7402 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7403   using namespace llvm::AMDGPU::Swizzle;
7404 
7405   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7406 
7407     SMLoc ModeLoc = getLoc();
7408     bool Ok = false;
7409 
7410     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7411       Ok = parseSwizzleQuadPerm(Imm);
7412     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7413       Ok = parseSwizzleBitmaskPerm(Imm);
7414     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7415       Ok = parseSwizzleBroadcast(Imm);
7416     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7417       Ok = parseSwizzleSwap(Imm);
7418     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7419       Ok = parseSwizzleReverse(Imm);
7420     } else {
7421       Error(ModeLoc, "expected a swizzle mode");
7422     }
7423 
7424     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7425   }
7426 
7427   return false;
7428 }
7429 
7430 OperandMatchResultTy
7431 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7432   SMLoc S = getLoc();
7433   int64_t Imm = 0;
7434 
7435   if (trySkipId("offset")) {
7436 
7437     bool Ok = false;
7438     if (skipToken(AsmToken::Colon, "expected a colon")) {
7439       if (trySkipId("swizzle")) {
7440         Ok = parseSwizzleMacro(Imm);
7441       } else {
7442         Ok = parseSwizzleOffset(Imm);
7443       }
7444     }
7445 
7446     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7447 
7448     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7449   } else {
7450     // Swizzle "offset" operand is optional.
7451     // If it is omitted, try parsing other optional operands.
7452     return parseOptionalOpr(Operands);
7453   }
7454 }
7455 
7456 bool
7457 AMDGPUOperand::isSwizzle() const {
7458   return isImmTy(ImmTySwizzle);
7459 }
7460 
7461 //===----------------------------------------------------------------------===//
7462 // VGPR Index Mode
7463 //===----------------------------------------------------------------------===//
7464 
7465 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7466 
7467   using namespace llvm::AMDGPU::VGPRIndexMode;
7468 
7469   if (trySkipToken(AsmToken::RParen)) {
7470     return OFF;
7471   }
7472 
7473   int64_t Imm = 0;
7474 
7475   while (true) {
7476     unsigned Mode = 0;
7477     SMLoc S = getLoc();
7478 
7479     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7480       if (trySkipId(IdSymbolic[ModeId])) {
7481         Mode = 1 << ModeId;
7482         break;
7483       }
7484     }
7485 
7486     if (Mode == 0) {
7487       Error(S, (Imm == 0)?
7488                "expected a VGPR index mode or a closing parenthesis" :
7489                "expected a VGPR index mode");
7490       return UNDEF;
7491     }
7492 
7493     if (Imm & Mode) {
7494       Error(S, "duplicate VGPR index mode");
7495       return UNDEF;
7496     }
7497     Imm |= Mode;
7498 
7499     if (trySkipToken(AsmToken::RParen))
7500       break;
7501     if (!skipToken(AsmToken::Comma,
7502                    "expected a comma or a closing parenthesis"))
7503       return UNDEF;
7504   }
7505 
7506   return Imm;
7507 }
7508 
7509 OperandMatchResultTy
7510 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7511 
7512   using namespace llvm::AMDGPU::VGPRIndexMode;
7513 
7514   int64_t Imm = 0;
7515   SMLoc S = getLoc();
7516 
7517   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7518     Imm = parseGPRIdxMacro();
7519     if (Imm == UNDEF)
7520       return MatchOperand_ParseFail;
7521   } else {
7522     if (getParser().parseAbsoluteExpression(Imm))
7523       return MatchOperand_ParseFail;
7524     if (Imm < 0 || !isUInt<4>(Imm)) {
7525       Error(S, "invalid immediate: only 4-bit values are legal");
7526       return MatchOperand_ParseFail;
7527     }
7528   }
7529 
7530   Operands.push_back(
7531       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7532   return MatchOperand_Success;
7533 }
7534 
7535 bool AMDGPUOperand::isGPRIdxMode() const {
7536   return isImmTy(ImmTyGprIdxMode);
7537 }
7538 
7539 //===----------------------------------------------------------------------===//
7540 // sopp branch targets
7541 //===----------------------------------------------------------------------===//
7542 
7543 OperandMatchResultTy
7544 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7545 
7546   // Make sure we are not parsing something
7547   // that looks like a label or an expression but is not.
7548   // This will improve error messages.
7549   if (isRegister() || isModifier())
7550     return MatchOperand_NoMatch;
7551 
7552   if (!parseExpr(Operands))
7553     return MatchOperand_ParseFail;
7554 
7555   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7556   assert(Opr.isImm() || Opr.isExpr());
7557   SMLoc Loc = Opr.getStartLoc();
7558 
7559   // Currently we do not support arbitrary expressions as branch targets.
7560   // Only labels and absolute expressions are accepted.
7561   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7562     Error(Loc, "expected an absolute expression or a label");
7563   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7564     Error(Loc, "expected a 16-bit signed jump offset");
7565   }
7566 
7567   return MatchOperand_Success;
7568 }
7569 
7570 //===----------------------------------------------------------------------===//
7571 // Boolean holding registers
7572 //===----------------------------------------------------------------------===//
7573 
7574 OperandMatchResultTy
7575 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7576   return parseReg(Operands);
7577 }
7578 
7579 //===----------------------------------------------------------------------===//
7580 // mubuf
7581 //===----------------------------------------------------------------------===//
7582 
7583 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7584   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7585 }
7586 
7587 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7588                                    const OperandVector &Operands,
7589                                    bool IsAtomic,
7590                                    bool IsLds) {
7591   OptionalImmIndexMap OptionalIdx;
7592   unsigned FirstOperandIdx = 1;
7593   bool IsAtomicReturn = false;
7594 
7595   if (IsAtomic) {
7596     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7597       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7598       if (!Op.isCPol())
7599         continue;
7600       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7601       break;
7602     }
7603 
7604     if (!IsAtomicReturn) {
7605       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7606       if (NewOpc != -1)
7607         Inst.setOpcode(NewOpc);
7608     }
7609 
7610     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7611                       SIInstrFlags::IsAtomicRet;
7612   }
7613 
7614   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7615     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7616 
7617     // Add the register arguments
7618     if (Op.isReg()) {
7619       Op.addRegOperands(Inst, 1);
7620       // Insert a tied src for atomic return dst.
7621       // This cannot be postponed as subsequent calls to
7622       // addImmOperands rely on correct number of MC operands.
7623       if (IsAtomicReturn && i == FirstOperandIdx)
7624         Op.addRegOperands(Inst, 1);
7625       continue;
7626     }
7627 
7628     // Handle the case where soffset is an immediate
7629     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7630       Op.addImmOperands(Inst, 1);
7631       continue;
7632     }
7633 
7634     // Handle tokens like 'offen' which are sometimes hard-coded into the
7635     // asm string.  There are no MCInst operands for these.
7636     if (Op.isToken()) {
7637       continue;
7638     }
7639     assert(Op.isImm());
7640 
7641     // Handle optional arguments
7642     OptionalIdx[Op.getImmTy()] = i;
7643   }
7644 
7645   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7646   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7647 
7648   if (!IsLds) { // tfe is not legal with lds opcodes
7649     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7650   }
7651   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7652 }
7653 
7654 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7655   OptionalImmIndexMap OptionalIdx;
7656 
7657   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7658     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7659 
7660     // Add the register arguments
7661     if (Op.isReg()) {
7662       Op.addRegOperands(Inst, 1);
7663       continue;
7664     }
7665 
7666     // Handle the case where soffset is an immediate
7667     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7668       Op.addImmOperands(Inst, 1);
7669       continue;
7670     }
7671 
7672     // Handle tokens like 'offen' which are sometimes hard-coded into the
7673     // asm string.  There are no MCInst operands for these.
7674     if (Op.isToken()) {
7675       continue;
7676     }
7677     assert(Op.isImm());
7678 
7679     // Handle optional arguments
7680     OptionalIdx[Op.getImmTy()] = i;
7681   }
7682 
7683   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7684                         AMDGPUOperand::ImmTyOffset);
7685   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7686   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7687   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7688   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7689 }
7690 
7691 //===----------------------------------------------------------------------===//
7692 // mimg
7693 //===----------------------------------------------------------------------===//
7694 
7695 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7696                               bool IsAtomic) {
7697   unsigned I = 1;
7698   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7699   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7700     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7701   }
7702 
7703   if (IsAtomic) {
7704     // Add src, same as dst
7705     assert(Desc.getNumDefs() == 1);
7706     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7707   }
7708 
7709   OptionalImmIndexMap OptionalIdx;
7710 
7711   for (unsigned E = Operands.size(); I != E; ++I) {
7712     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7713 
7714     // Add the register arguments
7715     if (Op.isReg()) {
7716       Op.addRegOperands(Inst, 1);
7717     } else if (Op.isImmModifier()) {
7718       OptionalIdx[Op.getImmTy()] = I;
7719     } else if (!Op.isToken()) {
7720       llvm_unreachable("unexpected operand type");
7721     }
7722   }
7723 
7724   bool IsGFX10Plus = isGFX10Plus();
7725 
7726   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7727   if (IsGFX10Plus)
7728     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7729   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7730   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7731   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7732   if (IsGFX10Plus)
7733     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7734   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7735     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7736   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7737   if (!IsGFX10Plus)
7738     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7739   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7740 }
7741 
7742 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7743   cvtMIMG(Inst, Operands, true);
7744 }
7745 
7746 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7747   OptionalImmIndexMap OptionalIdx;
7748   bool IsAtomicReturn = false;
7749 
7750   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7751     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7752     if (!Op.isCPol())
7753       continue;
7754     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7755     break;
7756   }
7757 
7758   if (!IsAtomicReturn) {
7759     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7760     if (NewOpc != -1)
7761       Inst.setOpcode(NewOpc);
7762   }
7763 
7764   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7765                     SIInstrFlags::IsAtomicRet;
7766 
7767   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7768     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7769 
7770     // Add the register arguments
7771     if (Op.isReg()) {
7772       Op.addRegOperands(Inst, 1);
7773       if (IsAtomicReturn && i == 1)
7774         Op.addRegOperands(Inst, 1);
7775       continue;
7776     }
7777 
7778     // Handle the case where soffset is an immediate
7779     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7780       Op.addImmOperands(Inst, 1);
7781       continue;
7782     }
7783 
7784     // Handle tokens like 'offen' which are sometimes hard-coded into the
7785     // asm string.  There are no MCInst operands for these.
7786     if (Op.isToken()) {
7787       continue;
7788     }
7789     assert(Op.isImm());
7790 
7791     // Handle optional arguments
7792     OptionalIdx[Op.getImmTy()] = i;
7793   }
7794 
7795   if ((int)Inst.getNumOperands() <=
7796       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7797     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7798   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7799 }
7800 
7801 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7802                                       const OperandVector &Operands) {
7803   for (unsigned I = 1; I < Operands.size(); ++I) {
7804     auto &Operand = (AMDGPUOperand &)*Operands[I];
7805     if (Operand.isReg())
7806       Operand.addRegOperands(Inst, 1);
7807   }
7808 
7809   Inst.addOperand(MCOperand::createImm(1)); // a16
7810 }
7811 
7812 //===----------------------------------------------------------------------===//
7813 // smrd
7814 //===----------------------------------------------------------------------===//
7815 
7816 bool AMDGPUOperand::isSMRDOffset8() const {
7817   return isImm() && isUInt<8>(getImm());
7818 }
7819 
7820 bool AMDGPUOperand::isSMEMOffset() const {
7821   return isImmTy(ImmTyNone) ||
7822          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7823 }
7824 
7825 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7826   // 32-bit literals are only supported on CI and we only want to use them
7827   // when the offset is > 8-bits.
7828   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7829 }
7830 
7831 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7832   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7833 }
7834 
7835 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7836   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7837 }
7838 
7839 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7840   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7841 }
7842 
7843 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7844   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7845 }
7846 
7847 //===----------------------------------------------------------------------===//
7848 // vop3
7849 //===----------------------------------------------------------------------===//
7850 
7851 static bool ConvertOmodMul(int64_t &Mul) {
7852   if (Mul != 1 && Mul != 2 && Mul != 4)
7853     return false;
7854 
7855   Mul >>= 1;
7856   return true;
7857 }
7858 
7859 static bool ConvertOmodDiv(int64_t &Div) {
7860   if (Div == 1) {
7861     Div = 0;
7862     return true;
7863   }
7864 
7865   if (Div == 2) {
7866     Div = 3;
7867     return true;
7868   }
7869 
7870   return false;
7871 }
7872 
7873 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7874 // This is intentional and ensures compatibility with sp3.
7875 // See bug 35397 for details.
7876 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7877   if (BoundCtrl == 0 || BoundCtrl == 1) {
7878     BoundCtrl = 1;
7879     return true;
7880   }
7881   return false;
7882 }
7883 
7884 // Note: the order in this table matches the order of operands in AsmString.
7885 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7886   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7887   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7888   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7889   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7890   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7891   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7892   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7893   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7894   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7895   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7896   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7897   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7898   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7899   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7900   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7901   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7902   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7903   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7904   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7905   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7906   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7907   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7908   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7909   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7910   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7911   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7912   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7913   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7914   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7915   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7916   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7917   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7918   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7919   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7920   {"dpp8",     AMDGPUOperand::ImmTyDPP8, false, nullptr},
7921   {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
7922   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7923   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7924   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7925   {"fi",   AMDGPUOperand::ImmTyDppFi, false, nullptr},
7926   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7927   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7928   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7929   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7930   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7931 };
7932 
7933 void AMDGPUAsmParser::onBeginOfFile() {
7934   if (!getParser().getStreamer().getTargetStreamer() ||
7935       getSTI().getTargetTriple().getArch() == Triple::r600)
7936     return;
7937 
7938   if (!getTargetStreamer().getTargetID())
7939     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7940 
7941   if (isHsaAbiVersion3AndAbove(&getSTI()))
7942     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7943 }
7944 
7945 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7946 
7947   OperandMatchResultTy res = parseOptionalOpr(Operands);
7948 
7949   // This is a hack to enable hardcoded mandatory operands which follow
7950   // optional operands.
7951   //
7952   // Current design assumes that all operands after the first optional operand
7953   // are also optional. However implementation of some instructions violates
7954   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7955   //
7956   // To alleviate this problem, we have to (implicitly) parse extra operands
7957   // to make sure autogenerated parser of custom operands never hit hardcoded
7958   // mandatory operands.
7959 
7960   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7961     if (res != MatchOperand_Success ||
7962         isToken(AsmToken::EndOfStatement))
7963       break;
7964 
7965     trySkipToken(AsmToken::Comma);
7966     res = parseOptionalOpr(Operands);
7967   }
7968 
7969   return res;
7970 }
7971 
7972 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7973   OperandMatchResultTy res;
7974   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7975     // try to parse any optional operand here
7976     if (Op.IsBit) {
7977       res = parseNamedBit(Op.Name, Operands, Op.Type);
7978     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7979       res = parseOModOperand(Operands);
7980     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7981                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7982                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7983       res = parseSDWASel(Operands, Op.Name, Op.Type);
7984     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7985       res = parseSDWADstUnused(Operands);
7986     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7987                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7988                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7989                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7990       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7991                                         Op.ConvertResult);
7992     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7993       res = parseDim(Operands);
7994     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7995       res = parseCPol(Operands);
7996     } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
7997       res = parseDPP8(Operands);
7998     } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
7999       res = parseDPPCtrl(Operands);
8000     } else {
8001       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
8002       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
8003         res = parseOperandArrayWithPrefix("neg", Operands,
8004                                           AMDGPUOperand::ImmTyBLGP,
8005                                           nullptr);
8006       }
8007     }
8008     if (res != MatchOperand_NoMatch) {
8009       return res;
8010     }
8011   }
8012   return MatchOperand_NoMatch;
8013 }
8014 
8015 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
8016   StringRef Name = getTokenStr();
8017   if (Name == "mul") {
8018     return parseIntWithPrefix("mul", Operands,
8019                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8020   }
8021 
8022   if (Name == "div") {
8023     return parseIntWithPrefix("div", Operands,
8024                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8025   }
8026 
8027   return MatchOperand_NoMatch;
8028 }
8029 
8030 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8031 // the number of src operands present, then copies that bit into src0_modifiers.
8032 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8033   int Opc = Inst.getOpcode();
8034   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8035   if (OpSelIdx == -1)
8036     return;
8037 
8038   int SrcNum;
8039   const int Ops[] = { AMDGPU::OpName::src0,
8040                       AMDGPU::OpName::src1,
8041                       AMDGPU::OpName::src2 };
8042   for (SrcNum = 0;
8043        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
8044        ++SrcNum);
8045   assert(SrcNum > 0);
8046 
8047   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8048 
8049   if ((OpSel & (1 << SrcNum)) != 0) {
8050     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8051     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8052     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8053   }
8054 }
8055 
8056 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8057                                    const OperandVector &Operands) {
8058   cvtVOP3P(Inst, Operands);
8059   cvtVOP3DstOpSelOnly(Inst);
8060 }
8061 
8062 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8063                                    OptionalImmIndexMap &OptionalIdx) {
8064   cvtVOP3P(Inst, Operands, OptionalIdx);
8065   cvtVOP3DstOpSelOnly(Inst);
8066 }
8067 
8068 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8069       // 1. This operand is input modifiers
8070   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8071       // 2. This is not last operand
8072       && Desc.NumOperands > (OpNum + 1)
8073       // 3. Next operand is register class
8074       && Desc.OpInfo[OpNum + 1].RegClass != -1
8075       // 4. Next register is not tied to any other operand
8076       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
8077 }
8078 
8079 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8080 {
8081   OptionalImmIndexMap OptionalIdx;
8082   unsigned Opc = Inst.getOpcode();
8083 
8084   unsigned I = 1;
8085   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8086   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8087     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8088   }
8089 
8090   for (unsigned E = Operands.size(); I != E; ++I) {
8091     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8092     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8093       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8094     } else if (Op.isInterpSlot() ||
8095                Op.isInterpAttr() ||
8096                Op.isAttrChan()) {
8097       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8098     } else if (Op.isImmModifier()) {
8099       OptionalIdx[Op.getImmTy()] = I;
8100     } else {
8101       llvm_unreachable("unhandled operand type");
8102     }
8103   }
8104 
8105   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8106     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8107   }
8108 
8109   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8110     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8111   }
8112 
8113   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8114     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8115   }
8116 }
8117 
8118 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8119 {
8120   OptionalImmIndexMap OptionalIdx;
8121   unsigned Opc = Inst.getOpcode();
8122 
8123   unsigned I = 1;
8124   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8125   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8126     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8127   }
8128 
8129   for (unsigned E = Operands.size(); I != E; ++I) {
8130     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8131     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8132       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8133     } else if (Op.isImmModifier()) {
8134       OptionalIdx[Op.getImmTy()] = I;
8135     } else {
8136       llvm_unreachable("unhandled operand type");
8137     }
8138   }
8139 
8140   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8141 
8142   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8143   if (OpSelIdx != -1)
8144     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8145 
8146   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8147 
8148   if (OpSelIdx == -1)
8149     return;
8150 
8151   const int Ops[] = { AMDGPU::OpName::src0,
8152                       AMDGPU::OpName::src1,
8153                       AMDGPU::OpName::src2 };
8154   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8155                          AMDGPU::OpName::src1_modifiers,
8156                          AMDGPU::OpName::src2_modifiers };
8157 
8158   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8159 
8160   for (int J = 0; J < 3; ++J) {
8161     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8162     if (OpIdx == -1)
8163       break;
8164 
8165     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8166     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8167 
8168     if ((OpSel & (1 << J)) != 0)
8169       ModVal |= SISrcMods::OP_SEL_0;
8170     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8171         (OpSel & (1 << 3)) != 0)
8172       ModVal |= SISrcMods::DST_OP_SEL;
8173 
8174     Inst.getOperand(ModIdx).setImm(ModVal);
8175   }
8176 }
8177 
8178 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8179                               OptionalImmIndexMap &OptionalIdx) {
8180   unsigned Opc = Inst.getOpcode();
8181 
8182   unsigned I = 1;
8183   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8184   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8185     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8186   }
8187 
8188   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8189     // This instruction has src modifiers
8190     for (unsigned E = Operands.size(); I != E; ++I) {
8191       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8192       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8193         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8194       } else if (Op.isImmModifier()) {
8195         OptionalIdx[Op.getImmTy()] = I;
8196       } else if (Op.isRegOrImm()) {
8197         Op.addRegOrImmOperands(Inst, 1);
8198       } else {
8199         llvm_unreachable("unhandled operand type");
8200       }
8201     }
8202   } else {
8203     // No src modifiers
8204     for (unsigned E = Operands.size(); I != E; ++I) {
8205       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8206       if (Op.isMod()) {
8207         OptionalIdx[Op.getImmTy()] = I;
8208       } else {
8209         Op.addRegOrImmOperands(Inst, 1);
8210       }
8211     }
8212   }
8213 
8214   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8215     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8216   }
8217 
8218   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8219     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8220   }
8221 
8222   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8223   // it has src2 register operand that is tied to dst operand
8224   // we don't allow modifiers for this operand in assembler so src2_modifiers
8225   // should be 0.
8226   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8227       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8228       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8229       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8230       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8231       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8232       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8233       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8234       Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
8235       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8236       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8237       Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
8238       Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
8239       Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
8240     auto it = Inst.begin();
8241     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8242     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8243     ++it;
8244     // Copy the operand to ensure it's not invalidated when Inst grows.
8245     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8246   }
8247 }
8248 
8249 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8250   OptionalImmIndexMap OptionalIdx;
8251   cvtVOP3(Inst, Operands, OptionalIdx);
8252 }
8253 
8254 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8255                                OptionalImmIndexMap &OptIdx) {
8256   const int Opc = Inst.getOpcode();
8257   const MCInstrDesc &Desc = MII.get(Opc);
8258 
8259   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8260 
8261   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8262       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
8263     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8264     Inst.addOperand(Inst.getOperand(0));
8265   }
8266 
8267   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8268     assert(!IsPacked);
8269     Inst.addOperand(Inst.getOperand(0));
8270   }
8271 
8272   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8273   // instruction, and then figure out where to actually put the modifiers
8274 
8275   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8276   if (OpSelIdx != -1) {
8277     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8278   }
8279 
8280   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8281   if (OpSelHiIdx != -1) {
8282     int DefaultVal = IsPacked ? -1 : 0;
8283     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8284                           DefaultVal);
8285   }
8286 
8287   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8288   if (NegLoIdx != -1) {
8289     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8290     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8291   }
8292 
8293   const int Ops[] = { AMDGPU::OpName::src0,
8294                       AMDGPU::OpName::src1,
8295                       AMDGPU::OpName::src2 };
8296   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8297                          AMDGPU::OpName::src1_modifiers,
8298                          AMDGPU::OpName::src2_modifiers };
8299 
8300   unsigned OpSel = 0;
8301   unsigned OpSelHi = 0;
8302   unsigned NegLo = 0;
8303   unsigned NegHi = 0;
8304 
8305   if (OpSelIdx != -1)
8306     OpSel = Inst.getOperand(OpSelIdx).getImm();
8307 
8308   if (OpSelHiIdx != -1)
8309     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8310 
8311   if (NegLoIdx != -1) {
8312     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8313     NegLo = Inst.getOperand(NegLoIdx).getImm();
8314     NegHi = Inst.getOperand(NegHiIdx).getImm();
8315   }
8316 
8317   for (int J = 0; J < 3; ++J) {
8318     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8319     if (OpIdx == -1)
8320       break;
8321 
8322     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8323 
8324     if (ModIdx == -1)
8325       continue;
8326 
8327     uint32_t ModVal = 0;
8328 
8329     if ((OpSel & (1 << J)) != 0)
8330       ModVal |= SISrcMods::OP_SEL_0;
8331 
8332     if ((OpSelHi & (1 << J)) != 0)
8333       ModVal |= SISrcMods::OP_SEL_1;
8334 
8335     if ((NegLo & (1 << J)) != 0)
8336       ModVal |= SISrcMods::NEG;
8337 
8338     if ((NegHi & (1 << J)) != 0)
8339       ModVal |= SISrcMods::NEG_HI;
8340 
8341     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8342   }
8343 }
8344 
8345 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8346   OptionalImmIndexMap OptIdx;
8347   cvtVOP3(Inst, Operands, OptIdx);
8348   cvtVOP3P(Inst, Operands, OptIdx);
8349 }
8350 
8351 //===----------------------------------------------------------------------===//
8352 // VOPD
8353 //===----------------------------------------------------------------------===//
8354 
8355 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8356   if (!hasVOPD(getSTI()))
8357     return MatchOperand_NoMatch;
8358 
8359   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8360     SMLoc S = getLoc();
8361     lex();
8362     lex();
8363     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8364     const MCExpr *Expr;
8365     if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) {
8366       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8367       return MatchOperand_Success;
8368     }
8369     Error(S, "invalid VOPD :: usage");
8370     return MatchOperand_ParseFail;
8371   }
8372   return MatchOperand_NoMatch;
8373 }
8374 
8375 // Create VOPD MCInst operands using parsed assembler operands.
8376 // Parsed VOPD operands are ordered as follows:
8377 //   OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
8378 //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8379 // If both OpX and OpY have an imm, the first imm has a different name:
8380 //   OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::'
8381 //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8382 // MCInst operands have the following order:
8383 //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8384 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8385   auto addOp = [&](uint16_t i) { // NOLINT:function pointer
8386     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8387     if (Op.isReg()) {
8388       Op.addRegOperands(Inst, 1);
8389       return;
8390     }
8391     if (Op.isImm()) {
8392       Op.addImmOperands(Inst, 1);
8393       return;
8394     }
8395     // Handle tokens like 'offen' which are sometimes hard-coded into the
8396     // asm string.  There are no MCInst operands for these.
8397     if (Op.isToken()) {
8398       return;
8399     }
8400     llvm_unreachable("Unhandled operand type in cvtVOPD");
8401   };
8402 
8403   // Indices into MCInst.Operands
8404   const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ...
8405   const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ...
8406   const auto MinOpYImmMCIndex = 4;   // dstX, dstY, src0X, src0Y, imm, ...
8407 
8408   unsigned Opc = Inst.getOpcode();
8409   bool HasVsrc1X =
8410       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1;
8411   bool HasImmX =
8412       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8413       (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8414                          FmamkOpXImmMCIndex ||
8415                      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8416                          FmaakOpXImmMCIndex));
8417 
8418   bool HasVsrc1Y =
8419       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1;
8420   bool HasImmY =
8421       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8422       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >=
8423           MinOpYImmMCIndex + HasVsrc1X;
8424 
8425   // Indices of parsed operands relative to dst
8426   const auto DstIdx = 0;
8427   const auto Src0Idx = 1;
8428   const auto Vsrc1OrImmIdx = 2;
8429 
8430   const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X;
8431   const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo)
8432 
8433   // Offsets into parsed operands
8434   const auto OpXFirstOperandOffset = 1;
8435   const auto OpYFirstOperandOffset =
8436       OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize;
8437 
8438   // Order of addOp calls determines MC operand order
8439   addOp(OpXFirstOperandOffset + DstIdx); // vdstX
8440   addOp(OpYFirstOperandOffset + DstIdx); // vdstY
8441 
8442   addOp(OpXFirstOperandOffset + Src0Idx); // src0X
8443   if (HasImmX) {
8444     // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak
8445     addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx);
8446     addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1);
8447   } else {
8448     if (HasVsrc1X) // all except v_mov
8449       addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X
8450   }
8451 
8452   addOp(OpYFirstOperandOffset + Src0Idx); // src0Y
8453   if (HasImmY) {
8454     // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak
8455     addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx);
8456     addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1);
8457   } else {
8458     if (HasVsrc1Y) // all except v_mov
8459       addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y
8460   }
8461 }
8462 
8463 //===----------------------------------------------------------------------===//
8464 // dpp
8465 //===----------------------------------------------------------------------===//
8466 
8467 bool AMDGPUOperand::isDPP8() const {
8468   return isImmTy(ImmTyDPP8);
8469 }
8470 
8471 bool AMDGPUOperand::isDPPCtrl() const {
8472   using namespace AMDGPU::DPP;
8473 
8474   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8475   if (result) {
8476     int64_t Imm = getImm();
8477     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8478            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8479            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8480            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8481            (Imm == DppCtrl::WAVE_SHL1) ||
8482            (Imm == DppCtrl::WAVE_ROL1) ||
8483            (Imm == DppCtrl::WAVE_SHR1) ||
8484            (Imm == DppCtrl::WAVE_ROR1) ||
8485            (Imm == DppCtrl::ROW_MIRROR) ||
8486            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8487            (Imm == DppCtrl::BCAST15) ||
8488            (Imm == DppCtrl::BCAST31) ||
8489            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8490            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8491   }
8492   return false;
8493 }
8494 
8495 //===----------------------------------------------------------------------===//
8496 // mAI
8497 //===----------------------------------------------------------------------===//
8498 
8499 bool AMDGPUOperand::isBLGP() const {
8500   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8501 }
8502 
8503 bool AMDGPUOperand::isCBSZ() const {
8504   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8505 }
8506 
8507 bool AMDGPUOperand::isABID() const {
8508   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8509 }
8510 
8511 bool AMDGPUOperand::isS16Imm() const {
8512   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8513 }
8514 
8515 bool AMDGPUOperand::isU16Imm() const {
8516   return isImm() && isUInt<16>(getImm());
8517 }
8518 
8519 //===----------------------------------------------------------------------===//
8520 // dim
8521 //===----------------------------------------------------------------------===//
8522 
8523 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8524   // We want to allow "dim:1D" etc.,
8525   // but the initial 1 is tokenized as an integer.
8526   std::string Token;
8527   if (isToken(AsmToken::Integer)) {
8528     SMLoc Loc = getToken().getEndLoc();
8529     Token = std::string(getTokenStr());
8530     lex();
8531     if (getLoc() != Loc)
8532       return false;
8533   }
8534 
8535   StringRef Suffix;
8536   if (!parseId(Suffix))
8537     return false;
8538   Token += Suffix;
8539 
8540   StringRef DimId = Token;
8541   if (DimId.startswith("SQ_RSRC_IMG_"))
8542     DimId = DimId.drop_front(12);
8543 
8544   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8545   if (!DimInfo)
8546     return false;
8547 
8548   Encoding = DimInfo->Encoding;
8549   return true;
8550 }
8551 
8552 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8553   if (!isGFX10Plus())
8554     return MatchOperand_NoMatch;
8555 
8556   SMLoc S = getLoc();
8557 
8558   if (!trySkipId("dim", AsmToken::Colon))
8559     return MatchOperand_NoMatch;
8560 
8561   unsigned Encoding;
8562   SMLoc Loc = getLoc();
8563   if (!parseDimId(Encoding)) {
8564     Error(Loc, "invalid dim value");
8565     return MatchOperand_ParseFail;
8566   }
8567 
8568   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8569                                               AMDGPUOperand::ImmTyDim));
8570   return MatchOperand_Success;
8571 }
8572 
8573 //===----------------------------------------------------------------------===//
8574 // dpp
8575 //===----------------------------------------------------------------------===//
8576 
8577 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8578   SMLoc S = getLoc();
8579 
8580   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8581     return MatchOperand_NoMatch;
8582 
8583   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8584 
8585   int64_t Sels[8];
8586 
8587   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8588     return MatchOperand_ParseFail;
8589 
8590   for (size_t i = 0; i < 8; ++i) {
8591     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8592       return MatchOperand_ParseFail;
8593 
8594     SMLoc Loc = getLoc();
8595     if (getParser().parseAbsoluteExpression(Sels[i]))
8596       return MatchOperand_ParseFail;
8597     if (0 > Sels[i] || 7 < Sels[i]) {
8598       Error(Loc, "expected a 3-bit value");
8599       return MatchOperand_ParseFail;
8600     }
8601   }
8602 
8603   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8604     return MatchOperand_ParseFail;
8605 
8606   unsigned DPP8 = 0;
8607   for (size_t i = 0; i < 8; ++i)
8608     DPP8 |= (Sels[i] << (i * 3));
8609 
8610   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8611   return MatchOperand_Success;
8612 }
8613 
8614 bool
8615 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8616                                     const OperandVector &Operands) {
8617   if (Ctrl == "row_newbcast")
8618     return isGFX90A();
8619 
8620   if (Ctrl == "row_share" ||
8621       Ctrl == "row_xmask")
8622     return isGFX10Plus();
8623 
8624   if (Ctrl == "wave_shl" ||
8625       Ctrl == "wave_shr" ||
8626       Ctrl == "wave_rol" ||
8627       Ctrl == "wave_ror" ||
8628       Ctrl == "row_bcast")
8629     return isVI() || isGFX9();
8630 
8631   return Ctrl == "row_mirror" ||
8632          Ctrl == "row_half_mirror" ||
8633          Ctrl == "quad_perm" ||
8634          Ctrl == "row_shl" ||
8635          Ctrl == "row_shr" ||
8636          Ctrl == "row_ror";
8637 }
8638 
8639 int64_t
8640 AMDGPUAsmParser::parseDPPCtrlPerm() {
8641   // quad_perm:[%d,%d,%d,%d]
8642 
8643   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8644     return -1;
8645 
8646   int64_t Val = 0;
8647   for (int i = 0; i < 4; ++i) {
8648     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8649       return -1;
8650 
8651     int64_t Temp;
8652     SMLoc Loc = getLoc();
8653     if (getParser().parseAbsoluteExpression(Temp))
8654       return -1;
8655     if (Temp < 0 || Temp > 3) {
8656       Error(Loc, "expected a 2-bit value");
8657       return -1;
8658     }
8659 
8660     Val += (Temp << i * 2);
8661   }
8662 
8663   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8664     return -1;
8665 
8666   return Val;
8667 }
8668 
8669 int64_t
8670 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8671   using namespace AMDGPU::DPP;
8672 
8673   // sel:%d
8674 
8675   int64_t Val;
8676   SMLoc Loc = getLoc();
8677 
8678   if (getParser().parseAbsoluteExpression(Val))
8679     return -1;
8680 
8681   struct DppCtrlCheck {
8682     int64_t Ctrl;
8683     int Lo;
8684     int Hi;
8685   };
8686 
8687   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8688     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8689     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8690     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8691     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8692     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8693     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8694     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8695     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8696     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8697     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8698     .Default({-1, 0, 0});
8699 
8700   bool Valid;
8701   if (Check.Ctrl == -1) {
8702     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8703     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8704   } else {
8705     Valid = Check.Lo <= Val && Val <= Check.Hi;
8706     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8707   }
8708 
8709   if (!Valid) {
8710     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8711     return -1;
8712   }
8713 
8714   return Val;
8715 }
8716 
8717 OperandMatchResultTy
8718 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8719   using namespace AMDGPU::DPP;
8720 
8721   if (!isToken(AsmToken::Identifier) ||
8722       !isSupportedDPPCtrl(getTokenStr(), Operands))
8723     return MatchOperand_NoMatch;
8724 
8725   SMLoc S = getLoc();
8726   int64_t Val = -1;
8727   StringRef Ctrl;
8728 
8729   parseId(Ctrl);
8730 
8731   if (Ctrl == "row_mirror") {
8732     Val = DppCtrl::ROW_MIRROR;
8733   } else if (Ctrl == "row_half_mirror") {
8734     Val = DppCtrl::ROW_HALF_MIRROR;
8735   } else {
8736     if (skipToken(AsmToken::Colon, "expected a colon")) {
8737       if (Ctrl == "quad_perm") {
8738         Val = parseDPPCtrlPerm();
8739       } else {
8740         Val = parseDPPCtrlSel(Ctrl);
8741       }
8742     }
8743   }
8744 
8745   if (Val == -1)
8746     return MatchOperand_ParseFail;
8747 
8748   Operands.push_back(
8749     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8750   return MatchOperand_Success;
8751 }
8752 
8753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8754   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8755 }
8756 
8757 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8758   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8759 }
8760 
8761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8762   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8763 }
8764 
8765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8766   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8767 }
8768 
8769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8770   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8771 }
8772 
8773 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8774   OptionalImmIndexMap OptionalIdx;
8775   unsigned Opc = Inst.getOpcode();
8776   bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8777   unsigned I = 1;
8778   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8779   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8780     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8781   }
8782 
8783   int Fi = 0;
8784   for (unsigned E = Operands.size(); I != E; ++I) {
8785     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8786                                             MCOI::TIED_TO);
8787     if (TiedTo != -1) {
8788       assert((unsigned)TiedTo < Inst.getNumOperands());
8789       // handle tied old or src2 for MAC instructions
8790       Inst.addOperand(Inst.getOperand(TiedTo));
8791     }
8792     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8793     // Add the register arguments
8794     if (IsDPP8 && Op.isFI()) {
8795       Fi = Op.getImm();
8796     } else if (HasModifiers &&
8797                isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8798       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8799     } else if (Op.isReg()) {
8800       Op.addRegOperands(Inst, 1);
8801     } else if (Op.isImm() &&
8802                Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
8803       assert(!HasModifiers && "Case should be unreachable with modifiers");
8804       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8805       Op.addImmOperands(Inst, 1);
8806     } else if (Op.isImm()) {
8807       OptionalIdx[Op.getImmTy()] = I;
8808     } else {
8809       llvm_unreachable("unhandled operand type");
8810     }
8811   }
8812   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8813     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8814   }
8815   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8816     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8817   }
8818   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8819     cvtVOP3P(Inst, Operands, OptionalIdx);
8820   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8821     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8822   else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
8823     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8824   }
8825 
8826   if (IsDPP8) {
8827     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8828     using namespace llvm::AMDGPU::DPP;
8829     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8830   } else {
8831     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8832     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8833     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8834     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8835     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8836       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8837     }
8838   }
8839 }
8840 
8841 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8842   OptionalImmIndexMap OptionalIdx;
8843 
8844   unsigned Opc = Inst.getOpcode();
8845   bool HasModifiers =
8846       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8847   unsigned I = 1;
8848   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8849   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8850     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8851   }
8852 
8853   int Fi = 0;
8854   for (unsigned E = Operands.size(); I != E; ++I) {
8855     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8856                                             MCOI::TIED_TO);
8857     if (TiedTo != -1) {
8858       assert((unsigned)TiedTo < Inst.getNumOperands());
8859       // handle tied old or src2 for MAC instructions
8860       Inst.addOperand(Inst.getOperand(TiedTo));
8861     }
8862     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8863     // Add the register arguments
8864     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8865       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8866       // Skip it.
8867       continue;
8868     }
8869 
8870     if (IsDPP8) {
8871       if (Op.isDPP8()) {
8872         Op.addImmOperands(Inst, 1);
8873       } else if (HasModifiers &&
8874                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8875         Op.addRegWithFPInputModsOperands(Inst, 2);
8876       } else if (Op.isFI()) {
8877         Fi = Op.getImm();
8878       } else if (Op.isReg()) {
8879         Op.addRegOperands(Inst, 1);
8880       } else {
8881         llvm_unreachable("Invalid operand type");
8882       }
8883     } else {
8884       if (HasModifiers &&
8885           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8886         Op.addRegWithFPInputModsOperands(Inst, 2);
8887       } else if (Op.isReg()) {
8888         Op.addRegOperands(Inst, 1);
8889       } else if (Op.isDPPCtrl()) {
8890         Op.addImmOperands(Inst, 1);
8891       } else if (Op.isImm()) {
8892         // Handle optional arguments
8893         OptionalIdx[Op.getImmTy()] = I;
8894       } else {
8895         llvm_unreachable("Invalid operand type");
8896       }
8897     }
8898   }
8899 
8900   if (IsDPP8) {
8901     using namespace llvm::AMDGPU::DPP;
8902     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8903   } else {
8904     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8905     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8906     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8907     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8908       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8909     }
8910   }
8911 }
8912 
8913 //===----------------------------------------------------------------------===//
8914 // sdwa
8915 //===----------------------------------------------------------------------===//
8916 
8917 OperandMatchResultTy
8918 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8919                               AMDGPUOperand::ImmTy Type) {
8920   using namespace llvm::AMDGPU::SDWA;
8921 
8922   SMLoc S = getLoc();
8923   StringRef Value;
8924   OperandMatchResultTy res;
8925 
8926   SMLoc StringLoc;
8927   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8928   if (res != MatchOperand_Success) {
8929     return res;
8930   }
8931 
8932   int64_t Int;
8933   Int = StringSwitch<int64_t>(Value)
8934         .Case("BYTE_0", SdwaSel::BYTE_0)
8935         .Case("BYTE_1", SdwaSel::BYTE_1)
8936         .Case("BYTE_2", SdwaSel::BYTE_2)
8937         .Case("BYTE_3", SdwaSel::BYTE_3)
8938         .Case("WORD_0", SdwaSel::WORD_0)
8939         .Case("WORD_1", SdwaSel::WORD_1)
8940         .Case("DWORD", SdwaSel::DWORD)
8941         .Default(0xffffffff);
8942 
8943   if (Int == 0xffffffff) {
8944     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8945     return MatchOperand_ParseFail;
8946   }
8947 
8948   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8949   return MatchOperand_Success;
8950 }
8951 
8952 OperandMatchResultTy
8953 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8954   using namespace llvm::AMDGPU::SDWA;
8955 
8956   SMLoc S = getLoc();
8957   StringRef Value;
8958   OperandMatchResultTy res;
8959 
8960   SMLoc StringLoc;
8961   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8962   if (res != MatchOperand_Success) {
8963     return res;
8964   }
8965 
8966   int64_t Int;
8967   Int = StringSwitch<int64_t>(Value)
8968         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8969         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8970         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8971         .Default(0xffffffff);
8972 
8973   if (Int == 0xffffffff) {
8974     Error(StringLoc, "invalid dst_unused value");
8975     return MatchOperand_ParseFail;
8976   }
8977 
8978   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8979   return MatchOperand_Success;
8980 }
8981 
8982 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8983   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8984 }
8985 
8986 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8987   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8988 }
8989 
8990 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8991   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8992 }
8993 
8994 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8995   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8996 }
8997 
8998 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8999   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9000 }
9001 
9002 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9003                               uint64_t BasicInstType,
9004                               bool SkipDstVcc,
9005                               bool SkipSrcVcc) {
9006   using namespace llvm::AMDGPU::SDWA;
9007 
9008   OptionalImmIndexMap OptionalIdx;
9009   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9010   bool SkippedVcc = false;
9011 
9012   unsigned I = 1;
9013   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9014   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9015     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9016   }
9017 
9018   for (unsigned E = Operands.size(); I != E; ++I) {
9019     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9020     if (SkipVcc && !SkippedVcc && Op.isReg() &&
9021         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9022       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9023       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9024       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9025       // Skip VCC only if we didn't skip it on previous iteration.
9026       // Note that src0 and src1 occupy 2 slots each because of modifiers.
9027       if (BasicInstType == SIInstrFlags::VOP2 &&
9028           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9029            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9030         SkippedVcc = true;
9031         continue;
9032       } else if (BasicInstType == SIInstrFlags::VOPC &&
9033                  Inst.getNumOperands() == 0) {
9034         SkippedVcc = true;
9035         continue;
9036       }
9037     }
9038     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9039       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9040     } else if (Op.isImm()) {
9041       // Handle optional arguments
9042       OptionalIdx[Op.getImmTy()] = I;
9043     } else {
9044       llvm_unreachable("Invalid operand type");
9045     }
9046     SkippedVcc = false;
9047   }
9048 
9049   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
9050       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
9051       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
9052     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9053     switch (BasicInstType) {
9054     case SIInstrFlags::VOP1:
9055       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9056                                      AMDGPU::OpName::clamp) != -1) {
9057         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9058                               AMDGPUOperand::ImmTyClampSI, 0);
9059       }
9060       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9061                                      AMDGPU::OpName::omod) != -1) {
9062         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9063                               AMDGPUOperand::ImmTyOModSI, 0);
9064       }
9065       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9066                                      AMDGPU::OpName::dst_sel) != -1) {
9067         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9068                               AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9069       }
9070       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9071                                      AMDGPU::OpName::dst_unused) != -1) {
9072         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9073                               AMDGPUOperand::ImmTySdwaDstUnused,
9074                               DstUnused::UNUSED_PRESERVE);
9075       }
9076       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9077       break;
9078 
9079     case SIInstrFlags::VOP2:
9080       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9081       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
9082         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9083       }
9084       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9085       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
9086       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9087       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9088       break;
9089 
9090     case SIInstrFlags::VOPC:
9091       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
9092         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9093       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9094       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9095       break;
9096 
9097     default:
9098       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9099     }
9100   }
9101 
9102   // special case v_mac_{f16, f32}:
9103   // it has src2 register operand that is tied to dst operand
9104   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9105       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9106     auto it = Inst.begin();
9107     std::advance(
9108       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9109     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9110   }
9111 }
9112 
9113 //===----------------------------------------------------------------------===//
9114 // mAI
9115 //===----------------------------------------------------------------------===//
9116 
9117 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
9118   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
9119 }
9120 
9121 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
9122   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
9123 }
9124 
9125 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
9126   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
9127 }
9128 
9129 /// Force static initialization.
9130 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9131   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
9132   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9133 }
9134 
9135 #define GET_REGISTER_MATCHER
9136 #define GET_MATCHER_IMPLEMENTATION
9137 #define GET_MNEMONIC_SPELL_CHECKER
9138 #define GET_MNEMONIC_CHECKER
9139 #include "AMDGPUGenAsmMatcher.inc"
9140 
9141 // This function should be defined after auto-generated include so that we have
9142 // MatchClassKind enum defined
9143 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9144                                                      unsigned Kind) {
9145   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9146   // But MatchInstructionImpl() expects to meet token and fails to validate
9147   // operand. This method checks if we are given immediate operand but expect to
9148   // get corresponding token.
9149   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9150   switch (Kind) {
9151   case MCK_addr64:
9152     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9153   case MCK_gds:
9154     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9155   case MCK_lds:
9156     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9157   case MCK_idxen:
9158     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9159   case MCK_offen:
9160     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9161   case MCK_SSrcB32:
9162     // When operands have expression values, they will return true for isToken,
9163     // because it is not possible to distinguish between a token and an
9164     // expression at parse time. MatchInstructionImpl() will always try to
9165     // match an operand as a token, when isToken returns true, and when the
9166     // name of the expression is not a valid token, the match will fail,
9167     // so we need to handle it here.
9168     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9169   case MCK_SSrcF32:
9170     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9171   case MCK_SoppBrTarget:
9172     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
9173   case MCK_VReg32OrOff:
9174     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9175   case MCK_InterpSlot:
9176     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9177   case MCK_Attr:
9178     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9179   case MCK_AttrChan:
9180     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
9181   case MCK_ImmSMEMOffset:
9182     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
9183   case MCK_SReg_64:
9184   case MCK_SReg_64_XEXEC:
9185     // Null is defined as a 32-bit register but
9186     // it should also be enabled with 64-bit operands.
9187     // The following code enables it for SReg_64 operands
9188     // used as source and destination. Remaining source
9189     // operands are handled in isInlinableImm.
9190     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9191   default:
9192     return Match_InvalidOperand;
9193   }
9194 }
9195 
9196 //===----------------------------------------------------------------------===//
9197 // endpgm
9198 //===----------------------------------------------------------------------===//
9199 
9200 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9201   SMLoc S = getLoc();
9202   int64_t Imm = 0;
9203 
9204   if (!parseExpr(Imm)) {
9205     // The operand is optional, if not present default to 0
9206     Imm = 0;
9207   }
9208 
9209   if (!isUInt<16>(Imm)) {
9210     Error(S, "expected a 16-bit value");
9211     return MatchOperand_ParseFail;
9212   }
9213 
9214   Operands.push_back(
9215       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9216   return MatchOperand_Success;
9217 }
9218 
9219 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9220 
9221 //===----------------------------------------------------------------------===//
9222 // LDSDIR
9223 //===----------------------------------------------------------------------===//
9224 
9225 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9226   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9227 }
9228 
9229 bool AMDGPUOperand::isWaitVDST() const {
9230   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9231 }
9232 
9233 //===----------------------------------------------------------------------===//
9234 // VINTERP
9235 //===----------------------------------------------------------------------===//
9236 
9237 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9238   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9239 }
9240 
9241 bool AMDGPUOperand::isWaitEXP() const {
9242   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9243 }
9244