xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision d30a1689f5b37e78ea189232a8b94a7011dc0dc8)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/TargetRegistry.h"
32 #include "llvm/Support/AMDGPUMetadata.h"
33 #include "llvm/Support/AMDHSAKernelDescriptor.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/MachineValueType.h"
36 #include "llvm/Support/TargetParser.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65       : Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrInline(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type);
251   }
252 
253   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
254     return isRegOrInline(RCID, type) || isLiteralImm(type);
255   }
256 
257   bool isRegOrImmWithInt16InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
259   }
260 
261   bool isRegOrImmWithInt32InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
263   }
264 
265   bool isRegOrImmWithInt64InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
267   }
268 
269   bool isRegOrImmWithFP16InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
271   }
272 
273   bool isRegOrImmWithFP32InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
275   }
276 
277   bool isRegOrImmWithFP64InputMods() const {
278     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
279   }
280 
281   bool isVReg() const {
282     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
283            isRegClass(AMDGPU::VReg_64RegClassID) ||
284            isRegClass(AMDGPU::VReg_96RegClassID) ||
285            isRegClass(AMDGPU::VReg_128RegClassID) ||
286            isRegClass(AMDGPU::VReg_160RegClassID) ||
287            isRegClass(AMDGPU::VReg_192RegClassID) ||
288            isRegClass(AMDGPU::VReg_256RegClassID) ||
289            isRegClass(AMDGPU::VReg_512RegClassID) ||
290            isRegClass(AMDGPU::VReg_1024RegClassID);
291   }
292 
293   bool isVReg32() const {
294     return isRegClass(AMDGPU::VGPR_32RegClassID);
295   }
296 
297   bool isVReg32OrOff() const {
298     return isOff() || isVReg32();
299   }
300 
301   bool isNull() const {
302     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
303   }
304 
305   bool isVRegWithInputMods() const;
306 
307   bool isSDWAOperand(MVT type) const;
308   bool isSDWAFP16Operand() const;
309   bool isSDWAFP32Operand() const;
310   bool isSDWAInt16Operand() const;
311   bool isSDWAInt32Operand() const;
312 
313   bool isImmTy(ImmTy ImmT) const {
314     return isImm() && Imm.Type == ImmT;
315   }
316 
317   bool isImmModifier() const {
318     return isImm() && Imm.Type != ImmTyNone;
319   }
320 
321   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
322   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
323   bool isDMask() const { return isImmTy(ImmTyDMask); }
324   bool isDim() const { return isImmTy(ImmTyDim); }
325   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
326   bool isDA() const { return isImmTy(ImmTyDA); }
327   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
328   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
329   bool isLWE() const { return isImmTy(ImmTyLWE); }
330   bool isOff() const { return isImmTy(ImmTyOff); }
331   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
332   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
333   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
334   bool isOffen() const { return isImmTy(ImmTyOffen); }
335   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
336   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
337   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
338   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
339   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
340 
341   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
342   bool isGDS() const { return isImmTy(ImmTyGDS); }
343   bool isLDS() const { return isImmTy(ImmTyLDS); }
344   bool isCPol() const { return isImmTy(ImmTyCPol); }
345   bool isSWZ() const { return isImmTy(ImmTySWZ); }
346   bool isTFE() const { return isImmTy(ImmTyTFE); }
347   bool isD16() const { return isImmTy(ImmTyD16); }
348   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
349   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
350   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
351   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
352   bool isFI() const { return isImmTy(ImmTyDppFi); }
353   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
354   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
355   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
356   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
357   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
358   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
359   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
360   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
361   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
362   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
363   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
364   bool isHigh() const { return isImmTy(ImmTyHigh); }
365 
366   bool isMod() const {
367     return isClampSI() || isOModSI();
368   }
369 
370   bool isRegOrImm() const {
371     return isReg() || isImm();
372   }
373 
374   bool isRegClass(unsigned RCID) const;
375 
376   bool isInlineValue() const;
377 
378   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
379     return isRegOrInline(RCID, type) && !hasModifiers();
380   }
381 
382   bool isSCSrcB16() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
384   }
385 
386   bool isSCSrcV2B16() const {
387     return isSCSrcB16();
388   }
389 
390   bool isSCSrcB32() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
392   }
393 
394   bool isSCSrcB64() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
396   }
397 
398   bool isBoolReg() const;
399 
400   bool isSCSrcF16() const {
401     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
402   }
403 
404   bool isSCSrcV2F16() const {
405     return isSCSrcF16();
406   }
407 
408   bool isSCSrcF32() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
410   }
411 
412   bool isSCSrcF64() const {
413     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
414   }
415 
416   bool isSSrcB32() const {
417     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
418   }
419 
420   bool isSSrcB16() const {
421     return isSCSrcB16() || isLiteralImm(MVT::i16);
422   }
423 
424   bool isSSrcV2B16() const {
425     llvm_unreachable("cannot happen");
426     return isSSrcB16();
427   }
428 
429   bool isSSrcB64() const {
430     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
431     // See isVSrc64().
432     return isSCSrcB64() || isLiteralImm(MVT::i64);
433   }
434 
435   bool isSSrcF32() const {
436     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
437   }
438 
439   bool isSSrcF64() const {
440     return isSCSrcB64() || isLiteralImm(MVT::f64);
441   }
442 
443   bool isSSrcF16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::f16);
445   }
446 
447   bool isSSrcV2F16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcF16();
450   }
451 
452   bool isSSrcV2FP32() const {
453     llvm_unreachable("cannot happen");
454     return isSSrcF32();
455   }
456 
457   bool isSCSrcV2FP32() const {
458     llvm_unreachable("cannot happen");
459     return isSCSrcF32();
460   }
461 
462   bool isSSrcV2INT32() const {
463     llvm_unreachable("cannot happen");
464     return isSSrcB32();
465   }
466 
467   bool isSCSrcV2INT32() const {
468     llvm_unreachable("cannot happen");
469     return isSCSrcB32();
470   }
471 
472   bool isSSrcOrLdsB32() const {
473     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
474            isLiteralImm(MVT::i32) || isExpr();
475   }
476 
477   bool isVCSrcB32() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
479   }
480 
481   bool isVCSrcB64() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
483   }
484 
485   bool isVCSrcB16() const {
486     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
487   }
488 
489   bool isVCSrcV2B16() const {
490     return isVCSrcB16();
491   }
492 
493   bool isVCSrcF32() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
495   }
496 
497   bool isVCSrcF64() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
499   }
500 
501   bool isVCSrcF16() const {
502     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
503   }
504 
505   bool isVCSrcV2F16() const {
506     return isVCSrcF16();
507   }
508 
509   bool isVSrcB32() const {
510     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
511   }
512 
513   bool isVSrcB64() const {
514     return isVCSrcF64() || isLiteralImm(MVT::i64);
515   }
516 
517   bool isVSrcB16() const {
518     return isVCSrcB16() || isLiteralImm(MVT::i16);
519   }
520 
521   bool isVSrcV2B16() const {
522     return isVSrcB16() || isLiteralImm(MVT::v2i16);
523   }
524 
525   bool isVCSrcV2FP32() const {
526     return isVCSrcF64();
527   }
528 
529   bool isVSrcV2FP32() const {
530     return isVSrcF64() || isLiteralImm(MVT::v2f32);
531   }
532 
533   bool isVCSrcV2INT32() const {
534     return isVCSrcB64();
535   }
536 
537   bool isVSrcV2INT32() const {
538     return isVSrcB64() || isLiteralImm(MVT::v2i32);
539   }
540 
541   bool isVSrcF32() const {
542     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
543   }
544 
545   bool isVSrcF64() const {
546     return isVCSrcF64() || isLiteralImm(MVT::f64);
547   }
548 
549   bool isVSrcF16() const {
550     return isVCSrcF16() || isLiteralImm(MVT::f16);
551   }
552 
553   bool isVSrcV2F16() const {
554     return isVSrcF16() || isLiteralImm(MVT::v2f16);
555   }
556 
557   bool isVISrcB32() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
559   }
560 
561   bool isVISrcB16() const {
562     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
563   }
564 
565   bool isVISrcV2B16() const {
566     return isVISrcB16();
567   }
568 
569   bool isVISrcF32() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
571   }
572 
573   bool isVISrcF16() const {
574     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
575   }
576 
577   bool isVISrcV2F16() const {
578     return isVISrcF16() || isVISrcB32();
579   }
580 
581   bool isVISrc_64B64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
583   }
584 
585   bool isVISrc_64F64() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
587   }
588 
589   bool isVISrc_64V2FP32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
591   }
592 
593   bool isVISrc_64V2INT32() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
595   }
596 
597   bool isVISrc_256B64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
599   }
600 
601   bool isVISrc_256F64() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
603   }
604 
605   bool isVISrc_128B16() const {
606     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
607   }
608 
609   bool isVISrc_128V2B16() const {
610     return isVISrc_128B16();
611   }
612 
613   bool isVISrc_128B32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
615   }
616 
617   bool isVISrc_128F32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2FP32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
623   }
624 
625   bool isVISrc_256V2INT32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B32() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
631   }
632 
633   bool isVISrc_512B16() const {
634     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
635   }
636 
637   bool isVISrc_512V2B16() const {
638     return isVISrc_512B16();
639   }
640 
641   bool isVISrc_512F32() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
643   }
644 
645   bool isVISrc_512F16() const {
646     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
647   }
648 
649   bool isVISrc_512V2F16() const {
650     return isVISrc_512F16() || isVISrc_512B32();
651   }
652 
653   bool isVISrc_1024B32() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
655   }
656 
657   bool isVISrc_1024B16() const {
658     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
659   }
660 
661   bool isVISrc_1024V2B16() const {
662     return isVISrc_1024B16();
663   }
664 
665   bool isVISrc_1024F32() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
667   }
668 
669   bool isVISrc_1024F16() const {
670     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
671   }
672 
673   bool isVISrc_1024V2F16() const {
674     return isVISrc_1024F16() || isVISrc_1024B32();
675   }
676 
677   bool isAISrcB32() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
679   }
680 
681   bool isAISrcB16() const {
682     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
683   }
684 
685   bool isAISrcV2B16() const {
686     return isAISrcB16();
687   }
688 
689   bool isAISrcF32() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
691   }
692 
693   bool isAISrcF16() const {
694     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
695   }
696 
697   bool isAISrcV2F16() const {
698     return isAISrcF16() || isAISrcB32();
699   }
700 
701   bool isAISrc_64B64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
703   }
704 
705   bool isAISrc_64F64() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
707   }
708 
709   bool isAISrc_128B32() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
711   }
712 
713   bool isAISrc_128B16() const {
714     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
715   }
716 
717   bool isAISrc_128V2B16() const {
718     return isAISrc_128B16();
719   }
720 
721   bool isAISrc_128F32() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
723   }
724 
725   bool isAISrc_128F16() const {
726     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
727   }
728 
729   bool isAISrc_128V2F16() const {
730     return isAISrc_128F16() || isAISrc_128B32();
731   }
732 
733   bool isVISrc_128F16() const {
734     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
735   }
736 
737   bool isVISrc_128V2F16() const {
738     return isVISrc_128F16() || isVISrc_128B32();
739   }
740 
741   bool isAISrc_256B64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
743   }
744 
745   bool isAISrc_256F64() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
747   }
748 
749   bool isAISrc_512B32() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
751   }
752 
753   bool isAISrc_512B16() const {
754     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
755   }
756 
757   bool isAISrc_512V2B16() const {
758     return isAISrc_512B16();
759   }
760 
761   bool isAISrc_512F32() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
763   }
764 
765   bool isAISrc_512F16() const {
766     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
767   }
768 
769   bool isAISrc_512V2F16() const {
770     return isAISrc_512F16() || isAISrc_512B32();
771   }
772 
773   bool isAISrc_1024B32() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
775   }
776 
777   bool isAISrc_1024B16() const {
778     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
779   }
780 
781   bool isAISrc_1024V2B16() const {
782     return isAISrc_1024B16();
783   }
784 
785   bool isAISrc_1024F32() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
787   }
788 
789   bool isAISrc_1024F16() const {
790     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
791   }
792 
793   bool isAISrc_1024V2F16() const {
794     return isAISrc_1024F16() || isAISrc_1024B32();
795   }
796 
797   bool isKImmFP32() const {
798     return isLiteralImm(MVT::f32);
799   }
800 
801   bool isKImmFP16() const {
802     return isLiteralImm(MVT::f16);
803   }
804 
805   bool isMem() const override {
806     return false;
807   }
808 
809   bool isExpr() const {
810     return Kind == Expression;
811   }
812 
813   bool isSoppBrTarget() const {
814     return isExpr() || isImm();
815   }
816 
817   bool isSWaitCnt() const;
818   bool isHwreg() const;
819   bool isSendMsg() const;
820   bool isSwizzle() const;
821   bool isSMRDOffset8() const;
822   bool isSMEMOffset() const;
823   bool isSMRDLiteralOffset() const;
824   bool isDPP8() const;
825   bool isDPPCtrl() const;
826   bool isBLGP() const;
827   bool isCBSZ() const;
828   bool isABID() const;
829   bool isGPRIdxMode() const;
830   bool isS16Imm() const;
831   bool isU16Imm() const;
832   bool isEndpgm() const;
833 
834   StringRef getExpressionAsToken() const {
835     assert(isExpr());
836     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
837     return S->getSymbol().getName();
838   }
839 
840   StringRef getToken() const {
841     assert(isToken());
842 
843     if (Kind == Expression)
844       return getExpressionAsToken();
845 
846     return StringRef(Tok.Data, Tok.Length);
847   }
848 
849   int64_t getImm() const {
850     assert(isImm());
851     return Imm.Val;
852   }
853 
854   void setImm(int64_t Val) {
855     assert(isImm());
856     Imm.Val = Val;
857   }
858 
859   ImmTy getImmTy() const {
860     assert(isImm());
861     return Imm.Type;
862   }
863 
864   unsigned getReg() const override {
865     assert(isRegKind());
866     return Reg.RegNo;
867   }
868 
869   SMLoc getStartLoc() const override {
870     return StartLoc;
871   }
872 
873   SMLoc getEndLoc() const override {
874     return EndLoc;
875   }
876 
877   SMRange getLocRange() const {
878     return SMRange(StartLoc, EndLoc);
879   }
880 
881   Modifiers getModifiers() const {
882     assert(isRegKind() || isImmTy(ImmTyNone));
883     return isRegKind() ? Reg.Mods : Imm.Mods;
884   }
885 
886   void setModifiers(Modifiers Mods) {
887     assert(isRegKind() || isImmTy(ImmTyNone));
888     if (isRegKind())
889       Reg.Mods = Mods;
890     else
891       Imm.Mods = Mods;
892   }
893 
894   bool hasModifiers() const {
895     return getModifiers().hasModifiers();
896   }
897 
898   bool hasFPModifiers() const {
899     return getModifiers().hasFPModifiers();
900   }
901 
902   bool hasIntModifiers() const {
903     return getModifiers().hasIntModifiers();
904   }
905 
906   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
907 
908   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
909 
910   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
911 
912   template <unsigned Bitwidth>
913   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
914 
915   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<16>(Inst, N);
917   }
918 
919   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
920     addKImmFPOperands<32>(Inst, N);
921   }
922 
923   void addRegOperands(MCInst &Inst, unsigned N) const;
924 
925   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
926     addRegOperands(Inst, N);
927   }
928 
929   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
930     if (isRegKind())
931       addRegOperands(Inst, N);
932     else if (isExpr())
933       Inst.addOperand(MCOperand::createExpr(Expr));
934     else
935       addImmOperands(Inst, N);
936   }
937 
938   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
939     Modifiers Mods = getModifiers();
940     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
941     if (isRegKind()) {
942       addRegOperands(Inst, N);
943     } else {
944       addImmOperands(Inst, N, false);
945     }
946   }
947 
948   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
949     assert(!hasIntModifiers());
950     addRegOrImmWithInputModsOperands(Inst, N);
951   }
952 
953   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
954     assert(!hasFPModifiers());
955     addRegOrImmWithInputModsOperands(Inst, N);
956   }
957 
958   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
959     Modifiers Mods = getModifiers();
960     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
961     assert(isRegKind());
962     addRegOperands(Inst, N);
963   }
964 
965   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
966     assert(!hasIntModifiers());
967     addRegWithInputModsOperands(Inst, N);
968   }
969 
970   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
971     assert(!hasFPModifiers());
972     addRegWithInputModsOperands(Inst, N);
973   }
974 
975   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
976     if (isImm())
977       addImmOperands(Inst, N);
978     else {
979       assert(isExpr());
980       Inst.addOperand(MCOperand::createExpr(Expr));
981     }
982   }
983 
984   static void printImmTy(raw_ostream& OS, ImmTy Type) {
985     switch (Type) {
986     case ImmTyNone: OS << "None"; break;
987     case ImmTyGDS: OS << "GDS"; break;
988     case ImmTyLDS: OS << "LDS"; break;
989     case ImmTyOffen: OS << "Offen"; break;
990     case ImmTyIdxen: OS << "Idxen"; break;
991     case ImmTyAddr64: OS << "Addr64"; break;
992     case ImmTyOffset: OS << "Offset"; break;
993     case ImmTyInstOffset: OS << "InstOffset"; break;
994     case ImmTyOffset0: OS << "Offset0"; break;
995     case ImmTyOffset1: OS << "Offset1"; break;
996     case ImmTyCPol: OS << "CPol"; break;
997     case ImmTySWZ: OS << "SWZ"; break;
998     case ImmTyTFE: OS << "TFE"; break;
999     case ImmTyD16: OS << "D16"; break;
1000     case ImmTyFORMAT: OS << "FORMAT"; break;
1001     case ImmTyClampSI: OS << "ClampSI"; break;
1002     case ImmTyOModSI: OS << "OModSI"; break;
1003     case ImmTyDPP8: OS << "DPP8"; break;
1004     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1005     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1006     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1007     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1008     case ImmTyDppFi: OS << "FI"; break;
1009     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1010     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1011     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1012     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1013     case ImmTyDMask: OS << "DMask"; break;
1014     case ImmTyDim: OS << "Dim"; break;
1015     case ImmTyUNorm: OS << "UNorm"; break;
1016     case ImmTyDA: OS << "DA"; break;
1017     case ImmTyR128A16: OS << "R128A16"; break;
1018     case ImmTyA16: OS << "A16"; break;
1019     case ImmTyLWE: OS << "LWE"; break;
1020     case ImmTyOff: OS << "Off"; break;
1021     case ImmTyExpTgt: OS << "ExpTgt"; break;
1022     case ImmTyExpCompr: OS << "ExpCompr"; break;
1023     case ImmTyExpVM: OS << "ExpVM"; break;
1024     case ImmTyHwreg: OS << "Hwreg"; break;
1025     case ImmTySendMsg: OS << "SendMsg"; break;
1026     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1027     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1028     case ImmTyAttrChan: OS << "AttrChan"; break;
1029     case ImmTyOpSel: OS << "OpSel"; break;
1030     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1031     case ImmTyNegLo: OS << "NegLo"; break;
1032     case ImmTyNegHi: OS << "NegHi"; break;
1033     case ImmTySwizzle: OS << "Swizzle"; break;
1034     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1035     case ImmTyHigh: OS << "High"; break;
1036     case ImmTyBLGP: OS << "BLGP"; break;
1037     case ImmTyCBSZ: OS << "CBSZ"; break;
1038     case ImmTyABID: OS << "ABID"; break;
1039     case ImmTyEndpgm: OS << "Endpgm"; break;
1040     }
1041   }
1042 
1043   void print(raw_ostream &OS) const override {
1044     switch (Kind) {
1045     case Register:
1046       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1047       break;
1048     case Immediate:
1049       OS << '<' << getImm();
1050       if (getImmTy() != ImmTyNone) {
1051         OS << " type: "; printImmTy(OS, getImmTy());
1052       }
1053       OS << " mods: " << Imm.Mods << '>';
1054       break;
1055     case Token:
1056       OS << '\'' << getToken() << '\'';
1057       break;
1058     case Expression:
1059       OS << "<expr " << *Expr << '>';
1060       break;
1061     }
1062   }
1063 
1064   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1065                                       int64_t Val, SMLoc Loc,
1066                                       ImmTy Type = ImmTyNone,
1067                                       bool IsFPImm = false) {
1068     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1069     Op->Imm.Val = Val;
1070     Op->Imm.IsFPImm = IsFPImm;
1071     Op->Imm.Kind = ImmKindTyNone;
1072     Op->Imm.Type = Type;
1073     Op->Imm.Mods = Modifiers();
1074     Op->StartLoc = Loc;
1075     Op->EndLoc = Loc;
1076     return Op;
1077   }
1078 
1079   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1080                                         StringRef Str, SMLoc Loc,
1081                                         bool HasExplicitEncodingSize = true) {
1082     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1083     Res->Tok.Data = Str.data();
1084     Res->Tok.Length = Str.size();
1085     Res->StartLoc = Loc;
1086     Res->EndLoc = Loc;
1087     return Res;
1088   }
1089 
1090   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1091                                       unsigned RegNo, SMLoc S,
1092                                       SMLoc E) {
1093     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1094     Op->Reg.RegNo = RegNo;
1095     Op->Reg.Mods = Modifiers();
1096     Op->StartLoc = S;
1097     Op->EndLoc = E;
1098     return Op;
1099   }
1100 
1101   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1102                                        const class MCExpr *Expr, SMLoc S) {
1103     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1104     Op->Expr = Expr;
1105     Op->StartLoc = S;
1106     Op->EndLoc = S;
1107     return Op;
1108   }
1109 };
1110 
1111 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1112   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1113   return OS;
1114 }
1115 
1116 //===----------------------------------------------------------------------===//
1117 // AsmParser
1118 //===----------------------------------------------------------------------===//
1119 
1120 // Holds info related to the current kernel, e.g. count of SGPRs used.
1121 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1122 // .amdgpu_hsa_kernel or at EOF.
1123 class KernelScopeInfo {
1124   int SgprIndexUnusedMin = -1;
1125   int VgprIndexUnusedMin = -1;
1126   MCContext *Ctx = nullptr;
1127 
1128   void usesSgprAt(int i) {
1129     if (i >= SgprIndexUnusedMin) {
1130       SgprIndexUnusedMin = ++i;
1131       if (Ctx) {
1132         MCSymbol* const Sym =
1133           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1134         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1135       }
1136     }
1137   }
1138 
1139   void usesVgprAt(int i) {
1140     if (i >= VgprIndexUnusedMin) {
1141       VgprIndexUnusedMin = ++i;
1142       if (Ctx) {
1143         MCSymbol* const Sym =
1144           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1145         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1146       }
1147     }
1148   }
1149 
1150 public:
1151   KernelScopeInfo() = default;
1152 
1153   void initialize(MCContext &Context) {
1154     Ctx = &Context;
1155     usesSgprAt(SgprIndexUnusedMin = -1);
1156     usesVgprAt(VgprIndexUnusedMin = -1);
1157   }
1158 
1159   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1160     switch (RegKind) {
1161       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1162       case IS_AGPR: // fall through
1163       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1164       default: break;
1165     }
1166   }
1167 };
1168 
1169 class AMDGPUAsmParser : public MCTargetAsmParser {
1170   MCAsmParser &Parser;
1171 
1172   // Number of extra operands parsed after the first optional operand.
1173   // This may be necessary to skip hardcoded mandatory operands.
1174   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1175 
1176   unsigned ForcedEncodingSize = 0;
1177   bool ForcedDPP = false;
1178   bool ForcedSDWA = false;
1179   KernelScopeInfo KernelScope;
1180   unsigned CPolSeen;
1181 
1182   /// @name Auto-generated Match Functions
1183   /// {
1184 
1185 #define GET_ASSEMBLER_HEADER
1186 #include "AMDGPUGenAsmMatcher.inc"
1187 
1188   /// }
1189 
1190 private:
1191   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1192   bool OutOfRangeError(SMRange Range);
1193   /// Calculate VGPR/SGPR blocks required for given target, reserved
1194   /// registers, and user-specified NextFreeXGPR values.
1195   ///
1196   /// \param Features [in] Target features, used for bug corrections.
1197   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1198   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1199   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1200   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1201   /// descriptor field, if valid.
1202   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1203   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1204   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1205   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1206   /// \param VGPRBlocks [out] Result VGPR block count.
1207   /// \param SGPRBlocks [out] Result SGPR block count.
1208   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1209                           bool FlatScrUsed, bool XNACKUsed,
1210                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1211                           SMRange VGPRRange, unsigned NextFreeSGPR,
1212                           SMRange SGPRRange, unsigned &VGPRBlocks,
1213                           unsigned &SGPRBlocks);
1214   bool ParseDirectiveAMDGCNTarget();
1215   bool ParseDirectiveAMDHSAKernel();
1216   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1217   bool ParseDirectiveHSACodeObjectVersion();
1218   bool ParseDirectiveHSACodeObjectISA();
1219   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1220   bool ParseDirectiveAMDKernelCodeT();
1221   // TODO: Possibly make subtargetHasRegister const.
1222   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1223   bool ParseDirectiveAMDGPUHsaKernel();
1224 
1225   bool ParseDirectiveISAVersion();
1226   bool ParseDirectiveHSAMetadata();
1227   bool ParseDirectivePALMetadataBegin();
1228   bool ParseDirectivePALMetadata();
1229   bool ParseDirectiveAMDGPULDS();
1230 
1231   /// Common code to parse out a block of text (typically YAML) between start and
1232   /// end directives.
1233   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1234                            const char *AssemblerDirectiveEnd,
1235                            std::string &CollectString);
1236 
1237   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1238                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1239   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1240                            unsigned &RegNum, unsigned &RegWidth,
1241                            bool RestoreOnFailure = false);
1242   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1243                            unsigned &RegNum, unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1246                            unsigned &RegWidth,
1247                            SmallVectorImpl<AsmToken> &Tokens);
1248   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1249                            unsigned &RegWidth,
1250                            SmallVectorImpl<AsmToken> &Tokens);
1251   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1252                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1253   bool ParseRegRange(unsigned& Num, unsigned& Width);
1254   unsigned getRegularReg(RegisterKind RegKind,
1255                          unsigned RegNum,
1256                          unsigned RegWidth,
1257                          SMLoc Loc);
1258 
1259   bool isRegister();
1260   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1261   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1262   void initializeGprCountSymbol(RegisterKind RegKind);
1263   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1264                              unsigned RegWidth);
1265   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1266                     bool IsAtomic, bool IsLds = false);
1267   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1268                  bool IsGdsHardcoded);
1269 
1270 public:
1271   enum AMDGPUMatchResultTy {
1272     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1273   };
1274   enum OperandMode {
1275     OperandMode_Default,
1276     OperandMode_NSA,
1277   };
1278 
1279   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1280 
1281   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1282                const MCInstrInfo &MII,
1283                const MCTargetOptions &Options)
1284       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1285     MCAsmParserExtension::Initialize(Parser);
1286 
1287     if (getFeatureBits().none()) {
1288       // Set default features.
1289       copySTI().ToggleFeature("southern-islands");
1290     }
1291 
1292     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1293 
1294     {
1295       // TODO: make those pre-defined variables read-only.
1296       // Currently there is none suitable machinery in the core llvm-mc for this.
1297       // MCSymbol::isRedefinable is intended for another purpose, and
1298       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1299       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1300       MCContext &Ctx = getContext();
1301       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1302         MCSymbol *Sym =
1303             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1304         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1305         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1309       } else {
1310         MCSymbol *Sym =
1311             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1312         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1313         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1314         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1315         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1316         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1317       }
1318       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1319         initializeGprCountSymbol(IS_VGPR);
1320         initializeGprCountSymbol(IS_SGPR);
1321       } else
1322         KernelScope.initialize(getContext());
1323     }
1324   }
1325 
1326   bool hasMIMG_R128() const {
1327     return AMDGPU::hasMIMG_R128(getSTI());
1328   }
1329 
1330   bool hasPackedD16() const {
1331     return AMDGPU::hasPackedD16(getSTI());
1332   }
1333 
1334   bool hasGFX10A16() const {
1335     return AMDGPU::hasGFX10A16(getSTI());
1336   }
1337 
1338   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1339 
1340   bool isSI() const {
1341     return AMDGPU::isSI(getSTI());
1342   }
1343 
1344   bool isCI() const {
1345     return AMDGPU::isCI(getSTI());
1346   }
1347 
1348   bool isVI() const {
1349     return AMDGPU::isVI(getSTI());
1350   }
1351 
1352   bool isGFX9() const {
1353     return AMDGPU::isGFX9(getSTI());
1354   }
1355 
1356   bool isGFX90A() const {
1357     return AMDGPU::isGFX90A(getSTI());
1358   }
1359 
1360   bool isGFX9Plus() const {
1361     return AMDGPU::isGFX9Plus(getSTI());
1362   }
1363 
1364   bool isGFX10() const {
1365     return AMDGPU::isGFX10(getSTI());
1366   }
1367 
1368   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1369 
1370   bool isGFX10_BEncoding() const {
1371     return AMDGPU::isGFX10_BEncoding(getSTI());
1372   }
1373 
1374   bool hasInv2PiInlineImm() const {
1375     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1376   }
1377 
1378   bool hasFlatOffsets() const {
1379     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1380   }
1381 
1382   bool hasArchitectedFlatScratch() const {
1383     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1384   }
1385 
1386   bool hasSGPR102_SGPR103() const {
1387     return !isVI() && !isGFX9();
1388   }
1389 
1390   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1391 
1392   bool hasIntClamp() const {
1393     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1394   }
1395 
1396   AMDGPUTargetStreamer &getTargetStreamer() {
1397     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1398     return static_cast<AMDGPUTargetStreamer &>(TS);
1399   }
1400 
1401   const MCRegisterInfo *getMRI() const {
1402     // We need this const_cast because for some reason getContext() is not const
1403     // in MCAsmParser.
1404     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1405   }
1406 
1407   const MCInstrInfo *getMII() const {
1408     return &MII;
1409   }
1410 
1411   const FeatureBitset &getFeatureBits() const {
1412     return getSTI().getFeatureBits();
1413   }
1414 
1415   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1416   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1417   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1418 
1419   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1420   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1421   bool isForcedDPP() const { return ForcedDPP; }
1422   bool isForcedSDWA() const { return ForcedSDWA; }
1423   ArrayRef<unsigned> getMatchedVariants() const;
1424   StringRef getMatchedVariantName() const;
1425 
1426   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1427   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1428                      bool RestoreOnFailure);
1429   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1430   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1431                                         SMLoc &EndLoc) override;
1432   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1433   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1434                                       unsigned Kind) override;
1435   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1436                                OperandVector &Operands, MCStreamer &Out,
1437                                uint64_t &ErrorInfo,
1438                                bool MatchingInlineAsm) override;
1439   bool ParseDirective(AsmToken DirectiveID) override;
1440   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1441                                     OperandMode Mode = OperandMode_Default);
1442   StringRef parseMnemonicSuffix(StringRef Name);
1443   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1444                         SMLoc NameLoc, OperandVector &Operands) override;
1445   //bool ProcessInstruction(MCInst &Inst);
1446 
1447   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1448 
1449   OperandMatchResultTy
1450   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1451                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452                      bool (*ConvertResult)(int64_t &) = nullptr);
1453 
1454   OperandMatchResultTy
1455   parseOperandArrayWithPrefix(const char *Prefix,
1456                               OperandVector &Operands,
1457                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1458                               bool (*ConvertResult)(int64_t&) = nullptr);
1459 
1460   OperandMatchResultTy
1461   parseNamedBit(StringRef Name, OperandVector &Operands,
1462                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1463   OperandMatchResultTy parseCPol(OperandVector &Operands);
1464   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1465                                              StringRef &Value,
1466                                              SMLoc &StringLoc);
1467 
1468   bool isModifier();
1469   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1470   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1471   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1472   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1473   bool parseSP3NegModifier();
1474   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1475   OperandMatchResultTy parseReg(OperandVector &Operands);
1476   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1477   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1478   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1479   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1480   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1481   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1482   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1483   OperandMatchResultTy parseUfmt(int64_t &Format);
1484   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1485   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1486   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1487   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1488   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1489   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1490   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1491 
1492   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1493   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1494   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1495   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1496 
1497   bool parseCnt(int64_t &IntVal);
1498   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1499   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1500 
1501 private:
1502   struct OperandInfoTy {
1503     SMLoc Loc;
1504     int64_t Id;
1505     bool IsSymbolic = false;
1506     bool IsDefined = false;
1507 
1508     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1509   };
1510 
1511   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1512   bool validateSendMsg(const OperandInfoTy &Msg,
1513                        const OperandInfoTy &Op,
1514                        const OperandInfoTy &Stream);
1515 
1516   bool parseHwregBody(OperandInfoTy &HwReg,
1517                       OperandInfoTy &Offset,
1518                       OperandInfoTy &Width);
1519   bool validateHwreg(const OperandInfoTy &HwReg,
1520                      const OperandInfoTy &Offset,
1521                      const OperandInfoTy &Width);
1522 
1523   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1524   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1525 
1526   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1527                       const OperandVector &Operands) const;
1528   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1529   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1530   SMLoc getLitLoc(const OperandVector &Operands) const;
1531   SMLoc getConstLoc(const OperandVector &Operands) const;
1532 
1533   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1534   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1535   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1536   bool validateSOPLiteral(const MCInst &Inst) const;
1537   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1538   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1539   bool validateIntClampSupported(const MCInst &Inst);
1540   bool validateMIMGAtomicDMask(const MCInst &Inst);
1541   bool validateMIMGGatherDMask(const MCInst &Inst);
1542   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1543   bool validateMIMGDataSize(const MCInst &Inst);
1544   bool validateMIMGAddrSize(const MCInst &Inst);
1545   bool validateMIMGD16(const MCInst &Inst);
1546   bool validateMIMGDim(const MCInst &Inst);
1547   bool validateMIMGMSAA(const MCInst &Inst);
1548   bool validateOpSel(const MCInst &Inst);
1549   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1550   bool validateVccOperand(unsigned Reg) const;
1551   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1552   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1553   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1554   bool validateAGPRLdSt(const MCInst &Inst) const;
1555   bool validateVGPRAlign(const MCInst &Inst) const;
1556   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1557   bool validateDivScale(const MCInst &Inst);
1558   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1559                              const SMLoc &IDLoc);
1560   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1561   unsigned getConstantBusLimit(unsigned Opcode) const;
1562   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1563   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1564   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1565 
1566   bool isSupportedMnemo(StringRef Mnemo,
1567                         const FeatureBitset &FBS);
1568   bool isSupportedMnemo(StringRef Mnemo,
1569                         const FeatureBitset &FBS,
1570                         ArrayRef<unsigned> Variants);
1571   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1572 
1573   bool isId(const StringRef Id) const;
1574   bool isId(const AsmToken &Token, const StringRef Id) const;
1575   bool isToken(const AsmToken::TokenKind Kind) const;
1576   bool trySkipId(const StringRef Id);
1577   bool trySkipId(const StringRef Pref, const StringRef Id);
1578   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1579   bool trySkipToken(const AsmToken::TokenKind Kind);
1580   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1581   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1582   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1583 
1584   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1585   AsmToken::TokenKind getTokenKind() const;
1586   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1587   bool parseExpr(OperandVector &Operands);
1588   StringRef getTokenStr() const;
1589   AsmToken peekToken();
1590   AsmToken getToken() const;
1591   SMLoc getLoc() const;
1592   void lex();
1593 
1594 public:
1595   void onBeginOfFile() override;
1596 
1597   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1598   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1599 
1600   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1601   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1602   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1603   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1604   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1605   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1606 
1607   bool parseSwizzleOperand(int64_t &Op,
1608                            const unsigned MinVal,
1609                            const unsigned MaxVal,
1610                            const StringRef ErrMsg,
1611                            SMLoc &Loc);
1612   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1613                             const unsigned MinVal,
1614                             const unsigned MaxVal,
1615                             const StringRef ErrMsg);
1616   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1617   bool parseSwizzleOffset(int64_t &Imm);
1618   bool parseSwizzleMacro(int64_t &Imm);
1619   bool parseSwizzleQuadPerm(int64_t &Imm);
1620   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1621   bool parseSwizzleBroadcast(int64_t &Imm);
1622   bool parseSwizzleSwap(int64_t &Imm);
1623   bool parseSwizzleReverse(int64_t &Imm);
1624 
1625   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1626   int64_t parseGPRIdxMacro();
1627 
1628   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1629   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1630   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1631   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1632 
1633   AMDGPUOperand::Ptr defaultCPol() const;
1634 
1635   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1636   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1637   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1638   AMDGPUOperand::Ptr defaultFlatOffset() const;
1639 
1640   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1641 
1642   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1643                OptionalImmIndexMap &OptionalIdx);
1644   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1645   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1646   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1647   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1648                 OptionalImmIndexMap &OptionalIdx);
1649 
1650   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1651 
1652   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1653                bool IsAtomic = false);
1654   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1655   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1656 
1657   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1658 
1659   bool parseDimId(unsigned &Encoding);
1660   OperandMatchResultTy parseDim(OperandVector &Operands);
1661   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1662   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1663   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1664   int64_t parseDPPCtrlSel(StringRef Ctrl);
1665   int64_t parseDPPCtrlPerm();
1666   AMDGPUOperand::Ptr defaultRowMask() const;
1667   AMDGPUOperand::Ptr defaultBankMask() const;
1668   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1669   AMDGPUOperand::Ptr defaultFI() const;
1670   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1671   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1672 
1673   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1674                                     AMDGPUOperand::ImmTy Type);
1675   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1676   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1677   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1678   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1679   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1680   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1681   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1682                uint64_t BasicInstType,
1683                bool SkipDstVcc = false,
1684                bool SkipSrcVcc = false);
1685 
1686   AMDGPUOperand::Ptr defaultBLGP() const;
1687   AMDGPUOperand::Ptr defaultCBSZ() const;
1688   AMDGPUOperand::Ptr defaultABID() const;
1689 
1690   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1691   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1692 };
1693 
1694 struct OptionalOperand {
1695   const char *Name;
1696   AMDGPUOperand::ImmTy Type;
1697   bool IsBit;
1698   bool (*ConvertResult)(int64_t&);
1699 };
1700 
1701 } // end anonymous namespace
1702 
1703 // May be called with integer type with equivalent bitwidth.
1704 static const fltSemantics *getFltSemantics(unsigned Size) {
1705   switch (Size) {
1706   case 4:
1707     return &APFloat::IEEEsingle();
1708   case 8:
1709     return &APFloat::IEEEdouble();
1710   case 2:
1711     return &APFloat::IEEEhalf();
1712   default:
1713     llvm_unreachable("unsupported fp type");
1714   }
1715 }
1716 
1717 static const fltSemantics *getFltSemantics(MVT VT) {
1718   return getFltSemantics(VT.getSizeInBits() / 8);
1719 }
1720 
1721 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1722   switch (OperandType) {
1723   case AMDGPU::OPERAND_REG_IMM_INT32:
1724   case AMDGPU::OPERAND_REG_IMM_FP32:
1725   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1726   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1727   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1728   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1729   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1730   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1731   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1732   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1733   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1734   case AMDGPU::OPERAND_KIMM32:
1735     return &APFloat::IEEEsingle();
1736   case AMDGPU::OPERAND_REG_IMM_INT64:
1737   case AMDGPU::OPERAND_REG_IMM_FP64:
1738   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1739   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1740   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1741     return &APFloat::IEEEdouble();
1742   case AMDGPU::OPERAND_REG_IMM_INT16:
1743   case AMDGPU::OPERAND_REG_IMM_FP16:
1744   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1745   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1746   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1747   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1748   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1749   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1750   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1751   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1752   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1753   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1754   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1755   case AMDGPU::OPERAND_KIMM16:
1756     return &APFloat::IEEEhalf();
1757   default:
1758     llvm_unreachable("unsupported fp type");
1759   }
1760 }
1761 
1762 //===----------------------------------------------------------------------===//
1763 // Operand
1764 //===----------------------------------------------------------------------===//
1765 
1766 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1767   bool Lost;
1768 
1769   // Convert literal to single precision
1770   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1771                                                APFloat::rmNearestTiesToEven,
1772                                                &Lost);
1773   // We allow precision lost but not overflow or underflow
1774   if (Status != APFloat::opOK &&
1775       Lost &&
1776       ((Status & APFloat::opOverflow)  != 0 ||
1777        (Status & APFloat::opUnderflow) != 0)) {
1778     return false;
1779   }
1780 
1781   return true;
1782 }
1783 
1784 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1785   return isUIntN(Size, Val) || isIntN(Size, Val);
1786 }
1787 
1788 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1789   if (VT.getScalarType() == MVT::i16) {
1790     // FP immediate values are broken.
1791     return isInlinableIntLiteral(Val);
1792   }
1793 
1794   // f16/v2f16 operands work correctly for all values.
1795   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1796 }
1797 
1798 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1799 
1800   // This is a hack to enable named inline values like
1801   // shared_base with both 32-bit and 64-bit operands.
1802   // Note that these values are defined as
1803   // 32-bit operands only.
1804   if (isInlineValue()) {
1805     return true;
1806   }
1807 
1808   if (!isImmTy(ImmTyNone)) {
1809     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1810     return false;
1811   }
1812   // TODO: We should avoid using host float here. It would be better to
1813   // check the float bit values which is what a few other places do.
1814   // We've had bot failures before due to weird NaN support on mips hosts.
1815 
1816   APInt Literal(64, Imm.Val);
1817 
1818   if (Imm.IsFPImm) { // We got fp literal token
1819     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1820       return AMDGPU::isInlinableLiteral64(Imm.Val,
1821                                           AsmParser->hasInv2PiInlineImm());
1822     }
1823 
1824     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1825     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1826       return false;
1827 
1828     if (type.getScalarSizeInBits() == 16) {
1829       return isInlineableLiteralOp16(
1830         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1831         type, AsmParser->hasInv2PiInlineImm());
1832     }
1833 
1834     // Check if single precision literal is inlinable
1835     return AMDGPU::isInlinableLiteral32(
1836       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1837       AsmParser->hasInv2PiInlineImm());
1838   }
1839 
1840   // We got int literal token.
1841   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1842     return AMDGPU::isInlinableLiteral64(Imm.Val,
1843                                         AsmParser->hasInv2PiInlineImm());
1844   }
1845 
1846   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1847     return false;
1848   }
1849 
1850   if (type.getScalarSizeInBits() == 16) {
1851     return isInlineableLiteralOp16(
1852       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1853       type, AsmParser->hasInv2PiInlineImm());
1854   }
1855 
1856   return AMDGPU::isInlinableLiteral32(
1857     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1858     AsmParser->hasInv2PiInlineImm());
1859 }
1860 
1861 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1862   // Check that this immediate can be added as literal
1863   if (!isImmTy(ImmTyNone)) {
1864     return false;
1865   }
1866 
1867   if (!Imm.IsFPImm) {
1868     // We got int literal token.
1869 
1870     if (type == MVT::f64 && hasFPModifiers()) {
1871       // Cannot apply fp modifiers to int literals preserving the same semantics
1872       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1873       // disable these cases.
1874       return false;
1875     }
1876 
1877     unsigned Size = type.getSizeInBits();
1878     if (Size == 64)
1879       Size = 32;
1880 
1881     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1882     // types.
1883     return isSafeTruncation(Imm.Val, Size);
1884   }
1885 
1886   // We got fp literal token
1887   if (type == MVT::f64) { // Expected 64-bit fp operand
1888     // We would set low 64-bits of literal to zeroes but we accept this literals
1889     return true;
1890   }
1891 
1892   if (type == MVT::i64) { // Expected 64-bit int operand
1893     // We don't allow fp literals in 64-bit integer instructions. It is
1894     // unclear how we should encode them.
1895     return false;
1896   }
1897 
1898   // We allow fp literals with f16x2 operands assuming that the specified
1899   // literal goes into the lower half and the upper half is zero. We also
1900   // require that the literal may be losslesly converted to f16.
1901   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1902                      (type == MVT::v2i16)? MVT::i16 :
1903                      (type == MVT::v2f32)? MVT::f32 : type;
1904 
1905   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1906   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1907 }
1908 
1909 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1910   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1911 }
1912 
1913 bool AMDGPUOperand::isVRegWithInputMods() const {
1914   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1915          // GFX90A allows DPP on 64-bit operands.
1916          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1917           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1918 }
1919 
1920 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1921   if (AsmParser->isVI())
1922     return isVReg32();
1923   else if (AsmParser->isGFX9Plus())
1924     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1925   else
1926     return false;
1927 }
1928 
1929 bool AMDGPUOperand::isSDWAFP16Operand() const {
1930   return isSDWAOperand(MVT::f16);
1931 }
1932 
1933 bool AMDGPUOperand::isSDWAFP32Operand() const {
1934   return isSDWAOperand(MVT::f32);
1935 }
1936 
1937 bool AMDGPUOperand::isSDWAInt16Operand() const {
1938   return isSDWAOperand(MVT::i16);
1939 }
1940 
1941 bool AMDGPUOperand::isSDWAInt32Operand() const {
1942   return isSDWAOperand(MVT::i32);
1943 }
1944 
1945 bool AMDGPUOperand::isBoolReg() const {
1946   auto FB = AsmParser->getFeatureBits();
1947   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1948                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1949 }
1950 
1951 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1952 {
1953   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1954   assert(Size == 2 || Size == 4 || Size == 8);
1955 
1956   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1957 
1958   if (Imm.Mods.Abs) {
1959     Val &= ~FpSignMask;
1960   }
1961   if (Imm.Mods.Neg) {
1962     Val ^= FpSignMask;
1963   }
1964 
1965   return Val;
1966 }
1967 
1968 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1969   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1970                              Inst.getNumOperands())) {
1971     addLiteralImmOperand(Inst, Imm.Val,
1972                          ApplyModifiers &
1973                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1974   } else {
1975     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1976     Inst.addOperand(MCOperand::createImm(Imm.Val));
1977     setImmKindNone();
1978   }
1979 }
1980 
1981 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1982   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1983   auto OpNum = Inst.getNumOperands();
1984   // Check that this operand accepts literals
1985   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1986 
1987   if (ApplyModifiers) {
1988     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1989     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1990     Val = applyInputFPModifiers(Val, Size);
1991   }
1992 
1993   APInt Literal(64, Val);
1994   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1995 
1996   if (Imm.IsFPImm) { // We got fp literal token
1997     switch (OpTy) {
1998     case AMDGPU::OPERAND_REG_IMM_INT64:
1999     case AMDGPU::OPERAND_REG_IMM_FP64:
2000     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2001     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2002     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2003       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2004                                        AsmParser->hasInv2PiInlineImm())) {
2005         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2006         setImmKindConst();
2007         return;
2008       }
2009 
2010       // Non-inlineable
2011       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2012         // For fp operands we check if low 32 bits are zeros
2013         if (Literal.getLoBits(32) != 0) {
2014           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2015           "Can't encode literal as exact 64-bit floating-point operand. "
2016           "Low 32-bits will be set to zero");
2017         }
2018 
2019         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2020         setImmKindLiteral();
2021         return;
2022       }
2023 
2024       // We don't allow fp literals in 64-bit integer instructions. It is
2025       // unclear how we should encode them. This case should be checked earlier
2026       // in predicate methods (isLiteralImm())
2027       llvm_unreachable("fp literal in 64-bit integer instruction.");
2028 
2029     case AMDGPU::OPERAND_REG_IMM_INT32:
2030     case AMDGPU::OPERAND_REG_IMM_FP32:
2031     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2032     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2033     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2034     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2035     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2036     case AMDGPU::OPERAND_REG_IMM_INT16:
2037     case AMDGPU::OPERAND_REG_IMM_FP16:
2038     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2039     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2040     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2041     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2042     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2043     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2044     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2045     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2046     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2047     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2048     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2049     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2050     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2051     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2052     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2053     case AMDGPU::OPERAND_KIMM32:
2054     case AMDGPU::OPERAND_KIMM16: {
2055       bool lost;
2056       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2057       // Convert literal to single precision
2058       FPLiteral.convert(*getOpFltSemantics(OpTy),
2059                         APFloat::rmNearestTiesToEven, &lost);
2060       // We allow precision lost but not overflow or underflow. This should be
2061       // checked earlier in isLiteralImm()
2062 
2063       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2064       Inst.addOperand(MCOperand::createImm(ImmVal));
2065       setImmKindLiteral();
2066       return;
2067     }
2068     default:
2069       llvm_unreachable("invalid operand size");
2070     }
2071 
2072     return;
2073   }
2074 
2075   // We got int literal token.
2076   // Only sign extend inline immediates.
2077   switch (OpTy) {
2078   case AMDGPU::OPERAND_REG_IMM_INT32:
2079   case AMDGPU::OPERAND_REG_IMM_FP32:
2080   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2081   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2082   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2083   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2084   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2085   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2086   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2087   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2088   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2089   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2090   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2091     if (isSafeTruncation(Val, 32) &&
2092         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2093                                      AsmParser->hasInv2PiInlineImm())) {
2094       Inst.addOperand(MCOperand::createImm(Val));
2095       setImmKindConst();
2096       return;
2097     }
2098 
2099     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2100     setImmKindLiteral();
2101     return;
2102 
2103   case AMDGPU::OPERAND_REG_IMM_INT64:
2104   case AMDGPU::OPERAND_REG_IMM_FP64:
2105   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2106   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2107   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2108     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2109       Inst.addOperand(MCOperand::createImm(Val));
2110       setImmKindConst();
2111       return;
2112     }
2113 
2114     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2115     setImmKindLiteral();
2116     return;
2117 
2118   case AMDGPU::OPERAND_REG_IMM_INT16:
2119   case AMDGPU::OPERAND_REG_IMM_FP16:
2120   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2121   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2122   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2123   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2124   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2125     if (isSafeTruncation(Val, 16) &&
2126         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2127                                      AsmParser->hasInv2PiInlineImm())) {
2128       Inst.addOperand(MCOperand::createImm(Val));
2129       setImmKindConst();
2130       return;
2131     }
2132 
2133     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2134     setImmKindLiteral();
2135     return;
2136 
2137   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2138   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2139   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2140   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2141     assert(isSafeTruncation(Val, 16));
2142     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2143                                         AsmParser->hasInv2PiInlineImm()));
2144 
2145     Inst.addOperand(MCOperand::createImm(Val));
2146     return;
2147   }
2148   case AMDGPU::OPERAND_KIMM32:
2149     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2150     setImmKindNone();
2151     return;
2152   case AMDGPU::OPERAND_KIMM16:
2153     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2154     setImmKindNone();
2155     return;
2156   default:
2157     llvm_unreachable("invalid operand size");
2158   }
2159 }
2160 
2161 template <unsigned Bitwidth>
2162 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2163   APInt Literal(64, Imm.Val);
2164   setImmKindNone();
2165 
2166   if (!Imm.IsFPImm) {
2167     // We got int literal token.
2168     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2169     return;
2170   }
2171 
2172   bool Lost;
2173   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2174   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2175                     APFloat::rmNearestTiesToEven, &Lost);
2176   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2177 }
2178 
2179 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2180   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2181 }
2182 
2183 static bool isInlineValue(unsigned Reg) {
2184   switch (Reg) {
2185   case AMDGPU::SRC_SHARED_BASE:
2186   case AMDGPU::SRC_SHARED_LIMIT:
2187   case AMDGPU::SRC_PRIVATE_BASE:
2188   case AMDGPU::SRC_PRIVATE_LIMIT:
2189   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2190     return true;
2191   case AMDGPU::SRC_VCCZ:
2192   case AMDGPU::SRC_EXECZ:
2193   case AMDGPU::SRC_SCC:
2194     return true;
2195   case AMDGPU::SGPR_NULL:
2196     return true;
2197   default:
2198     return false;
2199   }
2200 }
2201 
2202 bool AMDGPUOperand::isInlineValue() const {
2203   return isRegKind() && ::isInlineValue(getReg());
2204 }
2205 
2206 //===----------------------------------------------------------------------===//
2207 // AsmParser
2208 //===----------------------------------------------------------------------===//
2209 
2210 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2211   if (Is == IS_VGPR) {
2212     switch (RegWidth) {
2213       default: return -1;
2214       case 1: return AMDGPU::VGPR_32RegClassID;
2215       case 2: return AMDGPU::VReg_64RegClassID;
2216       case 3: return AMDGPU::VReg_96RegClassID;
2217       case 4: return AMDGPU::VReg_128RegClassID;
2218       case 5: return AMDGPU::VReg_160RegClassID;
2219       case 6: return AMDGPU::VReg_192RegClassID;
2220       case 7: return AMDGPU::VReg_224RegClassID;
2221       case 8: return AMDGPU::VReg_256RegClassID;
2222       case 16: return AMDGPU::VReg_512RegClassID;
2223       case 32: return AMDGPU::VReg_1024RegClassID;
2224     }
2225   } else if (Is == IS_TTMP) {
2226     switch (RegWidth) {
2227       default: return -1;
2228       case 1: return AMDGPU::TTMP_32RegClassID;
2229       case 2: return AMDGPU::TTMP_64RegClassID;
2230       case 4: return AMDGPU::TTMP_128RegClassID;
2231       case 8: return AMDGPU::TTMP_256RegClassID;
2232       case 16: return AMDGPU::TTMP_512RegClassID;
2233     }
2234   } else if (Is == IS_SGPR) {
2235     switch (RegWidth) {
2236       default: return -1;
2237       case 1: return AMDGPU::SGPR_32RegClassID;
2238       case 2: return AMDGPU::SGPR_64RegClassID;
2239       case 3: return AMDGPU::SGPR_96RegClassID;
2240       case 4: return AMDGPU::SGPR_128RegClassID;
2241       case 5: return AMDGPU::SGPR_160RegClassID;
2242       case 6: return AMDGPU::SGPR_192RegClassID;
2243       case 7: return AMDGPU::SGPR_224RegClassID;
2244       case 8: return AMDGPU::SGPR_256RegClassID;
2245       case 16: return AMDGPU::SGPR_512RegClassID;
2246     }
2247   } else if (Is == IS_AGPR) {
2248     switch (RegWidth) {
2249       default: return -1;
2250       case 1: return AMDGPU::AGPR_32RegClassID;
2251       case 2: return AMDGPU::AReg_64RegClassID;
2252       case 3: return AMDGPU::AReg_96RegClassID;
2253       case 4: return AMDGPU::AReg_128RegClassID;
2254       case 5: return AMDGPU::AReg_160RegClassID;
2255       case 6: return AMDGPU::AReg_192RegClassID;
2256       case 7: return AMDGPU::AReg_224RegClassID;
2257       case 8: return AMDGPU::AReg_256RegClassID;
2258       case 16: return AMDGPU::AReg_512RegClassID;
2259       case 32: return AMDGPU::AReg_1024RegClassID;
2260     }
2261   }
2262   return -1;
2263 }
2264 
2265 static unsigned getSpecialRegForName(StringRef RegName) {
2266   return StringSwitch<unsigned>(RegName)
2267     .Case("exec", AMDGPU::EXEC)
2268     .Case("vcc", AMDGPU::VCC)
2269     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2270     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2271     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2272     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2273     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2274     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2275     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2276     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2277     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2278     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2279     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2280     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2281     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2282     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2283     .Case("m0", AMDGPU::M0)
2284     .Case("vccz", AMDGPU::SRC_VCCZ)
2285     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2286     .Case("execz", AMDGPU::SRC_EXECZ)
2287     .Case("src_execz", AMDGPU::SRC_EXECZ)
2288     .Case("scc", AMDGPU::SRC_SCC)
2289     .Case("src_scc", AMDGPU::SRC_SCC)
2290     .Case("tba", AMDGPU::TBA)
2291     .Case("tma", AMDGPU::TMA)
2292     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2293     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2294     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2295     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2296     .Case("vcc_lo", AMDGPU::VCC_LO)
2297     .Case("vcc_hi", AMDGPU::VCC_HI)
2298     .Case("exec_lo", AMDGPU::EXEC_LO)
2299     .Case("exec_hi", AMDGPU::EXEC_HI)
2300     .Case("tma_lo", AMDGPU::TMA_LO)
2301     .Case("tma_hi", AMDGPU::TMA_HI)
2302     .Case("tba_lo", AMDGPU::TBA_LO)
2303     .Case("tba_hi", AMDGPU::TBA_HI)
2304     .Case("pc", AMDGPU::PC_REG)
2305     .Case("null", AMDGPU::SGPR_NULL)
2306     .Default(AMDGPU::NoRegister);
2307 }
2308 
2309 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2310                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2311   auto R = parseRegister();
2312   if (!R) return true;
2313   assert(R->isReg());
2314   RegNo = R->getReg();
2315   StartLoc = R->getStartLoc();
2316   EndLoc = R->getEndLoc();
2317   return false;
2318 }
2319 
2320 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2321                                     SMLoc &EndLoc) {
2322   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2323 }
2324 
2325 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2326                                                        SMLoc &StartLoc,
2327                                                        SMLoc &EndLoc) {
2328   bool Result =
2329       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2330   bool PendingErrors = getParser().hasPendingError();
2331   getParser().clearPendingErrors();
2332   if (PendingErrors)
2333     return MatchOperand_ParseFail;
2334   if (Result)
2335     return MatchOperand_NoMatch;
2336   return MatchOperand_Success;
2337 }
2338 
2339 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2340                                             RegisterKind RegKind, unsigned Reg1,
2341                                             SMLoc Loc) {
2342   switch (RegKind) {
2343   case IS_SPECIAL:
2344     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2345       Reg = AMDGPU::EXEC;
2346       RegWidth = 2;
2347       return true;
2348     }
2349     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2350       Reg = AMDGPU::FLAT_SCR;
2351       RegWidth = 2;
2352       return true;
2353     }
2354     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2355       Reg = AMDGPU::XNACK_MASK;
2356       RegWidth = 2;
2357       return true;
2358     }
2359     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2360       Reg = AMDGPU::VCC;
2361       RegWidth = 2;
2362       return true;
2363     }
2364     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2365       Reg = AMDGPU::TBA;
2366       RegWidth = 2;
2367       return true;
2368     }
2369     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2370       Reg = AMDGPU::TMA;
2371       RegWidth = 2;
2372       return true;
2373     }
2374     Error(Loc, "register does not fit in the list");
2375     return false;
2376   case IS_VGPR:
2377   case IS_SGPR:
2378   case IS_AGPR:
2379   case IS_TTMP:
2380     if (Reg1 != Reg + RegWidth) {
2381       Error(Loc, "registers in a list must have consecutive indices");
2382       return false;
2383     }
2384     RegWidth++;
2385     return true;
2386   default:
2387     llvm_unreachable("unexpected register kind");
2388   }
2389 }
2390 
2391 struct RegInfo {
2392   StringLiteral Name;
2393   RegisterKind Kind;
2394 };
2395 
2396 static constexpr RegInfo RegularRegisters[] = {
2397   {{"v"},    IS_VGPR},
2398   {{"s"},    IS_SGPR},
2399   {{"ttmp"}, IS_TTMP},
2400   {{"acc"},  IS_AGPR},
2401   {{"a"},    IS_AGPR},
2402 };
2403 
2404 static bool isRegularReg(RegisterKind Kind) {
2405   return Kind == IS_VGPR ||
2406          Kind == IS_SGPR ||
2407          Kind == IS_TTMP ||
2408          Kind == IS_AGPR;
2409 }
2410 
2411 static const RegInfo* getRegularRegInfo(StringRef Str) {
2412   for (const RegInfo &Reg : RegularRegisters)
2413     if (Str.startswith(Reg.Name))
2414       return &Reg;
2415   return nullptr;
2416 }
2417 
2418 static bool getRegNum(StringRef Str, unsigned& Num) {
2419   return !Str.getAsInteger(10, Num);
2420 }
2421 
2422 bool
2423 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2424                             const AsmToken &NextToken) const {
2425 
2426   // A list of consecutive registers: [s0,s1,s2,s3]
2427   if (Token.is(AsmToken::LBrac))
2428     return true;
2429 
2430   if (!Token.is(AsmToken::Identifier))
2431     return false;
2432 
2433   // A single register like s0 or a range of registers like s[0:1]
2434 
2435   StringRef Str = Token.getString();
2436   const RegInfo *Reg = getRegularRegInfo(Str);
2437   if (Reg) {
2438     StringRef RegName = Reg->Name;
2439     StringRef RegSuffix = Str.substr(RegName.size());
2440     if (!RegSuffix.empty()) {
2441       unsigned Num;
2442       // A single register with an index: rXX
2443       if (getRegNum(RegSuffix, Num))
2444         return true;
2445     } else {
2446       // A range of registers: r[XX:YY].
2447       if (NextToken.is(AsmToken::LBrac))
2448         return true;
2449     }
2450   }
2451 
2452   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2453 }
2454 
2455 bool
2456 AMDGPUAsmParser::isRegister()
2457 {
2458   return isRegister(getToken(), peekToken());
2459 }
2460 
2461 unsigned
2462 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2463                                unsigned RegNum,
2464                                unsigned RegWidth,
2465                                SMLoc Loc) {
2466 
2467   assert(isRegularReg(RegKind));
2468 
2469   unsigned AlignSize = 1;
2470   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2471     // SGPR and TTMP registers must be aligned.
2472     // Max required alignment is 4 dwords.
2473     AlignSize = std::min(RegWidth, 4u);
2474   }
2475 
2476   if (RegNum % AlignSize != 0) {
2477     Error(Loc, "invalid register alignment");
2478     return AMDGPU::NoRegister;
2479   }
2480 
2481   unsigned RegIdx = RegNum / AlignSize;
2482   int RCID = getRegClass(RegKind, RegWidth);
2483   if (RCID == -1) {
2484     Error(Loc, "invalid or unsupported register size");
2485     return AMDGPU::NoRegister;
2486   }
2487 
2488   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2489   const MCRegisterClass RC = TRI->getRegClass(RCID);
2490   if (RegIdx >= RC.getNumRegs()) {
2491     Error(Loc, "register index is out of range");
2492     return AMDGPU::NoRegister;
2493   }
2494 
2495   return RC.getRegister(RegIdx);
2496 }
2497 
2498 bool
2499 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2500   int64_t RegLo, RegHi;
2501   if (!skipToken(AsmToken::LBrac, "missing register index"))
2502     return false;
2503 
2504   SMLoc FirstIdxLoc = getLoc();
2505   SMLoc SecondIdxLoc;
2506 
2507   if (!parseExpr(RegLo))
2508     return false;
2509 
2510   if (trySkipToken(AsmToken::Colon)) {
2511     SecondIdxLoc = getLoc();
2512     if (!parseExpr(RegHi))
2513       return false;
2514   } else {
2515     RegHi = RegLo;
2516   }
2517 
2518   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2519     return false;
2520 
2521   if (!isUInt<32>(RegLo)) {
2522     Error(FirstIdxLoc, "invalid register index");
2523     return false;
2524   }
2525 
2526   if (!isUInt<32>(RegHi)) {
2527     Error(SecondIdxLoc, "invalid register index");
2528     return false;
2529   }
2530 
2531   if (RegLo > RegHi) {
2532     Error(FirstIdxLoc, "first register index should not exceed second index");
2533     return false;
2534   }
2535 
2536   Num = static_cast<unsigned>(RegLo);
2537   Width = (RegHi - RegLo) + 1;
2538   return true;
2539 }
2540 
2541 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2542                                           unsigned &RegNum, unsigned &RegWidth,
2543                                           SmallVectorImpl<AsmToken> &Tokens) {
2544   assert(isToken(AsmToken::Identifier));
2545   unsigned Reg = getSpecialRegForName(getTokenStr());
2546   if (Reg) {
2547     RegNum = 0;
2548     RegWidth = 1;
2549     RegKind = IS_SPECIAL;
2550     Tokens.push_back(getToken());
2551     lex(); // skip register name
2552   }
2553   return Reg;
2554 }
2555 
2556 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2557                                           unsigned &RegNum, unsigned &RegWidth,
2558                                           SmallVectorImpl<AsmToken> &Tokens) {
2559   assert(isToken(AsmToken::Identifier));
2560   StringRef RegName = getTokenStr();
2561   auto Loc = getLoc();
2562 
2563   const RegInfo *RI = getRegularRegInfo(RegName);
2564   if (!RI) {
2565     Error(Loc, "invalid register name");
2566     return AMDGPU::NoRegister;
2567   }
2568 
2569   Tokens.push_back(getToken());
2570   lex(); // skip register name
2571 
2572   RegKind = RI->Kind;
2573   StringRef RegSuffix = RegName.substr(RI->Name.size());
2574   if (!RegSuffix.empty()) {
2575     // Single 32-bit register: vXX.
2576     if (!getRegNum(RegSuffix, RegNum)) {
2577       Error(Loc, "invalid register index");
2578       return AMDGPU::NoRegister;
2579     }
2580     RegWidth = 1;
2581   } else {
2582     // Range of registers: v[XX:YY]. ":YY" is optional.
2583     if (!ParseRegRange(RegNum, RegWidth))
2584       return AMDGPU::NoRegister;
2585   }
2586 
2587   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2588 }
2589 
2590 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2591                                        unsigned &RegWidth,
2592                                        SmallVectorImpl<AsmToken> &Tokens) {
2593   unsigned Reg = AMDGPU::NoRegister;
2594   auto ListLoc = getLoc();
2595 
2596   if (!skipToken(AsmToken::LBrac,
2597                  "expected a register or a list of registers")) {
2598     return AMDGPU::NoRegister;
2599   }
2600 
2601   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2602 
2603   auto Loc = getLoc();
2604   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2605     return AMDGPU::NoRegister;
2606   if (RegWidth != 1) {
2607     Error(Loc, "expected a single 32-bit register");
2608     return AMDGPU::NoRegister;
2609   }
2610 
2611   for (; trySkipToken(AsmToken::Comma); ) {
2612     RegisterKind NextRegKind;
2613     unsigned NextReg, NextRegNum, NextRegWidth;
2614     Loc = getLoc();
2615 
2616     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2617                              NextRegNum, NextRegWidth,
2618                              Tokens)) {
2619       return AMDGPU::NoRegister;
2620     }
2621     if (NextRegWidth != 1) {
2622       Error(Loc, "expected a single 32-bit register");
2623       return AMDGPU::NoRegister;
2624     }
2625     if (NextRegKind != RegKind) {
2626       Error(Loc, "registers in a list must be of the same kind");
2627       return AMDGPU::NoRegister;
2628     }
2629     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2630       return AMDGPU::NoRegister;
2631   }
2632 
2633   if (!skipToken(AsmToken::RBrac,
2634                  "expected a comma or a closing square bracket")) {
2635     return AMDGPU::NoRegister;
2636   }
2637 
2638   if (isRegularReg(RegKind))
2639     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2640 
2641   return Reg;
2642 }
2643 
2644 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2645                                           unsigned &RegNum, unsigned &RegWidth,
2646                                           SmallVectorImpl<AsmToken> &Tokens) {
2647   auto Loc = getLoc();
2648   Reg = AMDGPU::NoRegister;
2649 
2650   if (isToken(AsmToken::Identifier)) {
2651     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2652     if (Reg == AMDGPU::NoRegister)
2653       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2654   } else {
2655     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2656   }
2657 
2658   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2659   if (Reg == AMDGPU::NoRegister) {
2660     assert(Parser.hasPendingError());
2661     return false;
2662   }
2663 
2664   if (!subtargetHasRegister(*TRI, Reg)) {
2665     if (Reg == AMDGPU::SGPR_NULL) {
2666       Error(Loc, "'null' operand is not supported on this GPU");
2667     } else {
2668       Error(Loc, "register not available on this GPU");
2669     }
2670     return false;
2671   }
2672 
2673   return true;
2674 }
2675 
2676 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2677                                           unsigned &RegNum, unsigned &RegWidth,
2678                                           bool RestoreOnFailure /*=false*/) {
2679   Reg = AMDGPU::NoRegister;
2680 
2681   SmallVector<AsmToken, 1> Tokens;
2682   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2683     if (RestoreOnFailure) {
2684       while (!Tokens.empty()) {
2685         getLexer().UnLex(Tokens.pop_back_val());
2686       }
2687     }
2688     return true;
2689   }
2690   return false;
2691 }
2692 
2693 Optional<StringRef>
2694 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2695   switch (RegKind) {
2696   case IS_VGPR:
2697     return StringRef(".amdgcn.next_free_vgpr");
2698   case IS_SGPR:
2699     return StringRef(".amdgcn.next_free_sgpr");
2700   default:
2701     return None;
2702   }
2703 }
2704 
2705 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2706   auto SymbolName = getGprCountSymbolName(RegKind);
2707   assert(SymbolName && "initializing invalid register kind");
2708   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2709   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2710 }
2711 
2712 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2713                                             unsigned DwordRegIndex,
2714                                             unsigned RegWidth) {
2715   // Symbols are only defined for GCN targets
2716   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2717     return true;
2718 
2719   auto SymbolName = getGprCountSymbolName(RegKind);
2720   if (!SymbolName)
2721     return true;
2722   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2723 
2724   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2725   int64_t OldCount;
2726 
2727   if (!Sym->isVariable())
2728     return !Error(getLoc(),
2729                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2730   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2731     return !Error(
2732         getLoc(),
2733         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2734 
2735   if (OldCount <= NewMax)
2736     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2737 
2738   return true;
2739 }
2740 
2741 std::unique_ptr<AMDGPUOperand>
2742 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2743   const auto &Tok = getToken();
2744   SMLoc StartLoc = Tok.getLoc();
2745   SMLoc EndLoc = Tok.getEndLoc();
2746   RegisterKind RegKind;
2747   unsigned Reg, RegNum, RegWidth;
2748 
2749   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2750     return nullptr;
2751   }
2752   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2753     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2754       return nullptr;
2755   } else
2756     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2757   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2758 }
2759 
2760 OperandMatchResultTy
2761 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2762   // TODO: add syntactic sugar for 1/(2*PI)
2763 
2764   assert(!isRegister());
2765   assert(!isModifier());
2766 
2767   const auto& Tok = getToken();
2768   const auto& NextTok = peekToken();
2769   bool IsReal = Tok.is(AsmToken::Real);
2770   SMLoc S = getLoc();
2771   bool Negate = false;
2772 
2773   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2774     lex();
2775     IsReal = true;
2776     Negate = true;
2777   }
2778 
2779   if (IsReal) {
2780     // Floating-point expressions are not supported.
2781     // Can only allow floating-point literals with an
2782     // optional sign.
2783 
2784     StringRef Num = getTokenStr();
2785     lex();
2786 
2787     APFloat RealVal(APFloat::IEEEdouble());
2788     auto roundMode = APFloat::rmNearestTiesToEven;
2789     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2790       return MatchOperand_ParseFail;
2791     }
2792     if (Negate)
2793       RealVal.changeSign();
2794 
2795     Operands.push_back(
2796       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2797                                AMDGPUOperand::ImmTyNone, true));
2798 
2799     return MatchOperand_Success;
2800 
2801   } else {
2802     int64_t IntVal;
2803     const MCExpr *Expr;
2804     SMLoc S = getLoc();
2805 
2806     if (HasSP3AbsModifier) {
2807       // This is a workaround for handling expressions
2808       // as arguments of SP3 'abs' modifier, for example:
2809       //     |1.0|
2810       //     |-1|
2811       //     |1+x|
2812       // This syntax is not compatible with syntax of standard
2813       // MC expressions (due to the trailing '|').
2814       SMLoc EndLoc;
2815       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2816         return MatchOperand_ParseFail;
2817     } else {
2818       if (Parser.parseExpression(Expr))
2819         return MatchOperand_ParseFail;
2820     }
2821 
2822     if (Expr->evaluateAsAbsolute(IntVal)) {
2823       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2824     } else {
2825       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2826     }
2827 
2828     return MatchOperand_Success;
2829   }
2830 
2831   return MatchOperand_NoMatch;
2832 }
2833 
2834 OperandMatchResultTy
2835 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2836   if (!isRegister())
2837     return MatchOperand_NoMatch;
2838 
2839   if (auto R = parseRegister()) {
2840     assert(R->isReg());
2841     Operands.push_back(std::move(R));
2842     return MatchOperand_Success;
2843   }
2844   return MatchOperand_ParseFail;
2845 }
2846 
2847 OperandMatchResultTy
2848 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2849   auto res = parseReg(Operands);
2850   if (res != MatchOperand_NoMatch) {
2851     return res;
2852   } else if (isModifier()) {
2853     return MatchOperand_NoMatch;
2854   } else {
2855     return parseImm(Operands, HasSP3AbsMod);
2856   }
2857 }
2858 
2859 bool
2860 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2861   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2862     const auto &str = Token.getString();
2863     return str == "abs" || str == "neg" || str == "sext";
2864   }
2865   return false;
2866 }
2867 
2868 bool
2869 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2870   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2871 }
2872 
2873 bool
2874 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2875   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2876 }
2877 
2878 bool
2879 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2880   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2881 }
2882 
2883 // Check if this is an operand modifier or an opcode modifier
2884 // which may look like an expression but it is not. We should
2885 // avoid parsing these modifiers as expressions. Currently
2886 // recognized sequences are:
2887 //   |...|
2888 //   abs(...)
2889 //   neg(...)
2890 //   sext(...)
2891 //   -reg
2892 //   -|...|
2893 //   -abs(...)
2894 //   name:...
2895 // Note that simple opcode modifiers like 'gds' may be parsed as
2896 // expressions; this is a special case. See getExpressionAsToken.
2897 //
2898 bool
2899 AMDGPUAsmParser::isModifier() {
2900 
2901   AsmToken Tok = getToken();
2902   AsmToken NextToken[2];
2903   peekTokens(NextToken);
2904 
2905   return isOperandModifier(Tok, NextToken[0]) ||
2906          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2907          isOpcodeModifierWithVal(Tok, NextToken[0]);
2908 }
2909 
2910 // Check if the current token is an SP3 'neg' modifier.
2911 // Currently this modifier is allowed in the following context:
2912 //
2913 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2914 // 2. Before an 'abs' modifier: -abs(...)
2915 // 3. Before an SP3 'abs' modifier: -|...|
2916 //
2917 // In all other cases "-" is handled as a part
2918 // of an expression that follows the sign.
2919 //
2920 // Note: When "-" is followed by an integer literal,
2921 // this is interpreted as integer negation rather
2922 // than a floating-point NEG modifier applied to N.
2923 // Beside being contr-intuitive, such use of floating-point
2924 // NEG modifier would have resulted in different meaning
2925 // of integer literals used with VOP1/2/C and VOP3,
2926 // for example:
2927 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2928 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2929 // Negative fp literals with preceding "-" are
2930 // handled likewise for unifomtity
2931 //
2932 bool
2933 AMDGPUAsmParser::parseSP3NegModifier() {
2934 
2935   AsmToken NextToken[2];
2936   peekTokens(NextToken);
2937 
2938   if (isToken(AsmToken::Minus) &&
2939       (isRegister(NextToken[0], NextToken[1]) ||
2940        NextToken[0].is(AsmToken::Pipe) ||
2941        isId(NextToken[0], "abs"))) {
2942     lex();
2943     return true;
2944   }
2945 
2946   return false;
2947 }
2948 
2949 OperandMatchResultTy
2950 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2951                                               bool AllowImm) {
2952   bool Neg, SP3Neg;
2953   bool Abs, SP3Abs;
2954   SMLoc Loc;
2955 
2956   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2957   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2958     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2959     return MatchOperand_ParseFail;
2960   }
2961 
2962   SP3Neg = parseSP3NegModifier();
2963 
2964   Loc = getLoc();
2965   Neg = trySkipId("neg");
2966   if (Neg && SP3Neg) {
2967     Error(Loc, "expected register or immediate");
2968     return MatchOperand_ParseFail;
2969   }
2970   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2971     return MatchOperand_ParseFail;
2972 
2973   Abs = trySkipId("abs");
2974   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2975     return MatchOperand_ParseFail;
2976 
2977   Loc = getLoc();
2978   SP3Abs = trySkipToken(AsmToken::Pipe);
2979   if (Abs && SP3Abs) {
2980     Error(Loc, "expected register or immediate");
2981     return MatchOperand_ParseFail;
2982   }
2983 
2984   OperandMatchResultTy Res;
2985   if (AllowImm) {
2986     Res = parseRegOrImm(Operands, SP3Abs);
2987   } else {
2988     Res = parseReg(Operands);
2989   }
2990   if (Res != MatchOperand_Success) {
2991     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2992   }
2993 
2994   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2995     return MatchOperand_ParseFail;
2996   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2997     return MatchOperand_ParseFail;
2998   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2999     return MatchOperand_ParseFail;
3000 
3001   AMDGPUOperand::Modifiers Mods;
3002   Mods.Abs = Abs || SP3Abs;
3003   Mods.Neg = Neg || SP3Neg;
3004 
3005   if (Mods.hasFPModifiers()) {
3006     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3007     if (Op.isExpr()) {
3008       Error(Op.getStartLoc(), "expected an absolute expression");
3009       return MatchOperand_ParseFail;
3010     }
3011     Op.setModifiers(Mods);
3012   }
3013   return MatchOperand_Success;
3014 }
3015 
3016 OperandMatchResultTy
3017 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3018                                                bool AllowImm) {
3019   bool Sext = trySkipId("sext");
3020   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3021     return MatchOperand_ParseFail;
3022 
3023   OperandMatchResultTy Res;
3024   if (AllowImm) {
3025     Res = parseRegOrImm(Operands);
3026   } else {
3027     Res = parseReg(Operands);
3028   }
3029   if (Res != MatchOperand_Success) {
3030     return Sext? MatchOperand_ParseFail : Res;
3031   }
3032 
3033   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3034     return MatchOperand_ParseFail;
3035 
3036   AMDGPUOperand::Modifiers Mods;
3037   Mods.Sext = Sext;
3038 
3039   if (Mods.hasIntModifiers()) {
3040     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3041     if (Op.isExpr()) {
3042       Error(Op.getStartLoc(), "expected an absolute expression");
3043       return MatchOperand_ParseFail;
3044     }
3045     Op.setModifiers(Mods);
3046   }
3047 
3048   return MatchOperand_Success;
3049 }
3050 
3051 OperandMatchResultTy
3052 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3053   return parseRegOrImmWithFPInputMods(Operands, false);
3054 }
3055 
3056 OperandMatchResultTy
3057 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3058   return parseRegOrImmWithIntInputMods(Operands, false);
3059 }
3060 
3061 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3062   auto Loc = getLoc();
3063   if (trySkipId("off")) {
3064     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3065                                                 AMDGPUOperand::ImmTyOff, false));
3066     return MatchOperand_Success;
3067   }
3068 
3069   if (!isRegister())
3070     return MatchOperand_NoMatch;
3071 
3072   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3073   if (Reg) {
3074     Operands.push_back(std::move(Reg));
3075     return MatchOperand_Success;
3076   }
3077 
3078   return MatchOperand_ParseFail;
3079 
3080 }
3081 
3082 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3083   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3084 
3085   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3086       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3087       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3088       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3089     return Match_InvalidOperand;
3090 
3091   if ((TSFlags & SIInstrFlags::VOP3) &&
3092       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3093       getForcedEncodingSize() != 64)
3094     return Match_PreferE32;
3095 
3096   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3097       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3098     // v_mac_f32/16 allow only dst_sel == DWORD;
3099     auto OpNum =
3100         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3101     const auto &Op = Inst.getOperand(OpNum);
3102     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3103       return Match_InvalidOperand;
3104     }
3105   }
3106 
3107   return Match_Success;
3108 }
3109 
3110 static ArrayRef<unsigned> getAllVariants() {
3111   static const unsigned Variants[] = {
3112     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3113     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3114   };
3115 
3116   return makeArrayRef(Variants);
3117 }
3118 
3119 // What asm variants we should check
3120 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3121   if (getForcedEncodingSize() == 32) {
3122     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3123     return makeArrayRef(Variants);
3124   }
3125 
3126   if (isForcedVOP3()) {
3127     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3128     return makeArrayRef(Variants);
3129   }
3130 
3131   if (isForcedSDWA()) {
3132     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3133                                         AMDGPUAsmVariants::SDWA9};
3134     return makeArrayRef(Variants);
3135   }
3136 
3137   if (isForcedDPP()) {
3138     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3139     return makeArrayRef(Variants);
3140   }
3141 
3142   return getAllVariants();
3143 }
3144 
3145 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3146   if (getForcedEncodingSize() == 32)
3147     return "e32";
3148 
3149   if (isForcedVOP3())
3150     return "e64";
3151 
3152   if (isForcedSDWA())
3153     return "sdwa";
3154 
3155   if (isForcedDPP())
3156     return "dpp";
3157 
3158   return "";
3159 }
3160 
3161 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3162   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3163   const unsigned Num = Desc.getNumImplicitUses();
3164   for (unsigned i = 0; i < Num; ++i) {
3165     unsigned Reg = Desc.ImplicitUses[i];
3166     switch (Reg) {
3167     case AMDGPU::FLAT_SCR:
3168     case AMDGPU::VCC:
3169     case AMDGPU::VCC_LO:
3170     case AMDGPU::VCC_HI:
3171     case AMDGPU::M0:
3172       return Reg;
3173     default:
3174       break;
3175     }
3176   }
3177   return AMDGPU::NoRegister;
3178 }
3179 
3180 // NB: This code is correct only when used to check constant
3181 // bus limitations because GFX7 support no f16 inline constants.
3182 // Note that there are no cases when a GFX7 opcode violates
3183 // constant bus limitations due to the use of an f16 constant.
3184 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3185                                        unsigned OpIdx) const {
3186   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3187 
3188   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3189     return false;
3190   }
3191 
3192   const MCOperand &MO = Inst.getOperand(OpIdx);
3193 
3194   int64_t Val = MO.getImm();
3195   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3196 
3197   switch (OpSize) { // expected operand size
3198   case 8:
3199     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3200   case 4:
3201     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3202   case 2: {
3203     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3204     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3205         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3206         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3207       return AMDGPU::isInlinableIntLiteral(Val);
3208 
3209     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3210         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3211         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3212       return AMDGPU::isInlinableIntLiteralV216(Val);
3213 
3214     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3215         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3216         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3217       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3218 
3219     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3220   }
3221   default:
3222     llvm_unreachable("invalid operand size");
3223   }
3224 }
3225 
3226 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3227   if (!isGFX10Plus())
3228     return 1;
3229 
3230   switch (Opcode) {
3231   // 64-bit shift instructions can use only one scalar value input
3232   case AMDGPU::V_LSHLREV_B64_e64:
3233   case AMDGPU::V_LSHLREV_B64_gfx10:
3234   case AMDGPU::V_LSHRREV_B64_e64:
3235   case AMDGPU::V_LSHRREV_B64_gfx10:
3236   case AMDGPU::V_ASHRREV_I64_e64:
3237   case AMDGPU::V_ASHRREV_I64_gfx10:
3238   case AMDGPU::V_LSHL_B64_e64:
3239   case AMDGPU::V_LSHR_B64_e64:
3240   case AMDGPU::V_ASHR_I64_e64:
3241     return 1;
3242   default:
3243     return 2;
3244   }
3245 }
3246 
3247 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3248   const MCOperand &MO = Inst.getOperand(OpIdx);
3249   if (MO.isImm()) {
3250     return !isInlineConstant(Inst, OpIdx);
3251   } else if (MO.isReg()) {
3252     auto Reg = MO.getReg();
3253     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3254     auto PReg = mc2PseudoReg(Reg);
3255     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3256   } else {
3257     return true;
3258   }
3259 }
3260 
3261 bool
3262 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3263                                                 const OperandVector &Operands) {
3264   const unsigned Opcode = Inst.getOpcode();
3265   const MCInstrDesc &Desc = MII.get(Opcode);
3266   unsigned LastSGPR = AMDGPU::NoRegister;
3267   unsigned ConstantBusUseCount = 0;
3268   unsigned NumLiterals = 0;
3269   unsigned LiteralSize;
3270 
3271   if (Desc.TSFlags &
3272       (SIInstrFlags::VOPC |
3273        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3274        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3275        SIInstrFlags::SDWA)) {
3276     // Check special imm operands (used by madmk, etc)
3277     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3278       ++NumLiterals;
3279       LiteralSize = 4;
3280     }
3281 
3282     SmallDenseSet<unsigned> SGPRsUsed;
3283     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3284     if (SGPRUsed != AMDGPU::NoRegister) {
3285       SGPRsUsed.insert(SGPRUsed);
3286       ++ConstantBusUseCount;
3287     }
3288 
3289     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3290     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3291     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3292 
3293     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3294 
3295     for (int OpIdx : OpIndices) {
3296       if (OpIdx == -1) break;
3297 
3298       const MCOperand &MO = Inst.getOperand(OpIdx);
3299       if (usesConstantBus(Inst, OpIdx)) {
3300         if (MO.isReg()) {
3301           LastSGPR = mc2PseudoReg(MO.getReg());
3302           // Pairs of registers with a partial intersections like these
3303           //   s0, s[0:1]
3304           //   flat_scratch_lo, flat_scratch
3305           //   flat_scratch_lo, flat_scratch_hi
3306           // are theoretically valid but they are disabled anyway.
3307           // Note that this code mimics SIInstrInfo::verifyInstruction
3308           if (!SGPRsUsed.count(LastSGPR)) {
3309             SGPRsUsed.insert(LastSGPR);
3310             ++ConstantBusUseCount;
3311           }
3312         } else { // Expression or a literal
3313 
3314           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3315             continue; // special operand like VINTERP attr_chan
3316 
3317           // An instruction may use only one literal.
3318           // This has been validated on the previous step.
3319           // See validateVOPLiteral.
3320           // This literal may be used as more than one operand.
3321           // If all these operands are of the same size,
3322           // this literal counts as one scalar value.
3323           // Otherwise it counts as 2 scalar values.
3324           // See "GFX10 Shader Programming", section 3.6.2.3.
3325 
3326           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3327           if (Size < 4) Size = 4;
3328 
3329           if (NumLiterals == 0) {
3330             NumLiterals = 1;
3331             LiteralSize = Size;
3332           } else if (LiteralSize != Size) {
3333             NumLiterals = 2;
3334           }
3335         }
3336       }
3337     }
3338   }
3339   ConstantBusUseCount += NumLiterals;
3340 
3341   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3342     return true;
3343 
3344   SMLoc LitLoc = getLitLoc(Operands);
3345   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3346   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3347   Error(Loc, "invalid operand (violates constant bus restrictions)");
3348   return false;
3349 }
3350 
3351 bool
3352 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3353                                                  const OperandVector &Operands) {
3354   const unsigned Opcode = Inst.getOpcode();
3355   const MCInstrDesc &Desc = MII.get(Opcode);
3356 
3357   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3358   if (DstIdx == -1 ||
3359       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3360     return true;
3361   }
3362 
3363   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3364 
3365   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3366   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3367   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3368 
3369   assert(DstIdx != -1);
3370   const MCOperand &Dst = Inst.getOperand(DstIdx);
3371   assert(Dst.isReg());
3372   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3373 
3374   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3375 
3376   for (int SrcIdx : SrcIndices) {
3377     if (SrcIdx == -1) break;
3378     const MCOperand &Src = Inst.getOperand(SrcIdx);
3379     if (Src.isReg()) {
3380       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3381       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3382         Error(getRegLoc(SrcReg, Operands),
3383           "destination must be different than all sources");
3384         return false;
3385       }
3386     }
3387   }
3388 
3389   return true;
3390 }
3391 
3392 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3393 
3394   const unsigned Opc = Inst.getOpcode();
3395   const MCInstrDesc &Desc = MII.get(Opc);
3396 
3397   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3398     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3399     assert(ClampIdx != -1);
3400     return Inst.getOperand(ClampIdx).getImm() == 0;
3401   }
3402 
3403   return true;
3404 }
3405 
3406 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3407 
3408   const unsigned Opc = Inst.getOpcode();
3409   const MCInstrDesc &Desc = MII.get(Opc);
3410 
3411   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3412     return true;
3413 
3414   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3415   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3416   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3417 
3418   assert(VDataIdx != -1);
3419 
3420   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3421     return true;
3422 
3423   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3424   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3425   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3426   if (DMask == 0)
3427     DMask = 1;
3428 
3429   unsigned DataSize =
3430     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3431   if (hasPackedD16()) {
3432     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3433     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3434       DataSize = (DataSize + 1) / 2;
3435   }
3436 
3437   return (VDataSize / 4) == DataSize + TFESize;
3438 }
3439 
3440 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3441   const unsigned Opc = Inst.getOpcode();
3442   const MCInstrDesc &Desc = MII.get(Opc);
3443 
3444   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3445     return true;
3446 
3447   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3448 
3449   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3450       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3451   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3452   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3453   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3454   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3455 
3456   assert(VAddr0Idx != -1);
3457   assert(SrsrcIdx != -1);
3458   assert(SrsrcIdx > VAddr0Idx);
3459 
3460   if (DimIdx == -1)
3461     return true; // intersect_ray
3462 
3463   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3464   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3465   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3466   unsigned ActualAddrSize =
3467       IsNSA ? SrsrcIdx - VAddr0Idx
3468             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3469   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3470 
3471   unsigned ExpectedAddrSize =
3472       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3473 
3474   if (!IsNSA) {
3475     if (ExpectedAddrSize > 8)
3476       ExpectedAddrSize = 16;
3477 
3478     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3479     // This provides backward compatibility for assembly created
3480     // before 160b/192b/224b types were directly supported.
3481     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3482       return true;
3483   }
3484 
3485   return ActualAddrSize == ExpectedAddrSize;
3486 }
3487 
3488 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3489 
3490   const unsigned Opc = Inst.getOpcode();
3491   const MCInstrDesc &Desc = MII.get(Opc);
3492 
3493   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3494     return true;
3495   if (!Desc.mayLoad() || !Desc.mayStore())
3496     return true; // Not atomic
3497 
3498   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3499   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3500 
3501   // This is an incomplete check because image_atomic_cmpswap
3502   // may only use 0x3 and 0xf while other atomic operations
3503   // may use 0x1 and 0x3. However these limitations are
3504   // verified when we check that dmask matches dst size.
3505   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3506 }
3507 
3508 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3509 
3510   const unsigned Opc = Inst.getOpcode();
3511   const MCInstrDesc &Desc = MII.get(Opc);
3512 
3513   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3514     return true;
3515 
3516   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3517   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3518 
3519   // GATHER4 instructions use dmask in a different fashion compared to
3520   // other MIMG instructions. The only useful DMASK values are
3521   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3522   // (red,red,red,red) etc.) The ISA document doesn't mention
3523   // this.
3524   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3525 }
3526 
3527 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3528   const unsigned Opc = Inst.getOpcode();
3529   const MCInstrDesc &Desc = MII.get(Opc);
3530 
3531   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3532     return true;
3533 
3534   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3535   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3536       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3537 
3538   if (!BaseOpcode->MSAA)
3539     return true;
3540 
3541   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3542   assert(DimIdx != -1);
3543 
3544   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3545   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3546 
3547   return DimInfo->MSAA;
3548 }
3549 
3550 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3551 {
3552   switch (Opcode) {
3553   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3554   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3555   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3556     return true;
3557   default:
3558     return false;
3559   }
3560 }
3561 
3562 // movrels* opcodes should only allow VGPRS as src0.
3563 // This is specified in .td description for vop1/vop3,
3564 // but sdwa is handled differently. See isSDWAOperand.
3565 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3566                                       const OperandVector &Operands) {
3567 
3568   const unsigned Opc = Inst.getOpcode();
3569   const MCInstrDesc &Desc = MII.get(Opc);
3570 
3571   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3572     return true;
3573 
3574   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3575   assert(Src0Idx != -1);
3576 
3577   SMLoc ErrLoc;
3578   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3579   if (Src0.isReg()) {
3580     auto Reg = mc2PseudoReg(Src0.getReg());
3581     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3582     if (!isSGPR(Reg, TRI))
3583       return true;
3584     ErrLoc = getRegLoc(Reg, Operands);
3585   } else {
3586     ErrLoc = getConstLoc(Operands);
3587   }
3588 
3589   Error(ErrLoc, "source operand must be a VGPR");
3590   return false;
3591 }
3592 
3593 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3594                                           const OperandVector &Operands) {
3595 
3596   const unsigned Opc = Inst.getOpcode();
3597 
3598   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3599     return true;
3600 
3601   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3602   assert(Src0Idx != -1);
3603 
3604   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3605   if (!Src0.isReg())
3606     return true;
3607 
3608   auto Reg = mc2PseudoReg(Src0.getReg());
3609   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3610   if (isSGPR(Reg, TRI)) {
3611     Error(getRegLoc(Reg, Operands),
3612           "source operand must be either a VGPR or an inline constant");
3613     return false;
3614   }
3615 
3616   return true;
3617 }
3618 
3619 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3620                                    const OperandVector &Operands) {
3621   const unsigned Opc = Inst.getOpcode();
3622   const MCInstrDesc &Desc = MII.get(Opc);
3623 
3624   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3625     return true;
3626 
3627   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3628   if (Src2Idx == -1)
3629     return true;
3630 
3631   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3632   if (!Src2.isReg())
3633     return true;
3634 
3635   MCRegister Src2Reg = Src2.getReg();
3636   MCRegister DstReg = Inst.getOperand(0).getReg();
3637   if (Src2Reg == DstReg)
3638     return true;
3639 
3640   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3641   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3642     return true;
3643 
3644   if (isRegIntersect(Src2Reg, DstReg, TRI)) {
3645     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3646           "source 2 operand must not partially overlap with dst");
3647     return false;
3648   }
3649 
3650   return true;
3651 }
3652 
3653 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3654   switch (Inst.getOpcode()) {
3655   default:
3656     return true;
3657   case V_DIV_SCALE_F32_gfx6_gfx7:
3658   case V_DIV_SCALE_F32_vi:
3659   case V_DIV_SCALE_F32_gfx10:
3660   case V_DIV_SCALE_F64_gfx6_gfx7:
3661   case V_DIV_SCALE_F64_vi:
3662   case V_DIV_SCALE_F64_gfx10:
3663     break;
3664   }
3665 
3666   // TODO: Check that src0 = src1 or src2.
3667 
3668   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3669                     AMDGPU::OpName::src2_modifiers,
3670                     AMDGPU::OpName::src2_modifiers}) {
3671     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3672             .getImm() &
3673         SISrcMods::ABS) {
3674       return false;
3675     }
3676   }
3677 
3678   return true;
3679 }
3680 
3681 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3682 
3683   const unsigned Opc = Inst.getOpcode();
3684   const MCInstrDesc &Desc = MII.get(Opc);
3685 
3686   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3687     return true;
3688 
3689   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3690   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3691     if (isCI() || isSI())
3692       return false;
3693   }
3694 
3695   return true;
3696 }
3697 
3698 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3699   const unsigned Opc = Inst.getOpcode();
3700   const MCInstrDesc &Desc = MII.get(Opc);
3701 
3702   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3703     return true;
3704 
3705   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3706   if (DimIdx < 0)
3707     return true;
3708 
3709   long Imm = Inst.getOperand(DimIdx).getImm();
3710   if (Imm < 0 || Imm >= 8)
3711     return false;
3712 
3713   return true;
3714 }
3715 
3716 static bool IsRevOpcode(const unsigned Opcode)
3717 {
3718   switch (Opcode) {
3719   case AMDGPU::V_SUBREV_F32_e32:
3720   case AMDGPU::V_SUBREV_F32_e64:
3721   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3722   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3723   case AMDGPU::V_SUBREV_F32_e32_vi:
3724   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3725   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3726   case AMDGPU::V_SUBREV_F32_e64_vi:
3727 
3728   case AMDGPU::V_SUBREV_CO_U32_e32:
3729   case AMDGPU::V_SUBREV_CO_U32_e64:
3730   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3731   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3732 
3733   case AMDGPU::V_SUBBREV_U32_e32:
3734   case AMDGPU::V_SUBBREV_U32_e64:
3735   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3736   case AMDGPU::V_SUBBREV_U32_e32_vi:
3737   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3738   case AMDGPU::V_SUBBREV_U32_e64_vi:
3739 
3740   case AMDGPU::V_SUBREV_U32_e32:
3741   case AMDGPU::V_SUBREV_U32_e64:
3742   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3743   case AMDGPU::V_SUBREV_U32_e32_vi:
3744   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3745   case AMDGPU::V_SUBREV_U32_e64_vi:
3746 
3747   case AMDGPU::V_SUBREV_F16_e32:
3748   case AMDGPU::V_SUBREV_F16_e64:
3749   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3750   case AMDGPU::V_SUBREV_F16_e32_vi:
3751   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3752   case AMDGPU::V_SUBREV_F16_e64_vi:
3753 
3754   case AMDGPU::V_SUBREV_U16_e32:
3755   case AMDGPU::V_SUBREV_U16_e64:
3756   case AMDGPU::V_SUBREV_U16_e32_vi:
3757   case AMDGPU::V_SUBREV_U16_e64_vi:
3758 
3759   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3760   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3761   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3762 
3763   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3764   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3765 
3766   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3767   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3768 
3769   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3770   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3771 
3772   case AMDGPU::V_LSHRREV_B32_e32:
3773   case AMDGPU::V_LSHRREV_B32_e64:
3774   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3775   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3776   case AMDGPU::V_LSHRREV_B32_e32_vi:
3777   case AMDGPU::V_LSHRREV_B32_e64_vi:
3778   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3779   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3780 
3781   case AMDGPU::V_ASHRREV_I32_e32:
3782   case AMDGPU::V_ASHRREV_I32_e64:
3783   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3784   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3785   case AMDGPU::V_ASHRREV_I32_e32_vi:
3786   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3787   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3788   case AMDGPU::V_ASHRREV_I32_e64_vi:
3789 
3790   case AMDGPU::V_LSHLREV_B32_e32:
3791   case AMDGPU::V_LSHLREV_B32_e64:
3792   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3793   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3794   case AMDGPU::V_LSHLREV_B32_e32_vi:
3795   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3796   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3797   case AMDGPU::V_LSHLREV_B32_e64_vi:
3798 
3799   case AMDGPU::V_LSHLREV_B16_e32:
3800   case AMDGPU::V_LSHLREV_B16_e64:
3801   case AMDGPU::V_LSHLREV_B16_e32_vi:
3802   case AMDGPU::V_LSHLREV_B16_e64_vi:
3803   case AMDGPU::V_LSHLREV_B16_gfx10:
3804 
3805   case AMDGPU::V_LSHRREV_B16_e32:
3806   case AMDGPU::V_LSHRREV_B16_e64:
3807   case AMDGPU::V_LSHRREV_B16_e32_vi:
3808   case AMDGPU::V_LSHRREV_B16_e64_vi:
3809   case AMDGPU::V_LSHRREV_B16_gfx10:
3810 
3811   case AMDGPU::V_ASHRREV_I16_e32:
3812   case AMDGPU::V_ASHRREV_I16_e64:
3813   case AMDGPU::V_ASHRREV_I16_e32_vi:
3814   case AMDGPU::V_ASHRREV_I16_e64_vi:
3815   case AMDGPU::V_ASHRREV_I16_gfx10:
3816 
3817   case AMDGPU::V_LSHLREV_B64_e64:
3818   case AMDGPU::V_LSHLREV_B64_gfx10:
3819   case AMDGPU::V_LSHLREV_B64_vi:
3820 
3821   case AMDGPU::V_LSHRREV_B64_e64:
3822   case AMDGPU::V_LSHRREV_B64_gfx10:
3823   case AMDGPU::V_LSHRREV_B64_vi:
3824 
3825   case AMDGPU::V_ASHRREV_I64_e64:
3826   case AMDGPU::V_ASHRREV_I64_gfx10:
3827   case AMDGPU::V_ASHRREV_I64_vi:
3828 
3829   case AMDGPU::V_PK_LSHLREV_B16:
3830   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3831   case AMDGPU::V_PK_LSHLREV_B16_vi:
3832 
3833   case AMDGPU::V_PK_LSHRREV_B16:
3834   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3835   case AMDGPU::V_PK_LSHRREV_B16_vi:
3836   case AMDGPU::V_PK_ASHRREV_I16:
3837   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3838   case AMDGPU::V_PK_ASHRREV_I16_vi:
3839     return true;
3840   default:
3841     return false;
3842   }
3843 }
3844 
3845 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3846 
3847   using namespace SIInstrFlags;
3848   const unsigned Opcode = Inst.getOpcode();
3849   const MCInstrDesc &Desc = MII.get(Opcode);
3850 
3851   // lds_direct register is defined so that it can be used
3852   // with 9-bit operands only. Ignore encodings which do not accept these.
3853   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3854   if ((Desc.TSFlags & Enc) == 0)
3855     return None;
3856 
3857   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3858     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3859     if (SrcIdx == -1)
3860       break;
3861     const auto &Src = Inst.getOperand(SrcIdx);
3862     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3863 
3864       if (isGFX90A())
3865         return StringRef("lds_direct is not supported on this GPU");
3866 
3867       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3868         return StringRef("lds_direct cannot be used with this instruction");
3869 
3870       if (SrcName != OpName::src0)
3871         return StringRef("lds_direct may be used as src0 only");
3872     }
3873   }
3874 
3875   return None;
3876 }
3877 
3878 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3879   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3880     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3881     if (Op.isFlatOffset())
3882       return Op.getStartLoc();
3883   }
3884   return getLoc();
3885 }
3886 
3887 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3888                                          const OperandVector &Operands) {
3889   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3890   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3891     return true;
3892 
3893   auto Opcode = Inst.getOpcode();
3894   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3895   assert(OpNum != -1);
3896 
3897   const auto &Op = Inst.getOperand(OpNum);
3898   if (!hasFlatOffsets() && Op.getImm() != 0) {
3899     Error(getFlatOffsetLoc(Operands),
3900           "flat offset modifier is not supported on this GPU");
3901     return false;
3902   }
3903 
3904   // For FLAT segment the offset must be positive;
3905   // MSB is ignored and forced to zero.
3906   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3907     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3908     if (!isIntN(OffsetSize, Op.getImm())) {
3909       Error(getFlatOffsetLoc(Operands),
3910             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3911       return false;
3912     }
3913   } else {
3914     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3915     if (!isUIntN(OffsetSize, Op.getImm())) {
3916       Error(getFlatOffsetLoc(Operands),
3917             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3918       return false;
3919     }
3920   }
3921 
3922   return true;
3923 }
3924 
3925 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3926   // Start with second operand because SMEM Offset cannot be dst or src0.
3927   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3928     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3929     if (Op.isSMEMOffset())
3930       return Op.getStartLoc();
3931   }
3932   return getLoc();
3933 }
3934 
3935 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3936                                          const OperandVector &Operands) {
3937   if (isCI() || isSI())
3938     return true;
3939 
3940   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3941   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3942     return true;
3943 
3944   auto Opcode = Inst.getOpcode();
3945   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3946   if (OpNum == -1)
3947     return true;
3948 
3949   const auto &Op = Inst.getOperand(OpNum);
3950   if (!Op.isImm())
3951     return true;
3952 
3953   uint64_t Offset = Op.getImm();
3954   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3955   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3956       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3957     return true;
3958 
3959   Error(getSMEMOffsetLoc(Operands),
3960         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3961                                "expected a 21-bit signed offset");
3962 
3963   return false;
3964 }
3965 
3966 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3967   unsigned Opcode = Inst.getOpcode();
3968   const MCInstrDesc &Desc = MII.get(Opcode);
3969   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3970     return true;
3971 
3972   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3973   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3974 
3975   const int OpIndices[] = { Src0Idx, Src1Idx };
3976 
3977   unsigned NumExprs = 0;
3978   unsigned NumLiterals = 0;
3979   uint32_t LiteralValue;
3980 
3981   for (int OpIdx : OpIndices) {
3982     if (OpIdx == -1) break;
3983 
3984     const MCOperand &MO = Inst.getOperand(OpIdx);
3985     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3986     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3987       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3988         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3989         if (NumLiterals == 0 || LiteralValue != Value) {
3990           LiteralValue = Value;
3991           ++NumLiterals;
3992         }
3993       } else if (MO.isExpr()) {
3994         ++NumExprs;
3995       }
3996     }
3997   }
3998 
3999   return NumLiterals + NumExprs <= 1;
4000 }
4001 
4002 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4003   const unsigned Opc = Inst.getOpcode();
4004   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4005       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4006     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4007     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4008 
4009     if (OpSel & ~3)
4010       return false;
4011   }
4012   return true;
4013 }
4014 
4015 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4016                                   const OperandVector &Operands) {
4017   const unsigned Opc = Inst.getOpcode();
4018   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4019   if (DppCtrlIdx < 0)
4020     return true;
4021   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4022 
4023   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4024     // DPP64 is supported for row_newbcast only.
4025     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4026     if (Src0Idx >= 0 &&
4027         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4028       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4029       Error(S, "64 bit dpp only supports row_newbcast");
4030       return false;
4031     }
4032   }
4033 
4034   return true;
4035 }
4036 
4037 // Check if VCC register matches wavefront size
4038 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4039   auto FB = getFeatureBits();
4040   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4041     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4042 }
4043 
4044 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4045 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4046                                          const OperandVector &Operands) {
4047   unsigned Opcode = Inst.getOpcode();
4048   const MCInstrDesc &Desc = MII.get(Opcode);
4049   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4050   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4051       ImmIdx == -1)
4052     return true;
4053 
4054   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4055   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4056   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4057 
4058   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4059 
4060   unsigned NumExprs = 0;
4061   unsigned NumLiterals = 0;
4062   uint32_t LiteralValue;
4063 
4064   for (int OpIdx : OpIndices) {
4065     if (OpIdx == -1)
4066       continue;
4067 
4068     const MCOperand &MO = Inst.getOperand(OpIdx);
4069     if (!MO.isImm() && !MO.isExpr())
4070       continue;
4071     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4072       continue;
4073 
4074     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4075         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4076       Error(getConstLoc(Operands),
4077             "inline constants are not allowed for this operand");
4078       return false;
4079     }
4080 
4081     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4082       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4083       if (NumLiterals == 0 || LiteralValue != Value) {
4084         LiteralValue = Value;
4085         ++NumLiterals;
4086       }
4087     } else if (MO.isExpr()) {
4088       ++NumExprs;
4089     }
4090   }
4091   NumLiterals += NumExprs;
4092 
4093   if (!NumLiterals)
4094     return true;
4095 
4096   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4097     Error(getLitLoc(Operands), "literal operands are not supported");
4098     return false;
4099   }
4100 
4101   if (NumLiterals > 1) {
4102     Error(getLitLoc(Operands), "only one literal operand is allowed");
4103     return false;
4104   }
4105 
4106   return true;
4107 }
4108 
4109 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4110 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4111                          const MCRegisterInfo *MRI) {
4112   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4113   if (OpIdx < 0)
4114     return -1;
4115 
4116   const MCOperand &Op = Inst.getOperand(OpIdx);
4117   if (!Op.isReg())
4118     return -1;
4119 
4120   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4121   auto Reg = Sub ? Sub : Op.getReg();
4122   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4123   return AGPR32.contains(Reg) ? 1 : 0;
4124 }
4125 
4126 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4127   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4128   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4129                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4130                   SIInstrFlags::DS)) == 0)
4131     return true;
4132 
4133   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4134                                                       : AMDGPU::OpName::vdata;
4135 
4136   const MCRegisterInfo *MRI = getMRI();
4137   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4138   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4139 
4140   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4141     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4142     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4143       return false;
4144   }
4145 
4146   auto FB = getFeatureBits();
4147   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4148     if (DataAreg < 0 || DstAreg < 0)
4149       return true;
4150     return DstAreg == DataAreg;
4151   }
4152 
4153   return DstAreg < 1 && DataAreg < 1;
4154 }
4155 
4156 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4157   auto FB = getFeatureBits();
4158   if (!FB[AMDGPU::FeatureGFX90AInsts])
4159     return true;
4160 
4161   const MCRegisterInfo *MRI = getMRI();
4162   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4163   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4164   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4165     const MCOperand &Op = Inst.getOperand(I);
4166     if (!Op.isReg())
4167       continue;
4168 
4169     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4170     if (!Sub)
4171       continue;
4172 
4173     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4174       return false;
4175     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4176       return false;
4177   }
4178 
4179   return true;
4180 }
4181 
4182 // gfx90a has an undocumented limitation:
4183 // DS_GWS opcodes must use even aligned registers.
4184 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4185                                   const OperandVector &Operands) {
4186   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4187     return true;
4188 
4189   int Opc = Inst.getOpcode();
4190   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4191       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4192     return true;
4193 
4194   const MCRegisterInfo *MRI = getMRI();
4195   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4196   int Data0Pos =
4197       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4198   assert(Data0Pos != -1);
4199   auto Reg = Inst.getOperand(Data0Pos).getReg();
4200   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4201   if (RegIdx & 1) {
4202     SMLoc RegLoc = getRegLoc(Reg, Operands);
4203     Error(RegLoc, "vgpr must be even aligned");
4204     return false;
4205   }
4206 
4207   return true;
4208 }
4209 
4210 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4211                                             const OperandVector &Operands,
4212                                             const SMLoc &IDLoc) {
4213   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4214                                            AMDGPU::OpName::cpol);
4215   if (CPolPos == -1)
4216     return true;
4217 
4218   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4219 
4220   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4221   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4222       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4223     Error(IDLoc, "invalid cache policy for SMRD instruction");
4224     return false;
4225   }
4226 
4227   if (isGFX90A() && (CPol & CPol::SCC)) {
4228     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4229     StringRef CStr(S.getPointer());
4230     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4231     Error(S, "scc is not supported on this GPU");
4232     return false;
4233   }
4234 
4235   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4236     return true;
4237 
4238   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4239     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4240       Error(IDLoc, "instruction must use glc");
4241       return false;
4242     }
4243   } else {
4244     if (CPol & CPol::GLC) {
4245       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4246       StringRef CStr(S.getPointer());
4247       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4248       Error(S, "instruction must not use glc");
4249       return false;
4250     }
4251   }
4252 
4253   return true;
4254 }
4255 
4256 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4257                                           const SMLoc &IDLoc,
4258                                           const OperandVector &Operands) {
4259   if (auto ErrMsg = validateLdsDirect(Inst)) {
4260     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4261     return false;
4262   }
4263   if (!validateSOPLiteral(Inst)) {
4264     Error(getLitLoc(Operands),
4265       "only one literal operand is allowed");
4266     return false;
4267   }
4268   if (!validateVOPLiteral(Inst, Operands)) {
4269     return false;
4270   }
4271   if (!validateConstantBusLimitations(Inst, Operands)) {
4272     return false;
4273   }
4274   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4275     return false;
4276   }
4277   if (!validateIntClampSupported(Inst)) {
4278     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4279       "integer clamping is not supported on this GPU");
4280     return false;
4281   }
4282   if (!validateOpSel(Inst)) {
4283     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4284       "invalid op_sel operand");
4285     return false;
4286   }
4287   if (!validateDPP(Inst, Operands)) {
4288     return false;
4289   }
4290   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4291   if (!validateMIMGD16(Inst)) {
4292     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4293       "d16 modifier is not supported on this GPU");
4294     return false;
4295   }
4296   if (!validateMIMGDim(Inst)) {
4297     Error(IDLoc, "dim modifier is required on this GPU");
4298     return false;
4299   }
4300   if (!validateMIMGMSAA(Inst)) {
4301     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4302           "invalid dim; must be MSAA type");
4303     return false;
4304   }
4305   if (!validateMIMGDataSize(Inst)) {
4306     Error(IDLoc,
4307       "image data size does not match dmask and tfe");
4308     return false;
4309   }
4310   if (!validateMIMGAddrSize(Inst)) {
4311     Error(IDLoc,
4312       "image address size does not match dim and a16");
4313     return false;
4314   }
4315   if (!validateMIMGAtomicDMask(Inst)) {
4316     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4317       "invalid atomic image dmask");
4318     return false;
4319   }
4320   if (!validateMIMGGatherDMask(Inst)) {
4321     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4322       "invalid image_gather dmask: only one bit must be set");
4323     return false;
4324   }
4325   if (!validateMovrels(Inst, Operands)) {
4326     return false;
4327   }
4328   if (!validateFlatOffset(Inst, Operands)) {
4329     return false;
4330   }
4331   if (!validateSMEMOffset(Inst, Operands)) {
4332     return false;
4333   }
4334   if (!validateMAIAccWrite(Inst, Operands)) {
4335     return false;
4336   }
4337   if (!validateMFMA(Inst, Operands)) {
4338     return false;
4339   }
4340   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4341     return false;
4342   }
4343 
4344   if (!validateAGPRLdSt(Inst)) {
4345     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4346     ? "invalid register class: data and dst should be all VGPR or AGPR"
4347     : "invalid register class: agpr loads and stores not supported on this GPU"
4348     );
4349     return false;
4350   }
4351   if (!validateVGPRAlign(Inst)) {
4352     Error(IDLoc,
4353       "invalid register class: vgpr tuples must be 64 bit aligned");
4354     return false;
4355   }
4356   if (!validateGWS(Inst, Operands)) {
4357     return false;
4358   }
4359 
4360   if (!validateDivScale(Inst)) {
4361     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4362     return false;
4363   }
4364   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4365     return false;
4366   }
4367 
4368   return true;
4369 }
4370 
4371 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4372                                             const FeatureBitset &FBS,
4373                                             unsigned VariantID = 0);
4374 
4375 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4376                                 const FeatureBitset &AvailableFeatures,
4377                                 unsigned VariantID);
4378 
4379 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4380                                        const FeatureBitset &FBS) {
4381   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4382 }
4383 
4384 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4385                                        const FeatureBitset &FBS,
4386                                        ArrayRef<unsigned> Variants) {
4387   for (auto Variant : Variants) {
4388     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4389       return true;
4390   }
4391 
4392   return false;
4393 }
4394 
4395 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4396                                                   const SMLoc &IDLoc) {
4397   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4398 
4399   // Check if requested instruction variant is supported.
4400   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4401     return false;
4402 
4403   // This instruction is not supported.
4404   // Clear any other pending errors because they are no longer relevant.
4405   getParser().clearPendingErrors();
4406 
4407   // Requested instruction variant is not supported.
4408   // Check if any other variants are supported.
4409   StringRef VariantName = getMatchedVariantName();
4410   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4411     return Error(IDLoc,
4412                  Twine(VariantName,
4413                        " variant of this instruction is not supported"));
4414   }
4415 
4416   // Finally check if this instruction is supported on any other GPU.
4417   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4418     return Error(IDLoc, "instruction not supported on this GPU");
4419   }
4420 
4421   // Instruction not supported on any GPU. Probably a typo.
4422   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4423   return Error(IDLoc, "invalid instruction" + Suggestion);
4424 }
4425 
4426 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4427                                               OperandVector &Operands,
4428                                               MCStreamer &Out,
4429                                               uint64_t &ErrorInfo,
4430                                               bool MatchingInlineAsm) {
4431   MCInst Inst;
4432   unsigned Result = Match_Success;
4433   for (auto Variant : getMatchedVariants()) {
4434     uint64_t EI;
4435     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4436                                   Variant);
4437     // We order match statuses from least to most specific. We use most specific
4438     // status as resulting
4439     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4440     if ((R == Match_Success) ||
4441         (R == Match_PreferE32) ||
4442         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4443         (R == Match_InvalidOperand && Result != Match_MissingFeature
4444                                    && Result != Match_PreferE32) ||
4445         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4446                                    && Result != Match_MissingFeature
4447                                    && Result != Match_PreferE32)) {
4448       Result = R;
4449       ErrorInfo = EI;
4450     }
4451     if (R == Match_Success)
4452       break;
4453   }
4454 
4455   if (Result == Match_Success) {
4456     if (!validateInstruction(Inst, IDLoc, Operands)) {
4457       return true;
4458     }
4459     Inst.setLoc(IDLoc);
4460     Out.emitInstruction(Inst, getSTI());
4461     return false;
4462   }
4463 
4464   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4465   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4466     return true;
4467   }
4468 
4469   switch (Result) {
4470   default: break;
4471   case Match_MissingFeature:
4472     // It has been verified that the specified instruction
4473     // mnemonic is valid. A match was found but it requires
4474     // features which are not supported on this GPU.
4475     return Error(IDLoc, "operands are not valid for this GPU or mode");
4476 
4477   case Match_InvalidOperand: {
4478     SMLoc ErrorLoc = IDLoc;
4479     if (ErrorInfo != ~0ULL) {
4480       if (ErrorInfo >= Operands.size()) {
4481         return Error(IDLoc, "too few operands for instruction");
4482       }
4483       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4484       if (ErrorLoc == SMLoc())
4485         ErrorLoc = IDLoc;
4486     }
4487     return Error(ErrorLoc, "invalid operand for instruction");
4488   }
4489 
4490   case Match_PreferE32:
4491     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4492                         "should be encoded as e32");
4493   case Match_MnemonicFail:
4494     llvm_unreachable("Invalid instructions should have been handled already");
4495   }
4496   llvm_unreachable("Implement any new match types added!");
4497 }
4498 
4499 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4500   int64_t Tmp = -1;
4501   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4502     return true;
4503   }
4504   if (getParser().parseAbsoluteExpression(Tmp)) {
4505     return true;
4506   }
4507   Ret = static_cast<uint32_t>(Tmp);
4508   return false;
4509 }
4510 
4511 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4512                                                uint32_t &Minor) {
4513   if (ParseAsAbsoluteExpression(Major))
4514     return TokError("invalid major version");
4515 
4516   if (!trySkipToken(AsmToken::Comma))
4517     return TokError("minor version number required, comma expected");
4518 
4519   if (ParseAsAbsoluteExpression(Minor))
4520     return TokError("invalid minor version");
4521 
4522   return false;
4523 }
4524 
4525 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4526   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4527     return TokError("directive only supported for amdgcn architecture");
4528 
4529   std::string TargetIDDirective;
4530   SMLoc TargetStart = getTok().getLoc();
4531   if (getParser().parseEscapedString(TargetIDDirective))
4532     return true;
4533 
4534   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4535   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4536     return getParser().Error(TargetRange.Start,
4537         (Twine(".amdgcn_target directive's target id ") +
4538          Twine(TargetIDDirective) +
4539          Twine(" does not match the specified target id ") +
4540          Twine(getTargetStreamer().getTargetID()->toString())).str());
4541 
4542   return false;
4543 }
4544 
4545 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4546   return Error(Range.Start, "value out of range", Range);
4547 }
4548 
4549 bool AMDGPUAsmParser::calculateGPRBlocks(
4550     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4551     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4552     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4553     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4554   // TODO(scott.linder): These calculations are duplicated from
4555   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4556   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4557 
4558   unsigned NumVGPRs = NextFreeVGPR;
4559   unsigned NumSGPRs = NextFreeSGPR;
4560 
4561   if (Version.Major >= 10)
4562     NumSGPRs = 0;
4563   else {
4564     unsigned MaxAddressableNumSGPRs =
4565         IsaInfo::getAddressableNumSGPRs(&getSTI());
4566 
4567     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4568         NumSGPRs > MaxAddressableNumSGPRs)
4569       return OutOfRangeError(SGPRRange);
4570 
4571     NumSGPRs +=
4572         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4573 
4574     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4575         NumSGPRs > MaxAddressableNumSGPRs)
4576       return OutOfRangeError(SGPRRange);
4577 
4578     if (Features.test(FeatureSGPRInitBug))
4579       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4580   }
4581 
4582   VGPRBlocks =
4583       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4584   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4585 
4586   return false;
4587 }
4588 
4589 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4590   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4591     return TokError("directive only supported for amdgcn architecture");
4592 
4593   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4594     return TokError("directive only supported for amdhsa OS");
4595 
4596   StringRef KernelName;
4597   if (getParser().parseIdentifier(KernelName))
4598     return true;
4599 
4600   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4601 
4602   StringSet<> Seen;
4603 
4604   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4605 
4606   SMRange VGPRRange;
4607   uint64_t NextFreeVGPR = 0;
4608   uint64_t AccumOffset = 0;
4609   SMRange SGPRRange;
4610   uint64_t NextFreeSGPR = 0;
4611 
4612   // Count the number of user SGPRs implied from the enabled feature bits.
4613   unsigned ImpliedUserSGPRCount = 0;
4614 
4615   // Track if the asm explicitly contains the directive for the user SGPR
4616   // count.
4617   Optional<unsigned> ExplicitUserSGPRCount;
4618   bool ReserveVCC = true;
4619   bool ReserveFlatScr = true;
4620   Optional<bool> EnableWavefrontSize32;
4621 
4622   while (true) {
4623     while (trySkipToken(AsmToken::EndOfStatement));
4624 
4625     StringRef ID;
4626     SMRange IDRange = getTok().getLocRange();
4627     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4628       return true;
4629 
4630     if (ID == ".end_amdhsa_kernel")
4631       break;
4632 
4633     if (Seen.find(ID) != Seen.end())
4634       return TokError(".amdhsa_ directives cannot be repeated");
4635     Seen.insert(ID);
4636 
4637     SMLoc ValStart = getLoc();
4638     int64_t IVal;
4639     if (getParser().parseAbsoluteExpression(IVal))
4640       return true;
4641     SMLoc ValEnd = getLoc();
4642     SMRange ValRange = SMRange(ValStart, ValEnd);
4643 
4644     if (IVal < 0)
4645       return OutOfRangeError(ValRange);
4646 
4647     uint64_t Val = IVal;
4648 
4649 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4650   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4651     return OutOfRangeError(RANGE);                                             \
4652   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4653 
4654     if (ID == ".amdhsa_group_segment_fixed_size") {
4655       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4656         return OutOfRangeError(ValRange);
4657       KD.group_segment_fixed_size = Val;
4658     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4659       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4660         return OutOfRangeError(ValRange);
4661       KD.private_segment_fixed_size = Val;
4662     } else if (ID == ".amdhsa_kernarg_size") {
4663       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4664         return OutOfRangeError(ValRange);
4665       KD.kernarg_size = Val;
4666     } else if (ID == ".amdhsa_user_sgpr_count") {
4667       ExplicitUserSGPRCount = Val;
4668     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4669       if (hasArchitectedFlatScratch())
4670         return Error(IDRange.Start,
4671                      "directive is not supported with architected flat scratch",
4672                      IDRange);
4673       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4674                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4675                        Val, ValRange);
4676       if (Val)
4677         ImpliedUserSGPRCount += 4;
4678     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4679       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4680                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4681                        ValRange);
4682       if (Val)
4683         ImpliedUserSGPRCount += 2;
4684     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4685       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4686                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4687                        ValRange);
4688       if (Val)
4689         ImpliedUserSGPRCount += 2;
4690     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4691       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4692                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4693                        Val, ValRange);
4694       if (Val)
4695         ImpliedUserSGPRCount += 2;
4696     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4697       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4698                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4699                        ValRange);
4700       if (Val)
4701         ImpliedUserSGPRCount += 2;
4702     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4703       if (hasArchitectedFlatScratch())
4704         return Error(IDRange.Start,
4705                      "directive is not supported with architected flat scratch",
4706                      IDRange);
4707       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4708                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4709                        ValRange);
4710       if (Val)
4711         ImpliedUserSGPRCount += 2;
4712     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4713       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4714                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4715                        Val, ValRange);
4716       if (Val)
4717         ImpliedUserSGPRCount += 1;
4718     } else if (ID == ".amdhsa_wavefront_size32") {
4719       if (IVersion.Major < 10)
4720         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4721       EnableWavefrontSize32 = Val;
4722       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4723                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4724                        Val, ValRange);
4725     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4726       if (hasArchitectedFlatScratch())
4727         return Error(IDRange.Start,
4728                      "directive is not supported with architected flat scratch",
4729                      IDRange);
4730       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4731                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4732     } else if (ID == ".amdhsa_enable_private_segment") {
4733       if (!hasArchitectedFlatScratch())
4734         return Error(
4735             IDRange.Start,
4736             "directive is not supported without architected flat scratch",
4737             IDRange);
4738       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4739                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4740     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4741       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4742                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4743                        ValRange);
4744     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4745       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4746                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4747                        ValRange);
4748     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4749       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4750                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4751                        ValRange);
4752     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4753       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4754                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4755                        ValRange);
4756     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4757       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4758                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4759                        ValRange);
4760     } else if (ID == ".amdhsa_next_free_vgpr") {
4761       VGPRRange = ValRange;
4762       NextFreeVGPR = Val;
4763     } else if (ID == ".amdhsa_next_free_sgpr") {
4764       SGPRRange = ValRange;
4765       NextFreeSGPR = Val;
4766     } else if (ID == ".amdhsa_accum_offset") {
4767       if (!isGFX90A())
4768         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4769       AccumOffset = Val;
4770     } else if (ID == ".amdhsa_reserve_vcc") {
4771       if (!isUInt<1>(Val))
4772         return OutOfRangeError(ValRange);
4773       ReserveVCC = Val;
4774     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4775       if (IVersion.Major < 7)
4776         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4777       if (hasArchitectedFlatScratch())
4778         return Error(IDRange.Start,
4779                      "directive is not supported with architected flat scratch",
4780                      IDRange);
4781       if (!isUInt<1>(Val))
4782         return OutOfRangeError(ValRange);
4783       ReserveFlatScr = Val;
4784     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4785       if (IVersion.Major < 8)
4786         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4787       if (!isUInt<1>(Val))
4788         return OutOfRangeError(ValRange);
4789       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4790         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4791                                  IDRange);
4792     } else if (ID == ".amdhsa_float_round_mode_32") {
4793       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4794                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4795     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4796       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4797                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4798     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4799       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4800                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4801     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4802       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4803                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4804                        ValRange);
4805     } else if (ID == ".amdhsa_dx10_clamp") {
4806       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4807                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4808     } else if (ID == ".amdhsa_ieee_mode") {
4809       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4810                        Val, ValRange);
4811     } else if (ID == ".amdhsa_fp16_overflow") {
4812       if (IVersion.Major < 9)
4813         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4814       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4815                        ValRange);
4816     } else if (ID == ".amdhsa_tg_split") {
4817       if (!isGFX90A())
4818         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4819       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4820                        ValRange);
4821     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4822       if (IVersion.Major < 10)
4823         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4824       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4825                        ValRange);
4826     } else if (ID == ".amdhsa_memory_ordered") {
4827       if (IVersion.Major < 10)
4828         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4829       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4830                        ValRange);
4831     } else if (ID == ".amdhsa_forward_progress") {
4832       if (IVersion.Major < 10)
4833         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4834       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4835                        ValRange);
4836     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4837       PARSE_BITS_ENTRY(
4838           KD.compute_pgm_rsrc2,
4839           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4840           ValRange);
4841     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4842       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4843                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4844                        Val, ValRange);
4845     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4846       PARSE_BITS_ENTRY(
4847           KD.compute_pgm_rsrc2,
4848           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4849           ValRange);
4850     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4851       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4852                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4853                        Val, ValRange);
4854     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4855       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4856                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4857                        Val, ValRange);
4858     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4859       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4860                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4861                        Val, ValRange);
4862     } else if (ID == ".amdhsa_exception_int_div_zero") {
4863       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4864                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4865                        Val, ValRange);
4866     } else {
4867       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4868     }
4869 
4870 #undef PARSE_BITS_ENTRY
4871   }
4872 
4873   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4874     return TokError(".amdhsa_next_free_vgpr directive is required");
4875 
4876   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4877     return TokError(".amdhsa_next_free_sgpr directive is required");
4878 
4879   unsigned VGPRBlocks;
4880   unsigned SGPRBlocks;
4881   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4882                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4883                          EnableWavefrontSize32, NextFreeVGPR,
4884                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4885                          SGPRBlocks))
4886     return true;
4887 
4888   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4889           VGPRBlocks))
4890     return OutOfRangeError(VGPRRange);
4891   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4892                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4893 
4894   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4895           SGPRBlocks))
4896     return OutOfRangeError(SGPRRange);
4897   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4898                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4899                   SGPRBlocks);
4900 
4901   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4902     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4903                     "enabled user SGPRs");
4904 
4905   unsigned UserSGPRCount =
4906       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4907 
4908   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4909     return TokError("too many user SGPRs enabled");
4910   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4911                   UserSGPRCount);
4912 
4913   if (isGFX90A()) {
4914     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4915       return TokError(".amdhsa_accum_offset directive is required");
4916     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4917       return TokError("accum_offset should be in range [4..256] in "
4918                       "increments of 4");
4919     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4920       return TokError("accum_offset exceeds total VGPR allocation");
4921     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4922                     (AccumOffset / 4 - 1));
4923   }
4924 
4925   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4926       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4927       ReserveFlatScr);
4928   return false;
4929 }
4930 
4931 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4932   uint32_t Major;
4933   uint32_t Minor;
4934 
4935   if (ParseDirectiveMajorMinor(Major, Minor))
4936     return true;
4937 
4938   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4939   return false;
4940 }
4941 
4942 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4943   uint32_t Major;
4944   uint32_t Minor;
4945   uint32_t Stepping;
4946   StringRef VendorName;
4947   StringRef ArchName;
4948 
4949   // If this directive has no arguments, then use the ISA version for the
4950   // targeted GPU.
4951   if (isToken(AsmToken::EndOfStatement)) {
4952     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4953     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4954                                                         ISA.Stepping,
4955                                                         "AMD", "AMDGPU");
4956     return false;
4957   }
4958 
4959   if (ParseDirectiveMajorMinor(Major, Minor))
4960     return true;
4961 
4962   if (!trySkipToken(AsmToken::Comma))
4963     return TokError("stepping version number required, comma expected");
4964 
4965   if (ParseAsAbsoluteExpression(Stepping))
4966     return TokError("invalid stepping version");
4967 
4968   if (!trySkipToken(AsmToken::Comma))
4969     return TokError("vendor name required, comma expected");
4970 
4971   if (!parseString(VendorName, "invalid vendor name"))
4972     return true;
4973 
4974   if (!trySkipToken(AsmToken::Comma))
4975     return TokError("arch name required, comma expected");
4976 
4977   if (!parseString(ArchName, "invalid arch name"))
4978     return true;
4979 
4980   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4981                                                       VendorName, ArchName);
4982   return false;
4983 }
4984 
4985 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4986                                                amd_kernel_code_t &Header) {
4987   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4988   // assembly for backwards compatibility.
4989   if (ID == "max_scratch_backing_memory_byte_size") {
4990     Parser.eatToEndOfStatement();
4991     return false;
4992   }
4993 
4994   SmallString<40> ErrStr;
4995   raw_svector_ostream Err(ErrStr);
4996   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4997     return TokError(Err.str());
4998   }
4999   Lex();
5000 
5001   if (ID == "enable_wavefront_size32") {
5002     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5003       if (!isGFX10Plus())
5004         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5005       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5006         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5007     } else {
5008       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5009         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5010     }
5011   }
5012 
5013   if (ID == "wavefront_size") {
5014     if (Header.wavefront_size == 5) {
5015       if (!isGFX10Plus())
5016         return TokError("wavefront_size=5 is only allowed on GFX10+");
5017       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5018         return TokError("wavefront_size=5 requires +WavefrontSize32");
5019     } else if (Header.wavefront_size == 6) {
5020       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5021         return TokError("wavefront_size=6 requires +WavefrontSize64");
5022     }
5023   }
5024 
5025   if (ID == "enable_wgp_mode") {
5026     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5027         !isGFX10Plus())
5028       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5029   }
5030 
5031   if (ID == "enable_mem_ordered") {
5032     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5033         !isGFX10Plus())
5034       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5035   }
5036 
5037   if (ID == "enable_fwd_progress") {
5038     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5039         !isGFX10Plus())
5040       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5041   }
5042 
5043   return false;
5044 }
5045 
5046 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5047   amd_kernel_code_t Header;
5048   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5049 
5050   while (true) {
5051     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5052     // will set the current token to EndOfStatement.
5053     while(trySkipToken(AsmToken::EndOfStatement));
5054 
5055     StringRef ID;
5056     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5057       return true;
5058 
5059     if (ID == ".end_amd_kernel_code_t")
5060       break;
5061 
5062     if (ParseAMDKernelCodeTValue(ID, Header))
5063       return true;
5064   }
5065 
5066   getTargetStreamer().EmitAMDKernelCodeT(Header);
5067 
5068   return false;
5069 }
5070 
5071 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5072   StringRef KernelName;
5073   if (!parseId(KernelName, "expected symbol name"))
5074     return true;
5075 
5076   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5077                                            ELF::STT_AMDGPU_HSA_KERNEL);
5078 
5079   KernelScope.initialize(getContext());
5080   return false;
5081 }
5082 
5083 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5084   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5085     return Error(getLoc(),
5086                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5087                  "architectures");
5088   }
5089 
5090   auto TargetIDDirective = getLexer().getTok().getStringContents();
5091   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5092     return Error(getParser().getTok().getLoc(), "target id must match options");
5093 
5094   getTargetStreamer().EmitISAVersion();
5095   Lex();
5096 
5097   return false;
5098 }
5099 
5100 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5101   const char *AssemblerDirectiveBegin;
5102   const char *AssemblerDirectiveEnd;
5103   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5104       isHsaAbiVersion3AndAbove(&getSTI())
5105           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5106                             HSAMD::V3::AssemblerDirectiveEnd)
5107           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5108                             HSAMD::AssemblerDirectiveEnd);
5109 
5110   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5111     return Error(getLoc(),
5112                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5113                  "not available on non-amdhsa OSes")).str());
5114   }
5115 
5116   std::string HSAMetadataString;
5117   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5118                           HSAMetadataString))
5119     return true;
5120 
5121   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5122     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5123       return Error(getLoc(), "invalid HSA metadata");
5124   } else {
5125     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5126       return Error(getLoc(), "invalid HSA metadata");
5127   }
5128 
5129   return false;
5130 }
5131 
5132 /// Common code to parse out a block of text (typically YAML) between start and
5133 /// end directives.
5134 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5135                                           const char *AssemblerDirectiveEnd,
5136                                           std::string &CollectString) {
5137 
5138   raw_string_ostream CollectStream(CollectString);
5139 
5140   getLexer().setSkipSpace(false);
5141 
5142   bool FoundEnd = false;
5143   while (!isToken(AsmToken::Eof)) {
5144     while (isToken(AsmToken::Space)) {
5145       CollectStream << getTokenStr();
5146       Lex();
5147     }
5148 
5149     if (trySkipId(AssemblerDirectiveEnd)) {
5150       FoundEnd = true;
5151       break;
5152     }
5153 
5154     CollectStream << Parser.parseStringToEndOfStatement()
5155                   << getContext().getAsmInfo()->getSeparatorString();
5156 
5157     Parser.eatToEndOfStatement();
5158   }
5159 
5160   getLexer().setSkipSpace(true);
5161 
5162   if (isToken(AsmToken::Eof) && !FoundEnd) {
5163     return TokError(Twine("expected directive ") +
5164                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5165   }
5166 
5167   CollectStream.flush();
5168   return false;
5169 }
5170 
5171 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5172 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5173   std::string String;
5174   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5175                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5176     return true;
5177 
5178   auto PALMetadata = getTargetStreamer().getPALMetadata();
5179   if (!PALMetadata->setFromString(String))
5180     return Error(getLoc(), "invalid PAL metadata");
5181   return false;
5182 }
5183 
5184 /// Parse the assembler directive for old linear-format PAL metadata.
5185 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5186   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5187     return Error(getLoc(),
5188                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5189                  "not available on non-amdpal OSes")).str());
5190   }
5191 
5192   auto PALMetadata = getTargetStreamer().getPALMetadata();
5193   PALMetadata->setLegacy();
5194   for (;;) {
5195     uint32_t Key, Value;
5196     if (ParseAsAbsoluteExpression(Key)) {
5197       return TokError(Twine("invalid value in ") +
5198                       Twine(PALMD::AssemblerDirective));
5199     }
5200     if (!trySkipToken(AsmToken::Comma)) {
5201       return TokError(Twine("expected an even number of values in ") +
5202                       Twine(PALMD::AssemblerDirective));
5203     }
5204     if (ParseAsAbsoluteExpression(Value)) {
5205       return TokError(Twine("invalid value in ") +
5206                       Twine(PALMD::AssemblerDirective));
5207     }
5208     PALMetadata->setRegister(Key, Value);
5209     if (!trySkipToken(AsmToken::Comma))
5210       break;
5211   }
5212   return false;
5213 }
5214 
5215 /// ParseDirectiveAMDGPULDS
5216 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5217 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5218   if (getParser().checkForValidSection())
5219     return true;
5220 
5221   StringRef Name;
5222   SMLoc NameLoc = getLoc();
5223   if (getParser().parseIdentifier(Name))
5224     return TokError("expected identifier in directive");
5225 
5226   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5227   if (parseToken(AsmToken::Comma, "expected ','"))
5228     return true;
5229 
5230   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5231 
5232   int64_t Size;
5233   SMLoc SizeLoc = getLoc();
5234   if (getParser().parseAbsoluteExpression(Size))
5235     return true;
5236   if (Size < 0)
5237     return Error(SizeLoc, "size must be non-negative");
5238   if (Size > LocalMemorySize)
5239     return Error(SizeLoc, "size is too large");
5240 
5241   int64_t Alignment = 4;
5242   if (trySkipToken(AsmToken::Comma)) {
5243     SMLoc AlignLoc = getLoc();
5244     if (getParser().parseAbsoluteExpression(Alignment))
5245       return true;
5246     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5247       return Error(AlignLoc, "alignment must be a power of two");
5248 
5249     // Alignment larger than the size of LDS is possible in theory, as long
5250     // as the linker manages to place to symbol at address 0, but we do want
5251     // to make sure the alignment fits nicely into a 32-bit integer.
5252     if (Alignment >= 1u << 31)
5253       return Error(AlignLoc, "alignment is too large");
5254   }
5255 
5256   if (parseToken(AsmToken::EndOfStatement,
5257                  "unexpected token in '.amdgpu_lds' directive"))
5258     return true;
5259 
5260   Symbol->redefineIfPossible();
5261   if (!Symbol->isUndefined())
5262     return Error(NameLoc, "invalid symbol redefinition");
5263 
5264   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5265   return false;
5266 }
5267 
5268 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5269   StringRef IDVal = DirectiveID.getString();
5270 
5271   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5272     if (IDVal == ".amdhsa_kernel")
5273      return ParseDirectiveAMDHSAKernel();
5274 
5275     // TODO: Restructure/combine with PAL metadata directive.
5276     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5277       return ParseDirectiveHSAMetadata();
5278   } else {
5279     if (IDVal == ".hsa_code_object_version")
5280       return ParseDirectiveHSACodeObjectVersion();
5281 
5282     if (IDVal == ".hsa_code_object_isa")
5283       return ParseDirectiveHSACodeObjectISA();
5284 
5285     if (IDVal == ".amd_kernel_code_t")
5286       return ParseDirectiveAMDKernelCodeT();
5287 
5288     if (IDVal == ".amdgpu_hsa_kernel")
5289       return ParseDirectiveAMDGPUHsaKernel();
5290 
5291     if (IDVal == ".amd_amdgpu_isa")
5292       return ParseDirectiveISAVersion();
5293 
5294     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5295       return ParseDirectiveHSAMetadata();
5296   }
5297 
5298   if (IDVal == ".amdgcn_target")
5299     return ParseDirectiveAMDGCNTarget();
5300 
5301   if (IDVal == ".amdgpu_lds")
5302     return ParseDirectiveAMDGPULDS();
5303 
5304   if (IDVal == PALMD::AssemblerDirectiveBegin)
5305     return ParseDirectivePALMetadataBegin();
5306 
5307   if (IDVal == PALMD::AssemblerDirective)
5308     return ParseDirectivePALMetadata();
5309 
5310   return true;
5311 }
5312 
5313 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5314                                            unsigned RegNo) {
5315 
5316   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5317        R.isValid(); ++R) {
5318     if (*R == RegNo)
5319       return isGFX9Plus();
5320   }
5321 
5322   // GFX10 has 2 more SGPRs 104 and 105.
5323   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5324        R.isValid(); ++R) {
5325     if (*R == RegNo)
5326       return hasSGPR104_SGPR105();
5327   }
5328 
5329   switch (RegNo) {
5330   case AMDGPU::SRC_SHARED_BASE:
5331   case AMDGPU::SRC_SHARED_LIMIT:
5332   case AMDGPU::SRC_PRIVATE_BASE:
5333   case AMDGPU::SRC_PRIVATE_LIMIT:
5334   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5335     return isGFX9Plus();
5336   case AMDGPU::TBA:
5337   case AMDGPU::TBA_LO:
5338   case AMDGPU::TBA_HI:
5339   case AMDGPU::TMA:
5340   case AMDGPU::TMA_LO:
5341   case AMDGPU::TMA_HI:
5342     return !isGFX9Plus();
5343   case AMDGPU::XNACK_MASK:
5344   case AMDGPU::XNACK_MASK_LO:
5345   case AMDGPU::XNACK_MASK_HI:
5346     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5347   case AMDGPU::SGPR_NULL:
5348     return isGFX10Plus();
5349   default:
5350     break;
5351   }
5352 
5353   if (isCI())
5354     return true;
5355 
5356   if (isSI() || isGFX10Plus()) {
5357     // No flat_scr on SI.
5358     // On GFX10 flat scratch is not a valid register operand and can only be
5359     // accessed with s_setreg/s_getreg.
5360     switch (RegNo) {
5361     case AMDGPU::FLAT_SCR:
5362     case AMDGPU::FLAT_SCR_LO:
5363     case AMDGPU::FLAT_SCR_HI:
5364       return false;
5365     default:
5366       return true;
5367     }
5368   }
5369 
5370   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5371   // SI/CI have.
5372   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5373        R.isValid(); ++R) {
5374     if (*R == RegNo)
5375       return hasSGPR102_SGPR103();
5376   }
5377 
5378   return true;
5379 }
5380 
5381 OperandMatchResultTy
5382 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5383                               OperandMode Mode) {
5384   // Try to parse with a custom parser
5385   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5386 
5387   // If we successfully parsed the operand or if there as an error parsing,
5388   // we are done.
5389   //
5390   // If we are parsing after we reach EndOfStatement then this means we
5391   // are appending default values to the Operands list.  This is only done
5392   // by custom parser, so we shouldn't continue on to the generic parsing.
5393   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5394       isToken(AsmToken::EndOfStatement))
5395     return ResTy;
5396 
5397   SMLoc RBraceLoc;
5398   SMLoc LBraceLoc = getLoc();
5399   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5400     unsigned Prefix = Operands.size();
5401 
5402     for (;;) {
5403       auto Loc = getLoc();
5404       ResTy = parseReg(Operands);
5405       if (ResTy == MatchOperand_NoMatch)
5406         Error(Loc, "expected a register");
5407       if (ResTy != MatchOperand_Success)
5408         return MatchOperand_ParseFail;
5409 
5410       RBraceLoc = getLoc();
5411       if (trySkipToken(AsmToken::RBrac))
5412         break;
5413 
5414       if (!skipToken(AsmToken::Comma,
5415                      "expected a comma or a closing square bracket")) {
5416         return MatchOperand_ParseFail;
5417       }
5418     }
5419 
5420     if (Operands.size() - Prefix > 1) {
5421       Operands.insert(Operands.begin() + Prefix,
5422                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5423       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5424     }
5425 
5426     return MatchOperand_Success;
5427   }
5428 
5429   return parseRegOrImm(Operands);
5430 }
5431 
5432 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5433   // Clear any forced encodings from the previous instruction.
5434   setForcedEncodingSize(0);
5435   setForcedDPP(false);
5436   setForcedSDWA(false);
5437 
5438   if (Name.endswith("_e64")) {
5439     setForcedEncodingSize(64);
5440     return Name.substr(0, Name.size() - 4);
5441   } else if (Name.endswith("_e32")) {
5442     setForcedEncodingSize(32);
5443     return Name.substr(0, Name.size() - 4);
5444   } else if (Name.endswith("_dpp")) {
5445     setForcedDPP(true);
5446     return Name.substr(0, Name.size() - 4);
5447   } else if (Name.endswith("_sdwa")) {
5448     setForcedSDWA(true);
5449     return Name.substr(0, Name.size() - 5);
5450   }
5451   return Name;
5452 }
5453 
5454 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5455                                        StringRef Name,
5456                                        SMLoc NameLoc, OperandVector &Operands) {
5457   // Add the instruction mnemonic
5458   Name = parseMnemonicSuffix(Name);
5459   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5460 
5461   bool IsMIMG = Name.startswith("image_");
5462 
5463   while (!trySkipToken(AsmToken::EndOfStatement)) {
5464     OperandMode Mode = OperandMode_Default;
5465     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5466       Mode = OperandMode_NSA;
5467     CPolSeen = 0;
5468     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5469 
5470     if (Res != MatchOperand_Success) {
5471       checkUnsupportedInstruction(Name, NameLoc);
5472       if (!Parser.hasPendingError()) {
5473         // FIXME: use real operand location rather than the current location.
5474         StringRef Msg =
5475           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5476                                             "not a valid operand.";
5477         Error(getLoc(), Msg);
5478       }
5479       while (!trySkipToken(AsmToken::EndOfStatement)) {
5480         lex();
5481       }
5482       return true;
5483     }
5484 
5485     // Eat the comma or space if there is one.
5486     trySkipToken(AsmToken::Comma);
5487   }
5488 
5489   return false;
5490 }
5491 
5492 //===----------------------------------------------------------------------===//
5493 // Utility functions
5494 //===----------------------------------------------------------------------===//
5495 
5496 OperandMatchResultTy
5497 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5498 
5499   if (!trySkipId(Prefix, AsmToken::Colon))
5500     return MatchOperand_NoMatch;
5501 
5502   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5503 }
5504 
5505 OperandMatchResultTy
5506 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5507                                     AMDGPUOperand::ImmTy ImmTy,
5508                                     bool (*ConvertResult)(int64_t&)) {
5509   SMLoc S = getLoc();
5510   int64_t Value = 0;
5511 
5512   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5513   if (Res != MatchOperand_Success)
5514     return Res;
5515 
5516   if (ConvertResult && !ConvertResult(Value)) {
5517     Error(S, "invalid " + StringRef(Prefix) + " value.");
5518   }
5519 
5520   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5521   return MatchOperand_Success;
5522 }
5523 
5524 OperandMatchResultTy
5525 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5526                                              OperandVector &Operands,
5527                                              AMDGPUOperand::ImmTy ImmTy,
5528                                              bool (*ConvertResult)(int64_t&)) {
5529   SMLoc S = getLoc();
5530   if (!trySkipId(Prefix, AsmToken::Colon))
5531     return MatchOperand_NoMatch;
5532 
5533   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5534     return MatchOperand_ParseFail;
5535 
5536   unsigned Val = 0;
5537   const unsigned MaxSize = 4;
5538 
5539   // FIXME: How to verify the number of elements matches the number of src
5540   // operands?
5541   for (int I = 0; ; ++I) {
5542     int64_t Op;
5543     SMLoc Loc = getLoc();
5544     if (!parseExpr(Op))
5545       return MatchOperand_ParseFail;
5546 
5547     if (Op != 0 && Op != 1) {
5548       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5549       return MatchOperand_ParseFail;
5550     }
5551 
5552     Val |= (Op << I);
5553 
5554     if (trySkipToken(AsmToken::RBrac))
5555       break;
5556 
5557     if (I + 1 == MaxSize) {
5558       Error(getLoc(), "expected a closing square bracket");
5559       return MatchOperand_ParseFail;
5560     }
5561 
5562     if (!skipToken(AsmToken::Comma, "expected a comma"))
5563       return MatchOperand_ParseFail;
5564   }
5565 
5566   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5567   return MatchOperand_Success;
5568 }
5569 
5570 OperandMatchResultTy
5571 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5572                                AMDGPUOperand::ImmTy ImmTy) {
5573   int64_t Bit;
5574   SMLoc S = getLoc();
5575 
5576   if (trySkipId(Name)) {
5577     Bit = 1;
5578   } else if (trySkipId("no", Name)) {
5579     Bit = 0;
5580   } else {
5581     return MatchOperand_NoMatch;
5582   }
5583 
5584   if (Name == "r128" && !hasMIMG_R128()) {
5585     Error(S, "r128 modifier is not supported on this GPU");
5586     return MatchOperand_ParseFail;
5587   }
5588   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5589     Error(S, "a16 modifier is not supported on this GPU");
5590     return MatchOperand_ParseFail;
5591   }
5592 
5593   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5594     ImmTy = AMDGPUOperand::ImmTyR128A16;
5595 
5596   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5597   return MatchOperand_Success;
5598 }
5599 
5600 OperandMatchResultTy
5601 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5602   unsigned CPolOn = 0;
5603   unsigned CPolOff = 0;
5604   SMLoc S = getLoc();
5605 
5606   if (trySkipId("glc"))
5607     CPolOn = AMDGPU::CPol::GLC;
5608   else if (trySkipId("noglc"))
5609     CPolOff = AMDGPU::CPol::GLC;
5610   else if (trySkipId("slc"))
5611     CPolOn = AMDGPU::CPol::SLC;
5612   else if (trySkipId("noslc"))
5613     CPolOff = AMDGPU::CPol::SLC;
5614   else if (trySkipId("dlc"))
5615     CPolOn = AMDGPU::CPol::DLC;
5616   else if (trySkipId("nodlc"))
5617     CPolOff = AMDGPU::CPol::DLC;
5618   else if (trySkipId("scc"))
5619     CPolOn = AMDGPU::CPol::SCC;
5620   else if (trySkipId("noscc"))
5621     CPolOff = AMDGPU::CPol::SCC;
5622   else
5623     return MatchOperand_NoMatch;
5624 
5625   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5626     Error(S, "dlc modifier is not supported on this GPU");
5627     return MatchOperand_ParseFail;
5628   }
5629 
5630   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5631     Error(S, "scc modifier is not supported on this GPU");
5632     return MatchOperand_ParseFail;
5633   }
5634 
5635   if (CPolSeen & (CPolOn | CPolOff)) {
5636     Error(S, "duplicate cache policy modifier");
5637     return MatchOperand_ParseFail;
5638   }
5639 
5640   CPolSeen |= (CPolOn | CPolOff);
5641 
5642   for (unsigned I = 1; I != Operands.size(); ++I) {
5643     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5644     if (Op.isCPol()) {
5645       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5646       return MatchOperand_Success;
5647     }
5648   }
5649 
5650   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5651                                               AMDGPUOperand::ImmTyCPol));
5652 
5653   return MatchOperand_Success;
5654 }
5655 
5656 static void addOptionalImmOperand(
5657   MCInst& Inst, const OperandVector& Operands,
5658   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5659   AMDGPUOperand::ImmTy ImmT,
5660   int64_t Default = 0) {
5661   auto i = OptionalIdx.find(ImmT);
5662   if (i != OptionalIdx.end()) {
5663     unsigned Idx = i->second;
5664     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5665   } else {
5666     Inst.addOperand(MCOperand::createImm(Default));
5667   }
5668 }
5669 
5670 OperandMatchResultTy
5671 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5672                                        StringRef &Value,
5673                                        SMLoc &StringLoc) {
5674   if (!trySkipId(Prefix, AsmToken::Colon))
5675     return MatchOperand_NoMatch;
5676 
5677   StringLoc = getLoc();
5678   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5679                                                   : MatchOperand_ParseFail;
5680 }
5681 
5682 //===----------------------------------------------------------------------===//
5683 // MTBUF format
5684 //===----------------------------------------------------------------------===//
5685 
5686 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5687                                   int64_t MaxVal,
5688                                   int64_t &Fmt) {
5689   int64_t Val;
5690   SMLoc Loc = getLoc();
5691 
5692   auto Res = parseIntWithPrefix(Pref, Val);
5693   if (Res == MatchOperand_ParseFail)
5694     return false;
5695   if (Res == MatchOperand_NoMatch)
5696     return true;
5697 
5698   if (Val < 0 || Val > MaxVal) {
5699     Error(Loc, Twine("out of range ", StringRef(Pref)));
5700     return false;
5701   }
5702 
5703   Fmt = Val;
5704   return true;
5705 }
5706 
5707 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5708 // values to live in a joint format operand in the MCInst encoding.
5709 OperandMatchResultTy
5710 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5711   using namespace llvm::AMDGPU::MTBUFFormat;
5712 
5713   int64_t Dfmt = DFMT_UNDEF;
5714   int64_t Nfmt = NFMT_UNDEF;
5715 
5716   // dfmt and nfmt can appear in either order, and each is optional.
5717   for (int I = 0; I < 2; ++I) {
5718     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5719       return MatchOperand_ParseFail;
5720 
5721     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5722       return MatchOperand_ParseFail;
5723     }
5724     // Skip optional comma between dfmt/nfmt
5725     // but guard against 2 commas following each other.
5726     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5727         !peekToken().is(AsmToken::Comma)) {
5728       trySkipToken(AsmToken::Comma);
5729     }
5730   }
5731 
5732   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5733     return MatchOperand_NoMatch;
5734 
5735   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5736   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5737 
5738   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5739   return MatchOperand_Success;
5740 }
5741 
5742 OperandMatchResultTy
5743 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5744   using namespace llvm::AMDGPU::MTBUFFormat;
5745 
5746   int64_t Fmt = UFMT_UNDEF;
5747 
5748   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5749     return MatchOperand_ParseFail;
5750 
5751   if (Fmt == UFMT_UNDEF)
5752     return MatchOperand_NoMatch;
5753 
5754   Format = Fmt;
5755   return MatchOperand_Success;
5756 }
5757 
5758 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5759                                     int64_t &Nfmt,
5760                                     StringRef FormatStr,
5761                                     SMLoc Loc) {
5762   using namespace llvm::AMDGPU::MTBUFFormat;
5763   int64_t Format;
5764 
5765   Format = getDfmt(FormatStr);
5766   if (Format != DFMT_UNDEF) {
5767     Dfmt = Format;
5768     return true;
5769   }
5770 
5771   Format = getNfmt(FormatStr, getSTI());
5772   if (Format != NFMT_UNDEF) {
5773     Nfmt = Format;
5774     return true;
5775   }
5776 
5777   Error(Loc, "unsupported format");
5778   return false;
5779 }
5780 
5781 OperandMatchResultTy
5782 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5783                                           SMLoc FormatLoc,
5784                                           int64_t &Format) {
5785   using namespace llvm::AMDGPU::MTBUFFormat;
5786 
5787   int64_t Dfmt = DFMT_UNDEF;
5788   int64_t Nfmt = NFMT_UNDEF;
5789   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5790     return MatchOperand_ParseFail;
5791 
5792   if (trySkipToken(AsmToken::Comma)) {
5793     StringRef Str;
5794     SMLoc Loc = getLoc();
5795     if (!parseId(Str, "expected a format string") ||
5796         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5797       return MatchOperand_ParseFail;
5798     }
5799     if (Dfmt == DFMT_UNDEF) {
5800       Error(Loc, "duplicate numeric format");
5801       return MatchOperand_ParseFail;
5802     } else if (Nfmt == NFMT_UNDEF) {
5803       Error(Loc, "duplicate data format");
5804       return MatchOperand_ParseFail;
5805     }
5806   }
5807 
5808   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5809   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5810 
5811   if (isGFX10Plus()) {
5812     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5813     if (Ufmt == UFMT_UNDEF) {
5814       Error(FormatLoc, "unsupported format");
5815       return MatchOperand_ParseFail;
5816     }
5817     Format = Ufmt;
5818   } else {
5819     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5820   }
5821 
5822   return MatchOperand_Success;
5823 }
5824 
5825 OperandMatchResultTy
5826 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5827                                             SMLoc Loc,
5828                                             int64_t &Format) {
5829   using namespace llvm::AMDGPU::MTBUFFormat;
5830 
5831   auto Id = getUnifiedFormat(FormatStr);
5832   if (Id == UFMT_UNDEF)
5833     return MatchOperand_NoMatch;
5834 
5835   if (!isGFX10Plus()) {
5836     Error(Loc, "unified format is not supported on this GPU");
5837     return MatchOperand_ParseFail;
5838   }
5839 
5840   Format = Id;
5841   return MatchOperand_Success;
5842 }
5843 
5844 OperandMatchResultTy
5845 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5846   using namespace llvm::AMDGPU::MTBUFFormat;
5847   SMLoc Loc = getLoc();
5848 
5849   if (!parseExpr(Format))
5850     return MatchOperand_ParseFail;
5851   if (!isValidFormatEncoding(Format, getSTI())) {
5852     Error(Loc, "out of range format");
5853     return MatchOperand_ParseFail;
5854   }
5855 
5856   return MatchOperand_Success;
5857 }
5858 
5859 OperandMatchResultTy
5860 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5861   using namespace llvm::AMDGPU::MTBUFFormat;
5862 
5863   if (!trySkipId("format", AsmToken::Colon))
5864     return MatchOperand_NoMatch;
5865 
5866   if (trySkipToken(AsmToken::LBrac)) {
5867     StringRef FormatStr;
5868     SMLoc Loc = getLoc();
5869     if (!parseId(FormatStr, "expected a format string"))
5870       return MatchOperand_ParseFail;
5871 
5872     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5873     if (Res == MatchOperand_NoMatch)
5874       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5875     if (Res != MatchOperand_Success)
5876       return Res;
5877 
5878     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5879       return MatchOperand_ParseFail;
5880 
5881     return MatchOperand_Success;
5882   }
5883 
5884   return parseNumericFormat(Format);
5885 }
5886 
5887 OperandMatchResultTy
5888 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5889   using namespace llvm::AMDGPU::MTBUFFormat;
5890 
5891   int64_t Format = getDefaultFormatEncoding(getSTI());
5892   OperandMatchResultTy Res;
5893   SMLoc Loc = getLoc();
5894 
5895   // Parse legacy format syntax.
5896   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5897   if (Res == MatchOperand_ParseFail)
5898     return Res;
5899 
5900   bool FormatFound = (Res == MatchOperand_Success);
5901 
5902   Operands.push_back(
5903     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5904 
5905   if (FormatFound)
5906     trySkipToken(AsmToken::Comma);
5907 
5908   if (isToken(AsmToken::EndOfStatement)) {
5909     // We are expecting an soffset operand,
5910     // but let matcher handle the error.
5911     return MatchOperand_Success;
5912   }
5913 
5914   // Parse soffset.
5915   Res = parseRegOrImm(Operands);
5916   if (Res != MatchOperand_Success)
5917     return Res;
5918 
5919   trySkipToken(AsmToken::Comma);
5920 
5921   if (!FormatFound) {
5922     Res = parseSymbolicOrNumericFormat(Format);
5923     if (Res == MatchOperand_ParseFail)
5924       return Res;
5925     if (Res == MatchOperand_Success) {
5926       auto Size = Operands.size();
5927       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5928       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5929       Op.setImm(Format);
5930     }
5931     return MatchOperand_Success;
5932   }
5933 
5934   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5935     Error(getLoc(), "duplicate format");
5936     return MatchOperand_ParseFail;
5937   }
5938   return MatchOperand_Success;
5939 }
5940 
5941 //===----------------------------------------------------------------------===//
5942 // ds
5943 //===----------------------------------------------------------------------===//
5944 
5945 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5946                                     const OperandVector &Operands) {
5947   OptionalImmIndexMap OptionalIdx;
5948 
5949   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5950     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5951 
5952     // Add the register arguments
5953     if (Op.isReg()) {
5954       Op.addRegOperands(Inst, 1);
5955       continue;
5956     }
5957 
5958     // Handle optional arguments
5959     OptionalIdx[Op.getImmTy()] = i;
5960   }
5961 
5962   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5963   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5964   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5965 
5966   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5967 }
5968 
5969 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5970                                 bool IsGdsHardcoded) {
5971   OptionalImmIndexMap OptionalIdx;
5972 
5973   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5974     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5975 
5976     // Add the register arguments
5977     if (Op.isReg()) {
5978       Op.addRegOperands(Inst, 1);
5979       continue;
5980     }
5981 
5982     if (Op.isToken() && Op.getToken() == "gds") {
5983       IsGdsHardcoded = true;
5984       continue;
5985     }
5986 
5987     // Handle optional arguments
5988     OptionalIdx[Op.getImmTy()] = i;
5989   }
5990 
5991   AMDGPUOperand::ImmTy OffsetType =
5992     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5993      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5994      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5995                                                       AMDGPUOperand::ImmTyOffset;
5996 
5997   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5998 
5999   if (!IsGdsHardcoded) {
6000     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6001   }
6002   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6003 }
6004 
6005 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6006   OptionalImmIndexMap OptionalIdx;
6007 
6008   unsigned OperandIdx[4];
6009   unsigned EnMask = 0;
6010   int SrcIdx = 0;
6011 
6012   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6013     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6014 
6015     // Add the register arguments
6016     if (Op.isReg()) {
6017       assert(SrcIdx < 4);
6018       OperandIdx[SrcIdx] = Inst.size();
6019       Op.addRegOperands(Inst, 1);
6020       ++SrcIdx;
6021       continue;
6022     }
6023 
6024     if (Op.isOff()) {
6025       assert(SrcIdx < 4);
6026       OperandIdx[SrcIdx] = Inst.size();
6027       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6028       ++SrcIdx;
6029       continue;
6030     }
6031 
6032     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6033       Op.addImmOperands(Inst, 1);
6034       continue;
6035     }
6036 
6037     if (Op.isToken() && Op.getToken() == "done")
6038       continue;
6039 
6040     // Handle optional arguments
6041     OptionalIdx[Op.getImmTy()] = i;
6042   }
6043 
6044   assert(SrcIdx == 4);
6045 
6046   bool Compr = false;
6047   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6048     Compr = true;
6049     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6050     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6051     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6052   }
6053 
6054   for (auto i = 0; i < SrcIdx; ++i) {
6055     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6056       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6057     }
6058   }
6059 
6060   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6061   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6062 
6063   Inst.addOperand(MCOperand::createImm(EnMask));
6064 }
6065 
6066 //===----------------------------------------------------------------------===//
6067 // s_waitcnt
6068 //===----------------------------------------------------------------------===//
6069 
6070 static bool
6071 encodeCnt(
6072   const AMDGPU::IsaVersion ISA,
6073   int64_t &IntVal,
6074   int64_t CntVal,
6075   bool Saturate,
6076   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6077   unsigned (*decode)(const IsaVersion &Version, unsigned))
6078 {
6079   bool Failed = false;
6080 
6081   IntVal = encode(ISA, IntVal, CntVal);
6082   if (CntVal != decode(ISA, IntVal)) {
6083     if (Saturate) {
6084       IntVal = encode(ISA, IntVal, -1);
6085     } else {
6086       Failed = true;
6087     }
6088   }
6089   return Failed;
6090 }
6091 
6092 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6093 
6094   SMLoc CntLoc = getLoc();
6095   StringRef CntName = getTokenStr();
6096 
6097   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6098       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6099     return false;
6100 
6101   int64_t CntVal;
6102   SMLoc ValLoc = getLoc();
6103   if (!parseExpr(CntVal))
6104     return false;
6105 
6106   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6107 
6108   bool Failed = true;
6109   bool Sat = CntName.endswith("_sat");
6110 
6111   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6112     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6113   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6114     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6115   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6116     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6117   } else {
6118     Error(CntLoc, "invalid counter name " + CntName);
6119     return false;
6120   }
6121 
6122   if (Failed) {
6123     Error(ValLoc, "too large value for " + CntName);
6124     return false;
6125   }
6126 
6127   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6128     return false;
6129 
6130   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6131     if (isToken(AsmToken::EndOfStatement)) {
6132       Error(getLoc(), "expected a counter name");
6133       return false;
6134     }
6135   }
6136 
6137   return true;
6138 }
6139 
6140 OperandMatchResultTy
6141 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6142   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6143   int64_t Waitcnt = getWaitcntBitMask(ISA);
6144   SMLoc S = getLoc();
6145 
6146   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6147     while (!isToken(AsmToken::EndOfStatement)) {
6148       if (!parseCnt(Waitcnt))
6149         return MatchOperand_ParseFail;
6150     }
6151   } else {
6152     if (!parseExpr(Waitcnt))
6153       return MatchOperand_ParseFail;
6154   }
6155 
6156   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6157   return MatchOperand_Success;
6158 }
6159 
6160 bool
6161 AMDGPUOperand::isSWaitCnt() const {
6162   return isImm();
6163 }
6164 
6165 //===----------------------------------------------------------------------===//
6166 // hwreg
6167 //===----------------------------------------------------------------------===//
6168 
6169 bool
6170 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6171                                 OperandInfoTy &Offset,
6172                                 OperandInfoTy &Width) {
6173   using namespace llvm::AMDGPU::Hwreg;
6174 
6175   // The register may be specified by name or using a numeric code
6176   HwReg.Loc = getLoc();
6177   if (isToken(AsmToken::Identifier) &&
6178       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6179     HwReg.IsSymbolic = true;
6180     lex(); // skip register name
6181   } else if (!parseExpr(HwReg.Id, "a register name")) {
6182     return false;
6183   }
6184 
6185   if (trySkipToken(AsmToken::RParen))
6186     return true;
6187 
6188   // parse optional params
6189   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6190     return false;
6191 
6192   Offset.Loc = getLoc();
6193   if (!parseExpr(Offset.Id))
6194     return false;
6195 
6196   if (!skipToken(AsmToken::Comma, "expected a comma"))
6197     return false;
6198 
6199   Width.Loc = getLoc();
6200   return parseExpr(Width.Id) &&
6201          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6202 }
6203 
6204 bool
6205 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6206                                const OperandInfoTy &Offset,
6207                                const OperandInfoTy &Width) {
6208 
6209   using namespace llvm::AMDGPU::Hwreg;
6210 
6211   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6212     Error(HwReg.Loc,
6213           "specified hardware register is not supported on this GPU");
6214     return false;
6215   }
6216   if (!isValidHwreg(HwReg.Id)) {
6217     Error(HwReg.Loc,
6218           "invalid code of hardware register: only 6-bit values are legal");
6219     return false;
6220   }
6221   if (!isValidHwregOffset(Offset.Id)) {
6222     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6223     return false;
6224   }
6225   if (!isValidHwregWidth(Width.Id)) {
6226     Error(Width.Loc,
6227           "invalid bitfield width: only values from 1 to 32 are legal");
6228     return false;
6229   }
6230   return true;
6231 }
6232 
6233 OperandMatchResultTy
6234 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6235   using namespace llvm::AMDGPU::Hwreg;
6236 
6237   int64_t ImmVal = 0;
6238   SMLoc Loc = getLoc();
6239 
6240   if (trySkipId("hwreg", AsmToken::LParen)) {
6241     OperandInfoTy HwReg(ID_UNKNOWN_);
6242     OperandInfoTy Offset(OFFSET_DEFAULT_);
6243     OperandInfoTy Width(WIDTH_DEFAULT_);
6244     if (parseHwregBody(HwReg, Offset, Width) &&
6245         validateHwreg(HwReg, Offset, Width)) {
6246       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6247     } else {
6248       return MatchOperand_ParseFail;
6249     }
6250   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6251     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6252       Error(Loc, "invalid immediate: only 16-bit values are legal");
6253       return MatchOperand_ParseFail;
6254     }
6255   } else {
6256     return MatchOperand_ParseFail;
6257   }
6258 
6259   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6260   return MatchOperand_Success;
6261 }
6262 
6263 bool AMDGPUOperand::isHwreg() const {
6264   return isImmTy(ImmTyHwreg);
6265 }
6266 
6267 //===----------------------------------------------------------------------===//
6268 // sendmsg
6269 //===----------------------------------------------------------------------===//
6270 
6271 bool
6272 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6273                                   OperandInfoTy &Op,
6274                                   OperandInfoTy &Stream) {
6275   using namespace llvm::AMDGPU::SendMsg;
6276 
6277   Msg.Loc = getLoc();
6278   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6279     Msg.IsSymbolic = true;
6280     lex(); // skip message name
6281   } else if (!parseExpr(Msg.Id, "a message name")) {
6282     return false;
6283   }
6284 
6285   if (trySkipToken(AsmToken::Comma)) {
6286     Op.IsDefined = true;
6287     Op.Loc = getLoc();
6288     if (isToken(AsmToken::Identifier) &&
6289         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6290       lex(); // skip operation name
6291     } else if (!parseExpr(Op.Id, "an operation name")) {
6292       return false;
6293     }
6294 
6295     if (trySkipToken(AsmToken::Comma)) {
6296       Stream.IsDefined = true;
6297       Stream.Loc = getLoc();
6298       if (!parseExpr(Stream.Id))
6299         return false;
6300     }
6301   }
6302 
6303   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6304 }
6305 
6306 bool
6307 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6308                                  const OperandInfoTy &Op,
6309                                  const OperandInfoTy &Stream) {
6310   using namespace llvm::AMDGPU::SendMsg;
6311 
6312   // Validation strictness depends on whether message is specified
6313   // in a symbolc or in a numeric form. In the latter case
6314   // only encoding possibility is checked.
6315   bool Strict = Msg.IsSymbolic;
6316 
6317   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6318     Error(Msg.Loc, "invalid message id");
6319     return false;
6320   }
6321   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6322     if (Op.IsDefined) {
6323       Error(Op.Loc, "message does not support operations");
6324     } else {
6325       Error(Msg.Loc, "missing message operation");
6326     }
6327     return false;
6328   }
6329   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6330     Error(Op.Loc, "invalid operation id");
6331     return false;
6332   }
6333   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6334     Error(Stream.Loc, "message operation does not support streams");
6335     return false;
6336   }
6337   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6338     Error(Stream.Loc, "invalid message stream id");
6339     return false;
6340   }
6341   return true;
6342 }
6343 
6344 OperandMatchResultTy
6345 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6346   using namespace llvm::AMDGPU::SendMsg;
6347 
6348   int64_t ImmVal = 0;
6349   SMLoc Loc = getLoc();
6350 
6351   if (trySkipId("sendmsg", AsmToken::LParen)) {
6352     OperandInfoTy Msg(ID_UNKNOWN_);
6353     OperandInfoTy Op(OP_NONE_);
6354     OperandInfoTy Stream(STREAM_ID_NONE_);
6355     if (parseSendMsgBody(Msg, Op, Stream) &&
6356         validateSendMsg(Msg, Op, Stream)) {
6357       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6358     } else {
6359       return MatchOperand_ParseFail;
6360     }
6361   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6362     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6363       Error(Loc, "invalid immediate: only 16-bit values are legal");
6364       return MatchOperand_ParseFail;
6365     }
6366   } else {
6367     return MatchOperand_ParseFail;
6368   }
6369 
6370   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6371   return MatchOperand_Success;
6372 }
6373 
6374 bool AMDGPUOperand::isSendMsg() const {
6375   return isImmTy(ImmTySendMsg);
6376 }
6377 
6378 //===----------------------------------------------------------------------===//
6379 // v_interp
6380 //===----------------------------------------------------------------------===//
6381 
6382 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6383   StringRef Str;
6384   SMLoc S = getLoc();
6385 
6386   if (!parseId(Str))
6387     return MatchOperand_NoMatch;
6388 
6389   int Slot = StringSwitch<int>(Str)
6390     .Case("p10", 0)
6391     .Case("p20", 1)
6392     .Case("p0", 2)
6393     .Default(-1);
6394 
6395   if (Slot == -1) {
6396     Error(S, "invalid interpolation slot");
6397     return MatchOperand_ParseFail;
6398   }
6399 
6400   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6401                                               AMDGPUOperand::ImmTyInterpSlot));
6402   return MatchOperand_Success;
6403 }
6404 
6405 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6406   StringRef Str;
6407   SMLoc S = getLoc();
6408 
6409   if (!parseId(Str))
6410     return MatchOperand_NoMatch;
6411 
6412   if (!Str.startswith("attr")) {
6413     Error(S, "invalid interpolation attribute");
6414     return MatchOperand_ParseFail;
6415   }
6416 
6417   StringRef Chan = Str.take_back(2);
6418   int AttrChan = StringSwitch<int>(Chan)
6419     .Case(".x", 0)
6420     .Case(".y", 1)
6421     .Case(".z", 2)
6422     .Case(".w", 3)
6423     .Default(-1);
6424   if (AttrChan == -1) {
6425     Error(S, "invalid or missing interpolation attribute channel");
6426     return MatchOperand_ParseFail;
6427   }
6428 
6429   Str = Str.drop_back(2).drop_front(4);
6430 
6431   uint8_t Attr;
6432   if (Str.getAsInteger(10, Attr)) {
6433     Error(S, "invalid or missing interpolation attribute number");
6434     return MatchOperand_ParseFail;
6435   }
6436 
6437   if (Attr > 63) {
6438     Error(S, "out of bounds interpolation attribute number");
6439     return MatchOperand_ParseFail;
6440   }
6441 
6442   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6443 
6444   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6445                                               AMDGPUOperand::ImmTyInterpAttr));
6446   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6447                                               AMDGPUOperand::ImmTyAttrChan));
6448   return MatchOperand_Success;
6449 }
6450 
6451 //===----------------------------------------------------------------------===//
6452 // exp
6453 //===----------------------------------------------------------------------===//
6454 
6455 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6456   using namespace llvm::AMDGPU::Exp;
6457 
6458   StringRef Str;
6459   SMLoc S = getLoc();
6460 
6461   if (!parseId(Str))
6462     return MatchOperand_NoMatch;
6463 
6464   unsigned Id = getTgtId(Str);
6465   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6466     Error(S, (Id == ET_INVALID) ?
6467                 "invalid exp target" :
6468                 "exp target is not supported on this GPU");
6469     return MatchOperand_ParseFail;
6470   }
6471 
6472   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6473                                               AMDGPUOperand::ImmTyExpTgt));
6474   return MatchOperand_Success;
6475 }
6476 
6477 //===----------------------------------------------------------------------===//
6478 // parser helpers
6479 //===----------------------------------------------------------------------===//
6480 
6481 bool
6482 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6483   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6484 }
6485 
6486 bool
6487 AMDGPUAsmParser::isId(const StringRef Id) const {
6488   return isId(getToken(), Id);
6489 }
6490 
6491 bool
6492 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6493   return getTokenKind() == Kind;
6494 }
6495 
6496 bool
6497 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6498   if (isId(Id)) {
6499     lex();
6500     return true;
6501   }
6502   return false;
6503 }
6504 
6505 bool
6506 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6507   if (isToken(AsmToken::Identifier)) {
6508     StringRef Tok = getTokenStr();
6509     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6510       lex();
6511       return true;
6512     }
6513   }
6514   return false;
6515 }
6516 
6517 bool
6518 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6519   if (isId(Id) && peekToken().is(Kind)) {
6520     lex();
6521     lex();
6522     return true;
6523   }
6524   return false;
6525 }
6526 
6527 bool
6528 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6529   if (isToken(Kind)) {
6530     lex();
6531     return true;
6532   }
6533   return false;
6534 }
6535 
6536 bool
6537 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6538                            const StringRef ErrMsg) {
6539   if (!trySkipToken(Kind)) {
6540     Error(getLoc(), ErrMsg);
6541     return false;
6542   }
6543   return true;
6544 }
6545 
6546 bool
6547 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6548   SMLoc S = getLoc();
6549 
6550   const MCExpr *Expr;
6551   if (Parser.parseExpression(Expr))
6552     return false;
6553 
6554   if (Expr->evaluateAsAbsolute(Imm))
6555     return true;
6556 
6557   if (Expected.empty()) {
6558     Error(S, "expected absolute expression");
6559   } else {
6560     Error(S, Twine("expected ", Expected) +
6561              Twine(" or an absolute expression"));
6562   }
6563   return false;
6564 }
6565 
6566 bool
6567 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6568   SMLoc S = getLoc();
6569 
6570   const MCExpr *Expr;
6571   if (Parser.parseExpression(Expr))
6572     return false;
6573 
6574   int64_t IntVal;
6575   if (Expr->evaluateAsAbsolute(IntVal)) {
6576     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6577   } else {
6578     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6579   }
6580   return true;
6581 }
6582 
6583 bool
6584 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6585   if (isToken(AsmToken::String)) {
6586     Val = getToken().getStringContents();
6587     lex();
6588     return true;
6589   } else {
6590     Error(getLoc(), ErrMsg);
6591     return false;
6592   }
6593 }
6594 
6595 bool
6596 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6597   if (isToken(AsmToken::Identifier)) {
6598     Val = getTokenStr();
6599     lex();
6600     return true;
6601   } else {
6602     if (!ErrMsg.empty())
6603       Error(getLoc(), ErrMsg);
6604     return false;
6605   }
6606 }
6607 
6608 AsmToken
6609 AMDGPUAsmParser::getToken() const {
6610   return Parser.getTok();
6611 }
6612 
6613 AsmToken
6614 AMDGPUAsmParser::peekToken() {
6615   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6616 }
6617 
6618 void
6619 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6620   auto TokCount = getLexer().peekTokens(Tokens);
6621 
6622   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6623     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6624 }
6625 
6626 AsmToken::TokenKind
6627 AMDGPUAsmParser::getTokenKind() const {
6628   return getLexer().getKind();
6629 }
6630 
6631 SMLoc
6632 AMDGPUAsmParser::getLoc() const {
6633   return getToken().getLoc();
6634 }
6635 
6636 StringRef
6637 AMDGPUAsmParser::getTokenStr() const {
6638   return getToken().getString();
6639 }
6640 
6641 void
6642 AMDGPUAsmParser::lex() {
6643   Parser.Lex();
6644 }
6645 
6646 SMLoc
6647 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6648                                const OperandVector &Operands) const {
6649   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6650     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6651     if (Test(Op))
6652       return Op.getStartLoc();
6653   }
6654   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6655 }
6656 
6657 SMLoc
6658 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6659                            const OperandVector &Operands) const {
6660   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6661   return getOperandLoc(Test, Operands);
6662 }
6663 
6664 SMLoc
6665 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6666                            const OperandVector &Operands) const {
6667   auto Test = [=](const AMDGPUOperand& Op) {
6668     return Op.isRegKind() && Op.getReg() == Reg;
6669   };
6670   return getOperandLoc(Test, Operands);
6671 }
6672 
6673 SMLoc
6674 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6675   auto Test = [](const AMDGPUOperand& Op) {
6676     return Op.IsImmKindLiteral() || Op.isExpr();
6677   };
6678   return getOperandLoc(Test, Operands);
6679 }
6680 
6681 SMLoc
6682 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6683   auto Test = [](const AMDGPUOperand& Op) {
6684     return Op.isImmKindConst();
6685   };
6686   return getOperandLoc(Test, Operands);
6687 }
6688 
6689 //===----------------------------------------------------------------------===//
6690 // swizzle
6691 //===----------------------------------------------------------------------===//
6692 
6693 LLVM_READNONE
6694 static unsigned
6695 encodeBitmaskPerm(const unsigned AndMask,
6696                   const unsigned OrMask,
6697                   const unsigned XorMask) {
6698   using namespace llvm::AMDGPU::Swizzle;
6699 
6700   return BITMASK_PERM_ENC |
6701          (AndMask << BITMASK_AND_SHIFT) |
6702          (OrMask  << BITMASK_OR_SHIFT)  |
6703          (XorMask << BITMASK_XOR_SHIFT);
6704 }
6705 
6706 bool
6707 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6708                                      const unsigned MinVal,
6709                                      const unsigned MaxVal,
6710                                      const StringRef ErrMsg,
6711                                      SMLoc &Loc) {
6712   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6713     return false;
6714   }
6715   Loc = getLoc();
6716   if (!parseExpr(Op)) {
6717     return false;
6718   }
6719   if (Op < MinVal || Op > MaxVal) {
6720     Error(Loc, ErrMsg);
6721     return false;
6722   }
6723 
6724   return true;
6725 }
6726 
6727 bool
6728 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6729                                       const unsigned MinVal,
6730                                       const unsigned MaxVal,
6731                                       const StringRef ErrMsg) {
6732   SMLoc Loc;
6733   for (unsigned i = 0; i < OpNum; ++i) {
6734     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6735       return false;
6736   }
6737 
6738   return true;
6739 }
6740 
6741 bool
6742 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6743   using namespace llvm::AMDGPU::Swizzle;
6744 
6745   int64_t Lane[LANE_NUM];
6746   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6747                            "expected a 2-bit lane id")) {
6748     Imm = QUAD_PERM_ENC;
6749     for (unsigned I = 0; I < LANE_NUM; ++I) {
6750       Imm |= Lane[I] << (LANE_SHIFT * I);
6751     }
6752     return true;
6753   }
6754   return false;
6755 }
6756 
6757 bool
6758 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6759   using namespace llvm::AMDGPU::Swizzle;
6760 
6761   SMLoc Loc;
6762   int64_t GroupSize;
6763   int64_t LaneIdx;
6764 
6765   if (!parseSwizzleOperand(GroupSize,
6766                            2, 32,
6767                            "group size must be in the interval [2,32]",
6768                            Loc)) {
6769     return false;
6770   }
6771   if (!isPowerOf2_64(GroupSize)) {
6772     Error(Loc, "group size must be a power of two");
6773     return false;
6774   }
6775   if (parseSwizzleOperand(LaneIdx,
6776                           0, GroupSize - 1,
6777                           "lane id must be in the interval [0,group size - 1]",
6778                           Loc)) {
6779     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6780     return true;
6781   }
6782   return false;
6783 }
6784 
6785 bool
6786 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6787   using namespace llvm::AMDGPU::Swizzle;
6788 
6789   SMLoc Loc;
6790   int64_t GroupSize;
6791 
6792   if (!parseSwizzleOperand(GroupSize,
6793                            2, 32,
6794                            "group size must be in the interval [2,32]",
6795                            Loc)) {
6796     return false;
6797   }
6798   if (!isPowerOf2_64(GroupSize)) {
6799     Error(Loc, "group size must be a power of two");
6800     return false;
6801   }
6802 
6803   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6804   return true;
6805 }
6806 
6807 bool
6808 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6809   using namespace llvm::AMDGPU::Swizzle;
6810 
6811   SMLoc Loc;
6812   int64_t GroupSize;
6813 
6814   if (!parseSwizzleOperand(GroupSize,
6815                            1, 16,
6816                            "group size must be in the interval [1,16]",
6817                            Loc)) {
6818     return false;
6819   }
6820   if (!isPowerOf2_64(GroupSize)) {
6821     Error(Loc, "group size must be a power of two");
6822     return false;
6823   }
6824 
6825   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6826   return true;
6827 }
6828 
6829 bool
6830 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6831   using namespace llvm::AMDGPU::Swizzle;
6832 
6833   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6834     return false;
6835   }
6836 
6837   StringRef Ctl;
6838   SMLoc StrLoc = getLoc();
6839   if (!parseString(Ctl)) {
6840     return false;
6841   }
6842   if (Ctl.size() != BITMASK_WIDTH) {
6843     Error(StrLoc, "expected a 5-character mask");
6844     return false;
6845   }
6846 
6847   unsigned AndMask = 0;
6848   unsigned OrMask = 0;
6849   unsigned XorMask = 0;
6850 
6851   for (size_t i = 0; i < Ctl.size(); ++i) {
6852     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6853     switch(Ctl[i]) {
6854     default:
6855       Error(StrLoc, "invalid mask");
6856       return false;
6857     case '0':
6858       break;
6859     case '1':
6860       OrMask |= Mask;
6861       break;
6862     case 'p':
6863       AndMask |= Mask;
6864       break;
6865     case 'i':
6866       AndMask |= Mask;
6867       XorMask |= Mask;
6868       break;
6869     }
6870   }
6871 
6872   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6873   return true;
6874 }
6875 
6876 bool
6877 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6878 
6879   SMLoc OffsetLoc = getLoc();
6880 
6881   if (!parseExpr(Imm, "a swizzle macro")) {
6882     return false;
6883   }
6884   if (!isUInt<16>(Imm)) {
6885     Error(OffsetLoc, "expected a 16-bit offset");
6886     return false;
6887   }
6888   return true;
6889 }
6890 
6891 bool
6892 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6893   using namespace llvm::AMDGPU::Swizzle;
6894 
6895   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6896 
6897     SMLoc ModeLoc = getLoc();
6898     bool Ok = false;
6899 
6900     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6901       Ok = parseSwizzleQuadPerm(Imm);
6902     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6903       Ok = parseSwizzleBitmaskPerm(Imm);
6904     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6905       Ok = parseSwizzleBroadcast(Imm);
6906     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6907       Ok = parseSwizzleSwap(Imm);
6908     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6909       Ok = parseSwizzleReverse(Imm);
6910     } else {
6911       Error(ModeLoc, "expected a swizzle mode");
6912     }
6913 
6914     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6915   }
6916 
6917   return false;
6918 }
6919 
6920 OperandMatchResultTy
6921 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6922   SMLoc S = getLoc();
6923   int64_t Imm = 0;
6924 
6925   if (trySkipId("offset")) {
6926 
6927     bool Ok = false;
6928     if (skipToken(AsmToken::Colon, "expected a colon")) {
6929       if (trySkipId("swizzle")) {
6930         Ok = parseSwizzleMacro(Imm);
6931       } else {
6932         Ok = parseSwizzleOffset(Imm);
6933       }
6934     }
6935 
6936     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6937 
6938     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6939   } else {
6940     // Swizzle "offset" operand is optional.
6941     // If it is omitted, try parsing other optional operands.
6942     return parseOptionalOpr(Operands);
6943   }
6944 }
6945 
6946 bool
6947 AMDGPUOperand::isSwizzle() const {
6948   return isImmTy(ImmTySwizzle);
6949 }
6950 
6951 //===----------------------------------------------------------------------===//
6952 // VGPR Index Mode
6953 //===----------------------------------------------------------------------===//
6954 
6955 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6956 
6957   using namespace llvm::AMDGPU::VGPRIndexMode;
6958 
6959   if (trySkipToken(AsmToken::RParen)) {
6960     return OFF;
6961   }
6962 
6963   int64_t Imm = 0;
6964 
6965   while (true) {
6966     unsigned Mode = 0;
6967     SMLoc S = getLoc();
6968 
6969     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6970       if (trySkipId(IdSymbolic[ModeId])) {
6971         Mode = 1 << ModeId;
6972         break;
6973       }
6974     }
6975 
6976     if (Mode == 0) {
6977       Error(S, (Imm == 0)?
6978                "expected a VGPR index mode or a closing parenthesis" :
6979                "expected a VGPR index mode");
6980       return UNDEF;
6981     }
6982 
6983     if (Imm & Mode) {
6984       Error(S, "duplicate VGPR index mode");
6985       return UNDEF;
6986     }
6987     Imm |= Mode;
6988 
6989     if (trySkipToken(AsmToken::RParen))
6990       break;
6991     if (!skipToken(AsmToken::Comma,
6992                    "expected a comma or a closing parenthesis"))
6993       return UNDEF;
6994   }
6995 
6996   return Imm;
6997 }
6998 
6999 OperandMatchResultTy
7000 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7001 
7002   using namespace llvm::AMDGPU::VGPRIndexMode;
7003 
7004   int64_t Imm = 0;
7005   SMLoc S = getLoc();
7006 
7007   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7008     Imm = parseGPRIdxMacro();
7009     if (Imm == UNDEF)
7010       return MatchOperand_ParseFail;
7011   } else {
7012     if (getParser().parseAbsoluteExpression(Imm))
7013       return MatchOperand_ParseFail;
7014     if (Imm < 0 || !isUInt<4>(Imm)) {
7015       Error(S, "invalid immediate: only 4-bit values are legal");
7016       return MatchOperand_ParseFail;
7017     }
7018   }
7019 
7020   Operands.push_back(
7021       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7022   return MatchOperand_Success;
7023 }
7024 
7025 bool AMDGPUOperand::isGPRIdxMode() const {
7026   return isImmTy(ImmTyGprIdxMode);
7027 }
7028 
7029 //===----------------------------------------------------------------------===//
7030 // sopp branch targets
7031 //===----------------------------------------------------------------------===//
7032 
7033 OperandMatchResultTy
7034 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7035 
7036   // Make sure we are not parsing something
7037   // that looks like a label or an expression but is not.
7038   // This will improve error messages.
7039   if (isRegister() || isModifier())
7040     return MatchOperand_NoMatch;
7041 
7042   if (!parseExpr(Operands))
7043     return MatchOperand_ParseFail;
7044 
7045   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7046   assert(Opr.isImm() || Opr.isExpr());
7047   SMLoc Loc = Opr.getStartLoc();
7048 
7049   // Currently we do not support arbitrary expressions as branch targets.
7050   // Only labels and absolute expressions are accepted.
7051   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7052     Error(Loc, "expected an absolute expression or a label");
7053   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7054     Error(Loc, "expected a 16-bit signed jump offset");
7055   }
7056 
7057   return MatchOperand_Success;
7058 }
7059 
7060 //===----------------------------------------------------------------------===//
7061 // Boolean holding registers
7062 //===----------------------------------------------------------------------===//
7063 
7064 OperandMatchResultTy
7065 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7066   return parseReg(Operands);
7067 }
7068 
7069 //===----------------------------------------------------------------------===//
7070 // mubuf
7071 //===----------------------------------------------------------------------===//
7072 
7073 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7074   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7075 }
7076 
7077 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7078                                    const OperandVector &Operands,
7079                                    bool IsAtomic,
7080                                    bool IsLds) {
7081   bool IsLdsOpcode = IsLds;
7082   bool HasLdsModifier = false;
7083   OptionalImmIndexMap OptionalIdx;
7084   unsigned FirstOperandIdx = 1;
7085   bool IsAtomicReturn = false;
7086 
7087   if (IsAtomic) {
7088     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7089       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7090       if (!Op.isCPol())
7091         continue;
7092       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7093       break;
7094     }
7095 
7096     if (!IsAtomicReturn) {
7097       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7098       if (NewOpc != -1)
7099         Inst.setOpcode(NewOpc);
7100     }
7101 
7102     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7103                       SIInstrFlags::IsAtomicRet;
7104   }
7105 
7106   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7107     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7108 
7109     // Add the register arguments
7110     if (Op.isReg()) {
7111       Op.addRegOperands(Inst, 1);
7112       // Insert a tied src for atomic return dst.
7113       // This cannot be postponed as subsequent calls to
7114       // addImmOperands rely on correct number of MC operands.
7115       if (IsAtomicReturn && i == FirstOperandIdx)
7116         Op.addRegOperands(Inst, 1);
7117       continue;
7118     }
7119 
7120     // Handle the case where soffset is an immediate
7121     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7122       Op.addImmOperands(Inst, 1);
7123       continue;
7124     }
7125 
7126     HasLdsModifier |= Op.isLDS();
7127 
7128     // Handle tokens like 'offen' which are sometimes hard-coded into the
7129     // asm string.  There are no MCInst operands for these.
7130     if (Op.isToken()) {
7131       continue;
7132     }
7133     assert(Op.isImm());
7134 
7135     // Handle optional arguments
7136     OptionalIdx[Op.getImmTy()] = i;
7137   }
7138 
7139   // This is a workaround for an llvm quirk which may result in an
7140   // incorrect instruction selection. Lds and non-lds versions of
7141   // MUBUF instructions are identical except that lds versions
7142   // have mandatory 'lds' modifier. However this modifier follows
7143   // optional modifiers and llvm asm matcher regards this 'lds'
7144   // modifier as an optional one. As a result, an lds version
7145   // of opcode may be selected even if it has no 'lds' modifier.
7146   if (IsLdsOpcode && !HasLdsModifier) {
7147     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7148     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7149       Inst.setOpcode(NoLdsOpcode);
7150       IsLdsOpcode = false;
7151     }
7152   }
7153 
7154   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7155   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7156 
7157   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7158     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7159   }
7160   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7161 }
7162 
7163 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7164   OptionalImmIndexMap OptionalIdx;
7165 
7166   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7167     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7168 
7169     // Add the register arguments
7170     if (Op.isReg()) {
7171       Op.addRegOperands(Inst, 1);
7172       continue;
7173     }
7174 
7175     // Handle the case where soffset is an immediate
7176     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7177       Op.addImmOperands(Inst, 1);
7178       continue;
7179     }
7180 
7181     // Handle tokens like 'offen' which are sometimes hard-coded into the
7182     // asm string.  There are no MCInst operands for these.
7183     if (Op.isToken()) {
7184       continue;
7185     }
7186     assert(Op.isImm());
7187 
7188     // Handle optional arguments
7189     OptionalIdx[Op.getImmTy()] = i;
7190   }
7191 
7192   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7193                         AMDGPUOperand::ImmTyOffset);
7194   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7195   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7196   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7197   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7198 }
7199 
7200 //===----------------------------------------------------------------------===//
7201 // mimg
7202 //===----------------------------------------------------------------------===//
7203 
7204 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7205                               bool IsAtomic) {
7206   unsigned I = 1;
7207   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7208   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7209     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7210   }
7211 
7212   if (IsAtomic) {
7213     // Add src, same as dst
7214     assert(Desc.getNumDefs() == 1);
7215     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7216   }
7217 
7218   OptionalImmIndexMap OptionalIdx;
7219 
7220   for (unsigned E = Operands.size(); I != E; ++I) {
7221     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7222 
7223     // Add the register arguments
7224     if (Op.isReg()) {
7225       Op.addRegOperands(Inst, 1);
7226     } else if (Op.isImmModifier()) {
7227       OptionalIdx[Op.getImmTy()] = I;
7228     } else if (!Op.isToken()) {
7229       llvm_unreachable("unexpected operand type");
7230     }
7231   }
7232 
7233   bool IsGFX10Plus = isGFX10Plus();
7234 
7235   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7236   if (IsGFX10Plus)
7237     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7238   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7239   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7240   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7241   if (IsGFX10Plus)
7242     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7243   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7244     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7245   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7246   if (!IsGFX10Plus)
7247     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7248   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7249 }
7250 
7251 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7252   cvtMIMG(Inst, Operands, true);
7253 }
7254 
7255 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7256   OptionalImmIndexMap OptionalIdx;
7257   bool IsAtomicReturn = false;
7258 
7259   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7260     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7261     if (!Op.isCPol())
7262       continue;
7263     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7264     break;
7265   }
7266 
7267   if (!IsAtomicReturn) {
7268     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7269     if (NewOpc != -1)
7270       Inst.setOpcode(NewOpc);
7271   }
7272 
7273   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7274                     SIInstrFlags::IsAtomicRet;
7275 
7276   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7277     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7278 
7279     // Add the register arguments
7280     if (Op.isReg()) {
7281       Op.addRegOperands(Inst, 1);
7282       if (IsAtomicReturn && i == 1)
7283         Op.addRegOperands(Inst, 1);
7284       continue;
7285     }
7286 
7287     // Handle the case where soffset is an immediate
7288     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7289       Op.addImmOperands(Inst, 1);
7290       continue;
7291     }
7292 
7293     // Handle tokens like 'offen' which are sometimes hard-coded into the
7294     // asm string.  There are no MCInst operands for these.
7295     if (Op.isToken()) {
7296       continue;
7297     }
7298     assert(Op.isImm());
7299 
7300     // Handle optional arguments
7301     OptionalIdx[Op.getImmTy()] = i;
7302   }
7303 
7304   if ((int)Inst.getNumOperands() <=
7305       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7306     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7307   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7308 }
7309 
7310 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7311                                       const OperandVector &Operands) {
7312   for (unsigned I = 1; I < Operands.size(); ++I) {
7313     auto &Operand = (AMDGPUOperand &)*Operands[I];
7314     if (Operand.isReg())
7315       Operand.addRegOperands(Inst, 1);
7316   }
7317 
7318   Inst.addOperand(MCOperand::createImm(1)); // a16
7319 }
7320 
7321 //===----------------------------------------------------------------------===//
7322 // smrd
7323 //===----------------------------------------------------------------------===//
7324 
7325 bool AMDGPUOperand::isSMRDOffset8() const {
7326   return isImm() && isUInt<8>(getImm());
7327 }
7328 
7329 bool AMDGPUOperand::isSMEMOffset() const {
7330   return isImm(); // Offset range is checked later by validator.
7331 }
7332 
7333 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7334   // 32-bit literals are only supported on CI and we only want to use them
7335   // when the offset is > 8-bits.
7336   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7337 }
7338 
7339 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7340   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7341 }
7342 
7343 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7344   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7345 }
7346 
7347 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7348   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7349 }
7350 
7351 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7352   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7353 }
7354 
7355 //===----------------------------------------------------------------------===//
7356 // vop3
7357 //===----------------------------------------------------------------------===//
7358 
7359 static bool ConvertOmodMul(int64_t &Mul) {
7360   if (Mul != 1 && Mul != 2 && Mul != 4)
7361     return false;
7362 
7363   Mul >>= 1;
7364   return true;
7365 }
7366 
7367 static bool ConvertOmodDiv(int64_t &Div) {
7368   if (Div == 1) {
7369     Div = 0;
7370     return true;
7371   }
7372 
7373   if (Div == 2) {
7374     Div = 3;
7375     return true;
7376   }
7377 
7378   return false;
7379 }
7380 
7381 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7382 // This is intentional and ensures compatibility with sp3.
7383 // See bug 35397 for details.
7384 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7385   if (BoundCtrl == 0 || BoundCtrl == 1) {
7386     BoundCtrl = 1;
7387     return true;
7388   }
7389   return false;
7390 }
7391 
7392 // Note: the order in this table matches the order of operands in AsmString.
7393 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7394   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7395   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7396   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7397   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7398   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7399   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7400   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7401   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7402   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7403   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7404   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7405   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7406   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7407   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7408   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7409   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7410   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7411   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7412   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7413   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7414   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7415   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7416   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7417   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7418   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7419   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7420   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7421   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7422   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7423   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7424   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7425   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7426   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7427   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7428   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7429   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7430   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7431   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7432   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7433   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7434   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7435 };
7436 
7437 void AMDGPUAsmParser::onBeginOfFile() {
7438   if (!getParser().getStreamer().getTargetStreamer() ||
7439       getSTI().getTargetTriple().getArch() == Triple::r600)
7440     return;
7441 
7442   if (!getTargetStreamer().getTargetID())
7443     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7444 
7445   if (isHsaAbiVersion3AndAbove(&getSTI()))
7446     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7447 }
7448 
7449 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7450 
7451   OperandMatchResultTy res = parseOptionalOpr(Operands);
7452 
7453   // This is a hack to enable hardcoded mandatory operands which follow
7454   // optional operands.
7455   //
7456   // Current design assumes that all operands after the first optional operand
7457   // are also optional. However implementation of some instructions violates
7458   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7459   //
7460   // To alleviate this problem, we have to (implicitly) parse extra operands
7461   // to make sure autogenerated parser of custom operands never hit hardcoded
7462   // mandatory operands.
7463 
7464   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7465     if (res != MatchOperand_Success ||
7466         isToken(AsmToken::EndOfStatement))
7467       break;
7468 
7469     trySkipToken(AsmToken::Comma);
7470     res = parseOptionalOpr(Operands);
7471   }
7472 
7473   return res;
7474 }
7475 
7476 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7477   OperandMatchResultTy res;
7478   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7479     // try to parse any optional operand here
7480     if (Op.IsBit) {
7481       res = parseNamedBit(Op.Name, Operands, Op.Type);
7482     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7483       res = parseOModOperand(Operands);
7484     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7485                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7486                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7487       res = parseSDWASel(Operands, Op.Name, Op.Type);
7488     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7489       res = parseSDWADstUnused(Operands);
7490     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7491                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7492                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7493                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7494       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7495                                         Op.ConvertResult);
7496     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7497       res = parseDim(Operands);
7498     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7499       res = parseCPol(Operands);
7500     } else {
7501       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7502     }
7503     if (res != MatchOperand_NoMatch) {
7504       return res;
7505     }
7506   }
7507   return MatchOperand_NoMatch;
7508 }
7509 
7510 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7511   StringRef Name = getTokenStr();
7512   if (Name == "mul") {
7513     return parseIntWithPrefix("mul", Operands,
7514                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7515   }
7516 
7517   if (Name == "div") {
7518     return parseIntWithPrefix("div", Operands,
7519                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7520   }
7521 
7522   return MatchOperand_NoMatch;
7523 }
7524 
7525 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7526   cvtVOP3P(Inst, Operands);
7527 
7528   int Opc = Inst.getOpcode();
7529 
7530   int SrcNum;
7531   const int Ops[] = { AMDGPU::OpName::src0,
7532                       AMDGPU::OpName::src1,
7533                       AMDGPU::OpName::src2 };
7534   for (SrcNum = 0;
7535        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7536        ++SrcNum);
7537   assert(SrcNum > 0);
7538 
7539   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7540   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7541 
7542   if ((OpSel & (1 << SrcNum)) != 0) {
7543     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7544     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7545     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7546   }
7547 }
7548 
7549 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7550       // 1. This operand is input modifiers
7551   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7552       // 2. This is not last operand
7553       && Desc.NumOperands > (OpNum + 1)
7554       // 3. Next operand is register class
7555       && Desc.OpInfo[OpNum + 1].RegClass != -1
7556       // 4. Next register is not tied to any other operand
7557       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7558 }
7559 
7560 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7561 {
7562   OptionalImmIndexMap OptionalIdx;
7563   unsigned Opc = Inst.getOpcode();
7564 
7565   unsigned I = 1;
7566   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7567   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7568     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7569   }
7570 
7571   for (unsigned E = Operands.size(); I != E; ++I) {
7572     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7573     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7574       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7575     } else if (Op.isInterpSlot() ||
7576                Op.isInterpAttr() ||
7577                Op.isAttrChan()) {
7578       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7579     } else if (Op.isImmModifier()) {
7580       OptionalIdx[Op.getImmTy()] = I;
7581     } else {
7582       llvm_unreachable("unhandled operand type");
7583     }
7584   }
7585 
7586   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7587     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7588   }
7589 
7590   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7591     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7592   }
7593 
7594   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7595     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7596   }
7597 }
7598 
7599 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7600                               OptionalImmIndexMap &OptionalIdx) {
7601   unsigned Opc = Inst.getOpcode();
7602 
7603   unsigned I = 1;
7604   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7605   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7606     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7607   }
7608 
7609   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7610     // This instruction has src modifiers
7611     for (unsigned E = Operands.size(); I != E; ++I) {
7612       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7613       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7614         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7615       } else if (Op.isImmModifier()) {
7616         OptionalIdx[Op.getImmTy()] = I;
7617       } else if (Op.isRegOrImm()) {
7618         Op.addRegOrImmOperands(Inst, 1);
7619       } else {
7620         llvm_unreachable("unhandled operand type");
7621       }
7622     }
7623   } else {
7624     // No src modifiers
7625     for (unsigned E = Operands.size(); I != E; ++I) {
7626       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7627       if (Op.isMod()) {
7628         OptionalIdx[Op.getImmTy()] = I;
7629       } else {
7630         Op.addRegOrImmOperands(Inst, 1);
7631       }
7632     }
7633   }
7634 
7635   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7636     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7637   }
7638 
7639   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7640     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7641   }
7642 
7643   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7644   // it has src2 register operand that is tied to dst operand
7645   // we don't allow modifiers for this operand in assembler so src2_modifiers
7646   // should be 0.
7647   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7648       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7649       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7650       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7651       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7652       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7653       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7654       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7655       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7656       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7657       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7658     auto it = Inst.begin();
7659     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7660     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7661     ++it;
7662     // Copy the operand to ensure it's not invalidated when Inst grows.
7663     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7664   }
7665 }
7666 
7667 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7668   OptionalImmIndexMap OptionalIdx;
7669   cvtVOP3(Inst, Operands, OptionalIdx);
7670 }
7671 
7672 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7673                                OptionalImmIndexMap &OptIdx) {
7674   const int Opc = Inst.getOpcode();
7675   const MCInstrDesc &Desc = MII.get(Opc);
7676 
7677   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7678 
7679   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7680     assert(!IsPacked);
7681     Inst.addOperand(Inst.getOperand(0));
7682   }
7683 
7684   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7685   // instruction, and then figure out where to actually put the modifiers
7686 
7687   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7688   if (OpSelIdx != -1) {
7689     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7690   }
7691 
7692   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7693   if (OpSelHiIdx != -1) {
7694     int DefaultVal = IsPacked ? -1 : 0;
7695     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7696                           DefaultVal);
7697   }
7698 
7699   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7700   if (NegLoIdx != -1) {
7701     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7702     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7703   }
7704 
7705   const int Ops[] = { AMDGPU::OpName::src0,
7706                       AMDGPU::OpName::src1,
7707                       AMDGPU::OpName::src2 };
7708   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7709                          AMDGPU::OpName::src1_modifiers,
7710                          AMDGPU::OpName::src2_modifiers };
7711 
7712   unsigned OpSel = 0;
7713   unsigned OpSelHi = 0;
7714   unsigned NegLo = 0;
7715   unsigned NegHi = 0;
7716 
7717   if (OpSelIdx != -1)
7718     OpSel = Inst.getOperand(OpSelIdx).getImm();
7719 
7720   if (OpSelHiIdx != -1)
7721     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7722 
7723   if (NegLoIdx != -1) {
7724     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7725     NegLo = Inst.getOperand(NegLoIdx).getImm();
7726     NegHi = Inst.getOperand(NegHiIdx).getImm();
7727   }
7728 
7729   for (int J = 0; J < 3; ++J) {
7730     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7731     if (OpIdx == -1)
7732       break;
7733 
7734     uint32_t ModVal = 0;
7735 
7736     if ((OpSel & (1 << J)) != 0)
7737       ModVal |= SISrcMods::OP_SEL_0;
7738 
7739     if ((OpSelHi & (1 << J)) != 0)
7740       ModVal |= SISrcMods::OP_SEL_1;
7741 
7742     if ((NegLo & (1 << J)) != 0)
7743       ModVal |= SISrcMods::NEG;
7744 
7745     if ((NegHi & (1 << J)) != 0)
7746       ModVal |= SISrcMods::NEG_HI;
7747 
7748     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7749 
7750     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7751   }
7752 }
7753 
7754 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7755   OptionalImmIndexMap OptIdx;
7756   cvtVOP3(Inst, Operands, OptIdx);
7757   cvtVOP3P(Inst, Operands, OptIdx);
7758 }
7759 
7760 //===----------------------------------------------------------------------===//
7761 // dpp
7762 //===----------------------------------------------------------------------===//
7763 
7764 bool AMDGPUOperand::isDPP8() const {
7765   return isImmTy(ImmTyDPP8);
7766 }
7767 
7768 bool AMDGPUOperand::isDPPCtrl() const {
7769   using namespace AMDGPU::DPP;
7770 
7771   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7772   if (result) {
7773     int64_t Imm = getImm();
7774     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7775            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7776            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7777            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7778            (Imm == DppCtrl::WAVE_SHL1) ||
7779            (Imm == DppCtrl::WAVE_ROL1) ||
7780            (Imm == DppCtrl::WAVE_SHR1) ||
7781            (Imm == DppCtrl::WAVE_ROR1) ||
7782            (Imm == DppCtrl::ROW_MIRROR) ||
7783            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7784            (Imm == DppCtrl::BCAST15) ||
7785            (Imm == DppCtrl::BCAST31) ||
7786            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7787            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7788   }
7789   return false;
7790 }
7791 
7792 //===----------------------------------------------------------------------===//
7793 // mAI
7794 //===----------------------------------------------------------------------===//
7795 
7796 bool AMDGPUOperand::isBLGP() const {
7797   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7798 }
7799 
7800 bool AMDGPUOperand::isCBSZ() const {
7801   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7802 }
7803 
7804 bool AMDGPUOperand::isABID() const {
7805   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7806 }
7807 
7808 bool AMDGPUOperand::isS16Imm() const {
7809   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7810 }
7811 
7812 bool AMDGPUOperand::isU16Imm() const {
7813   return isImm() && isUInt<16>(getImm());
7814 }
7815 
7816 //===----------------------------------------------------------------------===//
7817 // dim
7818 //===----------------------------------------------------------------------===//
7819 
7820 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7821   // We want to allow "dim:1D" etc.,
7822   // but the initial 1 is tokenized as an integer.
7823   std::string Token;
7824   if (isToken(AsmToken::Integer)) {
7825     SMLoc Loc = getToken().getEndLoc();
7826     Token = std::string(getTokenStr());
7827     lex();
7828     if (getLoc() != Loc)
7829       return false;
7830   }
7831 
7832   StringRef Suffix;
7833   if (!parseId(Suffix))
7834     return false;
7835   Token += Suffix;
7836 
7837   StringRef DimId = Token;
7838   if (DimId.startswith("SQ_RSRC_IMG_"))
7839     DimId = DimId.drop_front(12);
7840 
7841   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7842   if (!DimInfo)
7843     return false;
7844 
7845   Encoding = DimInfo->Encoding;
7846   return true;
7847 }
7848 
7849 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7850   if (!isGFX10Plus())
7851     return MatchOperand_NoMatch;
7852 
7853   SMLoc S = getLoc();
7854 
7855   if (!trySkipId("dim", AsmToken::Colon))
7856     return MatchOperand_NoMatch;
7857 
7858   unsigned Encoding;
7859   SMLoc Loc = getLoc();
7860   if (!parseDimId(Encoding)) {
7861     Error(Loc, "invalid dim value");
7862     return MatchOperand_ParseFail;
7863   }
7864 
7865   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7866                                               AMDGPUOperand::ImmTyDim));
7867   return MatchOperand_Success;
7868 }
7869 
7870 //===----------------------------------------------------------------------===//
7871 // dpp
7872 //===----------------------------------------------------------------------===//
7873 
7874 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7875   SMLoc S = getLoc();
7876 
7877   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7878     return MatchOperand_NoMatch;
7879 
7880   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7881 
7882   int64_t Sels[8];
7883 
7884   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7885     return MatchOperand_ParseFail;
7886 
7887   for (size_t i = 0; i < 8; ++i) {
7888     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7889       return MatchOperand_ParseFail;
7890 
7891     SMLoc Loc = getLoc();
7892     if (getParser().parseAbsoluteExpression(Sels[i]))
7893       return MatchOperand_ParseFail;
7894     if (0 > Sels[i] || 7 < Sels[i]) {
7895       Error(Loc, "expected a 3-bit value");
7896       return MatchOperand_ParseFail;
7897     }
7898   }
7899 
7900   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7901     return MatchOperand_ParseFail;
7902 
7903   unsigned DPP8 = 0;
7904   for (size_t i = 0; i < 8; ++i)
7905     DPP8 |= (Sels[i] << (i * 3));
7906 
7907   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7908   return MatchOperand_Success;
7909 }
7910 
7911 bool
7912 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7913                                     const OperandVector &Operands) {
7914   if (Ctrl == "row_newbcast")
7915     return isGFX90A();
7916 
7917   if (Ctrl == "row_share" ||
7918       Ctrl == "row_xmask")
7919     return isGFX10Plus();
7920 
7921   if (Ctrl == "wave_shl" ||
7922       Ctrl == "wave_shr" ||
7923       Ctrl == "wave_rol" ||
7924       Ctrl == "wave_ror" ||
7925       Ctrl == "row_bcast")
7926     return isVI() || isGFX9();
7927 
7928   return Ctrl == "row_mirror" ||
7929          Ctrl == "row_half_mirror" ||
7930          Ctrl == "quad_perm" ||
7931          Ctrl == "row_shl" ||
7932          Ctrl == "row_shr" ||
7933          Ctrl == "row_ror";
7934 }
7935 
7936 int64_t
7937 AMDGPUAsmParser::parseDPPCtrlPerm() {
7938   // quad_perm:[%d,%d,%d,%d]
7939 
7940   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7941     return -1;
7942 
7943   int64_t Val = 0;
7944   for (int i = 0; i < 4; ++i) {
7945     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7946       return -1;
7947 
7948     int64_t Temp;
7949     SMLoc Loc = getLoc();
7950     if (getParser().parseAbsoluteExpression(Temp))
7951       return -1;
7952     if (Temp < 0 || Temp > 3) {
7953       Error(Loc, "expected a 2-bit value");
7954       return -1;
7955     }
7956 
7957     Val += (Temp << i * 2);
7958   }
7959 
7960   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7961     return -1;
7962 
7963   return Val;
7964 }
7965 
7966 int64_t
7967 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7968   using namespace AMDGPU::DPP;
7969 
7970   // sel:%d
7971 
7972   int64_t Val;
7973   SMLoc Loc = getLoc();
7974 
7975   if (getParser().parseAbsoluteExpression(Val))
7976     return -1;
7977 
7978   struct DppCtrlCheck {
7979     int64_t Ctrl;
7980     int Lo;
7981     int Hi;
7982   };
7983 
7984   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7985     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7986     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7987     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7988     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7989     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7990     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7991     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7992     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7993     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7994     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7995     .Default({-1, 0, 0});
7996 
7997   bool Valid;
7998   if (Check.Ctrl == -1) {
7999     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8000     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8001   } else {
8002     Valid = Check.Lo <= Val && Val <= Check.Hi;
8003     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8004   }
8005 
8006   if (!Valid) {
8007     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8008     return -1;
8009   }
8010 
8011   return Val;
8012 }
8013 
8014 OperandMatchResultTy
8015 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8016   using namespace AMDGPU::DPP;
8017 
8018   if (!isToken(AsmToken::Identifier) ||
8019       !isSupportedDPPCtrl(getTokenStr(), Operands))
8020     return MatchOperand_NoMatch;
8021 
8022   SMLoc S = getLoc();
8023   int64_t Val = -1;
8024   StringRef Ctrl;
8025 
8026   parseId(Ctrl);
8027 
8028   if (Ctrl == "row_mirror") {
8029     Val = DppCtrl::ROW_MIRROR;
8030   } else if (Ctrl == "row_half_mirror") {
8031     Val = DppCtrl::ROW_HALF_MIRROR;
8032   } else {
8033     if (skipToken(AsmToken::Colon, "expected a colon")) {
8034       if (Ctrl == "quad_perm") {
8035         Val = parseDPPCtrlPerm();
8036       } else {
8037         Val = parseDPPCtrlSel(Ctrl);
8038       }
8039     }
8040   }
8041 
8042   if (Val == -1)
8043     return MatchOperand_ParseFail;
8044 
8045   Operands.push_back(
8046     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8047   return MatchOperand_Success;
8048 }
8049 
8050 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8051   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8052 }
8053 
8054 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8055   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8056 }
8057 
8058 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8059   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8060 }
8061 
8062 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8063   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8064 }
8065 
8066 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8067   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8068 }
8069 
8070 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8071   OptionalImmIndexMap OptionalIdx;
8072 
8073   unsigned Opc = Inst.getOpcode();
8074   bool HasModifiers =
8075       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8076   unsigned I = 1;
8077   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8078   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8079     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8080   }
8081 
8082   int Fi = 0;
8083   for (unsigned E = Operands.size(); I != E; ++I) {
8084     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8085                                             MCOI::TIED_TO);
8086     if (TiedTo != -1) {
8087       assert((unsigned)TiedTo < Inst.getNumOperands());
8088       // handle tied old or src2 for MAC instructions
8089       Inst.addOperand(Inst.getOperand(TiedTo));
8090     }
8091     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8092     // Add the register arguments
8093     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8094       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8095       // Skip it.
8096       continue;
8097     }
8098 
8099     if (IsDPP8) {
8100       if (Op.isDPP8()) {
8101         Op.addImmOperands(Inst, 1);
8102       } else if (HasModifiers &&
8103                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8104         Op.addRegWithFPInputModsOperands(Inst, 2);
8105       } else if (Op.isFI()) {
8106         Fi = Op.getImm();
8107       } else if (Op.isReg()) {
8108         Op.addRegOperands(Inst, 1);
8109       } else {
8110         llvm_unreachable("Invalid operand type");
8111       }
8112     } else {
8113       if (HasModifiers &&
8114           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8115         Op.addRegWithFPInputModsOperands(Inst, 2);
8116       } else if (Op.isReg()) {
8117         Op.addRegOperands(Inst, 1);
8118       } else if (Op.isDPPCtrl()) {
8119         Op.addImmOperands(Inst, 1);
8120       } else if (Op.isImm()) {
8121         // Handle optional arguments
8122         OptionalIdx[Op.getImmTy()] = I;
8123       } else {
8124         llvm_unreachable("Invalid operand type");
8125       }
8126     }
8127   }
8128 
8129   if (IsDPP8) {
8130     using namespace llvm::AMDGPU::DPP;
8131     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8132   } else {
8133     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8134     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8135     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8136     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8137       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8138     }
8139   }
8140 }
8141 
8142 //===----------------------------------------------------------------------===//
8143 // sdwa
8144 //===----------------------------------------------------------------------===//
8145 
8146 OperandMatchResultTy
8147 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8148                               AMDGPUOperand::ImmTy Type) {
8149   using namespace llvm::AMDGPU::SDWA;
8150 
8151   SMLoc S = getLoc();
8152   StringRef Value;
8153   OperandMatchResultTy res;
8154 
8155   SMLoc StringLoc;
8156   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8157   if (res != MatchOperand_Success) {
8158     return res;
8159   }
8160 
8161   int64_t Int;
8162   Int = StringSwitch<int64_t>(Value)
8163         .Case("BYTE_0", SdwaSel::BYTE_0)
8164         .Case("BYTE_1", SdwaSel::BYTE_1)
8165         .Case("BYTE_2", SdwaSel::BYTE_2)
8166         .Case("BYTE_3", SdwaSel::BYTE_3)
8167         .Case("WORD_0", SdwaSel::WORD_0)
8168         .Case("WORD_1", SdwaSel::WORD_1)
8169         .Case("DWORD", SdwaSel::DWORD)
8170         .Default(0xffffffff);
8171 
8172   if (Int == 0xffffffff) {
8173     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8174     return MatchOperand_ParseFail;
8175   }
8176 
8177   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8178   return MatchOperand_Success;
8179 }
8180 
8181 OperandMatchResultTy
8182 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8183   using namespace llvm::AMDGPU::SDWA;
8184 
8185   SMLoc S = getLoc();
8186   StringRef Value;
8187   OperandMatchResultTy res;
8188 
8189   SMLoc StringLoc;
8190   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8191   if (res != MatchOperand_Success) {
8192     return res;
8193   }
8194 
8195   int64_t Int;
8196   Int = StringSwitch<int64_t>(Value)
8197         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8198         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8199         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8200         .Default(0xffffffff);
8201 
8202   if (Int == 0xffffffff) {
8203     Error(StringLoc, "invalid dst_unused value");
8204     return MatchOperand_ParseFail;
8205   }
8206 
8207   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8208   return MatchOperand_Success;
8209 }
8210 
8211 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8212   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8213 }
8214 
8215 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8216   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8217 }
8218 
8219 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8220   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8221 }
8222 
8223 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8224   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8225 }
8226 
8227 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8228   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8229 }
8230 
8231 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8232                               uint64_t BasicInstType,
8233                               bool SkipDstVcc,
8234                               bool SkipSrcVcc) {
8235   using namespace llvm::AMDGPU::SDWA;
8236 
8237   OptionalImmIndexMap OptionalIdx;
8238   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8239   bool SkippedVcc = false;
8240 
8241   unsigned I = 1;
8242   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8243   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8244     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8245   }
8246 
8247   for (unsigned E = Operands.size(); I != E; ++I) {
8248     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8249     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8250         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8251       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8252       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8253       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8254       // Skip VCC only if we didn't skip it on previous iteration.
8255       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8256       if (BasicInstType == SIInstrFlags::VOP2 &&
8257           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8258            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8259         SkippedVcc = true;
8260         continue;
8261       } else if (BasicInstType == SIInstrFlags::VOPC &&
8262                  Inst.getNumOperands() == 0) {
8263         SkippedVcc = true;
8264         continue;
8265       }
8266     }
8267     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8268       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8269     } else if (Op.isImm()) {
8270       // Handle optional arguments
8271       OptionalIdx[Op.getImmTy()] = I;
8272     } else {
8273       llvm_unreachable("Invalid operand type");
8274     }
8275     SkippedVcc = false;
8276   }
8277 
8278   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8279       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8280       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8281     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8282     switch (BasicInstType) {
8283     case SIInstrFlags::VOP1:
8284       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8285       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8286         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8287       }
8288       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8289       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8290       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8291       break;
8292 
8293     case SIInstrFlags::VOP2:
8294       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8295       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8296         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8297       }
8298       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8299       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8300       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8301       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8302       break;
8303 
8304     case SIInstrFlags::VOPC:
8305       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8306         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8307       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8308       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8309       break;
8310 
8311     default:
8312       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8313     }
8314   }
8315 
8316   // special case v_mac_{f16, f32}:
8317   // it has src2 register operand that is tied to dst operand
8318   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8319       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8320     auto it = Inst.begin();
8321     std::advance(
8322       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8323     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8324   }
8325 }
8326 
8327 //===----------------------------------------------------------------------===//
8328 // mAI
8329 //===----------------------------------------------------------------------===//
8330 
8331 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8332   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8333 }
8334 
8335 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8336   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8337 }
8338 
8339 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8340   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8341 }
8342 
8343 /// Force static initialization.
8344 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8345   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8346   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8347 }
8348 
8349 #define GET_REGISTER_MATCHER
8350 #define GET_MATCHER_IMPLEMENTATION
8351 #define GET_MNEMONIC_SPELL_CHECKER
8352 #define GET_MNEMONIC_CHECKER
8353 #include "AMDGPUGenAsmMatcher.inc"
8354 
8355 // This fuction should be defined after auto-generated include so that we have
8356 // MatchClassKind enum defined
8357 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8358                                                      unsigned Kind) {
8359   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8360   // But MatchInstructionImpl() expects to meet token and fails to validate
8361   // operand. This method checks if we are given immediate operand but expect to
8362   // get corresponding token.
8363   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8364   switch (Kind) {
8365   case MCK_addr64:
8366     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8367   case MCK_gds:
8368     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8369   case MCK_lds:
8370     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8371   case MCK_idxen:
8372     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8373   case MCK_offen:
8374     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8375   case MCK_SSrcB32:
8376     // When operands have expression values, they will return true for isToken,
8377     // because it is not possible to distinguish between a token and an
8378     // expression at parse time. MatchInstructionImpl() will always try to
8379     // match an operand as a token, when isToken returns true, and when the
8380     // name of the expression is not a valid token, the match will fail,
8381     // so we need to handle it here.
8382     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8383   case MCK_SSrcF32:
8384     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8385   case MCK_SoppBrTarget:
8386     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8387   case MCK_VReg32OrOff:
8388     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8389   case MCK_InterpSlot:
8390     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8391   case MCK_Attr:
8392     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8393   case MCK_AttrChan:
8394     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8395   case MCK_ImmSMEMOffset:
8396     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8397   case MCK_SReg_64:
8398   case MCK_SReg_64_XEXEC:
8399     // Null is defined as a 32-bit register but
8400     // it should also be enabled with 64-bit operands.
8401     // The following code enables it for SReg_64 operands
8402     // used as source and destination. Remaining source
8403     // operands are handled in isInlinableImm.
8404     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8405   default:
8406     return Match_InvalidOperand;
8407   }
8408 }
8409 
8410 //===----------------------------------------------------------------------===//
8411 // endpgm
8412 //===----------------------------------------------------------------------===//
8413 
8414 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8415   SMLoc S = getLoc();
8416   int64_t Imm = 0;
8417 
8418   if (!parseExpr(Imm)) {
8419     // The operand is optional, if not present default to 0
8420     Imm = 0;
8421   }
8422 
8423   if (!isUInt<16>(Imm)) {
8424     Error(S, "expected a 16-bit value");
8425     return MatchOperand_ParseFail;
8426   }
8427 
8428   Operands.push_back(
8429       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8430   return MatchOperand_Success;
8431 }
8432 
8433 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8434