xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   // TODO: Possibly make subtargetHasRegister const.
1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217   bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219   bool ParseDirectiveISAVersion();
1220   bool ParseDirectiveHSAMetadata();
1221   bool ParseDirectivePALMetadataBegin();
1222   bool ParseDirectivePALMetadata();
1223   bool ParseDirectiveAMDGPULDS();
1224 
1225   /// Common code to parse out a block of text (typically YAML) between start and
1226   /// end directives.
1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                            const char *AssemblerDirectiveEnd,
1229                            std::string &CollectString);
1230 
1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                            unsigned &RegNum, unsigned &RegWidth,
1235                            bool RestoreOnFailure = false);
1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                            unsigned &RegNum, unsigned &RegWidth,
1238                            SmallVectorImpl<AsmToken> &Tokens);
1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                            unsigned &RegWidth,
1241                            SmallVectorImpl<AsmToken> &Tokens);
1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                            unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
1248   unsigned getRegularReg(RegisterKind RegKind,
1249                          unsigned RegNum,
1250                          unsigned RegWidth,
1251                          SMLoc Loc);
1252 
1253   bool isRegister();
1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256   void initializeGprCountSymbol(RegisterKind RegKind);
1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                              unsigned RegWidth);
1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                     bool IsAtomic, bool IsLds = false);
1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                  bool IsGdsHardcoded);
1263 
1264 public:
1265   enum AMDGPUMatchResultTy {
1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267   };
1268   enum OperandMode {
1269     OperandMode_Default,
1270     OperandMode_NSA,
1271   };
1272 
1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276                const MCInstrInfo &MII,
1277                const MCTargetOptions &Options)
1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279     MCAsmParserExtension::Initialize(Parser);
1280 
1281     if (getFeatureBits().none()) {
1282       // Set default features.
1283       copySTI().ToggleFeature("southern-islands");
1284     }
1285 
1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288     {
1289       // TODO: make those pre-defined variables read-only.
1290       // Currently there is none suitable machinery in the core llvm-mc for this.
1291       // MCSymbol::isRedefinable is intended for another purpose, and
1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294       MCContext &Ctx = getContext();
1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296         MCSymbol *Sym =
1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303       } else {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       }
1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313         initializeGprCountSymbol(IS_VGPR);
1314         initializeGprCountSymbol(IS_SGPR);
1315       } else
1316         KernelScope.initialize(getContext());
1317     }
1318   }
1319 
1320   bool hasMIMG_R128() const {
1321     return AMDGPU::hasMIMG_R128(getSTI());
1322   }
1323 
1324   bool hasPackedD16() const {
1325     return AMDGPU::hasPackedD16(getSTI());
1326   }
1327 
1328   bool hasGFX10A16() const {
1329     return AMDGPU::hasGFX10A16(getSTI());
1330   }
1331 
1332   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333 
1334   bool isSI() const {
1335     return AMDGPU::isSI(getSTI());
1336   }
1337 
1338   bool isCI() const {
1339     return AMDGPU::isCI(getSTI());
1340   }
1341 
1342   bool isVI() const {
1343     return AMDGPU::isVI(getSTI());
1344   }
1345 
1346   bool isGFX9() const {
1347     return AMDGPU::isGFX9(getSTI());
1348   }
1349 
1350   bool isGFX90A() const {
1351     return AMDGPU::isGFX90A(getSTI());
1352   }
1353 
1354   bool isGFX9Plus() const {
1355     return AMDGPU::isGFX9Plus(getSTI());
1356   }
1357 
1358   bool isGFX10() const {
1359     return AMDGPU::isGFX10(getSTI());
1360   }
1361 
1362   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363 
1364   bool isGFX10_BEncoding() const {
1365     return AMDGPU::isGFX10_BEncoding(getSTI());
1366   }
1367 
1368   bool hasInv2PiInlineImm() const {
1369     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370   }
1371 
1372   bool hasFlatOffsets() const {
1373     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374   }
1375 
1376   bool hasArchitectedFlatScratch() const {
1377     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378   }
1379 
1380   bool hasSGPR102_SGPR103() const {
1381     return !isVI() && !isGFX9();
1382   }
1383 
1384   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385 
1386   bool hasIntClamp() const {
1387     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388   }
1389 
1390   AMDGPUTargetStreamer &getTargetStreamer() {
1391     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392     return static_cast<AMDGPUTargetStreamer &>(TS);
1393   }
1394 
1395   const MCRegisterInfo *getMRI() const {
1396     // We need this const_cast because for some reason getContext() is not const
1397     // in MCAsmParser.
1398     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399   }
1400 
1401   const MCInstrInfo *getMII() const {
1402     return &MII;
1403   }
1404 
1405   const FeatureBitset &getFeatureBits() const {
1406     return getSTI().getFeatureBits();
1407   }
1408 
1409   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412 
1413   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415   bool isForcedDPP() const { return ForcedDPP; }
1416   bool isForcedSDWA() const { return ForcedSDWA; }
1417   ArrayRef<unsigned> getMatchedVariants() const;
1418   StringRef getMatchedVariantName() const;
1419 
1420   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422                      bool RestoreOnFailure);
1423   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425                                         SMLoc &EndLoc) override;
1426   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428                                       unsigned Kind) override;
1429   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430                                OperandVector &Operands, MCStreamer &Out,
1431                                uint64_t &ErrorInfo,
1432                                bool MatchingInlineAsm) override;
1433   bool ParseDirective(AsmToken DirectiveID) override;
1434   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435                                     OperandMode Mode = OperandMode_Default);
1436   StringRef parseMnemonicSuffix(StringRef Name);
1437   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438                         SMLoc NameLoc, OperandVector &Operands) override;
1439   //bool ProcessInstruction(MCInst &Inst);
1440 
1441   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442 
1443   OperandMatchResultTy
1444   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                      bool (*ConvertResult)(int64_t &) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseOperandArrayWithPrefix(const char *Prefix,
1450                               OperandVector &Operands,
1451                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452                               bool (*ConvertResult)(int64_t&) = nullptr);
1453 
1454   OperandMatchResultTy
1455   parseNamedBit(StringRef Name, OperandVector &Operands,
1456                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457   OperandMatchResultTy parseCPol(OperandVector &Operands);
1458   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459                                              StringRef &Value,
1460                                              SMLoc &StringLoc);
1461 
1462   bool isModifier();
1463   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467   bool parseSP3NegModifier();
1468   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469   OperandMatchResultTy parseReg(OperandVector &Operands);
1470   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477   OperandMatchResultTy parseUfmt(int64_t &Format);
1478   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485 
1486   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490 
1491   bool parseCnt(int64_t &IntVal);
1492   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494 
1495 private:
1496   struct OperandInfoTy {
1497     SMLoc Loc;
1498     int64_t Id;
1499     bool IsSymbolic = false;
1500     bool IsDefined = false;
1501 
1502     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503   };
1504 
1505   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506   bool validateSendMsg(const OperandInfoTy &Msg,
1507                        const OperandInfoTy &Op,
1508                        const OperandInfoTy &Stream);
1509 
1510   bool parseHwregBody(OperandInfoTy &HwReg,
1511                       OperandInfoTy &Offset,
1512                       OperandInfoTy &Width);
1513   bool validateHwreg(const OperandInfoTy &HwReg,
1514                      const OperandInfoTy &Offset,
1515                      const OperandInfoTy &Width);
1516 
1517   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519 
1520   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521                       const OperandVector &Operands) const;
1522   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524   SMLoc getLitLoc(const OperandVector &Operands) const;
1525   SMLoc getConstLoc(const OperandVector &Operands) const;
1526 
1527   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530   bool validateSOPLiteral(const MCInst &Inst) const;
1531   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateIntClampSupported(const MCInst &Inst);
1534   bool validateMIMGAtomicDMask(const MCInst &Inst);
1535   bool validateMIMGGatherDMask(const MCInst &Inst);
1536   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateMIMGDataSize(const MCInst &Inst);
1538   bool validateMIMGAddrSize(const MCInst &Inst);
1539   bool validateMIMGD16(const MCInst &Inst);
1540   bool validateMIMGDim(const MCInst &Inst);
1541   bool validateMIMGMSAA(const MCInst &Inst);
1542   bool validateOpSel(const MCInst &Inst);
1543   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544   bool validateVccOperand(unsigned Reg) const;
1545   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547   bool validateAGPRLdSt(const MCInst &Inst) const;
1548   bool validateVGPRAlign(const MCInst &Inst) const;
1549   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1550   bool validateDivScale(const MCInst &Inst);
1551   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1552                              const SMLoc &IDLoc);
1553   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1554   unsigned getConstantBusLimit(unsigned Opcode) const;
1555   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1556   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1557   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1558 
1559   bool isSupportedMnemo(StringRef Mnemo,
1560                         const FeatureBitset &FBS);
1561   bool isSupportedMnemo(StringRef Mnemo,
1562                         const FeatureBitset &FBS,
1563                         ArrayRef<unsigned> Variants);
1564   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1565 
1566   bool isId(const StringRef Id) const;
1567   bool isId(const AsmToken &Token, const StringRef Id) const;
1568   bool isToken(const AsmToken::TokenKind Kind) const;
1569   bool trySkipId(const StringRef Id);
1570   bool trySkipId(const StringRef Pref, const StringRef Id);
1571   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1572   bool trySkipToken(const AsmToken::TokenKind Kind);
1573   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1574   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1575   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1576 
1577   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1578   AsmToken::TokenKind getTokenKind() const;
1579   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1580   bool parseExpr(OperandVector &Operands);
1581   StringRef getTokenStr() const;
1582   AsmToken peekToken();
1583   AsmToken getToken() const;
1584   SMLoc getLoc() const;
1585   void lex();
1586 
1587 public:
1588   void onBeginOfFile() override;
1589 
1590   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1591   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1592 
1593   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1594   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1595   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1596   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1597   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1598   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1599 
1600   bool parseSwizzleOperand(int64_t &Op,
1601                            const unsigned MinVal,
1602                            const unsigned MaxVal,
1603                            const StringRef ErrMsg,
1604                            SMLoc &Loc);
1605   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1606                             const unsigned MinVal,
1607                             const unsigned MaxVal,
1608                             const StringRef ErrMsg);
1609   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1610   bool parseSwizzleOffset(int64_t &Imm);
1611   bool parseSwizzleMacro(int64_t &Imm);
1612   bool parseSwizzleQuadPerm(int64_t &Imm);
1613   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1614   bool parseSwizzleBroadcast(int64_t &Imm);
1615   bool parseSwizzleSwap(int64_t &Imm);
1616   bool parseSwizzleReverse(int64_t &Imm);
1617 
1618   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1619   int64_t parseGPRIdxMacro();
1620 
1621   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1622   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1623   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1624   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1625 
1626   AMDGPUOperand::Ptr defaultCPol() const;
1627 
1628   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1629   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1630   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1631   AMDGPUOperand::Ptr defaultFlatOffset() const;
1632 
1633   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1634 
1635   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1636                OptionalImmIndexMap &OptionalIdx);
1637   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1638   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1639   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1640   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1641                 OptionalImmIndexMap &OptionalIdx);
1642 
1643   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1644 
1645   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1646                bool IsAtomic = false);
1647   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1648   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1649 
1650   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1651 
1652   bool parseDimId(unsigned &Encoding);
1653   OperandMatchResultTy parseDim(OperandVector &Operands);
1654   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1655   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1656   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1657   int64_t parseDPPCtrlSel(StringRef Ctrl);
1658   int64_t parseDPPCtrlPerm();
1659   AMDGPUOperand::Ptr defaultRowMask() const;
1660   AMDGPUOperand::Ptr defaultBankMask() const;
1661   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1662   AMDGPUOperand::Ptr defaultFI() const;
1663   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1664   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1665 
1666   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1667                                     AMDGPUOperand::ImmTy Type);
1668   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1669   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1670   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1671   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1672   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1673   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1674   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1675                uint64_t BasicInstType,
1676                bool SkipDstVcc = false,
1677                bool SkipSrcVcc = false);
1678 
1679   AMDGPUOperand::Ptr defaultBLGP() const;
1680   AMDGPUOperand::Ptr defaultCBSZ() const;
1681   AMDGPUOperand::Ptr defaultABID() const;
1682 
1683   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1684   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1685 };
1686 
1687 struct OptionalOperand {
1688   const char *Name;
1689   AMDGPUOperand::ImmTy Type;
1690   bool IsBit;
1691   bool (*ConvertResult)(int64_t&);
1692 };
1693 
1694 } // end anonymous namespace
1695 
1696 // May be called with integer type with equivalent bitwidth.
1697 static const fltSemantics *getFltSemantics(unsigned Size) {
1698   switch (Size) {
1699   case 4:
1700     return &APFloat::IEEEsingle();
1701   case 8:
1702     return &APFloat::IEEEdouble();
1703   case 2:
1704     return &APFloat::IEEEhalf();
1705   default:
1706     llvm_unreachable("unsupported fp type");
1707   }
1708 }
1709 
1710 static const fltSemantics *getFltSemantics(MVT VT) {
1711   return getFltSemantics(VT.getSizeInBits() / 8);
1712 }
1713 
1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1715   switch (OperandType) {
1716   case AMDGPU::OPERAND_REG_IMM_INT32:
1717   case AMDGPU::OPERAND_REG_IMM_FP32:
1718   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1719   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1720   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1721   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1722   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1723   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1724   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1725   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1726     return &APFloat::IEEEsingle();
1727   case AMDGPU::OPERAND_REG_IMM_INT64:
1728   case AMDGPU::OPERAND_REG_IMM_FP64:
1729   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1730   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1732     return &APFloat::IEEEdouble();
1733   case AMDGPU::OPERAND_REG_IMM_INT16:
1734   case AMDGPU::OPERAND_REG_IMM_FP16:
1735   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1736   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1737   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1738   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1739   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1740   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1741   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1742   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1743   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1744   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1745     return &APFloat::IEEEhalf();
1746   default:
1747     llvm_unreachable("unsupported fp type");
1748   }
1749 }
1750 
1751 //===----------------------------------------------------------------------===//
1752 // Operand
1753 //===----------------------------------------------------------------------===//
1754 
1755 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1756   bool Lost;
1757 
1758   // Convert literal to single precision
1759   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1760                                                APFloat::rmNearestTiesToEven,
1761                                                &Lost);
1762   // We allow precision lost but not overflow or underflow
1763   if (Status != APFloat::opOK &&
1764       Lost &&
1765       ((Status & APFloat::opOverflow)  != 0 ||
1766        (Status & APFloat::opUnderflow) != 0)) {
1767     return false;
1768   }
1769 
1770   return true;
1771 }
1772 
1773 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1774   return isUIntN(Size, Val) || isIntN(Size, Val);
1775 }
1776 
1777 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1778   if (VT.getScalarType() == MVT::i16) {
1779     // FP immediate values are broken.
1780     return isInlinableIntLiteral(Val);
1781   }
1782 
1783   // f16/v2f16 operands work correctly for all values.
1784   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1785 }
1786 
1787 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1788 
1789   // This is a hack to enable named inline values like
1790   // shared_base with both 32-bit and 64-bit operands.
1791   // Note that these values are defined as
1792   // 32-bit operands only.
1793   if (isInlineValue()) {
1794     return true;
1795   }
1796 
1797   if (!isImmTy(ImmTyNone)) {
1798     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1799     return false;
1800   }
1801   // TODO: We should avoid using host float here. It would be better to
1802   // check the float bit values which is what a few other places do.
1803   // We've had bot failures before due to weird NaN support on mips hosts.
1804 
1805   APInt Literal(64, Imm.Val);
1806 
1807   if (Imm.IsFPImm) { // We got fp literal token
1808     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1809       return AMDGPU::isInlinableLiteral64(Imm.Val,
1810                                           AsmParser->hasInv2PiInlineImm());
1811     }
1812 
1813     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1814     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1815       return false;
1816 
1817     if (type.getScalarSizeInBits() == 16) {
1818       return isInlineableLiteralOp16(
1819         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1820         type, AsmParser->hasInv2PiInlineImm());
1821     }
1822 
1823     // Check if single precision literal is inlinable
1824     return AMDGPU::isInlinableLiteral32(
1825       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1826       AsmParser->hasInv2PiInlineImm());
1827   }
1828 
1829   // We got int literal token.
1830   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1831     return AMDGPU::isInlinableLiteral64(Imm.Val,
1832                                         AsmParser->hasInv2PiInlineImm());
1833   }
1834 
1835   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1836     return false;
1837   }
1838 
1839   if (type.getScalarSizeInBits() == 16) {
1840     return isInlineableLiteralOp16(
1841       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1842       type, AsmParser->hasInv2PiInlineImm());
1843   }
1844 
1845   return AMDGPU::isInlinableLiteral32(
1846     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1847     AsmParser->hasInv2PiInlineImm());
1848 }
1849 
1850 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1851   // Check that this immediate can be added as literal
1852   if (!isImmTy(ImmTyNone)) {
1853     return false;
1854   }
1855 
1856   if (!Imm.IsFPImm) {
1857     // We got int literal token.
1858 
1859     if (type == MVT::f64 && hasFPModifiers()) {
1860       // Cannot apply fp modifiers to int literals preserving the same semantics
1861       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1862       // disable these cases.
1863       return false;
1864     }
1865 
1866     unsigned Size = type.getSizeInBits();
1867     if (Size == 64)
1868       Size = 32;
1869 
1870     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1871     // types.
1872     return isSafeTruncation(Imm.Val, Size);
1873   }
1874 
1875   // We got fp literal token
1876   if (type == MVT::f64) { // Expected 64-bit fp operand
1877     // We would set low 64-bits of literal to zeroes but we accept this literals
1878     return true;
1879   }
1880 
1881   if (type == MVT::i64) { // Expected 64-bit int operand
1882     // We don't allow fp literals in 64-bit integer instructions. It is
1883     // unclear how we should encode them.
1884     return false;
1885   }
1886 
1887   // We allow fp literals with f16x2 operands assuming that the specified
1888   // literal goes into the lower half and the upper half is zero. We also
1889   // require that the literal may be losslesly converted to f16.
1890   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1891                      (type == MVT::v2i16)? MVT::i16 :
1892                      (type == MVT::v2f32)? MVT::f32 : type;
1893 
1894   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1895   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1896 }
1897 
1898 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1899   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1900 }
1901 
1902 bool AMDGPUOperand::isVRegWithInputMods() const {
1903   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1904          // GFX90A allows DPP on 64-bit operands.
1905          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1906           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1907 }
1908 
1909 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1910   if (AsmParser->isVI())
1911     return isVReg32();
1912   else if (AsmParser->isGFX9Plus())
1913     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1914   else
1915     return false;
1916 }
1917 
1918 bool AMDGPUOperand::isSDWAFP16Operand() const {
1919   return isSDWAOperand(MVT::f16);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAFP32Operand() const {
1923   return isSDWAOperand(MVT::f32);
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAInt16Operand() const {
1927   return isSDWAOperand(MVT::i16);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAInt32Operand() const {
1931   return isSDWAOperand(MVT::i32);
1932 }
1933 
1934 bool AMDGPUOperand::isBoolReg() const {
1935   auto FB = AsmParser->getFeatureBits();
1936   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1937                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1938 }
1939 
1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1941 {
1942   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1943   assert(Size == 2 || Size == 4 || Size == 8);
1944 
1945   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1946 
1947   if (Imm.Mods.Abs) {
1948     Val &= ~FpSignMask;
1949   }
1950   if (Imm.Mods.Neg) {
1951     Val ^= FpSignMask;
1952   }
1953 
1954   return Val;
1955 }
1956 
1957 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1958   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1959                              Inst.getNumOperands())) {
1960     addLiteralImmOperand(Inst, Imm.Val,
1961                          ApplyModifiers &
1962                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1963   } else {
1964     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1965     Inst.addOperand(MCOperand::createImm(Imm.Val));
1966     setImmKindNone();
1967   }
1968 }
1969 
1970 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1971   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1972   auto OpNum = Inst.getNumOperands();
1973   // Check that this operand accepts literals
1974   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1975 
1976   if (ApplyModifiers) {
1977     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1978     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1979     Val = applyInputFPModifiers(Val, Size);
1980   }
1981 
1982   APInt Literal(64, Val);
1983   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1984 
1985   if (Imm.IsFPImm) { // We got fp literal token
1986     switch (OpTy) {
1987     case AMDGPU::OPERAND_REG_IMM_INT64:
1988     case AMDGPU::OPERAND_REG_IMM_FP64:
1989     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1990     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1991     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1992       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1993                                        AsmParser->hasInv2PiInlineImm())) {
1994         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1995         setImmKindConst();
1996         return;
1997       }
1998 
1999       // Non-inlineable
2000       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2001         // For fp operands we check if low 32 bits are zeros
2002         if (Literal.getLoBits(32) != 0) {
2003           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2004           "Can't encode literal as exact 64-bit floating-point operand. "
2005           "Low 32-bits will be set to zero");
2006         }
2007 
2008         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2009         setImmKindLiteral();
2010         return;
2011       }
2012 
2013       // We don't allow fp literals in 64-bit integer instructions. It is
2014       // unclear how we should encode them. This case should be checked earlier
2015       // in predicate methods (isLiteralImm())
2016       llvm_unreachable("fp literal in 64-bit integer instruction.");
2017 
2018     case AMDGPU::OPERAND_REG_IMM_INT32:
2019     case AMDGPU::OPERAND_REG_IMM_FP32:
2020     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2021     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2022     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2023     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2024     case AMDGPU::OPERAND_REG_IMM_INT16:
2025     case AMDGPU::OPERAND_REG_IMM_FP16:
2026     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2027     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2028     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2029     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2030     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2031     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2032     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2033     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2034     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2035     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2036     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2037     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2038     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2039     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2040       bool lost;
2041       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2042       // Convert literal to single precision
2043       FPLiteral.convert(*getOpFltSemantics(OpTy),
2044                         APFloat::rmNearestTiesToEven, &lost);
2045       // We allow precision lost but not overflow or underflow. This should be
2046       // checked earlier in isLiteralImm()
2047 
2048       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2049       Inst.addOperand(MCOperand::createImm(ImmVal));
2050       setImmKindLiteral();
2051       return;
2052     }
2053     default:
2054       llvm_unreachable("invalid operand size");
2055     }
2056 
2057     return;
2058   }
2059 
2060   // We got int literal token.
2061   // Only sign extend inline immediates.
2062   switch (OpTy) {
2063   case AMDGPU::OPERAND_REG_IMM_INT32:
2064   case AMDGPU::OPERAND_REG_IMM_FP32:
2065   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2066   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2067   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2068   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2069   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2070   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2071   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2072   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2073   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2074   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2075     if (isSafeTruncation(Val, 32) &&
2076         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2077                                      AsmParser->hasInv2PiInlineImm())) {
2078       Inst.addOperand(MCOperand::createImm(Val));
2079       setImmKindConst();
2080       return;
2081     }
2082 
2083     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2084     setImmKindLiteral();
2085     return;
2086 
2087   case AMDGPU::OPERAND_REG_IMM_INT64:
2088   case AMDGPU::OPERAND_REG_IMM_FP64:
2089   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2090   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2091   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2092     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2093       Inst.addOperand(MCOperand::createImm(Val));
2094       setImmKindConst();
2095       return;
2096     }
2097 
2098     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2099     setImmKindLiteral();
2100     return;
2101 
2102   case AMDGPU::OPERAND_REG_IMM_INT16:
2103   case AMDGPU::OPERAND_REG_IMM_FP16:
2104   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2105   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2106   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2107   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2108     if (isSafeTruncation(Val, 16) &&
2109         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2110                                      AsmParser->hasInv2PiInlineImm())) {
2111       Inst.addOperand(MCOperand::createImm(Val));
2112       setImmKindConst();
2113       return;
2114     }
2115 
2116     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2117     setImmKindLiteral();
2118     return;
2119 
2120   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2121   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2122   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2123   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2124     assert(isSafeTruncation(Val, 16));
2125     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2126                                         AsmParser->hasInv2PiInlineImm()));
2127 
2128     Inst.addOperand(MCOperand::createImm(Val));
2129     return;
2130   }
2131   default:
2132     llvm_unreachable("invalid operand size");
2133   }
2134 }
2135 
2136 template <unsigned Bitwidth>
2137 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2138   APInt Literal(64, Imm.Val);
2139   setImmKindNone();
2140 
2141   if (!Imm.IsFPImm) {
2142     // We got int literal token.
2143     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2144     return;
2145   }
2146 
2147   bool Lost;
2148   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2149   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2150                     APFloat::rmNearestTiesToEven, &Lost);
2151   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2152 }
2153 
2154 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2155   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2156 }
2157 
2158 static bool isInlineValue(unsigned Reg) {
2159   switch (Reg) {
2160   case AMDGPU::SRC_SHARED_BASE:
2161   case AMDGPU::SRC_SHARED_LIMIT:
2162   case AMDGPU::SRC_PRIVATE_BASE:
2163   case AMDGPU::SRC_PRIVATE_LIMIT:
2164   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2165     return true;
2166   case AMDGPU::SRC_VCCZ:
2167   case AMDGPU::SRC_EXECZ:
2168   case AMDGPU::SRC_SCC:
2169     return true;
2170   case AMDGPU::SGPR_NULL:
2171     return true;
2172   default:
2173     return false;
2174   }
2175 }
2176 
2177 bool AMDGPUOperand::isInlineValue() const {
2178   return isRegKind() && ::isInlineValue(getReg());
2179 }
2180 
2181 //===----------------------------------------------------------------------===//
2182 // AsmParser
2183 //===----------------------------------------------------------------------===//
2184 
2185 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2186   if (Is == IS_VGPR) {
2187     switch (RegWidth) {
2188       default: return -1;
2189       case 1: return AMDGPU::VGPR_32RegClassID;
2190       case 2: return AMDGPU::VReg_64RegClassID;
2191       case 3: return AMDGPU::VReg_96RegClassID;
2192       case 4: return AMDGPU::VReg_128RegClassID;
2193       case 5: return AMDGPU::VReg_160RegClassID;
2194       case 6: return AMDGPU::VReg_192RegClassID;
2195       case 7: return AMDGPU::VReg_224RegClassID;
2196       case 8: return AMDGPU::VReg_256RegClassID;
2197       case 16: return AMDGPU::VReg_512RegClassID;
2198       case 32: return AMDGPU::VReg_1024RegClassID;
2199     }
2200   } else if (Is == IS_TTMP) {
2201     switch (RegWidth) {
2202       default: return -1;
2203       case 1: return AMDGPU::TTMP_32RegClassID;
2204       case 2: return AMDGPU::TTMP_64RegClassID;
2205       case 4: return AMDGPU::TTMP_128RegClassID;
2206       case 8: return AMDGPU::TTMP_256RegClassID;
2207       case 16: return AMDGPU::TTMP_512RegClassID;
2208     }
2209   } else if (Is == IS_SGPR) {
2210     switch (RegWidth) {
2211       default: return -1;
2212       case 1: return AMDGPU::SGPR_32RegClassID;
2213       case 2: return AMDGPU::SGPR_64RegClassID;
2214       case 3: return AMDGPU::SGPR_96RegClassID;
2215       case 4: return AMDGPU::SGPR_128RegClassID;
2216       case 5: return AMDGPU::SGPR_160RegClassID;
2217       case 6: return AMDGPU::SGPR_192RegClassID;
2218       case 7: return AMDGPU::SGPR_224RegClassID;
2219       case 8: return AMDGPU::SGPR_256RegClassID;
2220       case 16: return AMDGPU::SGPR_512RegClassID;
2221     }
2222   } else if (Is == IS_AGPR) {
2223     switch (RegWidth) {
2224       default: return -1;
2225       case 1: return AMDGPU::AGPR_32RegClassID;
2226       case 2: return AMDGPU::AReg_64RegClassID;
2227       case 3: return AMDGPU::AReg_96RegClassID;
2228       case 4: return AMDGPU::AReg_128RegClassID;
2229       case 5: return AMDGPU::AReg_160RegClassID;
2230       case 6: return AMDGPU::AReg_192RegClassID;
2231       case 7: return AMDGPU::AReg_224RegClassID;
2232       case 8: return AMDGPU::AReg_256RegClassID;
2233       case 16: return AMDGPU::AReg_512RegClassID;
2234       case 32: return AMDGPU::AReg_1024RegClassID;
2235     }
2236   }
2237   return -1;
2238 }
2239 
2240 static unsigned getSpecialRegForName(StringRef RegName) {
2241   return StringSwitch<unsigned>(RegName)
2242     .Case("exec", AMDGPU::EXEC)
2243     .Case("vcc", AMDGPU::VCC)
2244     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2245     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2246     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2247     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2248     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2249     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2250     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2251     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2252     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2253     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2254     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2255     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2256     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2257     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2258     .Case("m0", AMDGPU::M0)
2259     .Case("vccz", AMDGPU::SRC_VCCZ)
2260     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2261     .Case("execz", AMDGPU::SRC_EXECZ)
2262     .Case("src_execz", AMDGPU::SRC_EXECZ)
2263     .Case("scc", AMDGPU::SRC_SCC)
2264     .Case("src_scc", AMDGPU::SRC_SCC)
2265     .Case("tba", AMDGPU::TBA)
2266     .Case("tma", AMDGPU::TMA)
2267     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2268     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2269     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2270     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2271     .Case("vcc_lo", AMDGPU::VCC_LO)
2272     .Case("vcc_hi", AMDGPU::VCC_HI)
2273     .Case("exec_lo", AMDGPU::EXEC_LO)
2274     .Case("exec_hi", AMDGPU::EXEC_HI)
2275     .Case("tma_lo", AMDGPU::TMA_LO)
2276     .Case("tma_hi", AMDGPU::TMA_HI)
2277     .Case("tba_lo", AMDGPU::TBA_LO)
2278     .Case("tba_hi", AMDGPU::TBA_HI)
2279     .Case("pc", AMDGPU::PC_REG)
2280     .Case("null", AMDGPU::SGPR_NULL)
2281     .Default(AMDGPU::NoRegister);
2282 }
2283 
2284 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2285                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2286   auto R = parseRegister();
2287   if (!R) return true;
2288   assert(R->isReg());
2289   RegNo = R->getReg();
2290   StartLoc = R->getStartLoc();
2291   EndLoc = R->getEndLoc();
2292   return false;
2293 }
2294 
2295 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2296                                     SMLoc &EndLoc) {
2297   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2298 }
2299 
2300 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2301                                                        SMLoc &StartLoc,
2302                                                        SMLoc &EndLoc) {
2303   bool Result =
2304       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2305   bool PendingErrors = getParser().hasPendingError();
2306   getParser().clearPendingErrors();
2307   if (PendingErrors)
2308     return MatchOperand_ParseFail;
2309   if (Result)
2310     return MatchOperand_NoMatch;
2311   return MatchOperand_Success;
2312 }
2313 
2314 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2315                                             RegisterKind RegKind, unsigned Reg1,
2316                                             SMLoc Loc) {
2317   switch (RegKind) {
2318   case IS_SPECIAL:
2319     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2320       Reg = AMDGPU::EXEC;
2321       RegWidth = 2;
2322       return true;
2323     }
2324     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2325       Reg = AMDGPU::FLAT_SCR;
2326       RegWidth = 2;
2327       return true;
2328     }
2329     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2330       Reg = AMDGPU::XNACK_MASK;
2331       RegWidth = 2;
2332       return true;
2333     }
2334     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2335       Reg = AMDGPU::VCC;
2336       RegWidth = 2;
2337       return true;
2338     }
2339     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2340       Reg = AMDGPU::TBA;
2341       RegWidth = 2;
2342       return true;
2343     }
2344     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2345       Reg = AMDGPU::TMA;
2346       RegWidth = 2;
2347       return true;
2348     }
2349     Error(Loc, "register does not fit in the list");
2350     return false;
2351   case IS_VGPR:
2352   case IS_SGPR:
2353   case IS_AGPR:
2354   case IS_TTMP:
2355     if (Reg1 != Reg + RegWidth) {
2356       Error(Loc, "registers in a list must have consecutive indices");
2357       return false;
2358     }
2359     RegWidth++;
2360     return true;
2361   default:
2362     llvm_unreachable("unexpected register kind");
2363   }
2364 }
2365 
2366 struct RegInfo {
2367   StringLiteral Name;
2368   RegisterKind Kind;
2369 };
2370 
2371 static constexpr RegInfo RegularRegisters[] = {
2372   {{"v"},    IS_VGPR},
2373   {{"s"},    IS_SGPR},
2374   {{"ttmp"}, IS_TTMP},
2375   {{"acc"},  IS_AGPR},
2376   {{"a"},    IS_AGPR},
2377 };
2378 
2379 static bool isRegularReg(RegisterKind Kind) {
2380   return Kind == IS_VGPR ||
2381          Kind == IS_SGPR ||
2382          Kind == IS_TTMP ||
2383          Kind == IS_AGPR;
2384 }
2385 
2386 static const RegInfo* getRegularRegInfo(StringRef Str) {
2387   for (const RegInfo &Reg : RegularRegisters)
2388     if (Str.startswith(Reg.Name))
2389       return &Reg;
2390   return nullptr;
2391 }
2392 
2393 static bool getRegNum(StringRef Str, unsigned& Num) {
2394   return !Str.getAsInteger(10, Num);
2395 }
2396 
2397 bool
2398 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2399                             const AsmToken &NextToken) const {
2400 
2401   // A list of consecutive registers: [s0,s1,s2,s3]
2402   if (Token.is(AsmToken::LBrac))
2403     return true;
2404 
2405   if (!Token.is(AsmToken::Identifier))
2406     return false;
2407 
2408   // A single register like s0 or a range of registers like s[0:1]
2409 
2410   StringRef Str = Token.getString();
2411   const RegInfo *Reg = getRegularRegInfo(Str);
2412   if (Reg) {
2413     StringRef RegName = Reg->Name;
2414     StringRef RegSuffix = Str.substr(RegName.size());
2415     if (!RegSuffix.empty()) {
2416       unsigned Num;
2417       // A single register with an index: rXX
2418       if (getRegNum(RegSuffix, Num))
2419         return true;
2420     } else {
2421       // A range of registers: r[XX:YY].
2422       if (NextToken.is(AsmToken::LBrac))
2423         return true;
2424     }
2425   }
2426 
2427   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2428 }
2429 
2430 bool
2431 AMDGPUAsmParser::isRegister()
2432 {
2433   return isRegister(getToken(), peekToken());
2434 }
2435 
2436 unsigned
2437 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2438                                unsigned RegNum,
2439                                unsigned RegWidth,
2440                                SMLoc Loc) {
2441 
2442   assert(isRegularReg(RegKind));
2443 
2444   unsigned AlignSize = 1;
2445   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2446     // SGPR and TTMP registers must be aligned.
2447     // Max required alignment is 4 dwords.
2448     AlignSize = std::min(RegWidth, 4u);
2449   }
2450 
2451   if (RegNum % AlignSize != 0) {
2452     Error(Loc, "invalid register alignment");
2453     return AMDGPU::NoRegister;
2454   }
2455 
2456   unsigned RegIdx = RegNum / AlignSize;
2457   int RCID = getRegClass(RegKind, RegWidth);
2458   if (RCID == -1) {
2459     Error(Loc, "invalid or unsupported register size");
2460     return AMDGPU::NoRegister;
2461   }
2462 
2463   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2464   const MCRegisterClass RC = TRI->getRegClass(RCID);
2465   if (RegIdx >= RC.getNumRegs()) {
2466     Error(Loc, "register index is out of range");
2467     return AMDGPU::NoRegister;
2468   }
2469 
2470   return RC.getRegister(RegIdx);
2471 }
2472 
2473 bool
2474 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2475   int64_t RegLo, RegHi;
2476   if (!skipToken(AsmToken::LBrac, "missing register index"))
2477     return false;
2478 
2479   SMLoc FirstIdxLoc = getLoc();
2480   SMLoc SecondIdxLoc;
2481 
2482   if (!parseExpr(RegLo))
2483     return false;
2484 
2485   if (trySkipToken(AsmToken::Colon)) {
2486     SecondIdxLoc = getLoc();
2487     if (!parseExpr(RegHi))
2488       return false;
2489   } else {
2490     RegHi = RegLo;
2491   }
2492 
2493   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2494     return false;
2495 
2496   if (!isUInt<32>(RegLo)) {
2497     Error(FirstIdxLoc, "invalid register index");
2498     return false;
2499   }
2500 
2501   if (!isUInt<32>(RegHi)) {
2502     Error(SecondIdxLoc, "invalid register index");
2503     return false;
2504   }
2505 
2506   if (RegLo > RegHi) {
2507     Error(FirstIdxLoc, "first register index should not exceed second index");
2508     return false;
2509   }
2510 
2511   Num = static_cast<unsigned>(RegLo);
2512   Width = (RegHi - RegLo) + 1;
2513   return true;
2514 }
2515 
2516 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2517                                           unsigned &RegNum, unsigned &RegWidth,
2518                                           SmallVectorImpl<AsmToken> &Tokens) {
2519   assert(isToken(AsmToken::Identifier));
2520   unsigned Reg = getSpecialRegForName(getTokenStr());
2521   if (Reg) {
2522     RegNum = 0;
2523     RegWidth = 1;
2524     RegKind = IS_SPECIAL;
2525     Tokens.push_back(getToken());
2526     lex(); // skip register name
2527   }
2528   return Reg;
2529 }
2530 
2531 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2532                                           unsigned &RegNum, unsigned &RegWidth,
2533                                           SmallVectorImpl<AsmToken> &Tokens) {
2534   assert(isToken(AsmToken::Identifier));
2535   StringRef RegName = getTokenStr();
2536   auto Loc = getLoc();
2537 
2538   const RegInfo *RI = getRegularRegInfo(RegName);
2539   if (!RI) {
2540     Error(Loc, "invalid register name");
2541     return AMDGPU::NoRegister;
2542   }
2543 
2544   Tokens.push_back(getToken());
2545   lex(); // skip register name
2546 
2547   RegKind = RI->Kind;
2548   StringRef RegSuffix = RegName.substr(RI->Name.size());
2549   if (!RegSuffix.empty()) {
2550     // Single 32-bit register: vXX.
2551     if (!getRegNum(RegSuffix, RegNum)) {
2552       Error(Loc, "invalid register index");
2553       return AMDGPU::NoRegister;
2554     }
2555     RegWidth = 1;
2556   } else {
2557     // Range of registers: v[XX:YY]. ":YY" is optional.
2558     if (!ParseRegRange(RegNum, RegWidth))
2559       return AMDGPU::NoRegister;
2560   }
2561 
2562   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2563 }
2564 
2565 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2566                                        unsigned &RegWidth,
2567                                        SmallVectorImpl<AsmToken> &Tokens) {
2568   unsigned Reg = AMDGPU::NoRegister;
2569   auto ListLoc = getLoc();
2570 
2571   if (!skipToken(AsmToken::LBrac,
2572                  "expected a register or a list of registers")) {
2573     return AMDGPU::NoRegister;
2574   }
2575 
2576   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2577 
2578   auto Loc = getLoc();
2579   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2580     return AMDGPU::NoRegister;
2581   if (RegWidth != 1) {
2582     Error(Loc, "expected a single 32-bit register");
2583     return AMDGPU::NoRegister;
2584   }
2585 
2586   for (; trySkipToken(AsmToken::Comma); ) {
2587     RegisterKind NextRegKind;
2588     unsigned NextReg, NextRegNum, NextRegWidth;
2589     Loc = getLoc();
2590 
2591     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2592                              NextRegNum, NextRegWidth,
2593                              Tokens)) {
2594       return AMDGPU::NoRegister;
2595     }
2596     if (NextRegWidth != 1) {
2597       Error(Loc, "expected a single 32-bit register");
2598       return AMDGPU::NoRegister;
2599     }
2600     if (NextRegKind != RegKind) {
2601       Error(Loc, "registers in a list must be of the same kind");
2602       return AMDGPU::NoRegister;
2603     }
2604     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2605       return AMDGPU::NoRegister;
2606   }
2607 
2608   if (!skipToken(AsmToken::RBrac,
2609                  "expected a comma or a closing square bracket")) {
2610     return AMDGPU::NoRegister;
2611   }
2612 
2613   if (isRegularReg(RegKind))
2614     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2615 
2616   return Reg;
2617 }
2618 
2619 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2620                                           unsigned &RegNum, unsigned &RegWidth,
2621                                           SmallVectorImpl<AsmToken> &Tokens) {
2622   auto Loc = getLoc();
2623   Reg = AMDGPU::NoRegister;
2624 
2625   if (isToken(AsmToken::Identifier)) {
2626     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2627     if (Reg == AMDGPU::NoRegister)
2628       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2629   } else {
2630     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2631   }
2632 
2633   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2634   if (Reg == AMDGPU::NoRegister) {
2635     assert(Parser.hasPendingError());
2636     return false;
2637   }
2638 
2639   if (!subtargetHasRegister(*TRI, Reg)) {
2640     if (Reg == AMDGPU::SGPR_NULL) {
2641       Error(Loc, "'null' operand is not supported on this GPU");
2642     } else {
2643       Error(Loc, "register not available on this GPU");
2644     }
2645     return false;
2646   }
2647 
2648   return true;
2649 }
2650 
2651 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2652                                           unsigned &RegNum, unsigned &RegWidth,
2653                                           bool RestoreOnFailure /*=false*/) {
2654   Reg = AMDGPU::NoRegister;
2655 
2656   SmallVector<AsmToken, 1> Tokens;
2657   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2658     if (RestoreOnFailure) {
2659       while (!Tokens.empty()) {
2660         getLexer().UnLex(Tokens.pop_back_val());
2661       }
2662     }
2663     return true;
2664   }
2665   return false;
2666 }
2667 
2668 Optional<StringRef>
2669 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2670   switch (RegKind) {
2671   case IS_VGPR:
2672     return StringRef(".amdgcn.next_free_vgpr");
2673   case IS_SGPR:
2674     return StringRef(".amdgcn.next_free_sgpr");
2675   default:
2676     return None;
2677   }
2678 }
2679 
2680 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2681   auto SymbolName = getGprCountSymbolName(RegKind);
2682   assert(SymbolName && "initializing invalid register kind");
2683   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2684   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2685 }
2686 
2687 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2688                                             unsigned DwordRegIndex,
2689                                             unsigned RegWidth) {
2690   // Symbols are only defined for GCN targets
2691   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2692     return true;
2693 
2694   auto SymbolName = getGprCountSymbolName(RegKind);
2695   if (!SymbolName)
2696     return true;
2697   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2698 
2699   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2700   int64_t OldCount;
2701 
2702   if (!Sym->isVariable())
2703     return !Error(getLoc(),
2704                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2705   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2706     return !Error(
2707         getLoc(),
2708         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2709 
2710   if (OldCount <= NewMax)
2711     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2712 
2713   return true;
2714 }
2715 
2716 std::unique_ptr<AMDGPUOperand>
2717 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2718   const auto &Tok = getToken();
2719   SMLoc StartLoc = Tok.getLoc();
2720   SMLoc EndLoc = Tok.getEndLoc();
2721   RegisterKind RegKind;
2722   unsigned Reg, RegNum, RegWidth;
2723 
2724   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2725     return nullptr;
2726   }
2727   if (isHsaAbiVersion3Or4(&getSTI())) {
2728     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2729       return nullptr;
2730   } else
2731     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2732   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2733 }
2734 
2735 OperandMatchResultTy
2736 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2737   // TODO: add syntactic sugar for 1/(2*PI)
2738 
2739   assert(!isRegister());
2740   assert(!isModifier());
2741 
2742   const auto& Tok = getToken();
2743   const auto& NextTok = peekToken();
2744   bool IsReal = Tok.is(AsmToken::Real);
2745   SMLoc S = getLoc();
2746   bool Negate = false;
2747 
2748   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2749     lex();
2750     IsReal = true;
2751     Negate = true;
2752   }
2753 
2754   if (IsReal) {
2755     // Floating-point expressions are not supported.
2756     // Can only allow floating-point literals with an
2757     // optional sign.
2758 
2759     StringRef Num = getTokenStr();
2760     lex();
2761 
2762     APFloat RealVal(APFloat::IEEEdouble());
2763     auto roundMode = APFloat::rmNearestTiesToEven;
2764     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2765       return MatchOperand_ParseFail;
2766     }
2767     if (Negate)
2768       RealVal.changeSign();
2769 
2770     Operands.push_back(
2771       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2772                                AMDGPUOperand::ImmTyNone, true));
2773 
2774     return MatchOperand_Success;
2775 
2776   } else {
2777     int64_t IntVal;
2778     const MCExpr *Expr;
2779     SMLoc S = getLoc();
2780 
2781     if (HasSP3AbsModifier) {
2782       // This is a workaround for handling expressions
2783       // as arguments of SP3 'abs' modifier, for example:
2784       //     |1.0|
2785       //     |-1|
2786       //     |1+x|
2787       // This syntax is not compatible with syntax of standard
2788       // MC expressions (due to the trailing '|').
2789       SMLoc EndLoc;
2790       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2791         return MatchOperand_ParseFail;
2792     } else {
2793       if (Parser.parseExpression(Expr))
2794         return MatchOperand_ParseFail;
2795     }
2796 
2797     if (Expr->evaluateAsAbsolute(IntVal)) {
2798       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2799     } else {
2800       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2801     }
2802 
2803     return MatchOperand_Success;
2804   }
2805 
2806   return MatchOperand_NoMatch;
2807 }
2808 
2809 OperandMatchResultTy
2810 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2811   if (!isRegister())
2812     return MatchOperand_NoMatch;
2813 
2814   if (auto R = parseRegister()) {
2815     assert(R->isReg());
2816     Operands.push_back(std::move(R));
2817     return MatchOperand_Success;
2818   }
2819   return MatchOperand_ParseFail;
2820 }
2821 
2822 OperandMatchResultTy
2823 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2824   auto res = parseReg(Operands);
2825   if (res != MatchOperand_NoMatch) {
2826     return res;
2827   } else if (isModifier()) {
2828     return MatchOperand_NoMatch;
2829   } else {
2830     return parseImm(Operands, HasSP3AbsMod);
2831   }
2832 }
2833 
2834 bool
2835 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2836   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2837     const auto &str = Token.getString();
2838     return str == "abs" || str == "neg" || str == "sext";
2839   }
2840   return false;
2841 }
2842 
2843 bool
2844 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2845   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2846 }
2847 
2848 bool
2849 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2850   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2851 }
2852 
2853 bool
2854 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2855   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2856 }
2857 
2858 // Check if this is an operand modifier or an opcode modifier
2859 // which may look like an expression but it is not. We should
2860 // avoid parsing these modifiers as expressions. Currently
2861 // recognized sequences are:
2862 //   |...|
2863 //   abs(...)
2864 //   neg(...)
2865 //   sext(...)
2866 //   -reg
2867 //   -|...|
2868 //   -abs(...)
2869 //   name:...
2870 // Note that simple opcode modifiers like 'gds' may be parsed as
2871 // expressions; this is a special case. See getExpressionAsToken.
2872 //
2873 bool
2874 AMDGPUAsmParser::isModifier() {
2875 
2876   AsmToken Tok = getToken();
2877   AsmToken NextToken[2];
2878   peekTokens(NextToken);
2879 
2880   return isOperandModifier(Tok, NextToken[0]) ||
2881          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2882          isOpcodeModifierWithVal(Tok, NextToken[0]);
2883 }
2884 
2885 // Check if the current token is an SP3 'neg' modifier.
2886 // Currently this modifier is allowed in the following context:
2887 //
2888 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2889 // 2. Before an 'abs' modifier: -abs(...)
2890 // 3. Before an SP3 'abs' modifier: -|...|
2891 //
2892 // In all other cases "-" is handled as a part
2893 // of an expression that follows the sign.
2894 //
2895 // Note: When "-" is followed by an integer literal,
2896 // this is interpreted as integer negation rather
2897 // than a floating-point NEG modifier applied to N.
2898 // Beside being contr-intuitive, such use of floating-point
2899 // NEG modifier would have resulted in different meaning
2900 // of integer literals used with VOP1/2/C and VOP3,
2901 // for example:
2902 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2903 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2904 // Negative fp literals with preceding "-" are
2905 // handled likewise for unifomtity
2906 //
2907 bool
2908 AMDGPUAsmParser::parseSP3NegModifier() {
2909 
2910   AsmToken NextToken[2];
2911   peekTokens(NextToken);
2912 
2913   if (isToken(AsmToken::Minus) &&
2914       (isRegister(NextToken[0], NextToken[1]) ||
2915        NextToken[0].is(AsmToken::Pipe) ||
2916        isId(NextToken[0], "abs"))) {
2917     lex();
2918     return true;
2919   }
2920 
2921   return false;
2922 }
2923 
2924 OperandMatchResultTy
2925 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2926                                               bool AllowImm) {
2927   bool Neg, SP3Neg;
2928   bool Abs, SP3Abs;
2929   SMLoc Loc;
2930 
2931   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2932   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2933     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2934     return MatchOperand_ParseFail;
2935   }
2936 
2937   SP3Neg = parseSP3NegModifier();
2938 
2939   Loc = getLoc();
2940   Neg = trySkipId("neg");
2941   if (Neg && SP3Neg) {
2942     Error(Loc, "expected register or immediate");
2943     return MatchOperand_ParseFail;
2944   }
2945   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2946     return MatchOperand_ParseFail;
2947 
2948   Abs = trySkipId("abs");
2949   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2950     return MatchOperand_ParseFail;
2951 
2952   Loc = getLoc();
2953   SP3Abs = trySkipToken(AsmToken::Pipe);
2954   if (Abs && SP3Abs) {
2955     Error(Loc, "expected register or immediate");
2956     return MatchOperand_ParseFail;
2957   }
2958 
2959   OperandMatchResultTy Res;
2960   if (AllowImm) {
2961     Res = parseRegOrImm(Operands, SP3Abs);
2962   } else {
2963     Res = parseReg(Operands);
2964   }
2965   if (Res != MatchOperand_Success) {
2966     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2967   }
2968 
2969   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2970     return MatchOperand_ParseFail;
2971   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2972     return MatchOperand_ParseFail;
2973   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2974     return MatchOperand_ParseFail;
2975 
2976   AMDGPUOperand::Modifiers Mods;
2977   Mods.Abs = Abs || SP3Abs;
2978   Mods.Neg = Neg || SP3Neg;
2979 
2980   if (Mods.hasFPModifiers()) {
2981     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2982     if (Op.isExpr()) {
2983       Error(Op.getStartLoc(), "expected an absolute expression");
2984       return MatchOperand_ParseFail;
2985     }
2986     Op.setModifiers(Mods);
2987   }
2988   return MatchOperand_Success;
2989 }
2990 
2991 OperandMatchResultTy
2992 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2993                                                bool AllowImm) {
2994   bool Sext = trySkipId("sext");
2995   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2996     return MatchOperand_ParseFail;
2997 
2998   OperandMatchResultTy Res;
2999   if (AllowImm) {
3000     Res = parseRegOrImm(Operands);
3001   } else {
3002     Res = parseReg(Operands);
3003   }
3004   if (Res != MatchOperand_Success) {
3005     return Sext? MatchOperand_ParseFail : Res;
3006   }
3007 
3008   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3009     return MatchOperand_ParseFail;
3010 
3011   AMDGPUOperand::Modifiers Mods;
3012   Mods.Sext = Sext;
3013 
3014   if (Mods.hasIntModifiers()) {
3015     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3016     if (Op.isExpr()) {
3017       Error(Op.getStartLoc(), "expected an absolute expression");
3018       return MatchOperand_ParseFail;
3019     }
3020     Op.setModifiers(Mods);
3021   }
3022 
3023   return MatchOperand_Success;
3024 }
3025 
3026 OperandMatchResultTy
3027 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3028   return parseRegOrImmWithFPInputMods(Operands, false);
3029 }
3030 
3031 OperandMatchResultTy
3032 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3033   return parseRegOrImmWithIntInputMods(Operands, false);
3034 }
3035 
3036 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3037   auto Loc = getLoc();
3038   if (trySkipId("off")) {
3039     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3040                                                 AMDGPUOperand::ImmTyOff, false));
3041     return MatchOperand_Success;
3042   }
3043 
3044   if (!isRegister())
3045     return MatchOperand_NoMatch;
3046 
3047   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3048   if (Reg) {
3049     Operands.push_back(std::move(Reg));
3050     return MatchOperand_Success;
3051   }
3052 
3053   return MatchOperand_ParseFail;
3054 
3055 }
3056 
3057 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3058   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3059 
3060   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3061       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3062       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3063       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3064     return Match_InvalidOperand;
3065 
3066   if ((TSFlags & SIInstrFlags::VOP3) &&
3067       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3068       getForcedEncodingSize() != 64)
3069     return Match_PreferE32;
3070 
3071   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3072       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3073     // v_mac_f32/16 allow only dst_sel == DWORD;
3074     auto OpNum =
3075         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3076     const auto &Op = Inst.getOperand(OpNum);
3077     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3078       return Match_InvalidOperand;
3079     }
3080   }
3081 
3082   return Match_Success;
3083 }
3084 
3085 static ArrayRef<unsigned> getAllVariants() {
3086   static const unsigned Variants[] = {
3087     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3088     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3089   };
3090 
3091   return makeArrayRef(Variants);
3092 }
3093 
3094 // What asm variants we should check
3095 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3096   if (getForcedEncodingSize() == 32) {
3097     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3098     return makeArrayRef(Variants);
3099   }
3100 
3101   if (isForcedVOP3()) {
3102     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3103     return makeArrayRef(Variants);
3104   }
3105 
3106   if (isForcedSDWA()) {
3107     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3108                                         AMDGPUAsmVariants::SDWA9};
3109     return makeArrayRef(Variants);
3110   }
3111 
3112   if (isForcedDPP()) {
3113     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3114     return makeArrayRef(Variants);
3115   }
3116 
3117   return getAllVariants();
3118 }
3119 
3120 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3121   if (getForcedEncodingSize() == 32)
3122     return "e32";
3123 
3124   if (isForcedVOP3())
3125     return "e64";
3126 
3127   if (isForcedSDWA())
3128     return "sdwa";
3129 
3130   if (isForcedDPP())
3131     return "dpp";
3132 
3133   return "";
3134 }
3135 
3136 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3137   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3138   const unsigned Num = Desc.getNumImplicitUses();
3139   for (unsigned i = 0; i < Num; ++i) {
3140     unsigned Reg = Desc.ImplicitUses[i];
3141     switch (Reg) {
3142     case AMDGPU::FLAT_SCR:
3143     case AMDGPU::VCC:
3144     case AMDGPU::VCC_LO:
3145     case AMDGPU::VCC_HI:
3146     case AMDGPU::M0:
3147       return Reg;
3148     default:
3149       break;
3150     }
3151   }
3152   return AMDGPU::NoRegister;
3153 }
3154 
3155 // NB: This code is correct only when used to check constant
3156 // bus limitations because GFX7 support no f16 inline constants.
3157 // Note that there are no cases when a GFX7 opcode violates
3158 // constant bus limitations due to the use of an f16 constant.
3159 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3160                                        unsigned OpIdx) const {
3161   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3162 
3163   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3164     return false;
3165   }
3166 
3167   const MCOperand &MO = Inst.getOperand(OpIdx);
3168 
3169   int64_t Val = MO.getImm();
3170   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3171 
3172   switch (OpSize) { // expected operand size
3173   case 8:
3174     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3175   case 4:
3176     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3177   case 2: {
3178     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3179     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3180         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3181         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3182       return AMDGPU::isInlinableIntLiteral(Val);
3183 
3184     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3185         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3186         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3187       return AMDGPU::isInlinableIntLiteralV216(Val);
3188 
3189     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3190         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3191         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3192       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3193 
3194     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3195   }
3196   default:
3197     llvm_unreachable("invalid operand size");
3198   }
3199 }
3200 
3201 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3202   if (!isGFX10Plus())
3203     return 1;
3204 
3205   switch (Opcode) {
3206   // 64-bit shift instructions can use only one scalar value input
3207   case AMDGPU::V_LSHLREV_B64_e64:
3208   case AMDGPU::V_LSHLREV_B64_gfx10:
3209   case AMDGPU::V_LSHRREV_B64_e64:
3210   case AMDGPU::V_LSHRREV_B64_gfx10:
3211   case AMDGPU::V_ASHRREV_I64_e64:
3212   case AMDGPU::V_ASHRREV_I64_gfx10:
3213   case AMDGPU::V_LSHL_B64_e64:
3214   case AMDGPU::V_LSHR_B64_e64:
3215   case AMDGPU::V_ASHR_I64_e64:
3216     return 1;
3217   default:
3218     return 2;
3219   }
3220 }
3221 
3222 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3223   const MCOperand &MO = Inst.getOperand(OpIdx);
3224   if (MO.isImm()) {
3225     return !isInlineConstant(Inst, OpIdx);
3226   } else if (MO.isReg()) {
3227     auto Reg = MO.getReg();
3228     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3229     auto PReg = mc2PseudoReg(Reg);
3230     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3231   } else {
3232     return true;
3233   }
3234 }
3235 
3236 bool
3237 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3238                                                 const OperandVector &Operands) {
3239   const unsigned Opcode = Inst.getOpcode();
3240   const MCInstrDesc &Desc = MII.get(Opcode);
3241   unsigned LastSGPR = AMDGPU::NoRegister;
3242   unsigned ConstantBusUseCount = 0;
3243   unsigned NumLiterals = 0;
3244   unsigned LiteralSize;
3245 
3246   if (Desc.TSFlags &
3247       (SIInstrFlags::VOPC |
3248        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3249        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3250        SIInstrFlags::SDWA)) {
3251     // Check special imm operands (used by madmk, etc)
3252     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3253       ++ConstantBusUseCount;
3254     }
3255 
3256     SmallDenseSet<unsigned> SGPRsUsed;
3257     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3258     if (SGPRUsed != AMDGPU::NoRegister) {
3259       SGPRsUsed.insert(SGPRUsed);
3260       ++ConstantBusUseCount;
3261     }
3262 
3263     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3264     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3265     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3266 
3267     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3268 
3269     for (int OpIdx : OpIndices) {
3270       if (OpIdx == -1) break;
3271 
3272       const MCOperand &MO = Inst.getOperand(OpIdx);
3273       if (usesConstantBus(Inst, OpIdx)) {
3274         if (MO.isReg()) {
3275           LastSGPR = mc2PseudoReg(MO.getReg());
3276           // Pairs of registers with a partial intersections like these
3277           //   s0, s[0:1]
3278           //   flat_scratch_lo, flat_scratch
3279           //   flat_scratch_lo, flat_scratch_hi
3280           // are theoretically valid but they are disabled anyway.
3281           // Note that this code mimics SIInstrInfo::verifyInstruction
3282           if (!SGPRsUsed.count(LastSGPR)) {
3283             SGPRsUsed.insert(LastSGPR);
3284             ++ConstantBusUseCount;
3285           }
3286         } else { // Expression or a literal
3287 
3288           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3289             continue; // special operand like VINTERP attr_chan
3290 
3291           // An instruction may use only one literal.
3292           // This has been validated on the previous step.
3293           // See validateVOP3Literal.
3294           // This literal may be used as more than one operand.
3295           // If all these operands are of the same size,
3296           // this literal counts as one scalar value.
3297           // Otherwise it counts as 2 scalar values.
3298           // See "GFX10 Shader Programming", section 3.6.2.3.
3299 
3300           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3301           if (Size < 4) Size = 4;
3302 
3303           if (NumLiterals == 0) {
3304             NumLiterals = 1;
3305             LiteralSize = Size;
3306           } else if (LiteralSize != Size) {
3307             NumLiterals = 2;
3308           }
3309         }
3310       }
3311     }
3312   }
3313   ConstantBusUseCount += NumLiterals;
3314 
3315   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3316     return true;
3317 
3318   SMLoc LitLoc = getLitLoc(Operands);
3319   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3320   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3321   Error(Loc, "invalid operand (violates constant bus restrictions)");
3322   return false;
3323 }
3324 
3325 bool
3326 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3327                                                  const OperandVector &Operands) {
3328   const unsigned Opcode = Inst.getOpcode();
3329   const MCInstrDesc &Desc = MII.get(Opcode);
3330 
3331   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3332   if (DstIdx == -1 ||
3333       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3334     return true;
3335   }
3336 
3337   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3338 
3339   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3340   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3341   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3342 
3343   assert(DstIdx != -1);
3344   const MCOperand &Dst = Inst.getOperand(DstIdx);
3345   assert(Dst.isReg());
3346   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3347 
3348   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3349 
3350   for (int SrcIdx : SrcIndices) {
3351     if (SrcIdx == -1) break;
3352     const MCOperand &Src = Inst.getOperand(SrcIdx);
3353     if (Src.isReg()) {
3354       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3355       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3356         Error(getRegLoc(SrcReg, Operands),
3357           "destination must be different than all sources");
3358         return false;
3359       }
3360     }
3361   }
3362 
3363   return true;
3364 }
3365 
3366 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3367 
3368   const unsigned Opc = Inst.getOpcode();
3369   const MCInstrDesc &Desc = MII.get(Opc);
3370 
3371   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3372     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3373     assert(ClampIdx != -1);
3374     return Inst.getOperand(ClampIdx).getImm() == 0;
3375   }
3376 
3377   return true;
3378 }
3379 
3380 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3381 
3382   const unsigned Opc = Inst.getOpcode();
3383   const MCInstrDesc &Desc = MII.get(Opc);
3384 
3385   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3386     return true;
3387 
3388   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3389   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3390   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3391 
3392   assert(VDataIdx != -1);
3393 
3394   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3395     return true;
3396 
3397   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3398   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3399   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3400   if (DMask == 0)
3401     DMask = 1;
3402 
3403   unsigned DataSize =
3404     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3405   if (hasPackedD16()) {
3406     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3407     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3408       DataSize = (DataSize + 1) / 2;
3409   }
3410 
3411   return (VDataSize / 4) == DataSize + TFESize;
3412 }
3413 
3414 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3415   const unsigned Opc = Inst.getOpcode();
3416   const MCInstrDesc &Desc = MII.get(Opc);
3417 
3418   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3419     return true;
3420 
3421   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3422 
3423   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3424       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3425   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3426   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3427   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3428   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3429 
3430   assert(VAddr0Idx != -1);
3431   assert(SrsrcIdx != -1);
3432   assert(SrsrcIdx > VAddr0Idx);
3433 
3434   if (DimIdx == -1)
3435     return true; // intersect_ray
3436 
3437   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3438   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3439   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3440   unsigned ActualAddrSize =
3441       IsNSA ? SrsrcIdx - VAddr0Idx
3442             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3443   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3444 
3445   unsigned ExpectedAddrSize =
3446       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3447 
3448   if (!IsNSA) {
3449     if (ExpectedAddrSize > 8)
3450       ExpectedAddrSize = 16;
3451 
3452     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3453     // This provides backward compatibility for assembly created
3454     // before 160b/192b/224b types were directly supported.
3455     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3456       return true;
3457   }
3458 
3459   return ActualAddrSize == ExpectedAddrSize;
3460 }
3461 
3462 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3463 
3464   const unsigned Opc = Inst.getOpcode();
3465   const MCInstrDesc &Desc = MII.get(Opc);
3466 
3467   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3468     return true;
3469   if (!Desc.mayLoad() || !Desc.mayStore())
3470     return true; // Not atomic
3471 
3472   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3473   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3474 
3475   // This is an incomplete check because image_atomic_cmpswap
3476   // may only use 0x3 and 0xf while other atomic operations
3477   // may use 0x1 and 0x3. However these limitations are
3478   // verified when we check that dmask matches dst size.
3479   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3480 }
3481 
3482 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3483 
3484   const unsigned Opc = Inst.getOpcode();
3485   const MCInstrDesc &Desc = MII.get(Opc);
3486 
3487   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3488     return true;
3489 
3490   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3491   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3492 
3493   // GATHER4 instructions use dmask in a different fashion compared to
3494   // other MIMG instructions. The only useful DMASK values are
3495   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3496   // (red,red,red,red) etc.) The ISA document doesn't mention
3497   // this.
3498   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3499 }
3500 
3501 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3502   const unsigned Opc = Inst.getOpcode();
3503   const MCInstrDesc &Desc = MII.get(Opc);
3504 
3505   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3506     return true;
3507 
3508   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3509   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3510       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3511 
3512   if (!BaseOpcode->MSAA)
3513     return true;
3514 
3515   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3516   assert(DimIdx != -1);
3517 
3518   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3519   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3520 
3521   return DimInfo->MSAA;
3522 }
3523 
3524 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3525 {
3526   switch (Opcode) {
3527   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3528   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3529   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3530     return true;
3531   default:
3532     return false;
3533   }
3534 }
3535 
3536 // movrels* opcodes should only allow VGPRS as src0.
3537 // This is specified in .td description for vop1/vop3,
3538 // but sdwa is handled differently. See isSDWAOperand.
3539 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3540                                       const OperandVector &Operands) {
3541 
3542   const unsigned Opc = Inst.getOpcode();
3543   const MCInstrDesc &Desc = MII.get(Opc);
3544 
3545   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3546     return true;
3547 
3548   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3549   assert(Src0Idx != -1);
3550 
3551   SMLoc ErrLoc;
3552   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3553   if (Src0.isReg()) {
3554     auto Reg = mc2PseudoReg(Src0.getReg());
3555     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3556     if (!isSGPR(Reg, TRI))
3557       return true;
3558     ErrLoc = getRegLoc(Reg, Operands);
3559   } else {
3560     ErrLoc = getConstLoc(Operands);
3561   }
3562 
3563   Error(ErrLoc, "source operand must be a VGPR");
3564   return false;
3565 }
3566 
3567 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3568                                           const OperandVector &Operands) {
3569 
3570   const unsigned Opc = Inst.getOpcode();
3571 
3572   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3573     return true;
3574 
3575   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3576   assert(Src0Idx != -1);
3577 
3578   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3579   if (!Src0.isReg())
3580     return true;
3581 
3582   auto Reg = mc2PseudoReg(Src0.getReg());
3583   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3584   if (isSGPR(Reg, TRI)) {
3585     Error(getRegLoc(Reg, Operands),
3586           "source operand must be either a VGPR or an inline constant");
3587     return false;
3588   }
3589 
3590   return true;
3591 }
3592 
3593 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3594   switch (Inst.getOpcode()) {
3595   default:
3596     return true;
3597   case V_DIV_SCALE_F32_gfx6_gfx7:
3598   case V_DIV_SCALE_F32_vi:
3599   case V_DIV_SCALE_F32_gfx10:
3600   case V_DIV_SCALE_F64_gfx6_gfx7:
3601   case V_DIV_SCALE_F64_vi:
3602   case V_DIV_SCALE_F64_gfx10:
3603     break;
3604   }
3605 
3606   // TODO: Check that src0 = src1 or src2.
3607 
3608   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3609                     AMDGPU::OpName::src2_modifiers,
3610                     AMDGPU::OpName::src2_modifiers}) {
3611     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3612             .getImm() &
3613         SISrcMods::ABS) {
3614       return false;
3615     }
3616   }
3617 
3618   return true;
3619 }
3620 
3621 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3622 
3623   const unsigned Opc = Inst.getOpcode();
3624   const MCInstrDesc &Desc = MII.get(Opc);
3625 
3626   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3627     return true;
3628 
3629   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3630   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3631     if (isCI() || isSI())
3632       return false;
3633   }
3634 
3635   return true;
3636 }
3637 
3638 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3639   const unsigned Opc = Inst.getOpcode();
3640   const MCInstrDesc &Desc = MII.get(Opc);
3641 
3642   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3643     return true;
3644 
3645   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3646   if (DimIdx < 0)
3647     return true;
3648 
3649   long Imm = Inst.getOperand(DimIdx).getImm();
3650   if (Imm < 0 || Imm >= 8)
3651     return false;
3652 
3653   return true;
3654 }
3655 
3656 static bool IsRevOpcode(const unsigned Opcode)
3657 {
3658   switch (Opcode) {
3659   case AMDGPU::V_SUBREV_F32_e32:
3660   case AMDGPU::V_SUBREV_F32_e64:
3661   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3662   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3663   case AMDGPU::V_SUBREV_F32_e32_vi:
3664   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3665   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3666   case AMDGPU::V_SUBREV_F32_e64_vi:
3667 
3668   case AMDGPU::V_SUBREV_CO_U32_e32:
3669   case AMDGPU::V_SUBREV_CO_U32_e64:
3670   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3671   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3672 
3673   case AMDGPU::V_SUBBREV_U32_e32:
3674   case AMDGPU::V_SUBBREV_U32_e64:
3675   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3676   case AMDGPU::V_SUBBREV_U32_e32_vi:
3677   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3678   case AMDGPU::V_SUBBREV_U32_e64_vi:
3679 
3680   case AMDGPU::V_SUBREV_U32_e32:
3681   case AMDGPU::V_SUBREV_U32_e64:
3682   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3683   case AMDGPU::V_SUBREV_U32_e32_vi:
3684   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3685   case AMDGPU::V_SUBREV_U32_e64_vi:
3686 
3687   case AMDGPU::V_SUBREV_F16_e32:
3688   case AMDGPU::V_SUBREV_F16_e64:
3689   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3690   case AMDGPU::V_SUBREV_F16_e32_vi:
3691   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3692   case AMDGPU::V_SUBREV_F16_e64_vi:
3693 
3694   case AMDGPU::V_SUBREV_U16_e32:
3695   case AMDGPU::V_SUBREV_U16_e64:
3696   case AMDGPU::V_SUBREV_U16_e32_vi:
3697   case AMDGPU::V_SUBREV_U16_e64_vi:
3698 
3699   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3700   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3701   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3702 
3703   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3704   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3705 
3706   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3707   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3708 
3709   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3710   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3711 
3712   case AMDGPU::V_LSHRREV_B32_e32:
3713   case AMDGPU::V_LSHRREV_B32_e64:
3714   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3715   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3716   case AMDGPU::V_LSHRREV_B32_e32_vi:
3717   case AMDGPU::V_LSHRREV_B32_e64_vi:
3718   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3719   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3720 
3721   case AMDGPU::V_ASHRREV_I32_e32:
3722   case AMDGPU::V_ASHRREV_I32_e64:
3723   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3724   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3725   case AMDGPU::V_ASHRREV_I32_e32_vi:
3726   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3727   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3728   case AMDGPU::V_ASHRREV_I32_e64_vi:
3729 
3730   case AMDGPU::V_LSHLREV_B32_e32:
3731   case AMDGPU::V_LSHLREV_B32_e64:
3732   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3733   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3734   case AMDGPU::V_LSHLREV_B32_e32_vi:
3735   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3736   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3737   case AMDGPU::V_LSHLREV_B32_e64_vi:
3738 
3739   case AMDGPU::V_LSHLREV_B16_e32:
3740   case AMDGPU::V_LSHLREV_B16_e64:
3741   case AMDGPU::V_LSHLREV_B16_e32_vi:
3742   case AMDGPU::V_LSHLREV_B16_e64_vi:
3743   case AMDGPU::V_LSHLREV_B16_gfx10:
3744 
3745   case AMDGPU::V_LSHRREV_B16_e32:
3746   case AMDGPU::V_LSHRREV_B16_e64:
3747   case AMDGPU::V_LSHRREV_B16_e32_vi:
3748   case AMDGPU::V_LSHRREV_B16_e64_vi:
3749   case AMDGPU::V_LSHRREV_B16_gfx10:
3750 
3751   case AMDGPU::V_ASHRREV_I16_e32:
3752   case AMDGPU::V_ASHRREV_I16_e64:
3753   case AMDGPU::V_ASHRREV_I16_e32_vi:
3754   case AMDGPU::V_ASHRREV_I16_e64_vi:
3755   case AMDGPU::V_ASHRREV_I16_gfx10:
3756 
3757   case AMDGPU::V_LSHLREV_B64_e64:
3758   case AMDGPU::V_LSHLREV_B64_gfx10:
3759   case AMDGPU::V_LSHLREV_B64_vi:
3760 
3761   case AMDGPU::V_LSHRREV_B64_e64:
3762   case AMDGPU::V_LSHRREV_B64_gfx10:
3763   case AMDGPU::V_LSHRREV_B64_vi:
3764 
3765   case AMDGPU::V_ASHRREV_I64_e64:
3766   case AMDGPU::V_ASHRREV_I64_gfx10:
3767   case AMDGPU::V_ASHRREV_I64_vi:
3768 
3769   case AMDGPU::V_PK_LSHLREV_B16:
3770   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3771   case AMDGPU::V_PK_LSHLREV_B16_vi:
3772 
3773   case AMDGPU::V_PK_LSHRREV_B16:
3774   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3775   case AMDGPU::V_PK_LSHRREV_B16_vi:
3776   case AMDGPU::V_PK_ASHRREV_I16:
3777   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3778   case AMDGPU::V_PK_ASHRREV_I16_vi:
3779     return true;
3780   default:
3781     return false;
3782   }
3783 }
3784 
3785 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3786 
3787   using namespace SIInstrFlags;
3788   const unsigned Opcode = Inst.getOpcode();
3789   const MCInstrDesc &Desc = MII.get(Opcode);
3790 
3791   // lds_direct register is defined so that it can be used
3792   // with 9-bit operands only. Ignore encodings which do not accept these.
3793   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3794   if ((Desc.TSFlags & Enc) == 0)
3795     return None;
3796 
3797   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3798     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3799     if (SrcIdx == -1)
3800       break;
3801     const auto &Src = Inst.getOperand(SrcIdx);
3802     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3803 
3804       if (isGFX90A())
3805         return StringRef("lds_direct is not supported on this GPU");
3806 
3807       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3808         return StringRef("lds_direct cannot be used with this instruction");
3809 
3810       if (SrcName != OpName::src0)
3811         return StringRef("lds_direct may be used as src0 only");
3812     }
3813   }
3814 
3815   return None;
3816 }
3817 
3818 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3819   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3820     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3821     if (Op.isFlatOffset())
3822       return Op.getStartLoc();
3823   }
3824   return getLoc();
3825 }
3826 
3827 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3828                                          const OperandVector &Operands) {
3829   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3830   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3831     return true;
3832 
3833   auto Opcode = Inst.getOpcode();
3834   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3835   assert(OpNum != -1);
3836 
3837   const auto &Op = Inst.getOperand(OpNum);
3838   if (!hasFlatOffsets() && Op.getImm() != 0) {
3839     Error(getFlatOffsetLoc(Operands),
3840           "flat offset modifier is not supported on this GPU");
3841     return false;
3842   }
3843 
3844   // For FLAT segment the offset must be positive;
3845   // MSB is ignored and forced to zero.
3846   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3847     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3848     if (!isIntN(OffsetSize, Op.getImm())) {
3849       Error(getFlatOffsetLoc(Operands),
3850             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3851       return false;
3852     }
3853   } else {
3854     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3855     if (!isUIntN(OffsetSize, Op.getImm())) {
3856       Error(getFlatOffsetLoc(Operands),
3857             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3858       return false;
3859     }
3860   }
3861 
3862   return true;
3863 }
3864 
3865 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3866   // Start with second operand because SMEM Offset cannot be dst or src0.
3867   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3868     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3869     if (Op.isSMEMOffset())
3870       return Op.getStartLoc();
3871   }
3872   return getLoc();
3873 }
3874 
3875 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3876                                          const OperandVector &Operands) {
3877   if (isCI() || isSI())
3878     return true;
3879 
3880   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3881   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3882     return true;
3883 
3884   auto Opcode = Inst.getOpcode();
3885   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3886   if (OpNum == -1)
3887     return true;
3888 
3889   const auto &Op = Inst.getOperand(OpNum);
3890   if (!Op.isImm())
3891     return true;
3892 
3893   uint64_t Offset = Op.getImm();
3894   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3895   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3896       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3897     return true;
3898 
3899   Error(getSMEMOffsetLoc(Operands),
3900         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3901                                "expected a 21-bit signed offset");
3902 
3903   return false;
3904 }
3905 
3906 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3907   unsigned Opcode = Inst.getOpcode();
3908   const MCInstrDesc &Desc = MII.get(Opcode);
3909   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3910     return true;
3911 
3912   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3913   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3914 
3915   const int OpIndices[] = { Src0Idx, Src1Idx };
3916 
3917   unsigned NumExprs = 0;
3918   unsigned NumLiterals = 0;
3919   uint32_t LiteralValue;
3920 
3921   for (int OpIdx : OpIndices) {
3922     if (OpIdx == -1) break;
3923 
3924     const MCOperand &MO = Inst.getOperand(OpIdx);
3925     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3926     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3927       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3928         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3929         if (NumLiterals == 0 || LiteralValue != Value) {
3930           LiteralValue = Value;
3931           ++NumLiterals;
3932         }
3933       } else if (MO.isExpr()) {
3934         ++NumExprs;
3935       }
3936     }
3937   }
3938 
3939   return NumLiterals + NumExprs <= 1;
3940 }
3941 
3942 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3943   const unsigned Opc = Inst.getOpcode();
3944   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3945       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3946     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3947     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3948 
3949     if (OpSel & ~3)
3950       return false;
3951   }
3952   return true;
3953 }
3954 
3955 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3956                                   const OperandVector &Operands) {
3957   const unsigned Opc = Inst.getOpcode();
3958   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3959   if (DppCtrlIdx < 0)
3960     return true;
3961   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3962 
3963   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3964     // DPP64 is supported for row_newbcast only.
3965     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3966     if (Src0Idx >= 0 &&
3967         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3968       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3969       Error(S, "64 bit dpp only supports row_newbcast");
3970       return false;
3971     }
3972   }
3973 
3974   return true;
3975 }
3976 
3977 // Check if VCC register matches wavefront size
3978 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3979   auto FB = getFeatureBits();
3980   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3981     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3982 }
3983 
3984 // VOP3 literal is only allowed in GFX10+ and only one can be used
3985 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3986                                           const OperandVector &Operands) {
3987   unsigned Opcode = Inst.getOpcode();
3988   const MCInstrDesc &Desc = MII.get(Opcode);
3989   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3990     return true;
3991 
3992   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3993   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3994   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3995 
3996   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3997 
3998   unsigned NumExprs = 0;
3999   unsigned NumLiterals = 0;
4000   uint32_t LiteralValue;
4001 
4002   for (int OpIdx : OpIndices) {
4003     if (OpIdx == -1) break;
4004 
4005     const MCOperand &MO = Inst.getOperand(OpIdx);
4006     if (!MO.isImm() && !MO.isExpr())
4007       continue;
4008     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4009       continue;
4010 
4011     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4012         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4013       Error(getConstLoc(Operands),
4014             "inline constants are not allowed for this operand");
4015       return false;
4016     }
4017 
4018     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4019       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4020       if (NumLiterals == 0 || LiteralValue != Value) {
4021         LiteralValue = Value;
4022         ++NumLiterals;
4023       }
4024     } else if (MO.isExpr()) {
4025       ++NumExprs;
4026     }
4027   }
4028   NumLiterals += NumExprs;
4029 
4030   if (!NumLiterals)
4031     return true;
4032 
4033   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4034     Error(getLitLoc(Operands), "literal operands are not supported");
4035     return false;
4036   }
4037 
4038   if (NumLiterals > 1) {
4039     Error(getLitLoc(Operands), "only one literal operand is allowed");
4040     return false;
4041   }
4042 
4043   return true;
4044 }
4045 
4046 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4047 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4048                          const MCRegisterInfo *MRI) {
4049   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4050   if (OpIdx < 0)
4051     return -1;
4052 
4053   const MCOperand &Op = Inst.getOperand(OpIdx);
4054   if (!Op.isReg())
4055     return -1;
4056 
4057   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4058   auto Reg = Sub ? Sub : Op.getReg();
4059   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4060   return AGPR32.contains(Reg) ? 1 : 0;
4061 }
4062 
4063 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4064   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4065   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4066                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4067                   SIInstrFlags::DS)) == 0)
4068     return true;
4069 
4070   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4071                                                       : AMDGPU::OpName::vdata;
4072 
4073   const MCRegisterInfo *MRI = getMRI();
4074   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4075   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4076 
4077   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4078     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4079     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4080       return false;
4081   }
4082 
4083   auto FB = getFeatureBits();
4084   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4085     if (DataAreg < 0 || DstAreg < 0)
4086       return true;
4087     return DstAreg == DataAreg;
4088   }
4089 
4090   return DstAreg < 1 && DataAreg < 1;
4091 }
4092 
4093 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4094   auto FB = getFeatureBits();
4095   if (!FB[AMDGPU::FeatureGFX90AInsts])
4096     return true;
4097 
4098   const MCRegisterInfo *MRI = getMRI();
4099   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4100   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4101   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4102     const MCOperand &Op = Inst.getOperand(I);
4103     if (!Op.isReg())
4104       continue;
4105 
4106     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4107     if (!Sub)
4108       continue;
4109 
4110     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4111       return false;
4112     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4113       return false;
4114   }
4115 
4116   return true;
4117 }
4118 
4119 // gfx90a has an undocumented limitation:
4120 // DS_GWS opcodes must use even aligned registers.
4121 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4122                                   const OperandVector &Operands) {
4123   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4124     return true;
4125 
4126   int Opc = Inst.getOpcode();
4127   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4128       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4129     return true;
4130 
4131   const MCRegisterInfo *MRI = getMRI();
4132   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4133   int Data0Pos =
4134       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4135   assert(Data0Pos != -1);
4136   auto Reg = Inst.getOperand(Data0Pos).getReg();
4137   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4138   if (RegIdx & 1) {
4139     SMLoc RegLoc = getRegLoc(Reg, Operands);
4140     Error(RegLoc, "vgpr must be even aligned");
4141     return false;
4142   }
4143 
4144   return true;
4145 }
4146 
4147 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4148                                             const OperandVector &Operands,
4149                                             const SMLoc &IDLoc) {
4150   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4151                                            AMDGPU::OpName::cpol);
4152   if (CPolPos == -1)
4153     return true;
4154 
4155   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4156 
4157   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4158   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4159       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4160     Error(IDLoc, "invalid cache policy for SMRD instruction");
4161     return false;
4162   }
4163 
4164   if (isGFX90A() && (CPol & CPol::SCC)) {
4165     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4166     StringRef CStr(S.getPointer());
4167     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4168     Error(S, "scc is not supported on this GPU");
4169     return false;
4170   }
4171 
4172   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4173     return true;
4174 
4175   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4176     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4177       Error(IDLoc, "instruction must use glc");
4178       return false;
4179     }
4180   } else {
4181     if (CPol & CPol::GLC) {
4182       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4183       StringRef CStr(S.getPointer());
4184       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4185       Error(S, "instruction must not use glc");
4186       return false;
4187     }
4188   }
4189 
4190   return true;
4191 }
4192 
4193 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4194                                           const SMLoc &IDLoc,
4195                                           const OperandVector &Operands) {
4196   if (auto ErrMsg = validateLdsDirect(Inst)) {
4197     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4198     return false;
4199   }
4200   if (!validateSOPLiteral(Inst)) {
4201     Error(getLitLoc(Operands),
4202       "only one literal operand is allowed");
4203     return false;
4204   }
4205   if (!validateVOP3Literal(Inst, Operands)) {
4206     return false;
4207   }
4208   if (!validateConstantBusLimitations(Inst, Operands)) {
4209     return false;
4210   }
4211   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4212     return false;
4213   }
4214   if (!validateIntClampSupported(Inst)) {
4215     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4216       "integer clamping is not supported on this GPU");
4217     return false;
4218   }
4219   if (!validateOpSel(Inst)) {
4220     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4221       "invalid op_sel operand");
4222     return false;
4223   }
4224   if (!validateDPP(Inst, Operands)) {
4225     return false;
4226   }
4227   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4228   if (!validateMIMGD16(Inst)) {
4229     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4230       "d16 modifier is not supported on this GPU");
4231     return false;
4232   }
4233   if (!validateMIMGDim(Inst)) {
4234     Error(IDLoc, "dim modifier is required on this GPU");
4235     return false;
4236   }
4237   if (!validateMIMGMSAA(Inst)) {
4238     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4239           "invalid dim; must be MSAA type");
4240     return false;
4241   }
4242   if (!validateMIMGDataSize(Inst)) {
4243     Error(IDLoc,
4244       "image data size does not match dmask and tfe");
4245     return false;
4246   }
4247   if (!validateMIMGAddrSize(Inst)) {
4248     Error(IDLoc,
4249       "image address size does not match dim and a16");
4250     return false;
4251   }
4252   if (!validateMIMGAtomicDMask(Inst)) {
4253     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4254       "invalid atomic image dmask");
4255     return false;
4256   }
4257   if (!validateMIMGGatherDMask(Inst)) {
4258     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4259       "invalid image_gather dmask: only one bit must be set");
4260     return false;
4261   }
4262   if (!validateMovrels(Inst, Operands)) {
4263     return false;
4264   }
4265   if (!validateFlatOffset(Inst, Operands)) {
4266     return false;
4267   }
4268   if (!validateSMEMOffset(Inst, Operands)) {
4269     return false;
4270   }
4271   if (!validateMAIAccWrite(Inst, Operands)) {
4272     return false;
4273   }
4274   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4275     return false;
4276   }
4277 
4278   if (!validateAGPRLdSt(Inst)) {
4279     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4280     ? "invalid register class: data and dst should be all VGPR or AGPR"
4281     : "invalid register class: agpr loads and stores not supported on this GPU"
4282     );
4283     return false;
4284   }
4285   if (!validateVGPRAlign(Inst)) {
4286     Error(IDLoc,
4287       "invalid register class: vgpr tuples must be 64 bit aligned");
4288     return false;
4289   }
4290   if (!validateGWS(Inst, Operands)) {
4291     return false;
4292   }
4293 
4294   if (!validateDivScale(Inst)) {
4295     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4296     return false;
4297   }
4298   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4299     return false;
4300   }
4301 
4302   return true;
4303 }
4304 
4305 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4306                                             const FeatureBitset &FBS,
4307                                             unsigned VariantID = 0);
4308 
4309 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4310                                 const FeatureBitset &AvailableFeatures,
4311                                 unsigned VariantID);
4312 
4313 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4314                                        const FeatureBitset &FBS) {
4315   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4316 }
4317 
4318 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4319                                        const FeatureBitset &FBS,
4320                                        ArrayRef<unsigned> Variants) {
4321   for (auto Variant : Variants) {
4322     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4323       return true;
4324   }
4325 
4326   return false;
4327 }
4328 
4329 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4330                                                   const SMLoc &IDLoc) {
4331   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4332 
4333   // Check if requested instruction variant is supported.
4334   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4335     return false;
4336 
4337   // This instruction is not supported.
4338   // Clear any other pending errors because they are no longer relevant.
4339   getParser().clearPendingErrors();
4340 
4341   // Requested instruction variant is not supported.
4342   // Check if any other variants are supported.
4343   StringRef VariantName = getMatchedVariantName();
4344   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4345     return Error(IDLoc,
4346                  Twine(VariantName,
4347                        " variant of this instruction is not supported"));
4348   }
4349 
4350   // Finally check if this instruction is supported on any other GPU.
4351   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4352     return Error(IDLoc, "instruction not supported on this GPU");
4353   }
4354 
4355   // Instruction not supported on any GPU. Probably a typo.
4356   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4357   return Error(IDLoc, "invalid instruction" + Suggestion);
4358 }
4359 
4360 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4361                                               OperandVector &Operands,
4362                                               MCStreamer &Out,
4363                                               uint64_t &ErrorInfo,
4364                                               bool MatchingInlineAsm) {
4365   MCInst Inst;
4366   unsigned Result = Match_Success;
4367   for (auto Variant : getMatchedVariants()) {
4368     uint64_t EI;
4369     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4370                                   Variant);
4371     // We order match statuses from least to most specific. We use most specific
4372     // status as resulting
4373     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4374     if ((R == Match_Success) ||
4375         (R == Match_PreferE32) ||
4376         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4377         (R == Match_InvalidOperand && Result != Match_MissingFeature
4378                                    && Result != Match_PreferE32) ||
4379         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4380                                    && Result != Match_MissingFeature
4381                                    && Result != Match_PreferE32)) {
4382       Result = R;
4383       ErrorInfo = EI;
4384     }
4385     if (R == Match_Success)
4386       break;
4387   }
4388 
4389   if (Result == Match_Success) {
4390     if (!validateInstruction(Inst, IDLoc, Operands)) {
4391       return true;
4392     }
4393     Inst.setLoc(IDLoc);
4394     Out.emitInstruction(Inst, getSTI());
4395     return false;
4396   }
4397 
4398   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4399   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4400     return true;
4401   }
4402 
4403   switch (Result) {
4404   default: break;
4405   case Match_MissingFeature:
4406     // It has been verified that the specified instruction
4407     // mnemonic is valid. A match was found but it requires
4408     // features which are not supported on this GPU.
4409     return Error(IDLoc, "operands are not valid for this GPU or mode");
4410 
4411   case Match_InvalidOperand: {
4412     SMLoc ErrorLoc = IDLoc;
4413     if (ErrorInfo != ~0ULL) {
4414       if (ErrorInfo >= Operands.size()) {
4415         return Error(IDLoc, "too few operands for instruction");
4416       }
4417       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4418       if (ErrorLoc == SMLoc())
4419         ErrorLoc = IDLoc;
4420     }
4421     return Error(ErrorLoc, "invalid operand for instruction");
4422   }
4423 
4424   case Match_PreferE32:
4425     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4426                         "should be encoded as e32");
4427   case Match_MnemonicFail:
4428     llvm_unreachable("Invalid instructions should have been handled already");
4429   }
4430   llvm_unreachable("Implement any new match types added!");
4431 }
4432 
4433 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4434   int64_t Tmp = -1;
4435   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4436     return true;
4437   }
4438   if (getParser().parseAbsoluteExpression(Tmp)) {
4439     return true;
4440   }
4441   Ret = static_cast<uint32_t>(Tmp);
4442   return false;
4443 }
4444 
4445 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4446                                                uint32_t &Minor) {
4447   if (ParseAsAbsoluteExpression(Major))
4448     return TokError("invalid major version");
4449 
4450   if (!trySkipToken(AsmToken::Comma))
4451     return TokError("minor version number required, comma expected");
4452 
4453   if (ParseAsAbsoluteExpression(Minor))
4454     return TokError("invalid minor version");
4455 
4456   return false;
4457 }
4458 
4459 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4460   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4461     return TokError("directive only supported for amdgcn architecture");
4462 
4463   std::string TargetIDDirective;
4464   SMLoc TargetStart = getTok().getLoc();
4465   if (getParser().parseEscapedString(TargetIDDirective))
4466     return true;
4467 
4468   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4469   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4470     return getParser().Error(TargetRange.Start,
4471         (Twine(".amdgcn_target directive's target id ") +
4472          Twine(TargetIDDirective) +
4473          Twine(" does not match the specified target id ") +
4474          Twine(getTargetStreamer().getTargetID()->toString())).str());
4475 
4476   return false;
4477 }
4478 
4479 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4480   return Error(Range.Start, "value out of range", Range);
4481 }
4482 
4483 bool AMDGPUAsmParser::calculateGPRBlocks(
4484     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4485     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4486     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4487     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4488   // TODO(scott.linder): These calculations are duplicated from
4489   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4490   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4491 
4492   unsigned NumVGPRs = NextFreeVGPR;
4493   unsigned NumSGPRs = NextFreeSGPR;
4494 
4495   if (Version.Major >= 10)
4496     NumSGPRs = 0;
4497   else {
4498     unsigned MaxAddressableNumSGPRs =
4499         IsaInfo::getAddressableNumSGPRs(&getSTI());
4500 
4501     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4502         NumSGPRs > MaxAddressableNumSGPRs)
4503       return OutOfRangeError(SGPRRange);
4504 
4505     NumSGPRs +=
4506         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4507 
4508     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4509         NumSGPRs > MaxAddressableNumSGPRs)
4510       return OutOfRangeError(SGPRRange);
4511 
4512     if (Features.test(FeatureSGPRInitBug))
4513       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4514   }
4515 
4516   VGPRBlocks =
4517       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4518   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4519 
4520   return false;
4521 }
4522 
4523 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4524   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4525     return TokError("directive only supported for amdgcn architecture");
4526 
4527   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4528     return TokError("directive only supported for amdhsa OS");
4529 
4530   StringRef KernelName;
4531   if (getParser().parseIdentifier(KernelName))
4532     return true;
4533 
4534   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4535 
4536   StringSet<> Seen;
4537 
4538   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4539 
4540   SMRange VGPRRange;
4541   uint64_t NextFreeVGPR = 0;
4542   uint64_t AccumOffset = 0;
4543   SMRange SGPRRange;
4544   uint64_t NextFreeSGPR = 0;
4545   unsigned UserSGPRCount = 0;
4546   bool ReserveVCC = true;
4547   bool ReserveFlatScr = true;
4548   Optional<bool> EnableWavefrontSize32;
4549 
4550   while (true) {
4551     while (trySkipToken(AsmToken::EndOfStatement));
4552 
4553     StringRef ID;
4554     SMRange IDRange = getTok().getLocRange();
4555     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4556       return true;
4557 
4558     if (ID == ".end_amdhsa_kernel")
4559       break;
4560 
4561     if (Seen.find(ID) != Seen.end())
4562       return TokError(".amdhsa_ directives cannot be repeated");
4563     Seen.insert(ID);
4564 
4565     SMLoc ValStart = getLoc();
4566     int64_t IVal;
4567     if (getParser().parseAbsoluteExpression(IVal))
4568       return true;
4569     SMLoc ValEnd = getLoc();
4570     SMRange ValRange = SMRange(ValStart, ValEnd);
4571 
4572     if (IVal < 0)
4573       return OutOfRangeError(ValRange);
4574 
4575     uint64_t Val = IVal;
4576 
4577 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4578   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4579     return OutOfRangeError(RANGE);                                             \
4580   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4581 
4582     if (ID == ".amdhsa_group_segment_fixed_size") {
4583       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4584         return OutOfRangeError(ValRange);
4585       KD.group_segment_fixed_size = Val;
4586     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4587       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4588         return OutOfRangeError(ValRange);
4589       KD.private_segment_fixed_size = Val;
4590     } else if (ID == ".amdhsa_kernarg_size") {
4591       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4592         return OutOfRangeError(ValRange);
4593       KD.kernarg_size = Val;
4594     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4595       if (hasArchitectedFlatScratch())
4596         return Error(IDRange.Start,
4597                      "directive is not supported with architected flat scratch",
4598                      IDRange);
4599       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4600                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4601                        Val, ValRange);
4602       if (Val)
4603         UserSGPRCount += 4;
4604     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4605       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4606                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4607                        ValRange);
4608       if (Val)
4609         UserSGPRCount += 2;
4610     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4611       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4612                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4613                        ValRange);
4614       if (Val)
4615         UserSGPRCount += 2;
4616     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4617       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4618                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4619                        Val, ValRange);
4620       if (Val)
4621         UserSGPRCount += 2;
4622     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4623       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4624                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4625                        ValRange);
4626       if (Val)
4627         UserSGPRCount += 2;
4628     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4629       if (hasArchitectedFlatScratch())
4630         return Error(IDRange.Start,
4631                      "directive is not supported with architected flat scratch",
4632                      IDRange);
4633       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4634                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4635                        ValRange);
4636       if (Val)
4637         UserSGPRCount += 2;
4638     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4639       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4640                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4641                        Val, ValRange);
4642       if (Val)
4643         UserSGPRCount += 1;
4644     } else if (ID == ".amdhsa_wavefront_size32") {
4645       if (IVersion.Major < 10)
4646         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4647       EnableWavefrontSize32 = Val;
4648       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4649                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4650                        Val, ValRange);
4651     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4652       if (hasArchitectedFlatScratch())
4653         return Error(IDRange.Start,
4654                      "directive is not supported with architected flat scratch",
4655                      IDRange);
4656       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4657                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4658     } else if (ID == ".amdhsa_enable_private_segment") {
4659       if (!hasArchitectedFlatScratch())
4660         return Error(
4661             IDRange.Start,
4662             "directive is not supported without architected flat scratch",
4663             IDRange);
4664       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4665                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4666     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4667       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4668                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4669                        ValRange);
4670     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4671       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4672                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4673                        ValRange);
4674     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4675       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4676                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4677                        ValRange);
4678     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4679       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4680                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4681                        ValRange);
4682     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4683       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4684                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4685                        ValRange);
4686     } else if (ID == ".amdhsa_next_free_vgpr") {
4687       VGPRRange = ValRange;
4688       NextFreeVGPR = Val;
4689     } else if (ID == ".amdhsa_next_free_sgpr") {
4690       SGPRRange = ValRange;
4691       NextFreeSGPR = Val;
4692     } else if (ID == ".amdhsa_accum_offset") {
4693       if (!isGFX90A())
4694         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4695       AccumOffset = Val;
4696     } else if (ID == ".amdhsa_reserve_vcc") {
4697       if (!isUInt<1>(Val))
4698         return OutOfRangeError(ValRange);
4699       ReserveVCC = Val;
4700     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4701       if (IVersion.Major < 7)
4702         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4703       if (hasArchitectedFlatScratch())
4704         return Error(IDRange.Start,
4705                      "directive is not supported with architected flat scratch",
4706                      IDRange);
4707       if (!isUInt<1>(Val))
4708         return OutOfRangeError(ValRange);
4709       ReserveFlatScr = Val;
4710     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4711       if (IVersion.Major < 8)
4712         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4713       if (!isUInt<1>(Val))
4714         return OutOfRangeError(ValRange);
4715       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4716         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4717                                  IDRange);
4718     } else if (ID == ".amdhsa_float_round_mode_32") {
4719       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4720                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4721     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4722       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4723                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4724     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4725       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4726                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4727     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4728       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4729                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4730                        ValRange);
4731     } else if (ID == ".amdhsa_dx10_clamp") {
4732       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4733                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4734     } else if (ID == ".amdhsa_ieee_mode") {
4735       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4736                        Val, ValRange);
4737     } else if (ID == ".amdhsa_fp16_overflow") {
4738       if (IVersion.Major < 9)
4739         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4740       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4741                        ValRange);
4742     } else if (ID == ".amdhsa_tg_split") {
4743       if (!isGFX90A())
4744         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4745       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4746                        ValRange);
4747     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4748       if (IVersion.Major < 10)
4749         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4750       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4751                        ValRange);
4752     } else if (ID == ".amdhsa_memory_ordered") {
4753       if (IVersion.Major < 10)
4754         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4755       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4756                        ValRange);
4757     } else if (ID == ".amdhsa_forward_progress") {
4758       if (IVersion.Major < 10)
4759         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4760       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4761                        ValRange);
4762     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4763       PARSE_BITS_ENTRY(
4764           KD.compute_pgm_rsrc2,
4765           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4766           ValRange);
4767     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4768       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4769                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4770                        Val, ValRange);
4771     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4772       PARSE_BITS_ENTRY(
4773           KD.compute_pgm_rsrc2,
4774           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4775           ValRange);
4776     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4777       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4778                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4779                        Val, ValRange);
4780     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4781       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4782                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4783                        Val, ValRange);
4784     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4785       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4786                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4787                        Val, ValRange);
4788     } else if (ID == ".amdhsa_exception_int_div_zero") {
4789       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4790                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4791                        Val, ValRange);
4792     } else {
4793       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4794     }
4795 
4796 #undef PARSE_BITS_ENTRY
4797   }
4798 
4799   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4800     return TokError(".amdhsa_next_free_vgpr directive is required");
4801 
4802   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4803     return TokError(".amdhsa_next_free_sgpr directive is required");
4804 
4805   unsigned VGPRBlocks;
4806   unsigned SGPRBlocks;
4807   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4808                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4809                          EnableWavefrontSize32, NextFreeVGPR,
4810                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4811                          SGPRBlocks))
4812     return true;
4813 
4814   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4815           VGPRBlocks))
4816     return OutOfRangeError(VGPRRange);
4817   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4818                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4819 
4820   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4821           SGPRBlocks))
4822     return OutOfRangeError(SGPRRange);
4823   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4824                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4825                   SGPRBlocks);
4826 
4827   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4828     return TokError("too many user SGPRs enabled");
4829   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4830                   UserSGPRCount);
4831 
4832   if (isGFX90A()) {
4833     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4834       return TokError(".amdhsa_accum_offset directive is required");
4835     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4836       return TokError("accum_offset should be in range [4..256] in "
4837                       "increments of 4");
4838     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4839       return TokError("accum_offset exceeds total VGPR allocation");
4840     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4841                     (AccumOffset / 4 - 1));
4842   }
4843 
4844   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4845       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4846       ReserveFlatScr);
4847   return false;
4848 }
4849 
4850 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4851   uint32_t Major;
4852   uint32_t Minor;
4853 
4854   if (ParseDirectiveMajorMinor(Major, Minor))
4855     return true;
4856 
4857   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4858   return false;
4859 }
4860 
4861 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4862   uint32_t Major;
4863   uint32_t Minor;
4864   uint32_t Stepping;
4865   StringRef VendorName;
4866   StringRef ArchName;
4867 
4868   // If this directive has no arguments, then use the ISA version for the
4869   // targeted GPU.
4870   if (isToken(AsmToken::EndOfStatement)) {
4871     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4872     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4873                                                         ISA.Stepping,
4874                                                         "AMD", "AMDGPU");
4875     return false;
4876   }
4877 
4878   if (ParseDirectiveMajorMinor(Major, Minor))
4879     return true;
4880 
4881   if (!trySkipToken(AsmToken::Comma))
4882     return TokError("stepping version number required, comma expected");
4883 
4884   if (ParseAsAbsoluteExpression(Stepping))
4885     return TokError("invalid stepping version");
4886 
4887   if (!trySkipToken(AsmToken::Comma))
4888     return TokError("vendor name required, comma expected");
4889 
4890   if (!parseString(VendorName, "invalid vendor name"))
4891     return true;
4892 
4893   if (!trySkipToken(AsmToken::Comma))
4894     return TokError("arch name required, comma expected");
4895 
4896   if (!parseString(ArchName, "invalid arch name"))
4897     return true;
4898 
4899   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4900                                                       VendorName, ArchName);
4901   return false;
4902 }
4903 
4904 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4905                                                amd_kernel_code_t &Header) {
4906   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4907   // assembly for backwards compatibility.
4908   if (ID == "max_scratch_backing_memory_byte_size") {
4909     Parser.eatToEndOfStatement();
4910     return false;
4911   }
4912 
4913   SmallString<40> ErrStr;
4914   raw_svector_ostream Err(ErrStr);
4915   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4916     return TokError(Err.str());
4917   }
4918   Lex();
4919 
4920   if (ID == "enable_wavefront_size32") {
4921     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4922       if (!isGFX10Plus())
4923         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4924       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4925         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4926     } else {
4927       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4928         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4929     }
4930   }
4931 
4932   if (ID == "wavefront_size") {
4933     if (Header.wavefront_size == 5) {
4934       if (!isGFX10Plus())
4935         return TokError("wavefront_size=5 is only allowed on GFX10+");
4936       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4937         return TokError("wavefront_size=5 requires +WavefrontSize32");
4938     } else if (Header.wavefront_size == 6) {
4939       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4940         return TokError("wavefront_size=6 requires +WavefrontSize64");
4941     }
4942   }
4943 
4944   if (ID == "enable_wgp_mode") {
4945     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4946         !isGFX10Plus())
4947       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4948   }
4949 
4950   if (ID == "enable_mem_ordered") {
4951     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4952         !isGFX10Plus())
4953       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4954   }
4955 
4956   if (ID == "enable_fwd_progress") {
4957     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4958         !isGFX10Plus())
4959       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4960   }
4961 
4962   return false;
4963 }
4964 
4965 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4966   amd_kernel_code_t Header;
4967   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4968 
4969   while (true) {
4970     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4971     // will set the current token to EndOfStatement.
4972     while(trySkipToken(AsmToken::EndOfStatement));
4973 
4974     StringRef ID;
4975     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4976       return true;
4977 
4978     if (ID == ".end_amd_kernel_code_t")
4979       break;
4980 
4981     if (ParseAMDKernelCodeTValue(ID, Header))
4982       return true;
4983   }
4984 
4985   getTargetStreamer().EmitAMDKernelCodeT(Header);
4986 
4987   return false;
4988 }
4989 
4990 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4991   StringRef KernelName;
4992   if (!parseId(KernelName, "expected symbol name"))
4993     return true;
4994 
4995   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4996                                            ELF::STT_AMDGPU_HSA_KERNEL);
4997 
4998   KernelScope.initialize(getContext());
4999   return false;
5000 }
5001 
5002 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5003   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5004     return Error(getLoc(),
5005                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5006                  "architectures");
5007   }
5008 
5009   auto TargetIDDirective = getLexer().getTok().getStringContents();
5010   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5011     return Error(getParser().getTok().getLoc(), "target id must match options");
5012 
5013   getTargetStreamer().EmitISAVersion();
5014   Lex();
5015 
5016   return false;
5017 }
5018 
5019 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5020   const char *AssemblerDirectiveBegin;
5021   const char *AssemblerDirectiveEnd;
5022   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5023       isHsaAbiVersion3Or4(&getSTI())
5024           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5025                             HSAMD::V3::AssemblerDirectiveEnd)
5026           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5027                             HSAMD::AssemblerDirectiveEnd);
5028 
5029   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5030     return Error(getLoc(),
5031                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5032                  "not available on non-amdhsa OSes")).str());
5033   }
5034 
5035   std::string HSAMetadataString;
5036   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5037                           HSAMetadataString))
5038     return true;
5039 
5040   if (isHsaAbiVersion3Or4(&getSTI())) {
5041     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5042       return Error(getLoc(), "invalid HSA metadata");
5043   } else {
5044     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5045       return Error(getLoc(), "invalid HSA metadata");
5046   }
5047 
5048   return false;
5049 }
5050 
5051 /// Common code to parse out a block of text (typically YAML) between start and
5052 /// end directives.
5053 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5054                                           const char *AssemblerDirectiveEnd,
5055                                           std::string &CollectString) {
5056 
5057   raw_string_ostream CollectStream(CollectString);
5058 
5059   getLexer().setSkipSpace(false);
5060 
5061   bool FoundEnd = false;
5062   while (!isToken(AsmToken::Eof)) {
5063     while (isToken(AsmToken::Space)) {
5064       CollectStream << getTokenStr();
5065       Lex();
5066     }
5067 
5068     if (trySkipId(AssemblerDirectiveEnd)) {
5069       FoundEnd = true;
5070       break;
5071     }
5072 
5073     CollectStream << Parser.parseStringToEndOfStatement()
5074                   << getContext().getAsmInfo()->getSeparatorString();
5075 
5076     Parser.eatToEndOfStatement();
5077   }
5078 
5079   getLexer().setSkipSpace(true);
5080 
5081   if (isToken(AsmToken::Eof) && !FoundEnd) {
5082     return TokError(Twine("expected directive ") +
5083                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5084   }
5085 
5086   CollectStream.flush();
5087   return false;
5088 }
5089 
5090 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5091 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5092   std::string String;
5093   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5094                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5095     return true;
5096 
5097   auto PALMetadata = getTargetStreamer().getPALMetadata();
5098   if (!PALMetadata->setFromString(String))
5099     return Error(getLoc(), "invalid PAL metadata");
5100   return false;
5101 }
5102 
5103 /// Parse the assembler directive for old linear-format PAL metadata.
5104 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5105   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5106     return Error(getLoc(),
5107                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5108                  "not available on non-amdpal OSes")).str());
5109   }
5110 
5111   auto PALMetadata = getTargetStreamer().getPALMetadata();
5112   PALMetadata->setLegacy();
5113   for (;;) {
5114     uint32_t Key, Value;
5115     if (ParseAsAbsoluteExpression(Key)) {
5116       return TokError(Twine("invalid value in ") +
5117                       Twine(PALMD::AssemblerDirective));
5118     }
5119     if (!trySkipToken(AsmToken::Comma)) {
5120       return TokError(Twine("expected an even number of values in ") +
5121                       Twine(PALMD::AssemblerDirective));
5122     }
5123     if (ParseAsAbsoluteExpression(Value)) {
5124       return TokError(Twine("invalid value in ") +
5125                       Twine(PALMD::AssemblerDirective));
5126     }
5127     PALMetadata->setRegister(Key, Value);
5128     if (!trySkipToken(AsmToken::Comma))
5129       break;
5130   }
5131   return false;
5132 }
5133 
5134 /// ParseDirectiveAMDGPULDS
5135 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5136 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5137   if (getParser().checkForValidSection())
5138     return true;
5139 
5140   StringRef Name;
5141   SMLoc NameLoc = getLoc();
5142   if (getParser().parseIdentifier(Name))
5143     return TokError("expected identifier in directive");
5144 
5145   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5146   if (parseToken(AsmToken::Comma, "expected ','"))
5147     return true;
5148 
5149   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5150 
5151   int64_t Size;
5152   SMLoc SizeLoc = getLoc();
5153   if (getParser().parseAbsoluteExpression(Size))
5154     return true;
5155   if (Size < 0)
5156     return Error(SizeLoc, "size must be non-negative");
5157   if (Size > LocalMemorySize)
5158     return Error(SizeLoc, "size is too large");
5159 
5160   int64_t Alignment = 4;
5161   if (trySkipToken(AsmToken::Comma)) {
5162     SMLoc AlignLoc = getLoc();
5163     if (getParser().parseAbsoluteExpression(Alignment))
5164       return true;
5165     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5166       return Error(AlignLoc, "alignment must be a power of two");
5167 
5168     // Alignment larger than the size of LDS is possible in theory, as long
5169     // as the linker manages to place to symbol at address 0, but we do want
5170     // to make sure the alignment fits nicely into a 32-bit integer.
5171     if (Alignment >= 1u << 31)
5172       return Error(AlignLoc, "alignment is too large");
5173   }
5174 
5175   if (parseToken(AsmToken::EndOfStatement,
5176                  "unexpected token in '.amdgpu_lds' directive"))
5177     return true;
5178 
5179   Symbol->redefineIfPossible();
5180   if (!Symbol->isUndefined())
5181     return Error(NameLoc, "invalid symbol redefinition");
5182 
5183   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5184   return false;
5185 }
5186 
5187 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5188   StringRef IDVal = DirectiveID.getString();
5189 
5190   if (isHsaAbiVersion3Or4(&getSTI())) {
5191     if (IDVal == ".amdhsa_kernel")
5192      return ParseDirectiveAMDHSAKernel();
5193 
5194     // TODO: Restructure/combine with PAL metadata directive.
5195     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5196       return ParseDirectiveHSAMetadata();
5197   } else {
5198     if (IDVal == ".hsa_code_object_version")
5199       return ParseDirectiveHSACodeObjectVersion();
5200 
5201     if (IDVal == ".hsa_code_object_isa")
5202       return ParseDirectiveHSACodeObjectISA();
5203 
5204     if (IDVal == ".amd_kernel_code_t")
5205       return ParseDirectiveAMDKernelCodeT();
5206 
5207     if (IDVal == ".amdgpu_hsa_kernel")
5208       return ParseDirectiveAMDGPUHsaKernel();
5209 
5210     if (IDVal == ".amd_amdgpu_isa")
5211       return ParseDirectiveISAVersion();
5212 
5213     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5214       return ParseDirectiveHSAMetadata();
5215   }
5216 
5217   if (IDVal == ".amdgcn_target")
5218     return ParseDirectiveAMDGCNTarget();
5219 
5220   if (IDVal == ".amdgpu_lds")
5221     return ParseDirectiveAMDGPULDS();
5222 
5223   if (IDVal == PALMD::AssemblerDirectiveBegin)
5224     return ParseDirectivePALMetadataBegin();
5225 
5226   if (IDVal == PALMD::AssemblerDirective)
5227     return ParseDirectivePALMetadata();
5228 
5229   return true;
5230 }
5231 
5232 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5233                                            unsigned RegNo) {
5234 
5235   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5236        R.isValid(); ++R) {
5237     if (*R == RegNo)
5238       return isGFX9Plus();
5239   }
5240 
5241   // GFX10 has 2 more SGPRs 104 and 105.
5242   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5243        R.isValid(); ++R) {
5244     if (*R == RegNo)
5245       return hasSGPR104_SGPR105();
5246   }
5247 
5248   switch (RegNo) {
5249   case AMDGPU::SRC_SHARED_BASE:
5250   case AMDGPU::SRC_SHARED_LIMIT:
5251   case AMDGPU::SRC_PRIVATE_BASE:
5252   case AMDGPU::SRC_PRIVATE_LIMIT:
5253   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5254     return isGFX9Plus();
5255   case AMDGPU::TBA:
5256   case AMDGPU::TBA_LO:
5257   case AMDGPU::TBA_HI:
5258   case AMDGPU::TMA:
5259   case AMDGPU::TMA_LO:
5260   case AMDGPU::TMA_HI:
5261     return !isGFX9Plus();
5262   case AMDGPU::XNACK_MASK:
5263   case AMDGPU::XNACK_MASK_LO:
5264   case AMDGPU::XNACK_MASK_HI:
5265     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5266   case AMDGPU::SGPR_NULL:
5267     return isGFX10Plus();
5268   default:
5269     break;
5270   }
5271 
5272   if (isCI())
5273     return true;
5274 
5275   if (isSI() || isGFX10Plus()) {
5276     // No flat_scr on SI.
5277     // On GFX10 flat scratch is not a valid register operand and can only be
5278     // accessed with s_setreg/s_getreg.
5279     switch (RegNo) {
5280     case AMDGPU::FLAT_SCR:
5281     case AMDGPU::FLAT_SCR_LO:
5282     case AMDGPU::FLAT_SCR_HI:
5283       return false;
5284     default:
5285       return true;
5286     }
5287   }
5288 
5289   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5290   // SI/CI have.
5291   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5292        R.isValid(); ++R) {
5293     if (*R == RegNo)
5294       return hasSGPR102_SGPR103();
5295   }
5296 
5297   return true;
5298 }
5299 
5300 OperandMatchResultTy
5301 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5302                               OperandMode Mode) {
5303   // Try to parse with a custom parser
5304   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5305 
5306   // If we successfully parsed the operand or if there as an error parsing,
5307   // we are done.
5308   //
5309   // If we are parsing after we reach EndOfStatement then this means we
5310   // are appending default values to the Operands list.  This is only done
5311   // by custom parser, so we shouldn't continue on to the generic parsing.
5312   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5313       isToken(AsmToken::EndOfStatement))
5314     return ResTy;
5315 
5316   SMLoc RBraceLoc;
5317   SMLoc LBraceLoc = getLoc();
5318   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5319     unsigned Prefix = Operands.size();
5320 
5321     for (;;) {
5322       auto Loc = getLoc();
5323       ResTy = parseReg(Operands);
5324       if (ResTy == MatchOperand_NoMatch)
5325         Error(Loc, "expected a register");
5326       if (ResTy != MatchOperand_Success)
5327         return MatchOperand_ParseFail;
5328 
5329       RBraceLoc = getLoc();
5330       if (trySkipToken(AsmToken::RBrac))
5331         break;
5332 
5333       if (!skipToken(AsmToken::Comma,
5334                      "expected a comma or a closing square bracket")) {
5335         return MatchOperand_ParseFail;
5336       }
5337     }
5338 
5339     if (Operands.size() - Prefix > 1) {
5340       Operands.insert(Operands.begin() + Prefix,
5341                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5342       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5343     }
5344 
5345     return MatchOperand_Success;
5346   }
5347 
5348   return parseRegOrImm(Operands);
5349 }
5350 
5351 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5352   // Clear any forced encodings from the previous instruction.
5353   setForcedEncodingSize(0);
5354   setForcedDPP(false);
5355   setForcedSDWA(false);
5356 
5357   if (Name.endswith("_e64")) {
5358     setForcedEncodingSize(64);
5359     return Name.substr(0, Name.size() - 4);
5360   } else if (Name.endswith("_e32")) {
5361     setForcedEncodingSize(32);
5362     return Name.substr(0, Name.size() - 4);
5363   } else if (Name.endswith("_dpp")) {
5364     setForcedDPP(true);
5365     return Name.substr(0, Name.size() - 4);
5366   } else if (Name.endswith("_sdwa")) {
5367     setForcedSDWA(true);
5368     return Name.substr(0, Name.size() - 5);
5369   }
5370   return Name;
5371 }
5372 
5373 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5374                                        StringRef Name,
5375                                        SMLoc NameLoc, OperandVector &Operands) {
5376   // Add the instruction mnemonic
5377   Name = parseMnemonicSuffix(Name);
5378   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5379 
5380   bool IsMIMG = Name.startswith("image_");
5381 
5382   while (!trySkipToken(AsmToken::EndOfStatement)) {
5383     OperandMode Mode = OperandMode_Default;
5384     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5385       Mode = OperandMode_NSA;
5386     CPolSeen = 0;
5387     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5388 
5389     if (Res != MatchOperand_Success) {
5390       checkUnsupportedInstruction(Name, NameLoc);
5391       if (!Parser.hasPendingError()) {
5392         // FIXME: use real operand location rather than the current location.
5393         StringRef Msg =
5394           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5395                                             "not a valid operand.";
5396         Error(getLoc(), Msg);
5397       }
5398       while (!trySkipToken(AsmToken::EndOfStatement)) {
5399         lex();
5400       }
5401       return true;
5402     }
5403 
5404     // Eat the comma or space if there is one.
5405     trySkipToken(AsmToken::Comma);
5406   }
5407 
5408   return false;
5409 }
5410 
5411 //===----------------------------------------------------------------------===//
5412 // Utility functions
5413 //===----------------------------------------------------------------------===//
5414 
5415 OperandMatchResultTy
5416 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5417 
5418   if (!trySkipId(Prefix, AsmToken::Colon))
5419     return MatchOperand_NoMatch;
5420 
5421   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5422 }
5423 
5424 OperandMatchResultTy
5425 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5426                                     AMDGPUOperand::ImmTy ImmTy,
5427                                     bool (*ConvertResult)(int64_t&)) {
5428   SMLoc S = getLoc();
5429   int64_t Value = 0;
5430 
5431   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5432   if (Res != MatchOperand_Success)
5433     return Res;
5434 
5435   if (ConvertResult && !ConvertResult(Value)) {
5436     Error(S, "invalid " + StringRef(Prefix) + " value.");
5437   }
5438 
5439   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5440   return MatchOperand_Success;
5441 }
5442 
5443 OperandMatchResultTy
5444 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5445                                              OperandVector &Operands,
5446                                              AMDGPUOperand::ImmTy ImmTy,
5447                                              bool (*ConvertResult)(int64_t&)) {
5448   SMLoc S = getLoc();
5449   if (!trySkipId(Prefix, AsmToken::Colon))
5450     return MatchOperand_NoMatch;
5451 
5452   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5453     return MatchOperand_ParseFail;
5454 
5455   unsigned Val = 0;
5456   const unsigned MaxSize = 4;
5457 
5458   // FIXME: How to verify the number of elements matches the number of src
5459   // operands?
5460   for (int I = 0; ; ++I) {
5461     int64_t Op;
5462     SMLoc Loc = getLoc();
5463     if (!parseExpr(Op))
5464       return MatchOperand_ParseFail;
5465 
5466     if (Op != 0 && Op != 1) {
5467       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5468       return MatchOperand_ParseFail;
5469     }
5470 
5471     Val |= (Op << I);
5472 
5473     if (trySkipToken(AsmToken::RBrac))
5474       break;
5475 
5476     if (I + 1 == MaxSize) {
5477       Error(getLoc(), "expected a closing square bracket");
5478       return MatchOperand_ParseFail;
5479     }
5480 
5481     if (!skipToken(AsmToken::Comma, "expected a comma"))
5482       return MatchOperand_ParseFail;
5483   }
5484 
5485   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5486   return MatchOperand_Success;
5487 }
5488 
5489 OperandMatchResultTy
5490 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5491                                AMDGPUOperand::ImmTy ImmTy) {
5492   int64_t Bit;
5493   SMLoc S = getLoc();
5494 
5495   if (trySkipId(Name)) {
5496     Bit = 1;
5497   } else if (trySkipId("no", Name)) {
5498     Bit = 0;
5499   } else {
5500     return MatchOperand_NoMatch;
5501   }
5502 
5503   if (Name == "r128" && !hasMIMG_R128()) {
5504     Error(S, "r128 modifier is not supported on this GPU");
5505     return MatchOperand_ParseFail;
5506   }
5507   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5508     Error(S, "a16 modifier is not supported on this GPU");
5509     return MatchOperand_ParseFail;
5510   }
5511 
5512   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5513     ImmTy = AMDGPUOperand::ImmTyR128A16;
5514 
5515   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5516   return MatchOperand_Success;
5517 }
5518 
5519 OperandMatchResultTy
5520 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5521   unsigned CPolOn = 0;
5522   unsigned CPolOff = 0;
5523   SMLoc S = getLoc();
5524 
5525   if (trySkipId("glc"))
5526     CPolOn = AMDGPU::CPol::GLC;
5527   else if (trySkipId("noglc"))
5528     CPolOff = AMDGPU::CPol::GLC;
5529   else if (trySkipId("slc"))
5530     CPolOn = AMDGPU::CPol::SLC;
5531   else if (trySkipId("noslc"))
5532     CPolOff = AMDGPU::CPol::SLC;
5533   else if (trySkipId("dlc"))
5534     CPolOn = AMDGPU::CPol::DLC;
5535   else if (trySkipId("nodlc"))
5536     CPolOff = AMDGPU::CPol::DLC;
5537   else if (trySkipId("scc"))
5538     CPolOn = AMDGPU::CPol::SCC;
5539   else if (trySkipId("noscc"))
5540     CPolOff = AMDGPU::CPol::SCC;
5541   else
5542     return MatchOperand_NoMatch;
5543 
5544   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5545     Error(S, "dlc modifier is not supported on this GPU");
5546     return MatchOperand_ParseFail;
5547   }
5548 
5549   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5550     Error(S, "scc modifier is not supported on this GPU");
5551     return MatchOperand_ParseFail;
5552   }
5553 
5554   if (CPolSeen & (CPolOn | CPolOff)) {
5555     Error(S, "duplicate cache policy modifier");
5556     return MatchOperand_ParseFail;
5557   }
5558 
5559   CPolSeen |= (CPolOn | CPolOff);
5560 
5561   for (unsigned I = 1; I != Operands.size(); ++I) {
5562     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5563     if (Op.isCPol()) {
5564       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5565       return MatchOperand_Success;
5566     }
5567   }
5568 
5569   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5570                                               AMDGPUOperand::ImmTyCPol));
5571 
5572   return MatchOperand_Success;
5573 }
5574 
5575 static void addOptionalImmOperand(
5576   MCInst& Inst, const OperandVector& Operands,
5577   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5578   AMDGPUOperand::ImmTy ImmT,
5579   int64_t Default = 0) {
5580   auto i = OptionalIdx.find(ImmT);
5581   if (i != OptionalIdx.end()) {
5582     unsigned Idx = i->second;
5583     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5584   } else {
5585     Inst.addOperand(MCOperand::createImm(Default));
5586   }
5587 }
5588 
5589 OperandMatchResultTy
5590 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5591                                        StringRef &Value,
5592                                        SMLoc &StringLoc) {
5593   if (!trySkipId(Prefix, AsmToken::Colon))
5594     return MatchOperand_NoMatch;
5595 
5596   StringLoc = getLoc();
5597   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5598                                                   : MatchOperand_ParseFail;
5599 }
5600 
5601 //===----------------------------------------------------------------------===//
5602 // MTBUF format
5603 //===----------------------------------------------------------------------===//
5604 
5605 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5606                                   int64_t MaxVal,
5607                                   int64_t &Fmt) {
5608   int64_t Val;
5609   SMLoc Loc = getLoc();
5610 
5611   auto Res = parseIntWithPrefix(Pref, Val);
5612   if (Res == MatchOperand_ParseFail)
5613     return false;
5614   if (Res == MatchOperand_NoMatch)
5615     return true;
5616 
5617   if (Val < 0 || Val > MaxVal) {
5618     Error(Loc, Twine("out of range ", StringRef(Pref)));
5619     return false;
5620   }
5621 
5622   Fmt = Val;
5623   return true;
5624 }
5625 
5626 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5627 // values to live in a joint format operand in the MCInst encoding.
5628 OperandMatchResultTy
5629 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5630   using namespace llvm::AMDGPU::MTBUFFormat;
5631 
5632   int64_t Dfmt = DFMT_UNDEF;
5633   int64_t Nfmt = NFMT_UNDEF;
5634 
5635   // dfmt and nfmt can appear in either order, and each is optional.
5636   for (int I = 0; I < 2; ++I) {
5637     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5638       return MatchOperand_ParseFail;
5639 
5640     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5641       return MatchOperand_ParseFail;
5642     }
5643     // Skip optional comma between dfmt/nfmt
5644     // but guard against 2 commas following each other.
5645     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5646         !peekToken().is(AsmToken::Comma)) {
5647       trySkipToken(AsmToken::Comma);
5648     }
5649   }
5650 
5651   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5652     return MatchOperand_NoMatch;
5653 
5654   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5655   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5656 
5657   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5658   return MatchOperand_Success;
5659 }
5660 
5661 OperandMatchResultTy
5662 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5663   using namespace llvm::AMDGPU::MTBUFFormat;
5664 
5665   int64_t Fmt = UFMT_UNDEF;
5666 
5667   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5668     return MatchOperand_ParseFail;
5669 
5670   if (Fmt == UFMT_UNDEF)
5671     return MatchOperand_NoMatch;
5672 
5673   Format = Fmt;
5674   return MatchOperand_Success;
5675 }
5676 
5677 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5678                                     int64_t &Nfmt,
5679                                     StringRef FormatStr,
5680                                     SMLoc Loc) {
5681   using namespace llvm::AMDGPU::MTBUFFormat;
5682   int64_t Format;
5683 
5684   Format = getDfmt(FormatStr);
5685   if (Format != DFMT_UNDEF) {
5686     Dfmt = Format;
5687     return true;
5688   }
5689 
5690   Format = getNfmt(FormatStr, getSTI());
5691   if (Format != NFMT_UNDEF) {
5692     Nfmt = Format;
5693     return true;
5694   }
5695 
5696   Error(Loc, "unsupported format");
5697   return false;
5698 }
5699 
5700 OperandMatchResultTy
5701 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5702                                           SMLoc FormatLoc,
5703                                           int64_t &Format) {
5704   using namespace llvm::AMDGPU::MTBUFFormat;
5705 
5706   int64_t Dfmt = DFMT_UNDEF;
5707   int64_t Nfmt = NFMT_UNDEF;
5708   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5709     return MatchOperand_ParseFail;
5710 
5711   if (trySkipToken(AsmToken::Comma)) {
5712     StringRef Str;
5713     SMLoc Loc = getLoc();
5714     if (!parseId(Str, "expected a format string") ||
5715         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5716       return MatchOperand_ParseFail;
5717     }
5718     if (Dfmt == DFMT_UNDEF) {
5719       Error(Loc, "duplicate numeric format");
5720       return MatchOperand_ParseFail;
5721     } else if (Nfmt == NFMT_UNDEF) {
5722       Error(Loc, "duplicate data format");
5723       return MatchOperand_ParseFail;
5724     }
5725   }
5726 
5727   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5728   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5729 
5730   if (isGFX10Plus()) {
5731     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5732     if (Ufmt == UFMT_UNDEF) {
5733       Error(FormatLoc, "unsupported format");
5734       return MatchOperand_ParseFail;
5735     }
5736     Format = Ufmt;
5737   } else {
5738     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5739   }
5740 
5741   return MatchOperand_Success;
5742 }
5743 
5744 OperandMatchResultTy
5745 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5746                                             SMLoc Loc,
5747                                             int64_t &Format) {
5748   using namespace llvm::AMDGPU::MTBUFFormat;
5749 
5750   auto Id = getUnifiedFormat(FormatStr);
5751   if (Id == UFMT_UNDEF)
5752     return MatchOperand_NoMatch;
5753 
5754   if (!isGFX10Plus()) {
5755     Error(Loc, "unified format is not supported on this GPU");
5756     return MatchOperand_ParseFail;
5757   }
5758 
5759   Format = Id;
5760   return MatchOperand_Success;
5761 }
5762 
5763 OperandMatchResultTy
5764 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5765   using namespace llvm::AMDGPU::MTBUFFormat;
5766   SMLoc Loc = getLoc();
5767 
5768   if (!parseExpr(Format))
5769     return MatchOperand_ParseFail;
5770   if (!isValidFormatEncoding(Format, getSTI())) {
5771     Error(Loc, "out of range format");
5772     return MatchOperand_ParseFail;
5773   }
5774 
5775   return MatchOperand_Success;
5776 }
5777 
5778 OperandMatchResultTy
5779 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5780   using namespace llvm::AMDGPU::MTBUFFormat;
5781 
5782   if (!trySkipId("format", AsmToken::Colon))
5783     return MatchOperand_NoMatch;
5784 
5785   if (trySkipToken(AsmToken::LBrac)) {
5786     StringRef FormatStr;
5787     SMLoc Loc = getLoc();
5788     if (!parseId(FormatStr, "expected a format string"))
5789       return MatchOperand_ParseFail;
5790 
5791     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5792     if (Res == MatchOperand_NoMatch)
5793       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5794     if (Res != MatchOperand_Success)
5795       return Res;
5796 
5797     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5798       return MatchOperand_ParseFail;
5799 
5800     return MatchOperand_Success;
5801   }
5802 
5803   return parseNumericFormat(Format);
5804 }
5805 
5806 OperandMatchResultTy
5807 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5808   using namespace llvm::AMDGPU::MTBUFFormat;
5809 
5810   int64_t Format = getDefaultFormatEncoding(getSTI());
5811   OperandMatchResultTy Res;
5812   SMLoc Loc = getLoc();
5813 
5814   // Parse legacy format syntax.
5815   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5816   if (Res == MatchOperand_ParseFail)
5817     return Res;
5818 
5819   bool FormatFound = (Res == MatchOperand_Success);
5820 
5821   Operands.push_back(
5822     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5823 
5824   if (FormatFound)
5825     trySkipToken(AsmToken::Comma);
5826 
5827   if (isToken(AsmToken::EndOfStatement)) {
5828     // We are expecting an soffset operand,
5829     // but let matcher handle the error.
5830     return MatchOperand_Success;
5831   }
5832 
5833   // Parse soffset.
5834   Res = parseRegOrImm(Operands);
5835   if (Res != MatchOperand_Success)
5836     return Res;
5837 
5838   trySkipToken(AsmToken::Comma);
5839 
5840   if (!FormatFound) {
5841     Res = parseSymbolicOrNumericFormat(Format);
5842     if (Res == MatchOperand_ParseFail)
5843       return Res;
5844     if (Res == MatchOperand_Success) {
5845       auto Size = Operands.size();
5846       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5847       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5848       Op.setImm(Format);
5849     }
5850     return MatchOperand_Success;
5851   }
5852 
5853   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5854     Error(getLoc(), "duplicate format");
5855     return MatchOperand_ParseFail;
5856   }
5857   return MatchOperand_Success;
5858 }
5859 
5860 //===----------------------------------------------------------------------===//
5861 // ds
5862 //===----------------------------------------------------------------------===//
5863 
5864 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5865                                     const OperandVector &Operands) {
5866   OptionalImmIndexMap OptionalIdx;
5867 
5868   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5869     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5870 
5871     // Add the register arguments
5872     if (Op.isReg()) {
5873       Op.addRegOperands(Inst, 1);
5874       continue;
5875     }
5876 
5877     // Handle optional arguments
5878     OptionalIdx[Op.getImmTy()] = i;
5879   }
5880 
5881   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5882   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5883   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5884 
5885   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5886 }
5887 
5888 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5889                                 bool IsGdsHardcoded) {
5890   OptionalImmIndexMap OptionalIdx;
5891 
5892   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5893     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5894 
5895     // Add the register arguments
5896     if (Op.isReg()) {
5897       Op.addRegOperands(Inst, 1);
5898       continue;
5899     }
5900 
5901     if (Op.isToken() && Op.getToken() == "gds") {
5902       IsGdsHardcoded = true;
5903       continue;
5904     }
5905 
5906     // Handle optional arguments
5907     OptionalIdx[Op.getImmTy()] = i;
5908   }
5909 
5910   AMDGPUOperand::ImmTy OffsetType =
5911     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5912      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5913      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5914                                                       AMDGPUOperand::ImmTyOffset;
5915 
5916   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5917 
5918   if (!IsGdsHardcoded) {
5919     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5920   }
5921   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5922 }
5923 
5924 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5925   OptionalImmIndexMap OptionalIdx;
5926 
5927   unsigned OperandIdx[4];
5928   unsigned EnMask = 0;
5929   int SrcIdx = 0;
5930 
5931   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5932     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5933 
5934     // Add the register arguments
5935     if (Op.isReg()) {
5936       assert(SrcIdx < 4);
5937       OperandIdx[SrcIdx] = Inst.size();
5938       Op.addRegOperands(Inst, 1);
5939       ++SrcIdx;
5940       continue;
5941     }
5942 
5943     if (Op.isOff()) {
5944       assert(SrcIdx < 4);
5945       OperandIdx[SrcIdx] = Inst.size();
5946       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5947       ++SrcIdx;
5948       continue;
5949     }
5950 
5951     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5952       Op.addImmOperands(Inst, 1);
5953       continue;
5954     }
5955 
5956     if (Op.isToken() && Op.getToken() == "done")
5957       continue;
5958 
5959     // Handle optional arguments
5960     OptionalIdx[Op.getImmTy()] = i;
5961   }
5962 
5963   assert(SrcIdx == 4);
5964 
5965   bool Compr = false;
5966   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5967     Compr = true;
5968     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5969     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5970     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5971   }
5972 
5973   for (auto i = 0; i < SrcIdx; ++i) {
5974     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5975       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5976     }
5977   }
5978 
5979   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5980   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5981 
5982   Inst.addOperand(MCOperand::createImm(EnMask));
5983 }
5984 
5985 //===----------------------------------------------------------------------===//
5986 // s_waitcnt
5987 //===----------------------------------------------------------------------===//
5988 
5989 static bool
5990 encodeCnt(
5991   const AMDGPU::IsaVersion ISA,
5992   int64_t &IntVal,
5993   int64_t CntVal,
5994   bool Saturate,
5995   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5996   unsigned (*decode)(const IsaVersion &Version, unsigned))
5997 {
5998   bool Failed = false;
5999 
6000   IntVal = encode(ISA, IntVal, CntVal);
6001   if (CntVal != decode(ISA, IntVal)) {
6002     if (Saturate) {
6003       IntVal = encode(ISA, IntVal, -1);
6004     } else {
6005       Failed = true;
6006     }
6007   }
6008   return Failed;
6009 }
6010 
6011 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6012 
6013   SMLoc CntLoc = getLoc();
6014   StringRef CntName = getTokenStr();
6015 
6016   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6017       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6018     return false;
6019 
6020   int64_t CntVal;
6021   SMLoc ValLoc = getLoc();
6022   if (!parseExpr(CntVal))
6023     return false;
6024 
6025   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6026 
6027   bool Failed = true;
6028   bool Sat = CntName.endswith("_sat");
6029 
6030   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6031     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6032   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6033     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6034   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6035     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6036   } else {
6037     Error(CntLoc, "invalid counter name " + CntName);
6038     return false;
6039   }
6040 
6041   if (Failed) {
6042     Error(ValLoc, "too large value for " + CntName);
6043     return false;
6044   }
6045 
6046   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6047     return false;
6048 
6049   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6050     if (isToken(AsmToken::EndOfStatement)) {
6051       Error(getLoc(), "expected a counter name");
6052       return false;
6053     }
6054   }
6055 
6056   return true;
6057 }
6058 
6059 OperandMatchResultTy
6060 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6061   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6062   int64_t Waitcnt = getWaitcntBitMask(ISA);
6063   SMLoc S = getLoc();
6064 
6065   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6066     while (!isToken(AsmToken::EndOfStatement)) {
6067       if (!parseCnt(Waitcnt))
6068         return MatchOperand_ParseFail;
6069     }
6070   } else {
6071     if (!parseExpr(Waitcnt))
6072       return MatchOperand_ParseFail;
6073   }
6074 
6075   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6076   return MatchOperand_Success;
6077 }
6078 
6079 bool
6080 AMDGPUOperand::isSWaitCnt() const {
6081   return isImm();
6082 }
6083 
6084 //===----------------------------------------------------------------------===//
6085 // hwreg
6086 //===----------------------------------------------------------------------===//
6087 
6088 bool
6089 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6090                                 OperandInfoTy &Offset,
6091                                 OperandInfoTy &Width) {
6092   using namespace llvm::AMDGPU::Hwreg;
6093 
6094   // The register may be specified by name or using a numeric code
6095   HwReg.Loc = getLoc();
6096   if (isToken(AsmToken::Identifier) &&
6097       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6098     HwReg.IsSymbolic = true;
6099     lex(); // skip register name
6100   } else if (!parseExpr(HwReg.Id, "a register name")) {
6101     return false;
6102   }
6103 
6104   if (trySkipToken(AsmToken::RParen))
6105     return true;
6106 
6107   // parse optional params
6108   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6109     return false;
6110 
6111   Offset.Loc = getLoc();
6112   if (!parseExpr(Offset.Id))
6113     return false;
6114 
6115   if (!skipToken(AsmToken::Comma, "expected a comma"))
6116     return false;
6117 
6118   Width.Loc = getLoc();
6119   return parseExpr(Width.Id) &&
6120          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6121 }
6122 
6123 bool
6124 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6125                                const OperandInfoTy &Offset,
6126                                const OperandInfoTy &Width) {
6127 
6128   using namespace llvm::AMDGPU::Hwreg;
6129 
6130   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6131     Error(HwReg.Loc,
6132           "specified hardware register is not supported on this GPU");
6133     return false;
6134   }
6135   if (!isValidHwreg(HwReg.Id)) {
6136     Error(HwReg.Loc,
6137           "invalid code of hardware register: only 6-bit values are legal");
6138     return false;
6139   }
6140   if (!isValidHwregOffset(Offset.Id)) {
6141     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6142     return false;
6143   }
6144   if (!isValidHwregWidth(Width.Id)) {
6145     Error(Width.Loc,
6146           "invalid bitfield width: only values from 1 to 32 are legal");
6147     return false;
6148   }
6149   return true;
6150 }
6151 
6152 OperandMatchResultTy
6153 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6154   using namespace llvm::AMDGPU::Hwreg;
6155 
6156   int64_t ImmVal = 0;
6157   SMLoc Loc = getLoc();
6158 
6159   if (trySkipId("hwreg", AsmToken::LParen)) {
6160     OperandInfoTy HwReg(ID_UNKNOWN_);
6161     OperandInfoTy Offset(OFFSET_DEFAULT_);
6162     OperandInfoTy Width(WIDTH_DEFAULT_);
6163     if (parseHwregBody(HwReg, Offset, Width) &&
6164         validateHwreg(HwReg, Offset, Width)) {
6165       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6166     } else {
6167       return MatchOperand_ParseFail;
6168     }
6169   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6170     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6171       Error(Loc, "invalid immediate: only 16-bit values are legal");
6172       return MatchOperand_ParseFail;
6173     }
6174   } else {
6175     return MatchOperand_ParseFail;
6176   }
6177 
6178   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6179   return MatchOperand_Success;
6180 }
6181 
6182 bool AMDGPUOperand::isHwreg() const {
6183   return isImmTy(ImmTyHwreg);
6184 }
6185 
6186 //===----------------------------------------------------------------------===//
6187 // sendmsg
6188 //===----------------------------------------------------------------------===//
6189 
6190 bool
6191 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6192                                   OperandInfoTy &Op,
6193                                   OperandInfoTy &Stream) {
6194   using namespace llvm::AMDGPU::SendMsg;
6195 
6196   Msg.Loc = getLoc();
6197   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6198     Msg.IsSymbolic = true;
6199     lex(); // skip message name
6200   } else if (!parseExpr(Msg.Id, "a message name")) {
6201     return false;
6202   }
6203 
6204   if (trySkipToken(AsmToken::Comma)) {
6205     Op.IsDefined = true;
6206     Op.Loc = getLoc();
6207     if (isToken(AsmToken::Identifier) &&
6208         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6209       lex(); // skip operation name
6210     } else if (!parseExpr(Op.Id, "an operation name")) {
6211       return false;
6212     }
6213 
6214     if (trySkipToken(AsmToken::Comma)) {
6215       Stream.IsDefined = true;
6216       Stream.Loc = getLoc();
6217       if (!parseExpr(Stream.Id))
6218         return false;
6219     }
6220   }
6221 
6222   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6223 }
6224 
6225 bool
6226 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6227                                  const OperandInfoTy &Op,
6228                                  const OperandInfoTy &Stream) {
6229   using namespace llvm::AMDGPU::SendMsg;
6230 
6231   // Validation strictness depends on whether message is specified
6232   // in a symbolc or in a numeric form. In the latter case
6233   // only encoding possibility is checked.
6234   bool Strict = Msg.IsSymbolic;
6235 
6236   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6237     Error(Msg.Loc, "invalid message id");
6238     return false;
6239   }
6240   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6241     if (Op.IsDefined) {
6242       Error(Op.Loc, "message does not support operations");
6243     } else {
6244       Error(Msg.Loc, "missing message operation");
6245     }
6246     return false;
6247   }
6248   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6249     Error(Op.Loc, "invalid operation id");
6250     return false;
6251   }
6252   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6253     Error(Stream.Loc, "message operation does not support streams");
6254     return false;
6255   }
6256   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6257     Error(Stream.Loc, "invalid message stream id");
6258     return false;
6259   }
6260   return true;
6261 }
6262 
6263 OperandMatchResultTy
6264 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6265   using namespace llvm::AMDGPU::SendMsg;
6266 
6267   int64_t ImmVal = 0;
6268   SMLoc Loc = getLoc();
6269 
6270   if (trySkipId("sendmsg", AsmToken::LParen)) {
6271     OperandInfoTy Msg(ID_UNKNOWN_);
6272     OperandInfoTy Op(OP_NONE_);
6273     OperandInfoTy Stream(STREAM_ID_NONE_);
6274     if (parseSendMsgBody(Msg, Op, Stream) &&
6275         validateSendMsg(Msg, Op, Stream)) {
6276       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6277     } else {
6278       return MatchOperand_ParseFail;
6279     }
6280   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6281     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6282       Error(Loc, "invalid immediate: only 16-bit values are legal");
6283       return MatchOperand_ParseFail;
6284     }
6285   } else {
6286     return MatchOperand_ParseFail;
6287   }
6288 
6289   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6290   return MatchOperand_Success;
6291 }
6292 
6293 bool AMDGPUOperand::isSendMsg() const {
6294   return isImmTy(ImmTySendMsg);
6295 }
6296 
6297 //===----------------------------------------------------------------------===//
6298 // v_interp
6299 //===----------------------------------------------------------------------===//
6300 
6301 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6302   StringRef Str;
6303   SMLoc S = getLoc();
6304 
6305   if (!parseId(Str))
6306     return MatchOperand_NoMatch;
6307 
6308   int Slot = StringSwitch<int>(Str)
6309     .Case("p10", 0)
6310     .Case("p20", 1)
6311     .Case("p0", 2)
6312     .Default(-1);
6313 
6314   if (Slot == -1) {
6315     Error(S, "invalid interpolation slot");
6316     return MatchOperand_ParseFail;
6317   }
6318 
6319   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6320                                               AMDGPUOperand::ImmTyInterpSlot));
6321   return MatchOperand_Success;
6322 }
6323 
6324 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6325   StringRef Str;
6326   SMLoc S = getLoc();
6327 
6328   if (!parseId(Str))
6329     return MatchOperand_NoMatch;
6330 
6331   if (!Str.startswith("attr")) {
6332     Error(S, "invalid interpolation attribute");
6333     return MatchOperand_ParseFail;
6334   }
6335 
6336   StringRef Chan = Str.take_back(2);
6337   int AttrChan = StringSwitch<int>(Chan)
6338     .Case(".x", 0)
6339     .Case(".y", 1)
6340     .Case(".z", 2)
6341     .Case(".w", 3)
6342     .Default(-1);
6343   if (AttrChan == -1) {
6344     Error(S, "invalid or missing interpolation attribute channel");
6345     return MatchOperand_ParseFail;
6346   }
6347 
6348   Str = Str.drop_back(2).drop_front(4);
6349 
6350   uint8_t Attr;
6351   if (Str.getAsInteger(10, Attr)) {
6352     Error(S, "invalid or missing interpolation attribute number");
6353     return MatchOperand_ParseFail;
6354   }
6355 
6356   if (Attr > 63) {
6357     Error(S, "out of bounds interpolation attribute number");
6358     return MatchOperand_ParseFail;
6359   }
6360 
6361   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6362 
6363   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6364                                               AMDGPUOperand::ImmTyInterpAttr));
6365   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6366                                               AMDGPUOperand::ImmTyAttrChan));
6367   return MatchOperand_Success;
6368 }
6369 
6370 //===----------------------------------------------------------------------===//
6371 // exp
6372 //===----------------------------------------------------------------------===//
6373 
6374 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6375   using namespace llvm::AMDGPU::Exp;
6376 
6377   StringRef Str;
6378   SMLoc S = getLoc();
6379 
6380   if (!parseId(Str))
6381     return MatchOperand_NoMatch;
6382 
6383   unsigned Id = getTgtId(Str);
6384   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6385     Error(S, (Id == ET_INVALID) ?
6386                 "invalid exp target" :
6387                 "exp target is not supported on this GPU");
6388     return MatchOperand_ParseFail;
6389   }
6390 
6391   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6392                                               AMDGPUOperand::ImmTyExpTgt));
6393   return MatchOperand_Success;
6394 }
6395 
6396 //===----------------------------------------------------------------------===//
6397 // parser helpers
6398 //===----------------------------------------------------------------------===//
6399 
6400 bool
6401 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6402   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6403 }
6404 
6405 bool
6406 AMDGPUAsmParser::isId(const StringRef Id) const {
6407   return isId(getToken(), Id);
6408 }
6409 
6410 bool
6411 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6412   return getTokenKind() == Kind;
6413 }
6414 
6415 bool
6416 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6417   if (isId(Id)) {
6418     lex();
6419     return true;
6420   }
6421   return false;
6422 }
6423 
6424 bool
6425 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6426   if (isToken(AsmToken::Identifier)) {
6427     StringRef Tok = getTokenStr();
6428     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6429       lex();
6430       return true;
6431     }
6432   }
6433   return false;
6434 }
6435 
6436 bool
6437 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6438   if (isId(Id) && peekToken().is(Kind)) {
6439     lex();
6440     lex();
6441     return true;
6442   }
6443   return false;
6444 }
6445 
6446 bool
6447 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6448   if (isToken(Kind)) {
6449     lex();
6450     return true;
6451   }
6452   return false;
6453 }
6454 
6455 bool
6456 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6457                            const StringRef ErrMsg) {
6458   if (!trySkipToken(Kind)) {
6459     Error(getLoc(), ErrMsg);
6460     return false;
6461   }
6462   return true;
6463 }
6464 
6465 bool
6466 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6467   SMLoc S = getLoc();
6468 
6469   const MCExpr *Expr;
6470   if (Parser.parseExpression(Expr))
6471     return false;
6472 
6473   if (Expr->evaluateAsAbsolute(Imm))
6474     return true;
6475 
6476   if (Expected.empty()) {
6477     Error(S, "expected absolute expression");
6478   } else {
6479     Error(S, Twine("expected ", Expected) +
6480              Twine(" or an absolute expression"));
6481   }
6482   return false;
6483 }
6484 
6485 bool
6486 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6487   SMLoc S = getLoc();
6488 
6489   const MCExpr *Expr;
6490   if (Parser.parseExpression(Expr))
6491     return false;
6492 
6493   int64_t IntVal;
6494   if (Expr->evaluateAsAbsolute(IntVal)) {
6495     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6496   } else {
6497     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6498   }
6499   return true;
6500 }
6501 
6502 bool
6503 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6504   if (isToken(AsmToken::String)) {
6505     Val = getToken().getStringContents();
6506     lex();
6507     return true;
6508   } else {
6509     Error(getLoc(), ErrMsg);
6510     return false;
6511   }
6512 }
6513 
6514 bool
6515 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6516   if (isToken(AsmToken::Identifier)) {
6517     Val = getTokenStr();
6518     lex();
6519     return true;
6520   } else {
6521     if (!ErrMsg.empty())
6522       Error(getLoc(), ErrMsg);
6523     return false;
6524   }
6525 }
6526 
6527 AsmToken
6528 AMDGPUAsmParser::getToken() const {
6529   return Parser.getTok();
6530 }
6531 
6532 AsmToken
6533 AMDGPUAsmParser::peekToken() {
6534   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6535 }
6536 
6537 void
6538 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6539   auto TokCount = getLexer().peekTokens(Tokens);
6540 
6541   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6542     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6543 }
6544 
6545 AsmToken::TokenKind
6546 AMDGPUAsmParser::getTokenKind() const {
6547   return getLexer().getKind();
6548 }
6549 
6550 SMLoc
6551 AMDGPUAsmParser::getLoc() const {
6552   return getToken().getLoc();
6553 }
6554 
6555 StringRef
6556 AMDGPUAsmParser::getTokenStr() const {
6557   return getToken().getString();
6558 }
6559 
6560 void
6561 AMDGPUAsmParser::lex() {
6562   Parser.Lex();
6563 }
6564 
6565 SMLoc
6566 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6567                                const OperandVector &Operands) const {
6568   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6569     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6570     if (Test(Op))
6571       return Op.getStartLoc();
6572   }
6573   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6574 }
6575 
6576 SMLoc
6577 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6578                            const OperandVector &Operands) const {
6579   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6580   return getOperandLoc(Test, Operands);
6581 }
6582 
6583 SMLoc
6584 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6585                            const OperandVector &Operands) const {
6586   auto Test = [=](const AMDGPUOperand& Op) {
6587     return Op.isRegKind() && Op.getReg() == Reg;
6588   };
6589   return getOperandLoc(Test, Operands);
6590 }
6591 
6592 SMLoc
6593 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6594   auto Test = [](const AMDGPUOperand& Op) {
6595     return Op.IsImmKindLiteral() || Op.isExpr();
6596   };
6597   return getOperandLoc(Test, Operands);
6598 }
6599 
6600 SMLoc
6601 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6602   auto Test = [](const AMDGPUOperand& Op) {
6603     return Op.isImmKindConst();
6604   };
6605   return getOperandLoc(Test, Operands);
6606 }
6607 
6608 //===----------------------------------------------------------------------===//
6609 // swizzle
6610 //===----------------------------------------------------------------------===//
6611 
6612 LLVM_READNONE
6613 static unsigned
6614 encodeBitmaskPerm(const unsigned AndMask,
6615                   const unsigned OrMask,
6616                   const unsigned XorMask) {
6617   using namespace llvm::AMDGPU::Swizzle;
6618 
6619   return BITMASK_PERM_ENC |
6620          (AndMask << BITMASK_AND_SHIFT) |
6621          (OrMask  << BITMASK_OR_SHIFT)  |
6622          (XorMask << BITMASK_XOR_SHIFT);
6623 }
6624 
6625 bool
6626 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6627                                      const unsigned MinVal,
6628                                      const unsigned MaxVal,
6629                                      const StringRef ErrMsg,
6630                                      SMLoc &Loc) {
6631   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6632     return false;
6633   }
6634   Loc = getLoc();
6635   if (!parseExpr(Op)) {
6636     return false;
6637   }
6638   if (Op < MinVal || Op > MaxVal) {
6639     Error(Loc, ErrMsg);
6640     return false;
6641   }
6642 
6643   return true;
6644 }
6645 
6646 bool
6647 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6648                                       const unsigned MinVal,
6649                                       const unsigned MaxVal,
6650                                       const StringRef ErrMsg) {
6651   SMLoc Loc;
6652   for (unsigned i = 0; i < OpNum; ++i) {
6653     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6654       return false;
6655   }
6656 
6657   return true;
6658 }
6659 
6660 bool
6661 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6662   using namespace llvm::AMDGPU::Swizzle;
6663 
6664   int64_t Lane[LANE_NUM];
6665   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6666                            "expected a 2-bit lane id")) {
6667     Imm = QUAD_PERM_ENC;
6668     for (unsigned I = 0; I < LANE_NUM; ++I) {
6669       Imm |= Lane[I] << (LANE_SHIFT * I);
6670     }
6671     return true;
6672   }
6673   return false;
6674 }
6675 
6676 bool
6677 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6678   using namespace llvm::AMDGPU::Swizzle;
6679 
6680   SMLoc Loc;
6681   int64_t GroupSize;
6682   int64_t LaneIdx;
6683 
6684   if (!parseSwizzleOperand(GroupSize,
6685                            2, 32,
6686                            "group size must be in the interval [2,32]",
6687                            Loc)) {
6688     return false;
6689   }
6690   if (!isPowerOf2_64(GroupSize)) {
6691     Error(Loc, "group size must be a power of two");
6692     return false;
6693   }
6694   if (parseSwizzleOperand(LaneIdx,
6695                           0, GroupSize - 1,
6696                           "lane id must be in the interval [0,group size - 1]",
6697                           Loc)) {
6698     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6699     return true;
6700   }
6701   return false;
6702 }
6703 
6704 bool
6705 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6706   using namespace llvm::AMDGPU::Swizzle;
6707 
6708   SMLoc Loc;
6709   int64_t GroupSize;
6710 
6711   if (!parseSwizzleOperand(GroupSize,
6712                            2, 32,
6713                            "group size must be in the interval [2,32]",
6714                            Loc)) {
6715     return false;
6716   }
6717   if (!isPowerOf2_64(GroupSize)) {
6718     Error(Loc, "group size must be a power of two");
6719     return false;
6720   }
6721 
6722   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6723   return true;
6724 }
6725 
6726 bool
6727 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6728   using namespace llvm::AMDGPU::Swizzle;
6729 
6730   SMLoc Loc;
6731   int64_t GroupSize;
6732 
6733   if (!parseSwizzleOperand(GroupSize,
6734                            1, 16,
6735                            "group size must be in the interval [1,16]",
6736                            Loc)) {
6737     return false;
6738   }
6739   if (!isPowerOf2_64(GroupSize)) {
6740     Error(Loc, "group size must be a power of two");
6741     return false;
6742   }
6743 
6744   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6745   return true;
6746 }
6747 
6748 bool
6749 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6750   using namespace llvm::AMDGPU::Swizzle;
6751 
6752   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6753     return false;
6754   }
6755 
6756   StringRef Ctl;
6757   SMLoc StrLoc = getLoc();
6758   if (!parseString(Ctl)) {
6759     return false;
6760   }
6761   if (Ctl.size() != BITMASK_WIDTH) {
6762     Error(StrLoc, "expected a 5-character mask");
6763     return false;
6764   }
6765 
6766   unsigned AndMask = 0;
6767   unsigned OrMask = 0;
6768   unsigned XorMask = 0;
6769 
6770   for (size_t i = 0; i < Ctl.size(); ++i) {
6771     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6772     switch(Ctl[i]) {
6773     default:
6774       Error(StrLoc, "invalid mask");
6775       return false;
6776     case '0':
6777       break;
6778     case '1':
6779       OrMask |= Mask;
6780       break;
6781     case 'p':
6782       AndMask |= Mask;
6783       break;
6784     case 'i':
6785       AndMask |= Mask;
6786       XorMask |= Mask;
6787       break;
6788     }
6789   }
6790 
6791   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6792   return true;
6793 }
6794 
6795 bool
6796 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6797 
6798   SMLoc OffsetLoc = getLoc();
6799 
6800   if (!parseExpr(Imm, "a swizzle macro")) {
6801     return false;
6802   }
6803   if (!isUInt<16>(Imm)) {
6804     Error(OffsetLoc, "expected a 16-bit offset");
6805     return false;
6806   }
6807   return true;
6808 }
6809 
6810 bool
6811 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6812   using namespace llvm::AMDGPU::Swizzle;
6813 
6814   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6815 
6816     SMLoc ModeLoc = getLoc();
6817     bool Ok = false;
6818 
6819     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6820       Ok = parseSwizzleQuadPerm(Imm);
6821     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6822       Ok = parseSwizzleBitmaskPerm(Imm);
6823     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6824       Ok = parseSwizzleBroadcast(Imm);
6825     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6826       Ok = parseSwizzleSwap(Imm);
6827     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6828       Ok = parseSwizzleReverse(Imm);
6829     } else {
6830       Error(ModeLoc, "expected a swizzle mode");
6831     }
6832 
6833     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6834   }
6835 
6836   return false;
6837 }
6838 
6839 OperandMatchResultTy
6840 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6841   SMLoc S = getLoc();
6842   int64_t Imm = 0;
6843 
6844   if (trySkipId("offset")) {
6845 
6846     bool Ok = false;
6847     if (skipToken(AsmToken::Colon, "expected a colon")) {
6848       if (trySkipId("swizzle")) {
6849         Ok = parseSwizzleMacro(Imm);
6850       } else {
6851         Ok = parseSwizzleOffset(Imm);
6852       }
6853     }
6854 
6855     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6856 
6857     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6858   } else {
6859     // Swizzle "offset" operand is optional.
6860     // If it is omitted, try parsing other optional operands.
6861     return parseOptionalOpr(Operands);
6862   }
6863 }
6864 
6865 bool
6866 AMDGPUOperand::isSwizzle() const {
6867   return isImmTy(ImmTySwizzle);
6868 }
6869 
6870 //===----------------------------------------------------------------------===//
6871 // VGPR Index Mode
6872 //===----------------------------------------------------------------------===//
6873 
6874 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6875 
6876   using namespace llvm::AMDGPU::VGPRIndexMode;
6877 
6878   if (trySkipToken(AsmToken::RParen)) {
6879     return OFF;
6880   }
6881 
6882   int64_t Imm = 0;
6883 
6884   while (true) {
6885     unsigned Mode = 0;
6886     SMLoc S = getLoc();
6887 
6888     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6889       if (trySkipId(IdSymbolic[ModeId])) {
6890         Mode = 1 << ModeId;
6891         break;
6892       }
6893     }
6894 
6895     if (Mode == 0) {
6896       Error(S, (Imm == 0)?
6897                "expected a VGPR index mode or a closing parenthesis" :
6898                "expected a VGPR index mode");
6899       return UNDEF;
6900     }
6901 
6902     if (Imm & Mode) {
6903       Error(S, "duplicate VGPR index mode");
6904       return UNDEF;
6905     }
6906     Imm |= Mode;
6907 
6908     if (trySkipToken(AsmToken::RParen))
6909       break;
6910     if (!skipToken(AsmToken::Comma,
6911                    "expected a comma or a closing parenthesis"))
6912       return UNDEF;
6913   }
6914 
6915   return Imm;
6916 }
6917 
6918 OperandMatchResultTy
6919 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6920 
6921   using namespace llvm::AMDGPU::VGPRIndexMode;
6922 
6923   int64_t Imm = 0;
6924   SMLoc S = getLoc();
6925 
6926   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6927     Imm = parseGPRIdxMacro();
6928     if (Imm == UNDEF)
6929       return MatchOperand_ParseFail;
6930   } else {
6931     if (getParser().parseAbsoluteExpression(Imm))
6932       return MatchOperand_ParseFail;
6933     if (Imm < 0 || !isUInt<4>(Imm)) {
6934       Error(S, "invalid immediate: only 4-bit values are legal");
6935       return MatchOperand_ParseFail;
6936     }
6937   }
6938 
6939   Operands.push_back(
6940       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6941   return MatchOperand_Success;
6942 }
6943 
6944 bool AMDGPUOperand::isGPRIdxMode() const {
6945   return isImmTy(ImmTyGprIdxMode);
6946 }
6947 
6948 //===----------------------------------------------------------------------===//
6949 // sopp branch targets
6950 //===----------------------------------------------------------------------===//
6951 
6952 OperandMatchResultTy
6953 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6954 
6955   // Make sure we are not parsing something
6956   // that looks like a label or an expression but is not.
6957   // This will improve error messages.
6958   if (isRegister() || isModifier())
6959     return MatchOperand_NoMatch;
6960 
6961   if (!parseExpr(Operands))
6962     return MatchOperand_ParseFail;
6963 
6964   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6965   assert(Opr.isImm() || Opr.isExpr());
6966   SMLoc Loc = Opr.getStartLoc();
6967 
6968   // Currently we do not support arbitrary expressions as branch targets.
6969   // Only labels and absolute expressions are accepted.
6970   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6971     Error(Loc, "expected an absolute expression or a label");
6972   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6973     Error(Loc, "expected a 16-bit signed jump offset");
6974   }
6975 
6976   return MatchOperand_Success;
6977 }
6978 
6979 //===----------------------------------------------------------------------===//
6980 // Boolean holding registers
6981 //===----------------------------------------------------------------------===//
6982 
6983 OperandMatchResultTy
6984 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6985   return parseReg(Operands);
6986 }
6987 
6988 //===----------------------------------------------------------------------===//
6989 // mubuf
6990 //===----------------------------------------------------------------------===//
6991 
6992 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6993   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6994 }
6995 
6996 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6997                                    const OperandVector &Operands,
6998                                    bool IsAtomic,
6999                                    bool IsLds) {
7000   bool IsLdsOpcode = IsLds;
7001   bool HasLdsModifier = false;
7002   OptionalImmIndexMap OptionalIdx;
7003   unsigned FirstOperandIdx = 1;
7004   bool IsAtomicReturn = false;
7005 
7006   if (IsAtomic) {
7007     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7008       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7009       if (!Op.isCPol())
7010         continue;
7011       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7012       break;
7013     }
7014 
7015     if (!IsAtomicReturn) {
7016       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7017       if (NewOpc != -1)
7018         Inst.setOpcode(NewOpc);
7019     }
7020 
7021     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7022                       SIInstrFlags::IsAtomicRet;
7023   }
7024 
7025   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7026     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7027 
7028     // Add the register arguments
7029     if (Op.isReg()) {
7030       Op.addRegOperands(Inst, 1);
7031       // Insert a tied src for atomic return dst.
7032       // This cannot be postponed as subsequent calls to
7033       // addImmOperands rely on correct number of MC operands.
7034       if (IsAtomicReturn && i == FirstOperandIdx)
7035         Op.addRegOperands(Inst, 1);
7036       continue;
7037     }
7038 
7039     // Handle the case where soffset is an immediate
7040     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7041       Op.addImmOperands(Inst, 1);
7042       continue;
7043     }
7044 
7045     HasLdsModifier |= Op.isLDS();
7046 
7047     // Handle tokens like 'offen' which are sometimes hard-coded into the
7048     // asm string.  There are no MCInst operands for these.
7049     if (Op.isToken()) {
7050       continue;
7051     }
7052     assert(Op.isImm());
7053 
7054     // Handle optional arguments
7055     OptionalIdx[Op.getImmTy()] = i;
7056   }
7057 
7058   // This is a workaround for an llvm quirk which may result in an
7059   // incorrect instruction selection. Lds and non-lds versions of
7060   // MUBUF instructions are identical except that lds versions
7061   // have mandatory 'lds' modifier. However this modifier follows
7062   // optional modifiers and llvm asm matcher regards this 'lds'
7063   // modifier as an optional one. As a result, an lds version
7064   // of opcode may be selected even if it has no 'lds' modifier.
7065   if (IsLdsOpcode && !HasLdsModifier) {
7066     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7067     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7068       Inst.setOpcode(NoLdsOpcode);
7069       IsLdsOpcode = false;
7070     }
7071   }
7072 
7073   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7074   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7075 
7076   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7077     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7078   }
7079   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7080 }
7081 
7082 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7083   OptionalImmIndexMap OptionalIdx;
7084 
7085   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7086     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7087 
7088     // Add the register arguments
7089     if (Op.isReg()) {
7090       Op.addRegOperands(Inst, 1);
7091       continue;
7092     }
7093 
7094     // Handle the case where soffset is an immediate
7095     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7096       Op.addImmOperands(Inst, 1);
7097       continue;
7098     }
7099 
7100     // Handle tokens like 'offen' which are sometimes hard-coded into the
7101     // asm string.  There are no MCInst operands for these.
7102     if (Op.isToken()) {
7103       continue;
7104     }
7105     assert(Op.isImm());
7106 
7107     // Handle optional arguments
7108     OptionalIdx[Op.getImmTy()] = i;
7109   }
7110 
7111   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7112                         AMDGPUOperand::ImmTyOffset);
7113   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7114   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7115   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7116   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7117 }
7118 
7119 //===----------------------------------------------------------------------===//
7120 // mimg
7121 //===----------------------------------------------------------------------===//
7122 
7123 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7124                               bool IsAtomic) {
7125   unsigned I = 1;
7126   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7127   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7128     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7129   }
7130 
7131   if (IsAtomic) {
7132     // Add src, same as dst
7133     assert(Desc.getNumDefs() == 1);
7134     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7135   }
7136 
7137   OptionalImmIndexMap OptionalIdx;
7138 
7139   for (unsigned E = Operands.size(); I != E; ++I) {
7140     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7141 
7142     // Add the register arguments
7143     if (Op.isReg()) {
7144       Op.addRegOperands(Inst, 1);
7145     } else if (Op.isImmModifier()) {
7146       OptionalIdx[Op.getImmTy()] = I;
7147     } else if (!Op.isToken()) {
7148       llvm_unreachable("unexpected operand type");
7149     }
7150   }
7151 
7152   bool IsGFX10Plus = isGFX10Plus();
7153 
7154   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7155   if (IsGFX10Plus)
7156     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7157   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7158   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7159   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7160   if (IsGFX10Plus)
7161     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7162   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7163     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7164   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7165   if (!IsGFX10Plus)
7166     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7167   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7168 }
7169 
7170 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7171   cvtMIMG(Inst, Operands, true);
7172 }
7173 
7174 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7175   OptionalImmIndexMap OptionalIdx;
7176   bool IsAtomicReturn = false;
7177 
7178   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7179     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7180     if (!Op.isCPol())
7181       continue;
7182     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7183     break;
7184   }
7185 
7186   if (!IsAtomicReturn) {
7187     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7188     if (NewOpc != -1)
7189       Inst.setOpcode(NewOpc);
7190   }
7191 
7192   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7193                     SIInstrFlags::IsAtomicRet;
7194 
7195   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7196     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7197 
7198     // Add the register arguments
7199     if (Op.isReg()) {
7200       Op.addRegOperands(Inst, 1);
7201       if (IsAtomicReturn && i == 1)
7202         Op.addRegOperands(Inst, 1);
7203       continue;
7204     }
7205 
7206     // Handle the case where soffset is an immediate
7207     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7208       Op.addImmOperands(Inst, 1);
7209       continue;
7210     }
7211 
7212     // Handle tokens like 'offen' which are sometimes hard-coded into the
7213     // asm string.  There are no MCInst operands for these.
7214     if (Op.isToken()) {
7215       continue;
7216     }
7217     assert(Op.isImm());
7218 
7219     // Handle optional arguments
7220     OptionalIdx[Op.getImmTy()] = i;
7221   }
7222 
7223   if ((int)Inst.getNumOperands() <=
7224       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7225     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7226   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7227 }
7228 
7229 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7230                                       const OperandVector &Operands) {
7231   for (unsigned I = 1; I < Operands.size(); ++I) {
7232     auto &Operand = (AMDGPUOperand &)*Operands[I];
7233     if (Operand.isReg())
7234       Operand.addRegOperands(Inst, 1);
7235   }
7236 
7237   Inst.addOperand(MCOperand::createImm(1)); // a16
7238 }
7239 
7240 //===----------------------------------------------------------------------===//
7241 // smrd
7242 //===----------------------------------------------------------------------===//
7243 
7244 bool AMDGPUOperand::isSMRDOffset8() const {
7245   return isImm() && isUInt<8>(getImm());
7246 }
7247 
7248 bool AMDGPUOperand::isSMEMOffset() const {
7249   return isImm(); // Offset range is checked later by validator.
7250 }
7251 
7252 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7253   // 32-bit literals are only supported on CI and we only want to use them
7254   // when the offset is > 8-bits.
7255   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7256 }
7257 
7258 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7259   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7260 }
7261 
7262 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7263   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7264 }
7265 
7266 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7267   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7268 }
7269 
7270 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7271   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7272 }
7273 
7274 //===----------------------------------------------------------------------===//
7275 // vop3
7276 //===----------------------------------------------------------------------===//
7277 
7278 static bool ConvertOmodMul(int64_t &Mul) {
7279   if (Mul != 1 && Mul != 2 && Mul != 4)
7280     return false;
7281 
7282   Mul >>= 1;
7283   return true;
7284 }
7285 
7286 static bool ConvertOmodDiv(int64_t &Div) {
7287   if (Div == 1) {
7288     Div = 0;
7289     return true;
7290   }
7291 
7292   if (Div == 2) {
7293     Div = 3;
7294     return true;
7295   }
7296 
7297   return false;
7298 }
7299 
7300 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7301 // This is intentional and ensures compatibility with sp3.
7302 // See bug 35397 for details.
7303 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7304   if (BoundCtrl == 0 || BoundCtrl == 1) {
7305     BoundCtrl = 1;
7306     return true;
7307   }
7308   return false;
7309 }
7310 
7311 // Note: the order in this table matches the order of operands in AsmString.
7312 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7313   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7314   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7315   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7316   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7317   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7318   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7319   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7320   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7321   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7322   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7323   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7324   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7325   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7326   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7327   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7328   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7329   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7330   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7331   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7332   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7333   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7334   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7335   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7336   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7337   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7338   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7339   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7340   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7341   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7342   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7343   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7344   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7345   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7346   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7347   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7348   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7349   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7350   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7351   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7352   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7353   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7354 };
7355 
7356 void AMDGPUAsmParser::onBeginOfFile() {
7357   if (!getParser().getStreamer().getTargetStreamer() ||
7358       getSTI().getTargetTriple().getArch() == Triple::r600)
7359     return;
7360 
7361   if (!getTargetStreamer().getTargetID())
7362     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7363 
7364   if (isHsaAbiVersion3Or4(&getSTI()))
7365     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7366 }
7367 
7368 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7369 
7370   OperandMatchResultTy res = parseOptionalOpr(Operands);
7371 
7372   // This is a hack to enable hardcoded mandatory operands which follow
7373   // optional operands.
7374   //
7375   // Current design assumes that all operands after the first optional operand
7376   // are also optional. However implementation of some instructions violates
7377   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7378   //
7379   // To alleviate this problem, we have to (implicitly) parse extra operands
7380   // to make sure autogenerated parser of custom operands never hit hardcoded
7381   // mandatory operands.
7382 
7383   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7384     if (res != MatchOperand_Success ||
7385         isToken(AsmToken::EndOfStatement))
7386       break;
7387 
7388     trySkipToken(AsmToken::Comma);
7389     res = parseOptionalOpr(Operands);
7390   }
7391 
7392   return res;
7393 }
7394 
7395 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7396   OperandMatchResultTy res;
7397   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7398     // try to parse any optional operand here
7399     if (Op.IsBit) {
7400       res = parseNamedBit(Op.Name, Operands, Op.Type);
7401     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7402       res = parseOModOperand(Operands);
7403     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7404                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7405                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7406       res = parseSDWASel(Operands, Op.Name, Op.Type);
7407     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7408       res = parseSDWADstUnused(Operands);
7409     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7410                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7411                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7412                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7413       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7414                                         Op.ConvertResult);
7415     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7416       res = parseDim(Operands);
7417     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7418       res = parseCPol(Operands);
7419     } else {
7420       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7421     }
7422     if (res != MatchOperand_NoMatch) {
7423       return res;
7424     }
7425   }
7426   return MatchOperand_NoMatch;
7427 }
7428 
7429 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7430   StringRef Name = getTokenStr();
7431   if (Name == "mul") {
7432     return parseIntWithPrefix("mul", Operands,
7433                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7434   }
7435 
7436   if (Name == "div") {
7437     return parseIntWithPrefix("div", Operands,
7438                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7439   }
7440 
7441   return MatchOperand_NoMatch;
7442 }
7443 
7444 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7445   cvtVOP3P(Inst, Operands);
7446 
7447   int Opc = Inst.getOpcode();
7448 
7449   int SrcNum;
7450   const int Ops[] = { AMDGPU::OpName::src0,
7451                       AMDGPU::OpName::src1,
7452                       AMDGPU::OpName::src2 };
7453   for (SrcNum = 0;
7454        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7455        ++SrcNum);
7456   assert(SrcNum > 0);
7457 
7458   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7459   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7460 
7461   if ((OpSel & (1 << SrcNum)) != 0) {
7462     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7463     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7464     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7465   }
7466 }
7467 
7468 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7469       // 1. This operand is input modifiers
7470   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7471       // 2. This is not last operand
7472       && Desc.NumOperands > (OpNum + 1)
7473       // 3. Next operand is register class
7474       && Desc.OpInfo[OpNum + 1].RegClass != -1
7475       // 4. Next register is not tied to any other operand
7476       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7477 }
7478 
7479 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7480 {
7481   OptionalImmIndexMap OptionalIdx;
7482   unsigned Opc = Inst.getOpcode();
7483 
7484   unsigned I = 1;
7485   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7486   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7487     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7488   }
7489 
7490   for (unsigned E = Operands.size(); I != E; ++I) {
7491     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7492     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7493       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7494     } else if (Op.isInterpSlot() ||
7495                Op.isInterpAttr() ||
7496                Op.isAttrChan()) {
7497       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7498     } else if (Op.isImmModifier()) {
7499       OptionalIdx[Op.getImmTy()] = I;
7500     } else {
7501       llvm_unreachable("unhandled operand type");
7502     }
7503   }
7504 
7505   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7506     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7507   }
7508 
7509   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7510     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7511   }
7512 
7513   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7514     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7515   }
7516 }
7517 
7518 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7519                               OptionalImmIndexMap &OptionalIdx) {
7520   unsigned Opc = Inst.getOpcode();
7521 
7522   unsigned I = 1;
7523   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7524   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7525     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7526   }
7527 
7528   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7529     // This instruction has src modifiers
7530     for (unsigned E = Operands.size(); I != E; ++I) {
7531       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7532       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7533         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7534       } else if (Op.isImmModifier()) {
7535         OptionalIdx[Op.getImmTy()] = I;
7536       } else if (Op.isRegOrImm()) {
7537         Op.addRegOrImmOperands(Inst, 1);
7538       } else {
7539         llvm_unreachable("unhandled operand type");
7540       }
7541     }
7542   } else {
7543     // No src modifiers
7544     for (unsigned E = Operands.size(); I != E; ++I) {
7545       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7546       if (Op.isMod()) {
7547         OptionalIdx[Op.getImmTy()] = I;
7548       } else {
7549         Op.addRegOrImmOperands(Inst, 1);
7550       }
7551     }
7552   }
7553 
7554   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7555     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7556   }
7557 
7558   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7559     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7560   }
7561 
7562   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7563   // it has src2 register operand that is tied to dst operand
7564   // we don't allow modifiers for this operand in assembler so src2_modifiers
7565   // should be 0.
7566   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7567       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7568       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7569       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7570       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7571       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7572       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7573       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7574       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7575       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7576       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7577     auto it = Inst.begin();
7578     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7579     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7580     ++it;
7581     // Copy the operand to ensure it's not invalidated when Inst grows.
7582     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7583   }
7584 }
7585 
7586 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7587   OptionalImmIndexMap OptionalIdx;
7588   cvtVOP3(Inst, Operands, OptionalIdx);
7589 }
7590 
7591 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7592                                OptionalImmIndexMap &OptIdx) {
7593   const int Opc = Inst.getOpcode();
7594   const MCInstrDesc &Desc = MII.get(Opc);
7595 
7596   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7597 
7598   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7599     assert(!IsPacked);
7600     Inst.addOperand(Inst.getOperand(0));
7601   }
7602 
7603   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7604   // instruction, and then figure out where to actually put the modifiers
7605 
7606   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7607   if (OpSelIdx != -1) {
7608     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7609   }
7610 
7611   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7612   if (OpSelHiIdx != -1) {
7613     int DefaultVal = IsPacked ? -1 : 0;
7614     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7615                           DefaultVal);
7616   }
7617 
7618   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7619   if (NegLoIdx != -1) {
7620     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7621     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7622   }
7623 
7624   const int Ops[] = { AMDGPU::OpName::src0,
7625                       AMDGPU::OpName::src1,
7626                       AMDGPU::OpName::src2 };
7627   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7628                          AMDGPU::OpName::src1_modifiers,
7629                          AMDGPU::OpName::src2_modifiers };
7630 
7631   unsigned OpSel = 0;
7632   unsigned OpSelHi = 0;
7633   unsigned NegLo = 0;
7634   unsigned NegHi = 0;
7635 
7636   if (OpSelIdx != -1)
7637     OpSel = Inst.getOperand(OpSelIdx).getImm();
7638 
7639   if (OpSelHiIdx != -1)
7640     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7641 
7642   if (NegLoIdx != -1) {
7643     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7644     NegLo = Inst.getOperand(NegLoIdx).getImm();
7645     NegHi = Inst.getOperand(NegHiIdx).getImm();
7646   }
7647 
7648   for (int J = 0; J < 3; ++J) {
7649     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7650     if (OpIdx == -1)
7651       break;
7652 
7653     uint32_t ModVal = 0;
7654 
7655     if ((OpSel & (1 << J)) != 0)
7656       ModVal |= SISrcMods::OP_SEL_0;
7657 
7658     if ((OpSelHi & (1 << J)) != 0)
7659       ModVal |= SISrcMods::OP_SEL_1;
7660 
7661     if ((NegLo & (1 << J)) != 0)
7662       ModVal |= SISrcMods::NEG;
7663 
7664     if ((NegHi & (1 << J)) != 0)
7665       ModVal |= SISrcMods::NEG_HI;
7666 
7667     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7668 
7669     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7670   }
7671 }
7672 
7673 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7674   OptionalImmIndexMap OptIdx;
7675   cvtVOP3(Inst, Operands, OptIdx);
7676   cvtVOP3P(Inst, Operands, OptIdx);
7677 }
7678 
7679 //===----------------------------------------------------------------------===//
7680 // dpp
7681 //===----------------------------------------------------------------------===//
7682 
7683 bool AMDGPUOperand::isDPP8() const {
7684   return isImmTy(ImmTyDPP8);
7685 }
7686 
7687 bool AMDGPUOperand::isDPPCtrl() const {
7688   using namespace AMDGPU::DPP;
7689 
7690   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7691   if (result) {
7692     int64_t Imm = getImm();
7693     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7694            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7695            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7696            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7697            (Imm == DppCtrl::WAVE_SHL1) ||
7698            (Imm == DppCtrl::WAVE_ROL1) ||
7699            (Imm == DppCtrl::WAVE_SHR1) ||
7700            (Imm == DppCtrl::WAVE_ROR1) ||
7701            (Imm == DppCtrl::ROW_MIRROR) ||
7702            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7703            (Imm == DppCtrl::BCAST15) ||
7704            (Imm == DppCtrl::BCAST31) ||
7705            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7706            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7707   }
7708   return false;
7709 }
7710 
7711 //===----------------------------------------------------------------------===//
7712 // mAI
7713 //===----------------------------------------------------------------------===//
7714 
7715 bool AMDGPUOperand::isBLGP() const {
7716   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7717 }
7718 
7719 bool AMDGPUOperand::isCBSZ() const {
7720   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7721 }
7722 
7723 bool AMDGPUOperand::isABID() const {
7724   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7725 }
7726 
7727 bool AMDGPUOperand::isS16Imm() const {
7728   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7729 }
7730 
7731 bool AMDGPUOperand::isU16Imm() const {
7732   return isImm() && isUInt<16>(getImm());
7733 }
7734 
7735 //===----------------------------------------------------------------------===//
7736 // dim
7737 //===----------------------------------------------------------------------===//
7738 
7739 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7740   // We want to allow "dim:1D" etc.,
7741   // but the initial 1 is tokenized as an integer.
7742   std::string Token;
7743   if (isToken(AsmToken::Integer)) {
7744     SMLoc Loc = getToken().getEndLoc();
7745     Token = std::string(getTokenStr());
7746     lex();
7747     if (getLoc() != Loc)
7748       return false;
7749   }
7750 
7751   StringRef Suffix;
7752   if (!parseId(Suffix))
7753     return false;
7754   Token += Suffix;
7755 
7756   StringRef DimId = Token;
7757   if (DimId.startswith("SQ_RSRC_IMG_"))
7758     DimId = DimId.drop_front(12);
7759 
7760   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7761   if (!DimInfo)
7762     return false;
7763 
7764   Encoding = DimInfo->Encoding;
7765   return true;
7766 }
7767 
7768 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7769   if (!isGFX10Plus())
7770     return MatchOperand_NoMatch;
7771 
7772   SMLoc S = getLoc();
7773 
7774   if (!trySkipId("dim", AsmToken::Colon))
7775     return MatchOperand_NoMatch;
7776 
7777   unsigned Encoding;
7778   SMLoc Loc = getLoc();
7779   if (!parseDimId(Encoding)) {
7780     Error(Loc, "invalid dim value");
7781     return MatchOperand_ParseFail;
7782   }
7783 
7784   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7785                                               AMDGPUOperand::ImmTyDim));
7786   return MatchOperand_Success;
7787 }
7788 
7789 //===----------------------------------------------------------------------===//
7790 // dpp
7791 //===----------------------------------------------------------------------===//
7792 
7793 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7794   SMLoc S = getLoc();
7795 
7796   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7797     return MatchOperand_NoMatch;
7798 
7799   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7800 
7801   int64_t Sels[8];
7802 
7803   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7804     return MatchOperand_ParseFail;
7805 
7806   for (size_t i = 0; i < 8; ++i) {
7807     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7808       return MatchOperand_ParseFail;
7809 
7810     SMLoc Loc = getLoc();
7811     if (getParser().parseAbsoluteExpression(Sels[i]))
7812       return MatchOperand_ParseFail;
7813     if (0 > Sels[i] || 7 < Sels[i]) {
7814       Error(Loc, "expected a 3-bit value");
7815       return MatchOperand_ParseFail;
7816     }
7817   }
7818 
7819   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7820     return MatchOperand_ParseFail;
7821 
7822   unsigned DPP8 = 0;
7823   for (size_t i = 0; i < 8; ++i)
7824     DPP8 |= (Sels[i] << (i * 3));
7825 
7826   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7827   return MatchOperand_Success;
7828 }
7829 
7830 bool
7831 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7832                                     const OperandVector &Operands) {
7833   if (Ctrl == "row_newbcast")
7834     return isGFX90A();
7835 
7836   if (Ctrl == "row_share" ||
7837       Ctrl == "row_xmask")
7838     return isGFX10Plus();
7839 
7840   if (Ctrl == "wave_shl" ||
7841       Ctrl == "wave_shr" ||
7842       Ctrl == "wave_rol" ||
7843       Ctrl == "wave_ror" ||
7844       Ctrl == "row_bcast")
7845     return isVI() || isGFX9();
7846 
7847   return Ctrl == "row_mirror" ||
7848          Ctrl == "row_half_mirror" ||
7849          Ctrl == "quad_perm" ||
7850          Ctrl == "row_shl" ||
7851          Ctrl == "row_shr" ||
7852          Ctrl == "row_ror";
7853 }
7854 
7855 int64_t
7856 AMDGPUAsmParser::parseDPPCtrlPerm() {
7857   // quad_perm:[%d,%d,%d,%d]
7858 
7859   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7860     return -1;
7861 
7862   int64_t Val = 0;
7863   for (int i = 0; i < 4; ++i) {
7864     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7865       return -1;
7866 
7867     int64_t Temp;
7868     SMLoc Loc = getLoc();
7869     if (getParser().parseAbsoluteExpression(Temp))
7870       return -1;
7871     if (Temp < 0 || Temp > 3) {
7872       Error(Loc, "expected a 2-bit value");
7873       return -1;
7874     }
7875 
7876     Val += (Temp << i * 2);
7877   }
7878 
7879   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7880     return -1;
7881 
7882   return Val;
7883 }
7884 
7885 int64_t
7886 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7887   using namespace AMDGPU::DPP;
7888 
7889   // sel:%d
7890 
7891   int64_t Val;
7892   SMLoc Loc = getLoc();
7893 
7894   if (getParser().parseAbsoluteExpression(Val))
7895     return -1;
7896 
7897   struct DppCtrlCheck {
7898     int64_t Ctrl;
7899     int Lo;
7900     int Hi;
7901   };
7902 
7903   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7904     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7905     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7906     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7907     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7908     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7909     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7910     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7911     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7912     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7913     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7914     .Default({-1, 0, 0});
7915 
7916   bool Valid;
7917   if (Check.Ctrl == -1) {
7918     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7919     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7920   } else {
7921     Valid = Check.Lo <= Val && Val <= Check.Hi;
7922     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7923   }
7924 
7925   if (!Valid) {
7926     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7927     return -1;
7928   }
7929 
7930   return Val;
7931 }
7932 
7933 OperandMatchResultTy
7934 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7935   using namespace AMDGPU::DPP;
7936 
7937   if (!isToken(AsmToken::Identifier) ||
7938       !isSupportedDPPCtrl(getTokenStr(), Operands))
7939     return MatchOperand_NoMatch;
7940 
7941   SMLoc S = getLoc();
7942   int64_t Val = -1;
7943   StringRef Ctrl;
7944 
7945   parseId(Ctrl);
7946 
7947   if (Ctrl == "row_mirror") {
7948     Val = DppCtrl::ROW_MIRROR;
7949   } else if (Ctrl == "row_half_mirror") {
7950     Val = DppCtrl::ROW_HALF_MIRROR;
7951   } else {
7952     if (skipToken(AsmToken::Colon, "expected a colon")) {
7953       if (Ctrl == "quad_perm") {
7954         Val = parseDPPCtrlPerm();
7955       } else {
7956         Val = parseDPPCtrlSel(Ctrl);
7957       }
7958     }
7959   }
7960 
7961   if (Val == -1)
7962     return MatchOperand_ParseFail;
7963 
7964   Operands.push_back(
7965     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7966   return MatchOperand_Success;
7967 }
7968 
7969 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7970   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7971 }
7972 
7973 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7974   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7975 }
7976 
7977 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7978   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7979 }
7980 
7981 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7982   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7983 }
7984 
7985 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7986   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7987 }
7988 
7989 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7990   OptionalImmIndexMap OptionalIdx;
7991 
7992   unsigned Opc = Inst.getOpcode();
7993   bool HasModifiers =
7994       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
7995   unsigned I = 1;
7996   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7997   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7998     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7999   }
8000 
8001   int Fi = 0;
8002   for (unsigned E = Operands.size(); I != E; ++I) {
8003     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8004                                             MCOI::TIED_TO);
8005     if (TiedTo != -1) {
8006       assert((unsigned)TiedTo < Inst.getNumOperands());
8007       // handle tied old or src2 for MAC instructions
8008       Inst.addOperand(Inst.getOperand(TiedTo));
8009     }
8010     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8011     // Add the register arguments
8012     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8013       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8014       // Skip it.
8015       continue;
8016     }
8017 
8018     if (IsDPP8) {
8019       if (Op.isDPP8()) {
8020         Op.addImmOperands(Inst, 1);
8021       } else if (HasModifiers &&
8022                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8023         Op.addRegWithFPInputModsOperands(Inst, 2);
8024       } else if (Op.isFI()) {
8025         Fi = Op.getImm();
8026       } else if (Op.isReg()) {
8027         Op.addRegOperands(Inst, 1);
8028       } else {
8029         llvm_unreachable("Invalid operand type");
8030       }
8031     } else {
8032       if (HasModifiers &&
8033           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8034         Op.addRegWithFPInputModsOperands(Inst, 2);
8035       } else if (Op.isReg()) {
8036         Op.addRegOperands(Inst, 1);
8037       } else if (Op.isDPPCtrl()) {
8038         Op.addImmOperands(Inst, 1);
8039       } else if (Op.isImm()) {
8040         // Handle optional arguments
8041         OptionalIdx[Op.getImmTy()] = I;
8042       } else {
8043         llvm_unreachable("Invalid operand type");
8044       }
8045     }
8046   }
8047 
8048   if (IsDPP8) {
8049     using namespace llvm::AMDGPU::DPP;
8050     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8051   } else {
8052     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8053     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8054     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8055     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8056       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8057     }
8058   }
8059 }
8060 
8061 //===----------------------------------------------------------------------===//
8062 // sdwa
8063 //===----------------------------------------------------------------------===//
8064 
8065 OperandMatchResultTy
8066 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8067                               AMDGPUOperand::ImmTy Type) {
8068   using namespace llvm::AMDGPU::SDWA;
8069 
8070   SMLoc S = getLoc();
8071   StringRef Value;
8072   OperandMatchResultTy res;
8073 
8074   SMLoc StringLoc;
8075   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8076   if (res != MatchOperand_Success) {
8077     return res;
8078   }
8079 
8080   int64_t Int;
8081   Int = StringSwitch<int64_t>(Value)
8082         .Case("BYTE_0", SdwaSel::BYTE_0)
8083         .Case("BYTE_1", SdwaSel::BYTE_1)
8084         .Case("BYTE_2", SdwaSel::BYTE_2)
8085         .Case("BYTE_3", SdwaSel::BYTE_3)
8086         .Case("WORD_0", SdwaSel::WORD_0)
8087         .Case("WORD_1", SdwaSel::WORD_1)
8088         .Case("DWORD", SdwaSel::DWORD)
8089         .Default(0xffffffff);
8090 
8091   if (Int == 0xffffffff) {
8092     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8093     return MatchOperand_ParseFail;
8094   }
8095 
8096   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8097   return MatchOperand_Success;
8098 }
8099 
8100 OperandMatchResultTy
8101 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8102   using namespace llvm::AMDGPU::SDWA;
8103 
8104   SMLoc S = getLoc();
8105   StringRef Value;
8106   OperandMatchResultTy res;
8107 
8108   SMLoc StringLoc;
8109   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8110   if (res != MatchOperand_Success) {
8111     return res;
8112   }
8113 
8114   int64_t Int;
8115   Int = StringSwitch<int64_t>(Value)
8116         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8117         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8118         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8119         .Default(0xffffffff);
8120 
8121   if (Int == 0xffffffff) {
8122     Error(StringLoc, "invalid dst_unused value");
8123     return MatchOperand_ParseFail;
8124   }
8125 
8126   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8127   return MatchOperand_Success;
8128 }
8129 
8130 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8131   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8132 }
8133 
8134 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8135   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8136 }
8137 
8138 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8139   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8140 }
8141 
8142 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8143   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8144 }
8145 
8146 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8147   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8148 }
8149 
8150 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8151                               uint64_t BasicInstType,
8152                               bool SkipDstVcc,
8153                               bool SkipSrcVcc) {
8154   using namespace llvm::AMDGPU::SDWA;
8155 
8156   OptionalImmIndexMap OptionalIdx;
8157   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8158   bool SkippedVcc = false;
8159 
8160   unsigned I = 1;
8161   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8162   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8163     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8164   }
8165 
8166   for (unsigned E = Operands.size(); I != E; ++I) {
8167     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8168     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8169         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8170       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8171       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8172       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8173       // Skip VCC only if we didn't skip it on previous iteration.
8174       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8175       if (BasicInstType == SIInstrFlags::VOP2 &&
8176           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8177            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8178         SkippedVcc = true;
8179         continue;
8180       } else if (BasicInstType == SIInstrFlags::VOPC &&
8181                  Inst.getNumOperands() == 0) {
8182         SkippedVcc = true;
8183         continue;
8184       }
8185     }
8186     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8187       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8188     } else if (Op.isImm()) {
8189       // Handle optional arguments
8190       OptionalIdx[Op.getImmTy()] = I;
8191     } else {
8192       llvm_unreachable("Invalid operand type");
8193     }
8194     SkippedVcc = false;
8195   }
8196 
8197   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8198       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8199       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8200     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8201     switch (BasicInstType) {
8202     case SIInstrFlags::VOP1:
8203       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8204       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8205         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8206       }
8207       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8208       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8209       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8210       break;
8211 
8212     case SIInstrFlags::VOP2:
8213       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8214       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8215         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8216       }
8217       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8218       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8219       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8220       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8221       break;
8222 
8223     case SIInstrFlags::VOPC:
8224       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8225         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8226       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8227       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8228       break;
8229 
8230     default:
8231       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8232     }
8233   }
8234 
8235   // special case v_mac_{f16, f32}:
8236   // it has src2 register operand that is tied to dst operand
8237   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8238       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8239     auto it = Inst.begin();
8240     std::advance(
8241       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8242     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8243   }
8244 }
8245 
8246 //===----------------------------------------------------------------------===//
8247 // mAI
8248 //===----------------------------------------------------------------------===//
8249 
8250 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8251   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8252 }
8253 
8254 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8255   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8256 }
8257 
8258 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8259   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8260 }
8261 
8262 /// Force static initialization.
8263 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8264   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8265   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8266 }
8267 
8268 #define GET_REGISTER_MATCHER
8269 #define GET_MATCHER_IMPLEMENTATION
8270 #define GET_MNEMONIC_SPELL_CHECKER
8271 #define GET_MNEMONIC_CHECKER
8272 #include "AMDGPUGenAsmMatcher.inc"
8273 
8274 // This fuction should be defined after auto-generated include so that we have
8275 // MatchClassKind enum defined
8276 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8277                                                      unsigned Kind) {
8278   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8279   // But MatchInstructionImpl() expects to meet token and fails to validate
8280   // operand. This method checks if we are given immediate operand but expect to
8281   // get corresponding token.
8282   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8283   switch (Kind) {
8284   case MCK_addr64:
8285     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8286   case MCK_gds:
8287     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8288   case MCK_lds:
8289     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8290   case MCK_idxen:
8291     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8292   case MCK_offen:
8293     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8294   case MCK_SSrcB32:
8295     // When operands have expression values, they will return true for isToken,
8296     // because it is not possible to distinguish between a token and an
8297     // expression at parse time. MatchInstructionImpl() will always try to
8298     // match an operand as a token, when isToken returns true, and when the
8299     // name of the expression is not a valid token, the match will fail,
8300     // so we need to handle it here.
8301     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8302   case MCK_SSrcF32:
8303     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8304   case MCK_SoppBrTarget:
8305     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8306   case MCK_VReg32OrOff:
8307     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8308   case MCK_InterpSlot:
8309     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8310   case MCK_Attr:
8311     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8312   case MCK_AttrChan:
8313     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8314   case MCK_ImmSMEMOffset:
8315     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8316   case MCK_SReg_64:
8317   case MCK_SReg_64_XEXEC:
8318     // Null is defined as a 32-bit register but
8319     // it should also be enabled with 64-bit operands.
8320     // The following code enables it for SReg_64 operands
8321     // used as source and destination. Remaining source
8322     // operands are handled in isInlinableImm.
8323     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8324   default:
8325     return Match_InvalidOperand;
8326   }
8327 }
8328 
8329 //===----------------------------------------------------------------------===//
8330 // endpgm
8331 //===----------------------------------------------------------------------===//
8332 
8333 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8334   SMLoc S = getLoc();
8335   int64_t Imm = 0;
8336 
8337   if (!parseExpr(Imm)) {
8338     // The operand is optional, if not present default to 0
8339     Imm = 0;
8340   }
8341 
8342   if (!isUInt<16>(Imm)) {
8343     Error(S, "expected a 16-bit value");
8344     return MatchOperand_ParseFail;
8345   }
8346 
8347   Operands.push_back(
8348       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8349   return MatchOperand_Success;
8350 }
8351 
8352 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8353