xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   ImmTy getImmTy() const {
693     assert(isImm());
694     return Imm.Type;
695   }
696 
697   unsigned getReg() const override {
698     assert(isRegKind());
699     return Reg.RegNo;
700   }
701 
702   SMLoc getStartLoc() const override {
703     return StartLoc;
704   }
705 
706   SMLoc getEndLoc() const override {
707     return EndLoc;
708   }
709 
710   SMRange getLocRange() const {
711     return SMRange(StartLoc, EndLoc);
712   }
713 
714   Modifiers getModifiers() const {
715     assert(isRegKind() || isImmTy(ImmTyNone));
716     return isRegKind() ? Reg.Mods : Imm.Mods;
717   }
718 
719   void setModifiers(Modifiers Mods) {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     if (isRegKind())
722       Reg.Mods = Mods;
723     else
724       Imm.Mods = Mods;
725   }
726 
727   bool hasModifiers() const {
728     return getModifiers().hasModifiers();
729   }
730 
731   bool hasFPModifiers() const {
732     return getModifiers().hasFPModifiers();
733   }
734 
735   bool hasIntModifiers() const {
736     return getModifiers().hasIntModifiers();
737   }
738 
739   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
740 
741   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
742 
743   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
744 
745   template <unsigned Bitwidth>
746   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
747 
748   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
749     addKImmFPOperands<16>(Inst, N);
750   }
751 
752   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
753     addKImmFPOperands<32>(Inst, N);
754   }
755 
756   void addRegOperands(MCInst &Inst, unsigned N) const;
757 
758   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
759     addRegOperands(Inst, N);
760   }
761 
762   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
763     if (isRegKind())
764       addRegOperands(Inst, N);
765     else if (isExpr())
766       Inst.addOperand(MCOperand::createExpr(Expr));
767     else
768       addImmOperands(Inst, N);
769   }
770 
771   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
772     Modifiers Mods = getModifiers();
773     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
774     if (isRegKind()) {
775       addRegOperands(Inst, N);
776     } else {
777       addImmOperands(Inst, N, false);
778     }
779   }
780 
781   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
782     assert(!hasIntModifiers());
783     addRegOrImmWithInputModsOperands(Inst, N);
784   }
785 
786   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasFPModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
792     Modifiers Mods = getModifiers();
793     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
794     assert(isRegKind());
795     addRegOperands(Inst, N);
796   }
797 
798   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
799     assert(!hasIntModifiers());
800     addRegWithInputModsOperands(Inst, N);
801   }
802 
803   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasFPModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
809     if (isImm())
810       addImmOperands(Inst, N);
811     else {
812       assert(isExpr());
813       Inst.addOperand(MCOperand::createExpr(Expr));
814     }
815   }
816 
817   static void printImmTy(raw_ostream& OS, ImmTy Type) {
818     switch (Type) {
819     case ImmTyNone: OS << "None"; break;
820     case ImmTyGDS: OS << "GDS"; break;
821     case ImmTyLDS: OS << "LDS"; break;
822     case ImmTyOffen: OS << "Offen"; break;
823     case ImmTyIdxen: OS << "Idxen"; break;
824     case ImmTyAddr64: OS << "Addr64"; break;
825     case ImmTyOffset: OS << "Offset"; break;
826     case ImmTyInstOffset: OS << "InstOffset"; break;
827     case ImmTyOffset0: OS << "Offset0"; break;
828     case ImmTyOffset1: OS << "Offset1"; break;
829     case ImmTyDLC: OS << "DLC"; break;
830     case ImmTyGLC: OS << "GLC"; break;
831     case ImmTySLC: OS << "SLC"; break;
832     case ImmTySWZ: OS << "SWZ"; break;
833     case ImmTyTFE: OS << "TFE"; break;
834     case ImmTyD16: OS << "D16"; break;
835     case ImmTyFORMAT: OS << "FORMAT"; break;
836     case ImmTyClampSI: OS << "ClampSI"; break;
837     case ImmTyOModSI: OS << "OModSI"; break;
838     case ImmTyDPP8: OS << "DPP8"; break;
839     case ImmTyDppCtrl: OS << "DppCtrl"; break;
840     case ImmTyDppRowMask: OS << "DppRowMask"; break;
841     case ImmTyDppBankMask: OS << "DppBankMask"; break;
842     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
843     case ImmTyDppFi: OS << "FI"; break;
844     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
845     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
846     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
847     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
848     case ImmTyDMask: OS << "DMask"; break;
849     case ImmTyDim: OS << "Dim"; break;
850     case ImmTyUNorm: OS << "UNorm"; break;
851     case ImmTyDA: OS << "DA"; break;
852     case ImmTyR128A16: OS << "R128A16"; break;
853     case ImmTyA16: OS << "A16"; break;
854     case ImmTyLWE: OS << "LWE"; break;
855     case ImmTyOff: OS << "Off"; break;
856     case ImmTyExpTgt: OS << "ExpTgt"; break;
857     case ImmTyExpCompr: OS << "ExpCompr"; break;
858     case ImmTyExpVM: OS << "ExpVM"; break;
859     case ImmTyHwreg: OS << "Hwreg"; break;
860     case ImmTySendMsg: OS << "SendMsg"; break;
861     case ImmTyInterpSlot: OS << "InterpSlot"; break;
862     case ImmTyInterpAttr: OS << "InterpAttr"; break;
863     case ImmTyAttrChan: OS << "AttrChan"; break;
864     case ImmTyOpSel: OS << "OpSel"; break;
865     case ImmTyOpSelHi: OS << "OpSelHi"; break;
866     case ImmTyNegLo: OS << "NegLo"; break;
867     case ImmTyNegHi: OS << "NegHi"; break;
868     case ImmTySwizzle: OS << "Swizzle"; break;
869     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
870     case ImmTyHigh: OS << "High"; break;
871     case ImmTyBLGP: OS << "BLGP"; break;
872     case ImmTyCBSZ: OS << "CBSZ"; break;
873     case ImmTyABID: OS << "ABID"; break;
874     case ImmTyEndpgm: OS << "Endpgm"; break;
875     }
876   }
877 
878   void print(raw_ostream &OS) const override {
879     switch (Kind) {
880     case Register:
881       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
882       break;
883     case Immediate:
884       OS << '<' << getImm();
885       if (getImmTy() != ImmTyNone) {
886         OS << " type: "; printImmTy(OS, getImmTy());
887       }
888       OS << " mods: " << Imm.Mods << '>';
889       break;
890     case Token:
891       OS << '\'' << getToken() << '\'';
892       break;
893     case Expression:
894       OS << "<expr " << *Expr << '>';
895       break;
896     }
897   }
898 
899   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
900                                       int64_t Val, SMLoc Loc,
901                                       ImmTy Type = ImmTyNone,
902                                       bool IsFPImm = false) {
903     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
904     Op->Imm.Val = Val;
905     Op->Imm.IsFPImm = IsFPImm;
906     Op->Imm.Type = Type;
907     Op->Imm.Mods = Modifiers();
908     Op->StartLoc = Loc;
909     Op->EndLoc = Loc;
910     return Op;
911   }
912 
913   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
914                                         StringRef Str, SMLoc Loc,
915                                         bool HasExplicitEncodingSize = true) {
916     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
917     Res->Tok.Data = Str.data();
918     Res->Tok.Length = Str.size();
919     Res->StartLoc = Loc;
920     Res->EndLoc = Loc;
921     return Res;
922   }
923 
924   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
925                                       unsigned RegNo, SMLoc S,
926                                       SMLoc E) {
927     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
928     Op->Reg.RegNo = RegNo;
929     Op->Reg.Mods = Modifiers();
930     Op->StartLoc = S;
931     Op->EndLoc = E;
932     return Op;
933   }
934 
935   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
936                                        const class MCExpr *Expr, SMLoc S) {
937     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
938     Op->Expr = Expr;
939     Op->StartLoc = S;
940     Op->EndLoc = S;
941     return Op;
942   }
943 };
944 
945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
946   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
947   return OS;
948 }
949 
950 //===----------------------------------------------------------------------===//
951 // AsmParser
952 //===----------------------------------------------------------------------===//
953 
954 // Holds info related to the current kernel, e.g. count of SGPRs used.
955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
956 // .amdgpu_hsa_kernel or at EOF.
957 class KernelScopeInfo {
958   int SgprIndexUnusedMin = -1;
959   int VgprIndexUnusedMin = -1;
960   MCContext *Ctx = nullptr;
961 
962   void usesSgprAt(int i) {
963     if (i >= SgprIndexUnusedMin) {
964       SgprIndexUnusedMin = ++i;
965       if (Ctx) {
966         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
967         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
968       }
969     }
970   }
971 
972   void usesVgprAt(int i) {
973     if (i >= VgprIndexUnusedMin) {
974       VgprIndexUnusedMin = ++i;
975       if (Ctx) {
976         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
977         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
978       }
979     }
980   }
981 
982 public:
983   KernelScopeInfo() = default;
984 
985   void initialize(MCContext &Context) {
986     Ctx = &Context;
987     usesSgprAt(SgprIndexUnusedMin = -1);
988     usesVgprAt(VgprIndexUnusedMin = -1);
989   }
990 
991   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
992     switch (RegKind) {
993       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
994       case IS_AGPR: // fall through
995       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
996       default: break;
997     }
998   }
999 };
1000 
1001 class AMDGPUAsmParser : public MCTargetAsmParser {
1002   MCAsmParser &Parser;
1003 
1004   // Number of extra operands parsed after the first optional operand.
1005   // This may be necessary to skip hardcoded mandatory operands.
1006   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1007 
1008   unsigned ForcedEncodingSize = 0;
1009   bool ForcedDPP = false;
1010   bool ForcedSDWA = false;
1011   KernelScopeInfo KernelScope;
1012 
1013   /// @name Auto-generated Match Functions
1014   /// {
1015 
1016 #define GET_ASSEMBLER_HEADER
1017 #include "AMDGPUGenAsmMatcher.inc"
1018 
1019   /// }
1020 
1021 private:
1022   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1023   bool OutOfRangeError(SMRange Range);
1024   /// Calculate VGPR/SGPR blocks required for given target, reserved
1025   /// registers, and user-specified NextFreeXGPR values.
1026   ///
1027   /// \param Features [in] Target features, used for bug corrections.
1028   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1029   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1030   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1031   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1032   /// descriptor field, if valid.
1033   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1034   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1035   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1036   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1037   /// \param VGPRBlocks [out] Result VGPR block count.
1038   /// \param SGPRBlocks [out] Result SGPR block count.
1039   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1040                           bool FlatScrUsed, bool XNACKUsed,
1041                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1042                           SMRange VGPRRange, unsigned NextFreeSGPR,
1043                           SMRange SGPRRange, unsigned &VGPRBlocks,
1044                           unsigned &SGPRBlocks);
1045   bool ParseDirectiveAMDGCNTarget();
1046   bool ParseDirectiveAMDHSAKernel();
1047   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1048   bool ParseDirectiveHSACodeObjectVersion();
1049   bool ParseDirectiveHSACodeObjectISA();
1050   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1051   bool ParseDirectiveAMDKernelCodeT();
1052   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1053   bool ParseDirectiveAMDGPUHsaKernel();
1054 
1055   bool ParseDirectiveISAVersion();
1056   bool ParseDirectiveHSAMetadata();
1057   bool ParseDirectivePALMetadataBegin();
1058   bool ParseDirectivePALMetadata();
1059   bool ParseDirectiveAMDGPULDS();
1060 
1061   /// Common code to parse out a block of text (typically YAML) between start and
1062   /// end directives.
1063   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1064                            const char *AssemblerDirectiveEnd,
1065                            std::string &CollectString);
1066 
1067   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1068                              RegisterKind RegKind, unsigned Reg1);
1069   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1070                            unsigned &RegNum, unsigned &RegWidth,
1071                            bool RestoreOnFailure = false);
1072   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1073                            unsigned &RegNum, unsigned &RegWidth,
1074                            SmallVectorImpl<AsmToken> &Tokens);
1075   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1076                            unsigned &RegWidth,
1077                            SmallVectorImpl<AsmToken> &Tokens);
1078   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1079                            unsigned &RegWidth,
1080                            SmallVectorImpl<AsmToken> &Tokens);
1081   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1082                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1083   bool ParseRegRange(unsigned& Num, unsigned& Width);
1084   unsigned getRegularReg(RegisterKind RegKind,
1085                          unsigned RegNum,
1086                          unsigned RegWidth);
1087 
1088   bool isRegister();
1089   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1090   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1091   void initializeGprCountSymbol(RegisterKind RegKind);
1092   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1093                              unsigned RegWidth);
1094   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1095                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1096   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1097                  bool IsGdsHardcoded);
1098 
1099 public:
1100   enum AMDGPUMatchResultTy {
1101     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1102   };
1103   enum OperandMode {
1104     OperandMode_Default,
1105     OperandMode_NSA,
1106   };
1107 
1108   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1109 
1110   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1111                const MCInstrInfo &MII,
1112                const MCTargetOptions &Options)
1113       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1114     MCAsmParserExtension::Initialize(Parser);
1115 
1116     if (getFeatureBits().none()) {
1117       // Set default features.
1118       copySTI().ToggleFeature("southern-islands");
1119     }
1120 
1121     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1122 
1123     {
1124       // TODO: make those pre-defined variables read-only.
1125       // Currently there is none suitable machinery in the core llvm-mc for this.
1126       // MCSymbol::isRedefinable is intended for another purpose, and
1127       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1128       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1129       MCContext &Ctx = getContext();
1130       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1131         MCSymbol *Sym =
1132             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1133         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1134         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1135         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1136         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1137         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1138       } else {
1139         MCSymbol *Sym =
1140             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1144         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1145         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1146       }
1147       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1148         initializeGprCountSymbol(IS_VGPR);
1149         initializeGprCountSymbol(IS_SGPR);
1150       } else
1151         KernelScope.initialize(getContext());
1152     }
1153   }
1154 
1155   bool hasXNACK() const {
1156     return AMDGPU::hasXNACK(getSTI());
1157   }
1158 
1159   bool hasMIMG_R128() const {
1160     return AMDGPU::hasMIMG_R128(getSTI());
1161   }
1162 
1163   bool hasPackedD16() const {
1164     return AMDGPU::hasPackedD16(getSTI());
1165   }
1166 
1167   bool hasGFX10A16() const {
1168     return AMDGPU::hasGFX10A16(getSTI());
1169   }
1170 
1171   bool isSI() const {
1172     return AMDGPU::isSI(getSTI());
1173   }
1174 
1175   bool isCI() const {
1176     return AMDGPU::isCI(getSTI());
1177   }
1178 
1179   bool isVI() const {
1180     return AMDGPU::isVI(getSTI());
1181   }
1182 
1183   bool isGFX9() const {
1184     return AMDGPU::isGFX9(getSTI());
1185   }
1186 
1187   bool isGFX10() const {
1188     return AMDGPU::isGFX10(getSTI());
1189   }
1190 
1191   bool isGFX10_BEncoding() const {
1192     return AMDGPU::isGFX10_BEncoding(getSTI());
1193   }
1194 
1195   bool hasInv2PiInlineImm() const {
1196     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1197   }
1198 
1199   bool hasFlatOffsets() const {
1200     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1201   }
1202 
1203   bool hasSGPR102_SGPR103() const {
1204     return !isVI() && !isGFX9();
1205   }
1206 
1207   bool hasSGPR104_SGPR105() const {
1208     return isGFX10();
1209   }
1210 
1211   bool hasIntClamp() const {
1212     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1213   }
1214 
1215   AMDGPUTargetStreamer &getTargetStreamer() {
1216     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1217     return static_cast<AMDGPUTargetStreamer &>(TS);
1218   }
1219 
1220   const MCRegisterInfo *getMRI() const {
1221     // We need this const_cast because for some reason getContext() is not const
1222     // in MCAsmParser.
1223     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1224   }
1225 
1226   const MCInstrInfo *getMII() const {
1227     return &MII;
1228   }
1229 
1230   const FeatureBitset &getFeatureBits() const {
1231     return getSTI().getFeatureBits();
1232   }
1233 
1234   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1235   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1236   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1237 
1238   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1239   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1240   bool isForcedDPP() const { return ForcedDPP; }
1241   bool isForcedSDWA() const { return ForcedSDWA; }
1242   ArrayRef<unsigned> getMatchedVariants() const;
1243 
1244   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1245   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1246                      bool RestoreOnFailure);
1247   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1248   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1249                                         SMLoc &EndLoc) override;
1250   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1251   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1252                                       unsigned Kind) override;
1253   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1254                                OperandVector &Operands, MCStreamer &Out,
1255                                uint64_t &ErrorInfo,
1256                                bool MatchingInlineAsm) override;
1257   bool ParseDirective(AsmToken DirectiveID) override;
1258   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1259                                     OperandMode Mode = OperandMode_Default);
1260   StringRef parseMnemonicSuffix(StringRef Name);
1261   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1262                         SMLoc NameLoc, OperandVector &Operands) override;
1263   //bool ProcessInstruction(MCInst &Inst);
1264 
1265   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1266 
1267   OperandMatchResultTy
1268   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1269                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1270                      bool (*ConvertResult)(int64_t &) = nullptr);
1271 
1272   OperandMatchResultTy
1273   parseOperandArrayWithPrefix(const char *Prefix,
1274                               OperandVector &Operands,
1275                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1276                               bool (*ConvertResult)(int64_t&) = nullptr);
1277 
1278   OperandMatchResultTy
1279   parseNamedBit(const char *Name, OperandVector &Operands,
1280                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1281   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1282                                              StringRef &Value);
1283 
1284   bool isModifier();
1285   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1286   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1287   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1288   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1289   bool parseSP3NegModifier();
1290   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1291   OperandMatchResultTy parseReg(OperandVector &Operands);
1292   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1293   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1294   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1295   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1296   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1297   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1298   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1299 
1300   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1301   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1302   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1303   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1304 
1305   bool parseCnt(int64_t &IntVal);
1306   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1307   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1308 
1309 private:
1310   struct OperandInfoTy {
1311     int64_t Id;
1312     bool IsSymbolic = false;
1313     bool IsDefined = false;
1314 
1315     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1316   };
1317 
1318   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1319   bool validateSendMsg(const OperandInfoTy &Msg,
1320                        const OperandInfoTy &Op,
1321                        const OperandInfoTy &Stream,
1322                        const SMLoc Loc);
1323 
1324   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1325   bool validateHwreg(const OperandInfoTy &HwReg,
1326                      const int64_t Offset,
1327                      const int64_t Width,
1328                      const SMLoc Loc);
1329 
1330   void errorExpTgt();
1331   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1332   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1333   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1334 
1335   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1336   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1337   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1338   bool validateSOPLiteral(const MCInst &Inst) const;
1339   bool validateConstantBusLimitations(const MCInst &Inst);
1340   bool validateEarlyClobberLimitations(const MCInst &Inst);
1341   bool validateIntClampSupported(const MCInst &Inst);
1342   bool validateMIMGAtomicDMask(const MCInst &Inst);
1343   bool validateMIMGGatherDMask(const MCInst &Inst);
1344   bool validateMovrels(const MCInst &Inst);
1345   bool validateMIMGDataSize(const MCInst &Inst);
1346   bool validateMIMGAddrSize(const MCInst &Inst);
1347   bool validateMIMGD16(const MCInst &Inst);
1348   bool validateMIMGDim(const MCInst &Inst);
1349   bool validateLdsDirect(const MCInst &Inst);
1350   bool validateOpSel(const MCInst &Inst);
1351   bool validateVccOperand(unsigned Reg) const;
1352   bool validateVOP3Literal(const MCInst &Inst) const;
1353   bool validateMAIAccWrite(const MCInst &Inst);
1354   unsigned getConstantBusLimit(unsigned Opcode) const;
1355   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1356   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1357   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1358 
1359   bool isId(const StringRef Id) const;
1360   bool isId(const AsmToken &Token, const StringRef Id) const;
1361   bool isToken(const AsmToken::TokenKind Kind) const;
1362   bool trySkipId(const StringRef Id);
1363   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1364   bool trySkipToken(const AsmToken::TokenKind Kind);
1365   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1366   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1367   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1368   AsmToken::TokenKind getTokenKind() const;
1369   bool parseExpr(int64_t &Imm);
1370   bool parseExpr(OperandVector &Operands);
1371   StringRef getTokenStr() const;
1372   AsmToken peekToken();
1373   AsmToken getToken() const;
1374   SMLoc getLoc() const;
1375   void lex();
1376 
1377 public:
1378   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1379   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1380 
1381   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1382   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1383   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1384   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1385   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1386   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1387 
1388   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1389                             const unsigned MinVal,
1390                             const unsigned MaxVal,
1391                             const StringRef ErrMsg);
1392   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1393   bool parseSwizzleOffset(int64_t &Imm);
1394   bool parseSwizzleMacro(int64_t &Imm);
1395   bool parseSwizzleQuadPerm(int64_t &Imm);
1396   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1397   bool parseSwizzleBroadcast(int64_t &Imm);
1398   bool parseSwizzleSwap(int64_t &Imm);
1399   bool parseSwizzleReverse(int64_t &Imm);
1400 
1401   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1402   int64_t parseGPRIdxMacro();
1403 
1404   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1405   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1406   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1407   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1408   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1409 
1410   AMDGPUOperand::Ptr defaultDLC() const;
1411   AMDGPUOperand::Ptr defaultGLC() const;
1412   AMDGPUOperand::Ptr defaultSLC() const;
1413 
1414   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1415   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1416   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1417   AMDGPUOperand::Ptr defaultFlatOffset() const;
1418 
1419   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1420 
1421   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1422                OptionalImmIndexMap &OptionalIdx);
1423   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1424   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1425   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1426 
1427   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1428 
1429   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1430                bool IsAtomic = false);
1431   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1432 
1433   OperandMatchResultTy parseDim(OperandVector &Operands);
1434   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1435   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1436   AMDGPUOperand::Ptr defaultRowMask() const;
1437   AMDGPUOperand::Ptr defaultBankMask() const;
1438   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1439   AMDGPUOperand::Ptr defaultFI() const;
1440   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1441   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1442 
1443   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1444                                     AMDGPUOperand::ImmTy Type);
1445   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1446   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1447   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1448   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1449   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1450   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1451   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1452                uint64_t BasicInstType,
1453                bool SkipDstVcc = false,
1454                bool SkipSrcVcc = false);
1455 
1456   AMDGPUOperand::Ptr defaultBLGP() const;
1457   AMDGPUOperand::Ptr defaultCBSZ() const;
1458   AMDGPUOperand::Ptr defaultABID() const;
1459 
1460   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1461   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1462 };
1463 
1464 struct OptionalOperand {
1465   const char *Name;
1466   AMDGPUOperand::ImmTy Type;
1467   bool IsBit;
1468   bool (*ConvertResult)(int64_t&);
1469 };
1470 
1471 } // end anonymous namespace
1472 
1473 // May be called with integer type with equivalent bitwidth.
1474 static const fltSemantics *getFltSemantics(unsigned Size) {
1475   switch (Size) {
1476   case 4:
1477     return &APFloat::IEEEsingle();
1478   case 8:
1479     return &APFloat::IEEEdouble();
1480   case 2:
1481     return &APFloat::IEEEhalf();
1482   default:
1483     llvm_unreachable("unsupported fp type");
1484   }
1485 }
1486 
1487 static const fltSemantics *getFltSemantics(MVT VT) {
1488   return getFltSemantics(VT.getSizeInBits() / 8);
1489 }
1490 
1491 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1492   switch (OperandType) {
1493   case AMDGPU::OPERAND_REG_IMM_INT32:
1494   case AMDGPU::OPERAND_REG_IMM_FP32:
1495   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1496   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1497   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1498   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1499     return &APFloat::IEEEsingle();
1500   case AMDGPU::OPERAND_REG_IMM_INT64:
1501   case AMDGPU::OPERAND_REG_IMM_FP64:
1502   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1503   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1504     return &APFloat::IEEEdouble();
1505   case AMDGPU::OPERAND_REG_IMM_INT16:
1506   case AMDGPU::OPERAND_REG_IMM_FP16:
1507   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1508   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1509   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1510   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1511   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1512   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1513   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1514   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1515   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1516   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1517     return &APFloat::IEEEhalf();
1518   default:
1519     llvm_unreachable("unsupported fp type");
1520   }
1521 }
1522 
1523 //===----------------------------------------------------------------------===//
1524 // Operand
1525 //===----------------------------------------------------------------------===//
1526 
1527 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1528   bool Lost;
1529 
1530   // Convert literal to single precision
1531   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1532                                                APFloat::rmNearestTiesToEven,
1533                                                &Lost);
1534   // We allow precision lost but not overflow or underflow
1535   if (Status != APFloat::opOK &&
1536       Lost &&
1537       ((Status & APFloat::opOverflow)  != 0 ||
1538        (Status & APFloat::opUnderflow) != 0)) {
1539     return false;
1540   }
1541 
1542   return true;
1543 }
1544 
1545 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1546   return isUIntN(Size, Val) || isIntN(Size, Val);
1547 }
1548 
1549 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1550   if (VT.getScalarType() == MVT::i16) {
1551     // FP immediate values are broken.
1552     return isInlinableIntLiteral(Val);
1553   }
1554 
1555   // f16/v2f16 operands work correctly for all values.
1556   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1557 }
1558 
1559 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1560 
1561   // This is a hack to enable named inline values like
1562   // shared_base with both 32-bit and 64-bit operands.
1563   // Note that these values are defined as
1564   // 32-bit operands only.
1565   if (isInlineValue()) {
1566     return true;
1567   }
1568 
1569   if (!isImmTy(ImmTyNone)) {
1570     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1571     return false;
1572   }
1573   // TODO: We should avoid using host float here. It would be better to
1574   // check the float bit values which is what a few other places do.
1575   // We've had bot failures before due to weird NaN support on mips hosts.
1576 
1577   APInt Literal(64, Imm.Val);
1578 
1579   if (Imm.IsFPImm) { // We got fp literal token
1580     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1581       return AMDGPU::isInlinableLiteral64(Imm.Val,
1582                                           AsmParser->hasInv2PiInlineImm());
1583     }
1584 
1585     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1586     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1587       return false;
1588 
1589     if (type.getScalarSizeInBits() == 16) {
1590       return isInlineableLiteralOp16(
1591         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1592         type, AsmParser->hasInv2PiInlineImm());
1593     }
1594 
1595     // Check if single precision literal is inlinable
1596     return AMDGPU::isInlinableLiteral32(
1597       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1598       AsmParser->hasInv2PiInlineImm());
1599   }
1600 
1601   // We got int literal token.
1602   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1603     return AMDGPU::isInlinableLiteral64(Imm.Val,
1604                                         AsmParser->hasInv2PiInlineImm());
1605   }
1606 
1607   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1608     return false;
1609   }
1610 
1611   if (type.getScalarSizeInBits() == 16) {
1612     return isInlineableLiteralOp16(
1613       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1614       type, AsmParser->hasInv2PiInlineImm());
1615   }
1616 
1617   return AMDGPU::isInlinableLiteral32(
1618     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1619     AsmParser->hasInv2PiInlineImm());
1620 }
1621 
1622 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1623   // Check that this immediate can be added as literal
1624   if (!isImmTy(ImmTyNone)) {
1625     return false;
1626   }
1627 
1628   if (!Imm.IsFPImm) {
1629     // We got int literal token.
1630 
1631     if (type == MVT::f64 && hasFPModifiers()) {
1632       // Cannot apply fp modifiers to int literals preserving the same semantics
1633       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1634       // disable these cases.
1635       return false;
1636     }
1637 
1638     unsigned Size = type.getSizeInBits();
1639     if (Size == 64)
1640       Size = 32;
1641 
1642     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1643     // types.
1644     return isSafeTruncation(Imm.Val, Size);
1645   }
1646 
1647   // We got fp literal token
1648   if (type == MVT::f64) { // Expected 64-bit fp operand
1649     // We would set low 64-bits of literal to zeroes but we accept this literals
1650     return true;
1651   }
1652 
1653   if (type == MVT::i64) { // Expected 64-bit int operand
1654     // We don't allow fp literals in 64-bit integer instructions. It is
1655     // unclear how we should encode them.
1656     return false;
1657   }
1658 
1659   // We allow fp literals with f16x2 operands assuming that the specified
1660   // literal goes into the lower half and the upper half is zero. We also
1661   // require that the literal may be losslesly converted to f16.
1662   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1663                      (type == MVT::v2i16)? MVT::i16 : type;
1664 
1665   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1666   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1667 }
1668 
1669 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1670   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1671 }
1672 
1673 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1674   if (AsmParser->isVI())
1675     return isVReg32();
1676   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1677     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1678   else
1679     return false;
1680 }
1681 
1682 bool AMDGPUOperand::isSDWAFP16Operand() const {
1683   return isSDWAOperand(MVT::f16);
1684 }
1685 
1686 bool AMDGPUOperand::isSDWAFP32Operand() const {
1687   return isSDWAOperand(MVT::f32);
1688 }
1689 
1690 bool AMDGPUOperand::isSDWAInt16Operand() const {
1691   return isSDWAOperand(MVT::i16);
1692 }
1693 
1694 bool AMDGPUOperand::isSDWAInt32Operand() const {
1695   return isSDWAOperand(MVT::i32);
1696 }
1697 
1698 bool AMDGPUOperand::isBoolReg() const {
1699   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1700          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1701 }
1702 
1703 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1704 {
1705   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1706   assert(Size == 2 || Size == 4 || Size == 8);
1707 
1708   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1709 
1710   if (Imm.Mods.Abs) {
1711     Val &= ~FpSignMask;
1712   }
1713   if (Imm.Mods.Neg) {
1714     Val ^= FpSignMask;
1715   }
1716 
1717   return Val;
1718 }
1719 
1720 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1721   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1722                              Inst.getNumOperands())) {
1723     addLiteralImmOperand(Inst, Imm.Val,
1724                          ApplyModifiers &
1725                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1726   } else {
1727     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1728     Inst.addOperand(MCOperand::createImm(Imm.Val));
1729   }
1730 }
1731 
1732 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1733   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1734   auto OpNum = Inst.getNumOperands();
1735   // Check that this operand accepts literals
1736   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1737 
1738   if (ApplyModifiers) {
1739     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1740     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1741     Val = applyInputFPModifiers(Val, Size);
1742   }
1743 
1744   APInt Literal(64, Val);
1745   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1746 
1747   if (Imm.IsFPImm) { // We got fp literal token
1748     switch (OpTy) {
1749     case AMDGPU::OPERAND_REG_IMM_INT64:
1750     case AMDGPU::OPERAND_REG_IMM_FP64:
1751     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1752     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1753       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1754                                        AsmParser->hasInv2PiInlineImm())) {
1755         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1756         return;
1757       }
1758 
1759       // Non-inlineable
1760       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1761         // For fp operands we check if low 32 bits are zeros
1762         if (Literal.getLoBits(32) != 0) {
1763           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1764           "Can't encode literal as exact 64-bit floating-point operand. "
1765           "Low 32-bits will be set to zero");
1766         }
1767 
1768         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1769         return;
1770       }
1771 
1772       // We don't allow fp literals in 64-bit integer instructions. It is
1773       // unclear how we should encode them. This case should be checked earlier
1774       // in predicate methods (isLiteralImm())
1775       llvm_unreachable("fp literal in 64-bit integer instruction.");
1776 
1777     case AMDGPU::OPERAND_REG_IMM_INT32:
1778     case AMDGPU::OPERAND_REG_IMM_FP32:
1779     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1780     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1781     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1782     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1783     case AMDGPU::OPERAND_REG_IMM_INT16:
1784     case AMDGPU::OPERAND_REG_IMM_FP16:
1785     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1786     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1787     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1788     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1789     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1790     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1791     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1792     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1793     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1794     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1795       bool lost;
1796       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1797       // Convert literal to single precision
1798       FPLiteral.convert(*getOpFltSemantics(OpTy),
1799                         APFloat::rmNearestTiesToEven, &lost);
1800       // We allow precision lost but not overflow or underflow. This should be
1801       // checked earlier in isLiteralImm()
1802 
1803       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1804       Inst.addOperand(MCOperand::createImm(ImmVal));
1805       return;
1806     }
1807     default:
1808       llvm_unreachable("invalid operand size");
1809     }
1810 
1811     return;
1812   }
1813 
1814   // We got int literal token.
1815   // Only sign extend inline immediates.
1816   switch (OpTy) {
1817   case AMDGPU::OPERAND_REG_IMM_INT32:
1818   case AMDGPU::OPERAND_REG_IMM_FP32:
1819   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1820   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1821   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1822   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1823   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1824   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1825     if (isSafeTruncation(Val, 32) &&
1826         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1827                                      AsmParser->hasInv2PiInlineImm())) {
1828       Inst.addOperand(MCOperand::createImm(Val));
1829       return;
1830     }
1831 
1832     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1833     return;
1834 
1835   case AMDGPU::OPERAND_REG_IMM_INT64:
1836   case AMDGPU::OPERAND_REG_IMM_FP64:
1837   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1838   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1839     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1840       Inst.addOperand(MCOperand::createImm(Val));
1841       return;
1842     }
1843 
1844     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1845     return;
1846 
1847   case AMDGPU::OPERAND_REG_IMM_INT16:
1848   case AMDGPU::OPERAND_REG_IMM_FP16:
1849   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1850   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1851   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1852   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1853     if (isSafeTruncation(Val, 16) &&
1854         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1855                                      AsmParser->hasInv2PiInlineImm())) {
1856       Inst.addOperand(MCOperand::createImm(Val));
1857       return;
1858     }
1859 
1860     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1861     return;
1862 
1863   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1864   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1865   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1866   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1867     assert(isSafeTruncation(Val, 16));
1868     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1869                                         AsmParser->hasInv2PiInlineImm()));
1870 
1871     Inst.addOperand(MCOperand::createImm(Val));
1872     return;
1873   }
1874   default:
1875     llvm_unreachable("invalid operand size");
1876   }
1877 }
1878 
1879 template <unsigned Bitwidth>
1880 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1881   APInt Literal(64, Imm.Val);
1882 
1883   if (!Imm.IsFPImm) {
1884     // We got int literal token.
1885     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1886     return;
1887   }
1888 
1889   bool Lost;
1890   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1891   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1892                     APFloat::rmNearestTiesToEven, &Lost);
1893   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1894 }
1895 
1896 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1897   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1898 }
1899 
1900 static bool isInlineValue(unsigned Reg) {
1901   switch (Reg) {
1902   case AMDGPU::SRC_SHARED_BASE:
1903   case AMDGPU::SRC_SHARED_LIMIT:
1904   case AMDGPU::SRC_PRIVATE_BASE:
1905   case AMDGPU::SRC_PRIVATE_LIMIT:
1906   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1907     return true;
1908   case AMDGPU::SRC_VCCZ:
1909   case AMDGPU::SRC_EXECZ:
1910   case AMDGPU::SRC_SCC:
1911     return true;
1912   case AMDGPU::SGPR_NULL:
1913     return true;
1914   default:
1915     return false;
1916   }
1917 }
1918 
1919 bool AMDGPUOperand::isInlineValue() const {
1920   return isRegKind() && ::isInlineValue(getReg());
1921 }
1922 
1923 //===----------------------------------------------------------------------===//
1924 // AsmParser
1925 //===----------------------------------------------------------------------===//
1926 
1927 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1928   if (Is == IS_VGPR) {
1929     switch (RegWidth) {
1930       default: return -1;
1931       case 1: return AMDGPU::VGPR_32RegClassID;
1932       case 2: return AMDGPU::VReg_64RegClassID;
1933       case 3: return AMDGPU::VReg_96RegClassID;
1934       case 4: return AMDGPU::VReg_128RegClassID;
1935       case 5: return AMDGPU::VReg_160RegClassID;
1936       case 6: return AMDGPU::VReg_192RegClassID;
1937       case 8: return AMDGPU::VReg_256RegClassID;
1938       case 16: return AMDGPU::VReg_512RegClassID;
1939       case 32: return AMDGPU::VReg_1024RegClassID;
1940     }
1941   } else if (Is == IS_TTMP) {
1942     switch (RegWidth) {
1943       default: return -1;
1944       case 1: return AMDGPU::TTMP_32RegClassID;
1945       case 2: return AMDGPU::TTMP_64RegClassID;
1946       case 4: return AMDGPU::TTMP_128RegClassID;
1947       case 8: return AMDGPU::TTMP_256RegClassID;
1948       case 16: return AMDGPU::TTMP_512RegClassID;
1949     }
1950   } else if (Is == IS_SGPR) {
1951     switch (RegWidth) {
1952       default: return -1;
1953       case 1: return AMDGPU::SGPR_32RegClassID;
1954       case 2: return AMDGPU::SGPR_64RegClassID;
1955       case 3: return AMDGPU::SGPR_96RegClassID;
1956       case 4: return AMDGPU::SGPR_128RegClassID;
1957       case 5: return AMDGPU::SGPR_160RegClassID;
1958       case 6: return AMDGPU::SGPR_192RegClassID;
1959       case 8: return AMDGPU::SGPR_256RegClassID;
1960       case 16: return AMDGPU::SGPR_512RegClassID;
1961     }
1962   } else if (Is == IS_AGPR) {
1963     switch (RegWidth) {
1964       default: return -1;
1965       case 1: return AMDGPU::AGPR_32RegClassID;
1966       case 2: return AMDGPU::AReg_64RegClassID;
1967       case 3: return AMDGPU::AReg_96RegClassID;
1968       case 4: return AMDGPU::AReg_128RegClassID;
1969       case 5: return AMDGPU::AReg_160RegClassID;
1970       case 6: return AMDGPU::AReg_192RegClassID;
1971       case 8: return AMDGPU::AReg_256RegClassID;
1972       case 16: return AMDGPU::AReg_512RegClassID;
1973       case 32: return AMDGPU::AReg_1024RegClassID;
1974     }
1975   }
1976   return -1;
1977 }
1978 
1979 static unsigned getSpecialRegForName(StringRef RegName) {
1980   return StringSwitch<unsigned>(RegName)
1981     .Case("exec", AMDGPU::EXEC)
1982     .Case("vcc", AMDGPU::VCC)
1983     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1984     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1985     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1986     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1987     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1988     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1989     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1990     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1991     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1992     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1993     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1994     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1995     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1996     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1997     .Case("m0", AMDGPU::M0)
1998     .Case("vccz", AMDGPU::SRC_VCCZ)
1999     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2000     .Case("execz", AMDGPU::SRC_EXECZ)
2001     .Case("src_execz", AMDGPU::SRC_EXECZ)
2002     .Case("scc", AMDGPU::SRC_SCC)
2003     .Case("src_scc", AMDGPU::SRC_SCC)
2004     .Case("tba", AMDGPU::TBA)
2005     .Case("tma", AMDGPU::TMA)
2006     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2007     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2008     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2009     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2010     .Case("vcc_lo", AMDGPU::VCC_LO)
2011     .Case("vcc_hi", AMDGPU::VCC_HI)
2012     .Case("exec_lo", AMDGPU::EXEC_LO)
2013     .Case("exec_hi", AMDGPU::EXEC_HI)
2014     .Case("tma_lo", AMDGPU::TMA_LO)
2015     .Case("tma_hi", AMDGPU::TMA_HI)
2016     .Case("tba_lo", AMDGPU::TBA_LO)
2017     .Case("tba_hi", AMDGPU::TBA_HI)
2018     .Case("pc", AMDGPU::PC_REG)
2019     .Case("null", AMDGPU::SGPR_NULL)
2020     .Default(AMDGPU::NoRegister);
2021 }
2022 
2023 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2024                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2025   auto R = parseRegister();
2026   if (!R) return true;
2027   assert(R->isReg());
2028   RegNo = R->getReg();
2029   StartLoc = R->getStartLoc();
2030   EndLoc = R->getEndLoc();
2031   return false;
2032 }
2033 
2034 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2035                                     SMLoc &EndLoc) {
2036   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2037 }
2038 
2039 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2040                                                        SMLoc &StartLoc,
2041                                                        SMLoc &EndLoc) {
2042   bool Result =
2043       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2044   bool PendingErrors = getParser().hasPendingError();
2045   getParser().clearPendingErrors();
2046   if (PendingErrors)
2047     return MatchOperand_ParseFail;
2048   if (Result)
2049     return MatchOperand_NoMatch;
2050   return MatchOperand_Success;
2051 }
2052 
2053 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2054                                             RegisterKind RegKind, unsigned Reg1) {
2055   switch (RegKind) {
2056   case IS_SPECIAL:
2057     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2058       Reg = AMDGPU::EXEC;
2059       RegWidth = 2;
2060       return true;
2061     }
2062     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2063       Reg = AMDGPU::FLAT_SCR;
2064       RegWidth = 2;
2065       return true;
2066     }
2067     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2068       Reg = AMDGPU::XNACK_MASK;
2069       RegWidth = 2;
2070       return true;
2071     }
2072     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2073       Reg = AMDGPU::VCC;
2074       RegWidth = 2;
2075       return true;
2076     }
2077     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2078       Reg = AMDGPU::TBA;
2079       RegWidth = 2;
2080       return true;
2081     }
2082     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2083       Reg = AMDGPU::TMA;
2084       RegWidth = 2;
2085       return true;
2086     }
2087     return false;
2088   case IS_VGPR:
2089   case IS_SGPR:
2090   case IS_AGPR:
2091   case IS_TTMP:
2092     if (Reg1 != Reg + RegWidth) {
2093       return false;
2094     }
2095     RegWidth++;
2096     return true;
2097   default:
2098     llvm_unreachable("unexpected register kind");
2099   }
2100 }
2101 
2102 struct RegInfo {
2103   StringLiteral Name;
2104   RegisterKind Kind;
2105 };
2106 
2107 static constexpr RegInfo RegularRegisters[] = {
2108   {{"v"},    IS_VGPR},
2109   {{"s"},    IS_SGPR},
2110   {{"ttmp"}, IS_TTMP},
2111   {{"acc"},  IS_AGPR},
2112   {{"a"},    IS_AGPR},
2113 };
2114 
2115 static bool isRegularReg(RegisterKind Kind) {
2116   return Kind == IS_VGPR ||
2117          Kind == IS_SGPR ||
2118          Kind == IS_TTMP ||
2119          Kind == IS_AGPR;
2120 }
2121 
2122 static const RegInfo* getRegularRegInfo(StringRef Str) {
2123   for (const RegInfo &Reg : RegularRegisters)
2124     if (Str.startswith(Reg.Name))
2125       return &Reg;
2126   return nullptr;
2127 }
2128 
2129 static bool getRegNum(StringRef Str, unsigned& Num) {
2130   return !Str.getAsInteger(10, Num);
2131 }
2132 
2133 bool
2134 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2135                             const AsmToken &NextToken) const {
2136 
2137   // A list of consecutive registers: [s0,s1,s2,s3]
2138   if (Token.is(AsmToken::LBrac))
2139     return true;
2140 
2141   if (!Token.is(AsmToken::Identifier))
2142     return false;
2143 
2144   // A single register like s0 or a range of registers like s[0:1]
2145 
2146   StringRef Str = Token.getString();
2147   const RegInfo *Reg = getRegularRegInfo(Str);
2148   if (Reg) {
2149     StringRef RegName = Reg->Name;
2150     StringRef RegSuffix = Str.substr(RegName.size());
2151     if (!RegSuffix.empty()) {
2152       unsigned Num;
2153       // A single register with an index: rXX
2154       if (getRegNum(RegSuffix, Num))
2155         return true;
2156     } else {
2157       // A range of registers: r[XX:YY].
2158       if (NextToken.is(AsmToken::LBrac))
2159         return true;
2160     }
2161   }
2162 
2163   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2164 }
2165 
2166 bool
2167 AMDGPUAsmParser::isRegister()
2168 {
2169   return isRegister(getToken(), peekToken());
2170 }
2171 
2172 unsigned
2173 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2174                                unsigned RegNum,
2175                                unsigned RegWidth) {
2176 
2177   assert(isRegularReg(RegKind));
2178 
2179   unsigned AlignSize = 1;
2180   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2181     // SGPR and TTMP registers must be aligned.
2182     // Max required alignment is 4 dwords.
2183     AlignSize = std::min(RegWidth, 4u);
2184   }
2185 
2186   if (RegNum % AlignSize != 0)
2187     return AMDGPU::NoRegister;
2188 
2189   unsigned RegIdx = RegNum / AlignSize;
2190   int RCID = getRegClass(RegKind, RegWidth);
2191   if (RCID == -1)
2192     return AMDGPU::NoRegister;
2193 
2194   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2195   const MCRegisterClass RC = TRI->getRegClass(RCID);
2196   if (RegIdx >= RC.getNumRegs())
2197     return AMDGPU::NoRegister;
2198 
2199   return RC.getRegister(RegIdx);
2200 }
2201 
2202 bool
2203 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2204   int64_t RegLo, RegHi;
2205   if (!trySkipToken(AsmToken::LBrac))
2206     return false;
2207 
2208   if (!parseExpr(RegLo))
2209     return false;
2210 
2211   if (trySkipToken(AsmToken::Colon)) {
2212     if (!parseExpr(RegHi))
2213       return false;
2214   } else {
2215     RegHi = RegLo;
2216   }
2217 
2218   if (!trySkipToken(AsmToken::RBrac))
2219     return false;
2220 
2221   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2222     return false;
2223 
2224   Num = static_cast<unsigned>(RegLo);
2225   Width = (RegHi - RegLo) + 1;
2226   return true;
2227 }
2228 
2229 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2230                                           unsigned &RegNum, unsigned &RegWidth,
2231                                           SmallVectorImpl<AsmToken> &Tokens) {
2232   assert(isToken(AsmToken::Identifier));
2233   unsigned Reg = getSpecialRegForName(getTokenStr());
2234   if (Reg) {
2235     RegNum = 0;
2236     RegWidth = 1;
2237     RegKind = IS_SPECIAL;
2238     Tokens.push_back(getToken());
2239     lex(); // skip register name
2240   }
2241   return Reg;
2242 }
2243 
2244 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2245                                           unsigned &RegNum, unsigned &RegWidth,
2246                                           SmallVectorImpl<AsmToken> &Tokens) {
2247   assert(isToken(AsmToken::Identifier));
2248   StringRef RegName = getTokenStr();
2249 
2250   const RegInfo *RI = getRegularRegInfo(RegName);
2251   if (!RI)
2252     return AMDGPU::NoRegister;
2253   Tokens.push_back(getToken());
2254   lex(); // skip register name
2255 
2256   RegKind = RI->Kind;
2257   StringRef RegSuffix = RegName.substr(RI->Name.size());
2258   if (!RegSuffix.empty()) {
2259     // Single 32-bit register: vXX.
2260     if (!getRegNum(RegSuffix, RegNum))
2261       return AMDGPU::NoRegister;
2262     RegWidth = 1;
2263   } else {
2264     // Range of registers: v[XX:YY]. ":YY" is optional.
2265     if (!ParseRegRange(RegNum, RegWidth))
2266       return AMDGPU::NoRegister;
2267   }
2268 
2269   return getRegularReg(RegKind, RegNum, RegWidth);
2270 }
2271 
2272 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2273                                        unsigned &RegWidth,
2274                                        SmallVectorImpl<AsmToken> &Tokens) {
2275   unsigned Reg = AMDGPU::NoRegister;
2276 
2277   if (!trySkipToken(AsmToken::LBrac))
2278     return AMDGPU::NoRegister;
2279 
2280   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2281 
2282   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2283     return AMDGPU::NoRegister;
2284   if (RegWidth != 1)
2285     return AMDGPU::NoRegister;
2286 
2287   for (; trySkipToken(AsmToken::Comma); ) {
2288     RegisterKind NextRegKind;
2289     unsigned NextReg, NextRegNum, NextRegWidth;
2290 
2291     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2292                              Tokens))
2293       return AMDGPU::NoRegister;
2294     if (NextRegWidth != 1)
2295       return AMDGPU::NoRegister;
2296     if (NextRegKind != RegKind)
2297       return AMDGPU::NoRegister;
2298     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2299       return AMDGPU::NoRegister;
2300   }
2301 
2302   if (!trySkipToken(AsmToken::RBrac))
2303     return AMDGPU::NoRegister;
2304 
2305   if (isRegularReg(RegKind))
2306     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2307 
2308   return Reg;
2309 }
2310 
2311 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2312                                           unsigned &RegNum, unsigned &RegWidth,
2313                                           SmallVectorImpl<AsmToken> &Tokens) {
2314   Reg = AMDGPU::NoRegister;
2315 
2316   if (isToken(AsmToken::Identifier)) {
2317     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2318     if (Reg == AMDGPU::NoRegister)
2319       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2320   } else {
2321     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2322   }
2323 
2324   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2325   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2326 }
2327 
2328 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2329                                           unsigned &RegNum, unsigned &RegWidth,
2330                                           bool RestoreOnFailure) {
2331   Reg = AMDGPU::NoRegister;
2332 
2333   SmallVector<AsmToken, 1> Tokens;
2334   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2335     if (RestoreOnFailure) {
2336       while (!Tokens.empty()) {
2337         getLexer().UnLex(Tokens.pop_back_val());
2338       }
2339     }
2340     return true;
2341   }
2342   return false;
2343 }
2344 
2345 Optional<StringRef>
2346 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2347   switch (RegKind) {
2348   case IS_VGPR:
2349     return StringRef(".amdgcn.next_free_vgpr");
2350   case IS_SGPR:
2351     return StringRef(".amdgcn.next_free_sgpr");
2352   default:
2353     return None;
2354   }
2355 }
2356 
2357 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2358   auto SymbolName = getGprCountSymbolName(RegKind);
2359   assert(SymbolName && "initializing invalid register kind");
2360   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2361   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2362 }
2363 
2364 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2365                                             unsigned DwordRegIndex,
2366                                             unsigned RegWidth) {
2367   // Symbols are only defined for GCN targets
2368   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2369     return true;
2370 
2371   auto SymbolName = getGprCountSymbolName(RegKind);
2372   if (!SymbolName)
2373     return true;
2374   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2375 
2376   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2377   int64_t OldCount;
2378 
2379   if (!Sym->isVariable())
2380     return !Error(getParser().getTok().getLoc(),
2381                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2382   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2383     return !Error(
2384         getParser().getTok().getLoc(),
2385         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2386 
2387   if (OldCount <= NewMax)
2388     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2389 
2390   return true;
2391 }
2392 
2393 std::unique_ptr<AMDGPUOperand>
2394 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2395   const auto &Tok = Parser.getTok();
2396   SMLoc StartLoc = Tok.getLoc();
2397   SMLoc EndLoc = Tok.getEndLoc();
2398   RegisterKind RegKind;
2399   unsigned Reg, RegNum, RegWidth;
2400 
2401   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2402     //FIXME: improve error messages (bug 41303).
2403     Error(StartLoc, "not a valid operand.");
2404     return nullptr;
2405   }
2406   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2407     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2408       return nullptr;
2409   } else
2410     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2411   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2412 }
2413 
2414 OperandMatchResultTy
2415 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2416   // TODO: add syntactic sugar for 1/(2*PI)
2417 
2418   assert(!isRegister());
2419   assert(!isModifier());
2420 
2421   const auto& Tok = getToken();
2422   const auto& NextTok = peekToken();
2423   bool IsReal = Tok.is(AsmToken::Real);
2424   SMLoc S = getLoc();
2425   bool Negate = false;
2426 
2427   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2428     lex();
2429     IsReal = true;
2430     Negate = true;
2431   }
2432 
2433   if (IsReal) {
2434     // Floating-point expressions are not supported.
2435     // Can only allow floating-point literals with an
2436     // optional sign.
2437 
2438     StringRef Num = getTokenStr();
2439     lex();
2440 
2441     APFloat RealVal(APFloat::IEEEdouble());
2442     auto roundMode = APFloat::rmNearestTiesToEven;
2443     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2444       return MatchOperand_ParseFail;
2445     }
2446     if (Negate)
2447       RealVal.changeSign();
2448 
2449     Operands.push_back(
2450       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2451                                AMDGPUOperand::ImmTyNone, true));
2452 
2453     return MatchOperand_Success;
2454 
2455   } else {
2456     int64_t IntVal;
2457     const MCExpr *Expr;
2458     SMLoc S = getLoc();
2459 
2460     if (HasSP3AbsModifier) {
2461       // This is a workaround for handling expressions
2462       // as arguments of SP3 'abs' modifier, for example:
2463       //     |1.0|
2464       //     |-1|
2465       //     |1+x|
2466       // This syntax is not compatible with syntax of standard
2467       // MC expressions (due to the trailing '|').
2468       SMLoc EndLoc;
2469       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2470         return MatchOperand_ParseFail;
2471     } else {
2472       if (Parser.parseExpression(Expr))
2473         return MatchOperand_ParseFail;
2474     }
2475 
2476     if (Expr->evaluateAsAbsolute(IntVal)) {
2477       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2478     } else {
2479       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2480     }
2481 
2482     return MatchOperand_Success;
2483   }
2484 
2485   return MatchOperand_NoMatch;
2486 }
2487 
2488 OperandMatchResultTy
2489 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2490   if (!isRegister())
2491     return MatchOperand_NoMatch;
2492 
2493   if (auto R = parseRegister()) {
2494     assert(R->isReg());
2495     Operands.push_back(std::move(R));
2496     return MatchOperand_Success;
2497   }
2498   return MatchOperand_ParseFail;
2499 }
2500 
2501 OperandMatchResultTy
2502 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2503   auto res = parseReg(Operands);
2504   if (res != MatchOperand_NoMatch) {
2505     return res;
2506   } else if (isModifier()) {
2507     return MatchOperand_NoMatch;
2508   } else {
2509     return parseImm(Operands, HasSP3AbsMod);
2510   }
2511 }
2512 
2513 bool
2514 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2515   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2516     const auto &str = Token.getString();
2517     return str == "abs" || str == "neg" || str == "sext";
2518   }
2519   return false;
2520 }
2521 
2522 bool
2523 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2524   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2525 }
2526 
2527 bool
2528 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2529   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2530 }
2531 
2532 bool
2533 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2534   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2535 }
2536 
2537 // Check if this is an operand modifier or an opcode modifier
2538 // which may look like an expression but it is not. We should
2539 // avoid parsing these modifiers as expressions. Currently
2540 // recognized sequences are:
2541 //   |...|
2542 //   abs(...)
2543 //   neg(...)
2544 //   sext(...)
2545 //   -reg
2546 //   -|...|
2547 //   -abs(...)
2548 //   name:...
2549 // Note that simple opcode modifiers like 'gds' may be parsed as
2550 // expressions; this is a special case. See getExpressionAsToken.
2551 //
2552 bool
2553 AMDGPUAsmParser::isModifier() {
2554 
2555   AsmToken Tok = getToken();
2556   AsmToken NextToken[2];
2557   peekTokens(NextToken);
2558 
2559   return isOperandModifier(Tok, NextToken[0]) ||
2560          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2561          isOpcodeModifierWithVal(Tok, NextToken[0]);
2562 }
2563 
2564 // Check if the current token is an SP3 'neg' modifier.
2565 // Currently this modifier is allowed in the following context:
2566 //
2567 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2568 // 2. Before an 'abs' modifier: -abs(...)
2569 // 3. Before an SP3 'abs' modifier: -|...|
2570 //
2571 // In all other cases "-" is handled as a part
2572 // of an expression that follows the sign.
2573 //
2574 // Note: When "-" is followed by an integer literal,
2575 // this is interpreted as integer negation rather
2576 // than a floating-point NEG modifier applied to N.
2577 // Beside being contr-intuitive, such use of floating-point
2578 // NEG modifier would have resulted in different meaning
2579 // of integer literals used with VOP1/2/C and VOP3,
2580 // for example:
2581 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2582 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2583 // Negative fp literals with preceding "-" are
2584 // handled likewise for unifomtity
2585 //
2586 bool
2587 AMDGPUAsmParser::parseSP3NegModifier() {
2588 
2589   AsmToken NextToken[2];
2590   peekTokens(NextToken);
2591 
2592   if (isToken(AsmToken::Minus) &&
2593       (isRegister(NextToken[0], NextToken[1]) ||
2594        NextToken[0].is(AsmToken::Pipe) ||
2595        isId(NextToken[0], "abs"))) {
2596     lex();
2597     return true;
2598   }
2599 
2600   return false;
2601 }
2602 
2603 OperandMatchResultTy
2604 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2605                                               bool AllowImm) {
2606   bool Neg, SP3Neg;
2607   bool Abs, SP3Abs;
2608   SMLoc Loc;
2609 
2610   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2611   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2612     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2613     return MatchOperand_ParseFail;
2614   }
2615 
2616   SP3Neg = parseSP3NegModifier();
2617 
2618   Loc = getLoc();
2619   Neg = trySkipId("neg");
2620   if (Neg && SP3Neg) {
2621     Error(Loc, "expected register or immediate");
2622     return MatchOperand_ParseFail;
2623   }
2624   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2625     return MatchOperand_ParseFail;
2626 
2627   Abs = trySkipId("abs");
2628   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2629     return MatchOperand_ParseFail;
2630 
2631   Loc = getLoc();
2632   SP3Abs = trySkipToken(AsmToken::Pipe);
2633   if (Abs && SP3Abs) {
2634     Error(Loc, "expected register or immediate");
2635     return MatchOperand_ParseFail;
2636   }
2637 
2638   OperandMatchResultTy Res;
2639   if (AllowImm) {
2640     Res = parseRegOrImm(Operands, SP3Abs);
2641   } else {
2642     Res = parseReg(Operands);
2643   }
2644   if (Res != MatchOperand_Success) {
2645     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2646   }
2647 
2648   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2649     return MatchOperand_ParseFail;
2650   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2651     return MatchOperand_ParseFail;
2652   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2653     return MatchOperand_ParseFail;
2654 
2655   AMDGPUOperand::Modifiers Mods;
2656   Mods.Abs = Abs || SP3Abs;
2657   Mods.Neg = Neg || SP3Neg;
2658 
2659   if (Mods.hasFPModifiers()) {
2660     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2661     if (Op.isExpr()) {
2662       Error(Op.getStartLoc(), "expected an absolute expression");
2663       return MatchOperand_ParseFail;
2664     }
2665     Op.setModifiers(Mods);
2666   }
2667   return MatchOperand_Success;
2668 }
2669 
2670 OperandMatchResultTy
2671 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2672                                                bool AllowImm) {
2673   bool Sext = trySkipId("sext");
2674   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2675     return MatchOperand_ParseFail;
2676 
2677   OperandMatchResultTy Res;
2678   if (AllowImm) {
2679     Res = parseRegOrImm(Operands);
2680   } else {
2681     Res = parseReg(Operands);
2682   }
2683   if (Res != MatchOperand_Success) {
2684     return Sext? MatchOperand_ParseFail : Res;
2685   }
2686 
2687   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2688     return MatchOperand_ParseFail;
2689 
2690   AMDGPUOperand::Modifiers Mods;
2691   Mods.Sext = Sext;
2692 
2693   if (Mods.hasIntModifiers()) {
2694     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2695     if (Op.isExpr()) {
2696       Error(Op.getStartLoc(), "expected an absolute expression");
2697       return MatchOperand_ParseFail;
2698     }
2699     Op.setModifiers(Mods);
2700   }
2701 
2702   return MatchOperand_Success;
2703 }
2704 
2705 OperandMatchResultTy
2706 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2707   return parseRegOrImmWithFPInputMods(Operands, false);
2708 }
2709 
2710 OperandMatchResultTy
2711 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2712   return parseRegOrImmWithIntInputMods(Operands, false);
2713 }
2714 
2715 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2716   auto Loc = getLoc();
2717   if (trySkipId("off")) {
2718     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2719                                                 AMDGPUOperand::ImmTyOff, false));
2720     return MatchOperand_Success;
2721   }
2722 
2723   if (!isRegister())
2724     return MatchOperand_NoMatch;
2725 
2726   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2727   if (Reg) {
2728     Operands.push_back(std::move(Reg));
2729     return MatchOperand_Success;
2730   }
2731 
2732   return MatchOperand_ParseFail;
2733 
2734 }
2735 
2736 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2737   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2738 
2739   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2740       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2741       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2742       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2743     return Match_InvalidOperand;
2744 
2745   if ((TSFlags & SIInstrFlags::VOP3) &&
2746       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2747       getForcedEncodingSize() != 64)
2748     return Match_PreferE32;
2749 
2750   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2751       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2752     // v_mac_f32/16 allow only dst_sel == DWORD;
2753     auto OpNum =
2754         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2755     const auto &Op = Inst.getOperand(OpNum);
2756     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2757       return Match_InvalidOperand;
2758     }
2759   }
2760 
2761   return Match_Success;
2762 }
2763 
2764 // What asm variants we should check
2765 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2766   if (getForcedEncodingSize() == 32) {
2767     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2768     return makeArrayRef(Variants);
2769   }
2770 
2771   if (isForcedVOP3()) {
2772     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2773     return makeArrayRef(Variants);
2774   }
2775 
2776   if (isForcedSDWA()) {
2777     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2778                                         AMDGPUAsmVariants::SDWA9};
2779     return makeArrayRef(Variants);
2780   }
2781 
2782   if (isForcedDPP()) {
2783     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2784     return makeArrayRef(Variants);
2785   }
2786 
2787   static const unsigned Variants[] = {
2788     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2789     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2790   };
2791 
2792   return makeArrayRef(Variants);
2793 }
2794 
2795 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2796   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2797   const unsigned Num = Desc.getNumImplicitUses();
2798   for (unsigned i = 0; i < Num; ++i) {
2799     unsigned Reg = Desc.ImplicitUses[i];
2800     switch (Reg) {
2801     case AMDGPU::FLAT_SCR:
2802     case AMDGPU::VCC:
2803     case AMDGPU::VCC_LO:
2804     case AMDGPU::VCC_HI:
2805     case AMDGPU::M0:
2806       return Reg;
2807     default:
2808       break;
2809     }
2810   }
2811   return AMDGPU::NoRegister;
2812 }
2813 
2814 // NB: This code is correct only when used to check constant
2815 // bus limitations because GFX7 support no f16 inline constants.
2816 // Note that there are no cases when a GFX7 opcode violates
2817 // constant bus limitations due to the use of an f16 constant.
2818 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2819                                        unsigned OpIdx) const {
2820   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2821 
2822   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2823     return false;
2824   }
2825 
2826   const MCOperand &MO = Inst.getOperand(OpIdx);
2827 
2828   int64_t Val = MO.getImm();
2829   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2830 
2831   switch (OpSize) { // expected operand size
2832   case 8:
2833     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2834   case 4:
2835     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2836   case 2: {
2837     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2838     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2839         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2840         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2841       return AMDGPU::isInlinableIntLiteral(Val);
2842 
2843     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2844         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2845         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2846       return AMDGPU::isInlinableIntLiteralV216(Val);
2847 
2848     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2849         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2850         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2851       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2852 
2853     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2854   }
2855   default:
2856     llvm_unreachable("invalid operand size");
2857   }
2858 }
2859 
2860 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2861   if (!isGFX10())
2862     return 1;
2863 
2864   switch (Opcode) {
2865   // 64-bit shift instructions can use only one scalar value input
2866   case AMDGPU::V_LSHLREV_B64:
2867   case AMDGPU::V_LSHLREV_B64_gfx10:
2868   case AMDGPU::V_LSHL_B64:
2869   case AMDGPU::V_LSHRREV_B64:
2870   case AMDGPU::V_LSHRREV_B64_gfx10:
2871   case AMDGPU::V_LSHR_B64:
2872   case AMDGPU::V_ASHRREV_I64:
2873   case AMDGPU::V_ASHRREV_I64_gfx10:
2874   case AMDGPU::V_ASHR_I64:
2875     return 1;
2876   default:
2877     return 2;
2878   }
2879 }
2880 
2881 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2882   const MCOperand &MO = Inst.getOperand(OpIdx);
2883   if (MO.isImm()) {
2884     return !isInlineConstant(Inst, OpIdx);
2885   } else if (MO.isReg()) {
2886     auto Reg = MO.getReg();
2887     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2888     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2889   } else {
2890     return true;
2891   }
2892 }
2893 
2894 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2895   const unsigned Opcode = Inst.getOpcode();
2896   const MCInstrDesc &Desc = MII.get(Opcode);
2897   unsigned ConstantBusUseCount = 0;
2898   unsigned NumLiterals = 0;
2899   unsigned LiteralSize;
2900 
2901   if (Desc.TSFlags &
2902       (SIInstrFlags::VOPC |
2903        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2904        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2905        SIInstrFlags::SDWA)) {
2906     // Check special imm operands (used by madmk, etc)
2907     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2908       ++ConstantBusUseCount;
2909     }
2910 
2911     SmallDenseSet<unsigned> SGPRsUsed;
2912     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2913     if (SGPRUsed != AMDGPU::NoRegister) {
2914       SGPRsUsed.insert(SGPRUsed);
2915       ++ConstantBusUseCount;
2916     }
2917 
2918     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2919     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2920     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2921 
2922     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2923 
2924     for (int OpIdx : OpIndices) {
2925       if (OpIdx == -1) break;
2926 
2927       const MCOperand &MO = Inst.getOperand(OpIdx);
2928       if (usesConstantBus(Inst, OpIdx)) {
2929         if (MO.isReg()) {
2930           const unsigned Reg = mc2PseudoReg(MO.getReg());
2931           // Pairs of registers with a partial intersections like these
2932           //   s0, s[0:1]
2933           //   flat_scratch_lo, flat_scratch
2934           //   flat_scratch_lo, flat_scratch_hi
2935           // are theoretically valid but they are disabled anyway.
2936           // Note that this code mimics SIInstrInfo::verifyInstruction
2937           if (!SGPRsUsed.count(Reg)) {
2938             SGPRsUsed.insert(Reg);
2939             ++ConstantBusUseCount;
2940           }
2941         } else { // Expression or a literal
2942 
2943           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2944             continue; // special operand like VINTERP attr_chan
2945 
2946           // An instruction may use only one literal.
2947           // This has been validated on the previous step.
2948           // See validateVOP3Literal.
2949           // This literal may be used as more than one operand.
2950           // If all these operands are of the same size,
2951           // this literal counts as one scalar value.
2952           // Otherwise it counts as 2 scalar values.
2953           // See "GFX10 Shader Programming", section 3.6.2.3.
2954 
2955           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2956           if (Size < 4) Size = 4;
2957 
2958           if (NumLiterals == 0) {
2959             NumLiterals = 1;
2960             LiteralSize = Size;
2961           } else if (LiteralSize != Size) {
2962             NumLiterals = 2;
2963           }
2964         }
2965       }
2966     }
2967   }
2968   ConstantBusUseCount += NumLiterals;
2969 
2970   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2971 }
2972 
2973 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2974   const unsigned Opcode = Inst.getOpcode();
2975   const MCInstrDesc &Desc = MII.get(Opcode);
2976 
2977   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2978   if (DstIdx == -1 ||
2979       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2980     return true;
2981   }
2982 
2983   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2984 
2985   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2986   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2987   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2988 
2989   assert(DstIdx != -1);
2990   const MCOperand &Dst = Inst.getOperand(DstIdx);
2991   assert(Dst.isReg());
2992   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2993 
2994   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2995 
2996   for (int SrcIdx : SrcIndices) {
2997     if (SrcIdx == -1) break;
2998     const MCOperand &Src = Inst.getOperand(SrcIdx);
2999     if (Src.isReg()) {
3000       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3001       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3002         return false;
3003       }
3004     }
3005   }
3006 
3007   return true;
3008 }
3009 
3010 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3011 
3012   const unsigned Opc = Inst.getOpcode();
3013   const MCInstrDesc &Desc = MII.get(Opc);
3014 
3015   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3016     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3017     assert(ClampIdx != -1);
3018     return Inst.getOperand(ClampIdx).getImm() == 0;
3019   }
3020 
3021   return true;
3022 }
3023 
3024 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3025 
3026   const unsigned Opc = Inst.getOpcode();
3027   const MCInstrDesc &Desc = MII.get(Opc);
3028 
3029   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3030     return true;
3031 
3032   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3033   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3034   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3035 
3036   assert(VDataIdx != -1);
3037   assert(DMaskIdx != -1);
3038   assert(TFEIdx != -1);
3039 
3040   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3041   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3042   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3043   if (DMask == 0)
3044     DMask = 1;
3045 
3046   unsigned DataSize =
3047     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3048   if (hasPackedD16()) {
3049     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3050     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3051       DataSize = (DataSize + 1) / 2;
3052   }
3053 
3054   return (VDataSize / 4) == DataSize + TFESize;
3055 }
3056 
3057 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3058   const unsigned Opc = Inst.getOpcode();
3059   const MCInstrDesc &Desc = MII.get(Opc);
3060 
3061   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3062     return true;
3063 
3064   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3065   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3066       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3067   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3068   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3069   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3070 
3071   assert(VAddr0Idx != -1);
3072   assert(SrsrcIdx != -1);
3073   assert(DimIdx != -1);
3074   assert(SrsrcIdx > VAddr0Idx);
3075 
3076   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3077   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3078   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3079   unsigned VAddrSize =
3080       IsNSA ? SrsrcIdx - VAddr0Idx
3081             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3082 
3083   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3084                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3085                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3086                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3087   if (!IsNSA) {
3088     if (AddrSize > 8)
3089       AddrSize = 16;
3090     else if (AddrSize > 4)
3091       AddrSize = 8;
3092   }
3093 
3094   return VAddrSize == AddrSize;
3095 }
3096 
3097 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3098 
3099   const unsigned Opc = Inst.getOpcode();
3100   const MCInstrDesc &Desc = MII.get(Opc);
3101 
3102   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3103     return true;
3104   if (!Desc.mayLoad() || !Desc.mayStore())
3105     return true; // Not atomic
3106 
3107   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3108   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3109 
3110   // This is an incomplete check because image_atomic_cmpswap
3111   // may only use 0x3 and 0xf while other atomic operations
3112   // may use 0x1 and 0x3. However these limitations are
3113   // verified when we check that dmask matches dst size.
3114   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3115 }
3116 
3117 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3118 
3119   const unsigned Opc = Inst.getOpcode();
3120   const MCInstrDesc &Desc = MII.get(Opc);
3121 
3122   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3123     return true;
3124 
3125   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3126   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3127 
3128   // GATHER4 instructions use dmask in a different fashion compared to
3129   // other MIMG instructions. The only useful DMASK values are
3130   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3131   // (red,red,red,red) etc.) The ISA document doesn't mention
3132   // this.
3133   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3134 }
3135 
3136 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3137 {
3138   switch (Opcode) {
3139   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3140   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3141   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3142     return true;
3143   default:
3144     return false;
3145   }
3146 }
3147 
3148 // movrels* opcodes should only allow VGPRS as src0.
3149 // This is specified in .td description for vop1/vop3,
3150 // but sdwa is handled differently. See isSDWAOperand.
3151 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3152 
3153   const unsigned Opc = Inst.getOpcode();
3154   const MCInstrDesc &Desc = MII.get(Opc);
3155 
3156   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3157     return true;
3158 
3159   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3160   assert(Src0Idx != -1);
3161 
3162   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3163   if (!Src0.isReg())
3164     return false;
3165 
3166   auto Reg = Src0.getReg();
3167   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3168   return !isSGPR(mc2PseudoReg(Reg), TRI);
3169 }
3170 
3171 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3172 
3173   const unsigned Opc = Inst.getOpcode();
3174 
3175   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3176     return true;
3177 
3178   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3179   assert(Src0Idx != -1);
3180 
3181   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3182   if (!Src0.isReg())
3183     return true;
3184 
3185   auto Reg = Src0.getReg();
3186   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3187   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3188     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3189     return false;
3190   }
3191 
3192   return true;
3193 }
3194 
3195 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3196 
3197   const unsigned Opc = Inst.getOpcode();
3198   const MCInstrDesc &Desc = MII.get(Opc);
3199 
3200   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3201     return true;
3202 
3203   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3204   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3205     if (isCI() || isSI())
3206       return false;
3207   }
3208 
3209   return true;
3210 }
3211 
3212 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3213   const unsigned Opc = Inst.getOpcode();
3214   const MCInstrDesc &Desc = MII.get(Opc);
3215 
3216   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3217     return true;
3218 
3219   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3220   if (DimIdx < 0)
3221     return true;
3222 
3223   long Imm = Inst.getOperand(DimIdx).getImm();
3224   if (Imm < 0 || Imm >= 8)
3225     return false;
3226 
3227   return true;
3228 }
3229 
3230 static bool IsRevOpcode(const unsigned Opcode)
3231 {
3232   switch (Opcode) {
3233   case AMDGPU::V_SUBREV_F32_e32:
3234   case AMDGPU::V_SUBREV_F32_e64:
3235   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3236   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3237   case AMDGPU::V_SUBREV_F32_e32_vi:
3238   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3239   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3240   case AMDGPU::V_SUBREV_F32_e64_vi:
3241 
3242   case AMDGPU::V_SUBREV_I32_e32:
3243   case AMDGPU::V_SUBREV_I32_e64:
3244   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3245   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3246 
3247   case AMDGPU::V_SUBBREV_U32_e32:
3248   case AMDGPU::V_SUBBREV_U32_e64:
3249   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3250   case AMDGPU::V_SUBBREV_U32_e32_vi:
3251   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3252   case AMDGPU::V_SUBBREV_U32_e64_vi:
3253 
3254   case AMDGPU::V_SUBREV_U32_e32:
3255   case AMDGPU::V_SUBREV_U32_e64:
3256   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3257   case AMDGPU::V_SUBREV_U32_e32_vi:
3258   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3259   case AMDGPU::V_SUBREV_U32_e64_vi:
3260 
3261   case AMDGPU::V_SUBREV_F16_e32:
3262   case AMDGPU::V_SUBREV_F16_e64:
3263   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3264   case AMDGPU::V_SUBREV_F16_e32_vi:
3265   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3266   case AMDGPU::V_SUBREV_F16_e64_vi:
3267 
3268   case AMDGPU::V_SUBREV_U16_e32:
3269   case AMDGPU::V_SUBREV_U16_e64:
3270   case AMDGPU::V_SUBREV_U16_e32_vi:
3271   case AMDGPU::V_SUBREV_U16_e64_vi:
3272 
3273   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3274   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3275   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3276 
3277   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3278   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3279 
3280   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3281   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3282 
3283   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3284   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3285 
3286   case AMDGPU::V_LSHRREV_B32_e32:
3287   case AMDGPU::V_LSHRREV_B32_e64:
3288   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3289   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3290   case AMDGPU::V_LSHRREV_B32_e32_vi:
3291   case AMDGPU::V_LSHRREV_B32_e64_vi:
3292   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3293   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3294 
3295   case AMDGPU::V_ASHRREV_I32_e32:
3296   case AMDGPU::V_ASHRREV_I32_e64:
3297   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3298   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3299   case AMDGPU::V_ASHRREV_I32_e32_vi:
3300   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3301   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3302   case AMDGPU::V_ASHRREV_I32_e64_vi:
3303 
3304   case AMDGPU::V_LSHLREV_B32_e32:
3305   case AMDGPU::V_LSHLREV_B32_e64:
3306   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3307   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3308   case AMDGPU::V_LSHLREV_B32_e32_vi:
3309   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3310   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3311   case AMDGPU::V_LSHLREV_B32_e64_vi:
3312 
3313   case AMDGPU::V_LSHLREV_B16_e32:
3314   case AMDGPU::V_LSHLREV_B16_e64:
3315   case AMDGPU::V_LSHLREV_B16_e32_vi:
3316   case AMDGPU::V_LSHLREV_B16_e64_vi:
3317   case AMDGPU::V_LSHLREV_B16_gfx10:
3318 
3319   case AMDGPU::V_LSHRREV_B16_e32:
3320   case AMDGPU::V_LSHRREV_B16_e64:
3321   case AMDGPU::V_LSHRREV_B16_e32_vi:
3322   case AMDGPU::V_LSHRREV_B16_e64_vi:
3323   case AMDGPU::V_LSHRREV_B16_gfx10:
3324 
3325   case AMDGPU::V_ASHRREV_I16_e32:
3326   case AMDGPU::V_ASHRREV_I16_e64:
3327   case AMDGPU::V_ASHRREV_I16_e32_vi:
3328   case AMDGPU::V_ASHRREV_I16_e64_vi:
3329   case AMDGPU::V_ASHRREV_I16_gfx10:
3330 
3331   case AMDGPU::V_LSHLREV_B64:
3332   case AMDGPU::V_LSHLREV_B64_gfx10:
3333   case AMDGPU::V_LSHLREV_B64_vi:
3334 
3335   case AMDGPU::V_LSHRREV_B64:
3336   case AMDGPU::V_LSHRREV_B64_gfx10:
3337   case AMDGPU::V_LSHRREV_B64_vi:
3338 
3339   case AMDGPU::V_ASHRREV_I64:
3340   case AMDGPU::V_ASHRREV_I64_gfx10:
3341   case AMDGPU::V_ASHRREV_I64_vi:
3342 
3343   case AMDGPU::V_PK_LSHLREV_B16:
3344   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3345   case AMDGPU::V_PK_LSHLREV_B16_vi:
3346 
3347   case AMDGPU::V_PK_LSHRREV_B16:
3348   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3349   case AMDGPU::V_PK_LSHRREV_B16_vi:
3350   case AMDGPU::V_PK_ASHRREV_I16:
3351   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3352   case AMDGPU::V_PK_ASHRREV_I16_vi:
3353     return true;
3354   default:
3355     return false;
3356   }
3357 }
3358 
3359 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3360 
3361   using namespace SIInstrFlags;
3362   const unsigned Opcode = Inst.getOpcode();
3363   const MCInstrDesc &Desc = MII.get(Opcode);
3364 
3365   // lds_direct register is defined so that it can be used
3366   // with 9-bit operands only. Ignore encodings which do not accept these.
3367   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3368     return true;
3369 
3370   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3371   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3372   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3373 
3374   const int SrcIndices[] = { Src1Idx, Src2Idx };
3375 
3376   // lds_direct cannot be specified as either src1 or src2.
3377   for (int SrcIdx : SrcIndices) {
3378     if (SrcIdx == -1) break;
3379     const MCOperand &Src = Inst.getOperand(SrcIdx);
3380     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3381       return false;
3382     }
3383   }
3384 
3385   if (Src0Idx == -1)
3386     return true;
3387 
3388   const MCOperand &Src = Inst.getOperand(Src0Idx);
3389   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3390     return true;
3391 
3392   // lds_direct is specified as src0. Check additional limitations.
3393   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3394 }
3395 
3396 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3397   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3398     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3399     if (Op.isFlatOffset())
3400       return Op.getStartLoc();
3401   }
3402   return getLoc();
3403 }
3404 
3405 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3406                                          const OperandVector &Operands) {
3407   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3408   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3409     return true;
3410 
3411   auto Opcode = Inst.getOpcode();
3412   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3413   assert(OpNum != -1);
3414 
3415   const auto &Op = Inst.getOperand(OpNum);
3416   if (!hasFlatOffsets() && Op.getImm() != 0) {
3417     Error(getFlatOffsetLoc(Operands),
3418           "flat offset modifier is not supported on this GPU");
3419     return false;
3420   }
3421 
3422   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3423   // For FLAT segment the offset must be positive;
3424   // MSB is ignored and forced to zero.
3425   unsigned OffsetSize = isGFX9() ? 13 : 12;
3426   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3427     if (!isIntN(OffsetSize, Op.getImm())) {
3428       Error(getFlatOffsetLoc(Operands),
3429             isGFX9() ? "expected a 13-bit signed offset" :
3430                        "expected a 12-bit signed offset");
3431       return false;
3432     }
3433   } else {
3434     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3435       Error(getFlatOffsetLoc(Operands),
3436             isGFX9() ? "expected a 12-bit unsigned offset" :
3437                        "expected an 11-bit unsigned offset");
3438       return false;
3439     }
3440   }
3441 
3442   return true;
3443 }
3444 
3445 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3446   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3447     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3448     if (Op.isSMEMOffset())
3449       return Op.getStartLoc();
3450   }
3451   return getLoc();
3452 }
3453 
3454 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3455                                          const OperandVector &Operands) {
3456   if (isCI() || isSI())
3457     return true;
3458 
3459   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3460   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3461     return true;
3462 
3463   auto Opcode = Inst.getOpcode();
3464   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3465   if (OpNum == -1)
3466     return true;
3467 
3468   const auto &Op = Inst.getOperand(OpNum);
3469   if (!Op.isImm())
3470     return true;
3471 
3472   uint64_t Offset = Op.getImm();
3473   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3474   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3475       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3476     return true;
3477 
3478   Error(getSMEMOffsetLoc(Operands),
3479         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3480                                "expected a 21-bit signed offset");
3481 
3482   return false;
3483 }
3484 
3485 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3486   unsigned Opcode = Inst.getOpcode();
3487   const MCInstrDesc &Desc = MII.get(Opcode);
3488   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3489     return true;
3490 
3491   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3492   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3493 
3494   const int OpIndices[] = { Src0Idx, Src1Idx };
3495 
3496   unsigned NumExprs = 0;
3497   unsigned NumLiterals = 0;
3498   uint32_t LiteralValue;
3499 
3500   for (int OpIdx : OpIndices) {
3501     if (OpIdx == -1) break;
3502 
3503     const MCOperand &MO = Inst.getOperand(OpIdx);
3504     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3505     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3506       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3507         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3508         if (NumLiterals == 0 || LiteralValue != Value) {
3509           LiteralValue = Value;
3510           ++NumLiterals;
3511         }
3512       } else if (MO.isExpr()) {
3513         ++NumExprs;
3514       }
3515     }
3516   }
3517 
3518   return NumLiterals + NumExprs <= 1;
3519 }
3520 
3521 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3522   const unsigned Opc = Inst.getOpcode();
3523   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3524       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3525     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3526     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3527 
3528     if (OpSel & ~3)
3529       return false;
3530   }
3531   return true;
3532 }
3533 
3534 // Check if VCC register matches wavefront size
3535 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3536   auto FB = getFeatureBits();
3537   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3538     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3539 }
3540 
3541 // VOP3 literal is only allowed in GFX10+ and only one can be used
3542 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3543   unsigned Opcode = Inst.getOpcode();
3544   const MCInstrDesc &Desc = MII.get(Opcode);
3545   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3546     return true;
3547 
3548   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3549   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3550   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3551 
3552   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3553 
3554   unsigned NumExprs = 0;
3555   unsigned NumLiterals = 0;
3556   uint32_t LiteralValue;
3557 
3558   for (int OpIdx : OpIndices) {
3559     if (OpIdx == -1) break;
3560 
3561     const MCOperand &MO = Inst.getOperand(OpIdx);
3562     if (!MO.isImm() && !MO.isExpr())
3563       continue;
3564     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3565       continue;
3566 
3567     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3568         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3569       return false;
3570 
3571     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3572       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3573       if (NumLiterals == 0 || LiteralValue != Value) {
3574         LiteralValue = Value;
3575         ++NumLiterals;
3576       }
3577     } else if (MO.isExpr()) {
3578       ++NumExprs;
3579     }
3580   }
3581   NumLiterals += NumExprs;
3582 
3583   return !NumLiterals ||
3584          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3585 }
3586 
3587 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3588                                           const SMLoc &IDLoc,
3589                                           const OperandVector &Operands) {
3590   if (!validateLdsDirect(Inst)) {
3591     Error(IDLoc,
3592       "invalid use of lds_direct");
3593     return false;
3594   }
3595   if (!validateSOPLiteral(Inst)) {
3596     Error(IDLoc,
3597       "only one literal operand is allowed");
3598     return false;
3599   }
3600   if (!validateVOP3Literal(Inst)) {
3601     Error(IDLoc,
3602       "invalid literal operand");
3603     return false;
3604   }
3605   if (!validateConstantBusLimitations(Inst)) {
3606     Error(IDLoc,
3607       "invalid operand (violates constant bus restrictions)");
3608     return false;
3609   }
3610   if (!validateEarlyClobberLimitations(Inst)) {
3611     Error(IDLoc,
3612       "destination must be different than all sources");
3613     return false;
3614   }
3615   if (!validateIntClampSupported(Inst)) {
3616     Error(IDLoc,
3617       "integer clamping is not supported on this GPU");
3618     return false;
3619   }
3620   if (!validateOpSel(Inst)) {
3621     Error(IDLoc,
3622       "invalid op_sel operand");
3623     return false;
3624   }
3625   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3626   if (!validateMIMGD16(Inst)) {
3627     Error(IDLoc,
3628       "d16 modifier is not supported on this GPU");
3629     return false;
3630   }
3631   if (!validateMIMGDim(Inst)) {
3632     Error(IDLoc, "dim modifier is required on this GPU");
3633     return false;
3634   }
3635   if (!validateMIMGDataSize(Inst)) {
3636     Error(IDLoc,
3637       "image data size does not match dmask and tfe");
3638     return false;
3639   }
3640   if (!validateMIMGAddrSize(Inst)) {
3641     Error(IDLoc,
3642       "image address size does not match dim and a16");
3643     return false;
3644   }
3645   if (!validateMIMGAtomicDMask(Inst)) {
3646     Error(IDLoc,
3647       "invalid atomic image dmask");
3648     return false;
3649   }
3650   if (!validateMIMGGatherDMask(Inst)) {
3651     Error(IDLoc,
3652       "invalid image_gather dmask: only one bit must be set");
3653     return false;
3654   }
3655   if (!validateMovrels(Inst)) {
3656     Error(IDLoc, "source operand must be a VGPR");
3657     return false;
3658   }
3659   if (!validateFlatOffset(Inst, Operands)) {
3660     return false;
3661   }
3662   if (!validateSMEMOffset(Inst, Operands)) {
3663     return false;
3664   }
3665   if (!validateMAIAccWrite(Inst)) {
3666     return false;
3667   }
3668 
3669   return true;
3670 }
3671 
3672 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3673                                             const FeatureBitset &FBS,
3674                                             unsigned VariantID = 0);
3675 
3676 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3677                                               OperandVector &Operands,
3678                                               MCStreamer &Out,
3679                                               uint64_t &ErrorInfo,
3680                                               bool MatchingInlineAsm) {
3681   MCInst Inst;
3682   unsigned Result = Match_Success;
3683   for (auto Variant : getMatchedVariants()) {
3684     uint64_t EI;
3685     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3686                                   Variant);
3687     // We order match statuses from least to most specific. We use most specific
3688     // status as resulting
3689     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3690     if ((R == Match_Success) ||
3691         (R == Match_PreferE32) ||
3692         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3693         (R == Match_InvalidOperand && Result != Match_MissingFeature
3694                                    && Result != Match_PreferE32) ||
3695         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3696                                    && Result != Match_MissingFeature
3697                                    && Result != Match_PreferE32)) {
3698       Result = R;
3699       ErrorInfo = EI;
3700     }
3701     if (R == Match_Success)
3702       break;
3703   }
3704 
3705   switch (Result) {
3706   default: break;
3707   case Match_Success:
3708     if (!validateInstruction(Inst, IDLoc, Operands)) {
3709       return true;
3710     }
3711     Inst.setLoc(IDLoc);
3712     Out.emitInstruction(Inst, getSTI());
3713     return false;
3714 
3715   case Match_MissingFeature:
3716     return Error(IDLoc, "instruction not supported on this GPU");
3717 
3718   case Match_MnemonicFail: {
3719     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3720     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3721         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3722     return Error(IDLoc, "invalid instruction" + Suggestion,
3723                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3724   }
3725 
3726   case Match_InvalidOperand: {
3727     SMLoc ErrorLoc = IDLoc;
3728     if (ErrorInfo != ~0ULL) {
3729       if (ErrorInfo >= Operands.size()) {
3730         return Error(IDLoc, "too few operands for instruction");
3731       }
3732       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3733       if (ErrorLoc == SMLoc())
3734         ErrorLoc = IDLoc;
3735     }
3736     return Error(ErrorLoc, "invalid operand for instruction");
3737   }
3738 
3739   case Match_PreferE32:
3740     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3741                         "should be encoded as e32");
3742   }
3743   llvm_unreachable("Implement any new match types added!");
3744 }
3745 
3746 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3747   int64_t Tmp = -1;
3748   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3749     return true;
3750   }
3751   if (getParser().parseAbsoluteExpression(Tmp)) {
3752     return true;
3753   }
3754   Ret = static_cast<uint32_t>(Tmp);
3755   return false;
3756 }
3757 
3758 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3759                                                uint32_t &Minor) {
3760   if (ParseAsAbsoluteExpression(Major))
3761     return TokError("invalid major version");
3762 
3763   if (getLexer().isNot(AsmToken::Comma))
3764     return TokError("minor version number required, comma expected");
3765   Lex();
3766 
3767   if (ParseAsAbsoluteExpression(Minor))
3768     return TokError("invalid minor version");
3769 
3770   return false;
3771 }
3772 
3773 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3774   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3775     return TokError("directive only supported for amdgcn architecture");
3776 
3777   std::string Target;
3778 
3779   SMLoc TargetStart = getTok().getLoc();
3780   if (getParser().parseEscapedString(Target))
3781     return true;
3782   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3783 
3784   std::string ExpectedTarget;
3785   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3786   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3787 
3788   if (Target != ExpectedTargetOS.str())
3789     return getParser().Error(TargetRange.Start, "target must match options",
3790                              TargetRange);
3791 
3792   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3793   return false;
3794 }
3795 
3796 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3797   return getParser().Error(Range.Start, "value out of range", Range);
3798 }
3799 
3800 bool AMDGPUAsmParser::calculateGPRBlocks(
3801     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3802     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3803     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3804     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3805   // TODO(scott.linder): These calculations are duplicated from
3806   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3807   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3808 
3809   unsigned NumVGPRs = NextFreeVGPR;
3810   unsigned NumSGPRs = NextFreeSGPR;
3811 
3812   if (Version.Major >= 10)
3813     NumSGPRs = 0;
3814   else {
3815     unsigned MaxAddressableNumSGPRs =
3816         IsaInfo::getAddressableNumSGPRs(&getSTI());
3817 
3818     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3819         NumSGPRs > MaxAddressableNumSGPRs)
3820       return OutOfRangeError(SGPRRange);
3821 
3822     NumSGPRs +=
3823         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3824 
3825     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3826         NumSGPRs > MaxAddressableNumSGPRs)
3827       return OutOfRangeError(SGPRRange);
3828 
3829     if (Features.test(FeatureSGPRInitBug))
3830       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3831   }
3832 
3833   VGPRBlocks =
3834       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3835   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3836 
3837   return false;
3838 }
3839 
3840 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3841   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3842     return TokError("directive only supported for amdgcn architecture");
3843 
3844   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3845     return TokError("directive only supported for amdhsa OS");
3846 
3847   StringRef KernelName;
3848   if (getParser().parseIdentifier(KernelName))
3849     return true;
3850 
3851   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3852 
3853   StringSet<> Seen;
3854 
3855   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3856 
3857   SMRange VGPRRange;
3858   uint64_t NextFreeVGPR = 0;
3859   SMRange SGPRRange;
3860   uint64_t NextFreeSGPR = 0;
3861   unsigned UserSGPRCount = 0;
3862   bool ReserveVCC = true;
3863   bool ReserveFlatScr = true;
3864   bool ReserveXNACK = hasXNACK();
3865   Optional<bool> EnableWavefrontSize32;
3866 
3867   while (true) {
3868     while (getLexer().is(AsmToken::EndOfStatement))
3869       Lex();
3870 
3871     if (getLexer().isNot(AsmToken::Identifier))
3872       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3873 
3874     StringRef ID = getTok().getIdentifier();
3875     SMRange IDRange = getTok().getLocRange();
3876     Lex();
3877 
3878     if (ID == ".end_amdhsa_kernel")
3879       break;
3880 
3881     if (Seen.find(ID) != Seen.end())
3882       return TokError(".amdhsa_ directives cannot be repeated");
3883     Seen.insert(ID);
3884 
3885     SMLoc ValStart = getTok().getLoc();
3886     int64_t IVal;
3887     if (getParser().parseAbsoluteExpression(IVal))
3888       return true;
3889     SMLoc ValEnd = getTok().getLoc();
3890     SMRange ValRange = SMRange(ValStart, ValEnd);
3891 
3892     if (IVal < 0)
3893       return OutOfRangeError(ValRange);
3894 
3895     uint64_t Val = IVal;
3896 
3897 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3898   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3899     return OutOfRangeError(RANGE);                                             \
3900   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3901 
3902     if (ID == ".amdhsa_group_segment_fixed_size") {
3903       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3904         return OutOfRangeError(ValRange);
3905       KD.group_segment_fixed_size = Val;
3906     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3907       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3908         return OutOfRangeError(ValRange);
3909       KD.private_segment_fixed_size = Val;
3910     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3911       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3912                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3913                        Val, ValRange);
3914       if (Val)
3915         UserSGPRCount += 4;
3916     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3917       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3918                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3919                        ValRange);
3920       if (Val)
3921         UserSGPRCount += 2;
3922     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3923       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3924                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3925                        ValRange);
3926       if (Val)
3927         UserSGPRCount += 2;
3928     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3929       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3930                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3931                        Val, ValRange);
3932       if (Val)
3933         UserSGPRCount += 2;
3934     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3935       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3936                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3937                        ValRange);
3938       if (Val)
3939         UserSGPRCount += 2;
3940     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3941       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3942                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3943                        ValRange);
3944       if (Val)
3945         UserSGPRCount += 2;
3946     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3947       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3948                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3949                        Val, ValRange);
3950       if (Val)
3951         UserSGPRCount += 1;
3952     } else if (ID == ".amdhsa_wavefront_size32") {
3953       if (IVersion.Major < 10)
3954         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3955                                  IDRange);
3956       EnableWavefrontSize32 = Val;
3957       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3958                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3959                        Val, ValRange);
3960     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3961       PARSE_BITS_ENTRY(
3962           KD.compute_pgm_rsrc2,
3963           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3964           ValRange);
3965     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3966       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3967                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3968                        ValRange);
3969     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3970       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3971                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3972                        ValRange);
3973     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3974       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3975                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3976                        ValRange);
3977     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3978       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3979                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3980                        ValRange);
3981     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3982       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3983                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3984                        ValRange);
3985     } else if (ID == ".amdhsa_next_free_vgpr") {
3986       VGPRRange = ValRange;
3987       NextFreeVGPR = Val;
3988     } else if (ID == ".amdhsa_next_free_sgpr") {
3989       SGPRRange = ValRange;
3990       NextFreeSGPR = Val;
3991     } else if (ID == ".amdhsa_reserve_vcc") {
3992       if (!isUInt<1>(Val))
3993         return OutOfRangeError(ValRange);
3994       ReserveVCC = Val;
3995     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3996       if (IVersion.Major < 7)
3997         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3998                                  IDRange);
3999       if (!isUInt<1>(Val))
4000         return OutOfRangeError(ValRange);
4001       ReserveFlatScr = Val;
4002     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4003       if (IVersion.Major < 8)
4004         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4005                                  IDRange);
4006       if (!isUInt<1>(Val))
4007         return OutOfRangeError(ValRange);
4008       ReserveXNACK = Val;
4009     } else if (ID == ".amdhsa_float_round_mode_32") {
4010       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4011                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4012     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4013       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4014                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4015     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4016       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4017                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4018     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4019       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4020                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4021                        ValRange);
4022     } else if (ID == ".amdhsa_dx10_clamp") {
4023       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4024                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4025     } else if (ID == ".amdhsa_ieee_mode") {
4026       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4027                        Val, ValRange);
4028     } else if (ID == ".amdhsa_fp16_overflow") {
4029       if (IVersion.Major < 9)
4030         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4031                                  IDRange);
4032       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4033                        ValRange);
4034     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4035       if (IVersion.Major < 10)
4036         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4037                                  IDRange);
4038       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4039                        ValRange);
4040     } else if (ID == ".amdhsa_memory_ordered") {
4041       if (IVersion.Major < 10)
4042         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4043                                  IDRange);
4044       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4045                        ValRange);
4046     } else if (ID == ".amdhsa_forward_progress") {
4047       if (IVersion.Major < 10)
4048         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4049                                  IDRange);
4050       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4051                        ValRange);
4052     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4053       PARSE_BITS_ENTRY(
4054           KD.compute_pgm_rsrc2,
4055           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4056           ValRange);
4057     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4058       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4059                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4060                        Val, ValRange);
4061     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4062       PARSE_BITS_ENTRY(
4063           KD.compute_pgm_rsrc2,
4064           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4065           ValRange);
4066     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4067       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4068                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4069                        Val, ValRange);
4070     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4071       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4072                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4073                        Val, ValRange);
4074     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4075       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4076                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4077                        Val, ValRange);
4078     } else if (ID == ".amdhsa_exception_int_div_zero") {
4079       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4080                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4081                        Val, ValRange);
4082     } else {
4083       return getParser().Error(IDRange.Start,
4084                                "unknown .amdhsa_kernel directive", IDRange);
4085     }
4086 
4087 #undef PARSE_BITS_ENTRY
4088   }
4089 
4090   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4091     return TokError(".amdhsa_next_free_vgpr directive is required");
4092 
4093   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4094     return TokError(".amdhsa_next_free_sgpr directive is required");
4095 
4096   unsigned VGPRBlocks;
4097   unsigned SGPRBlocks;
4098   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4099                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4100                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4101                          SGPRBlocks))
4102     return true;
4103 
4104   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4105           VGPRBlocks))
4106     return OutOfRangeError(VGPRRange);
4107   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4108                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4109 
4110   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4111           SGPRBlocks))
4112     return OutOfRangeError(SGPRRange);
4113   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4114                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4115                   SGPRBlocks);
4116 
4117   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4118     return TokError("too many user SGPRs enabled");
4119   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4120                   UserSGPRCount);
4121 
4122   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4123       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4124       ReserveFlatScr, ReserveXNACK);
4125   return false;
4126 }
4127 
4128 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4129   uint32_t Major;
4130   uint32_t Minor;
4131 
4132   if (ParseDirectiveMajorMinor(Major, Minor))
4133     return true;
4134 
4135   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4136   return false;
4137 }
4138 
4139 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4140   uint32_t Major;
4141   uint32_t Minor;
4142   uint32_t Stepping;
4143   StringRef VendorName;
4144   StringRef ArchName;
4145 
4146   // If this directive has no arguments, then use the ISA version for the
4147   // targeted GPU.
4148   if (getLexer().is(AsmToken::EndOfStatement)) {
4149     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4150     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4151                                                       ISA.Stepping,
4152                                                       "AMD", "AMDGPU");
4153     return false;
4154   }
4155 
4156   if (ParseDirectiveMajorMinor(Major, Minor))
4157     return true;
4158 
4159   if (getLexer().isNot(AsmToken::Comma))
4160     return TokError("stepping version number required, comma expected");
4161   Lex();
4162 
4163   if (ParseAsAbsoluteExpression(Stepping))
4164     return TokError("invalid stepping version");
4165 
4166   if (getLexer().isNot(AsmToken::Comma))
4167     return TokError("vendor name required, comma expected");
4168   Lex();
4169 
4170   if (getLexer().isNot(AsmToken::String))
4171     return TokError("invalid vendor name");
4172 
4173   VendorName = getLexer().getTok().getStringContents();
4174   Lex();
4175 
4176   if (getLexer().isNot(AsmToken::Comma))
4177     return TokError("arch name required, comma expected");
4178   Lex();
4179 
4180   if (getLexer().isNot(AsmToken::String))
4181     return TokError("invalid arch name");
4182 
4183   ArchName = getLexer().getTok().getStringContents();
4184   Lex();
4185 
4186   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4187                                                     VendorName, ArchName);
4188   return false;
4189 }
4190 
4191 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4192                                                amd_kernel_code_t &Header) {
4193   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4194   // assembly for backwards compatibility.
4195   if (ID == "max_scratch_backing_memory_byte_size") {
4196     Parser.eatToEndOfStatement();
4197     return false;
4198   }
4199 
4200   SmallString<40> ErrStr;
4201   raw_svector_ostream Err(ErrStr);
4202   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4203     return TokError(Err.str());
4204   }
4205   Lex();
4206 
4207   if (ID == "enable_wavefront_size32") {
4208     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4209       if (!isGFX10())
4210         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4211       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4212         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4213     } else {
4214       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4215         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4216     }
4217   }
4218 
4219   if (ID == "wavefront_size") {
4220     if (Header.wavefront_size == 5) {
4221       if (!isGFX10())
4222         return TokError("wavefront_size=5 is only allowed on GFX10+");
4223       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4224         return TokError("wavefront_size=5 requires +WavefrontSize32");
4225     } else if (Header.wavefront_size == 6) {
4226       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4227         return TokError("wavefront_size=6 requires +WavefrontSize64");
4228     }
4229   }
4230 
4231   if (ID == "enable_wgp_mode") {
4232     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4233       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4234   }
4235 
4236   if (ID == "enable_mem_ordered") {
4237     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4238       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4239   }
4240 
4241   if (ID == "enable_fwd_progress") {
4242     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4243       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4244   }
4245 
4246   return false;
4247 }
4248 
4249 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4250   amd_kernel_code_t Header;
4251   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4252 
4253   while (true) {
4254     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4255     // will set the current token to EndOfStatement.
4256     while(getLexer().is(AsmToken::EndOfStatement))
4257       Lex();
4258 
4259     if (getLexer().isNot(AsmToken::Identifier))
4260       return TokError("expected value identifier or .end_amd_kernel_code_t");
4261 
4262     StringRef ID = getLexer().getTok().getIdentifier();
4263     Lex();
4264 
4265     if (ID == ".end_amd_kernel_code_t")
4266       break;
4267 
4268     if (ParseAMDKernelCodeTValue(ID, Header))
4269       return true;
4270   }
4271 
4272   getTargetStreamer().EmitAMDKernelCodeT(Header);
4273 
4274   return false;
4275 }
4276 
4277 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4278   if (getLexer().isNot(AsmToken::Identifier))
4279     return TokError("expected symbol name");
4280 
4281   StringRef KernelName = Parser.getTok().getString();
4282 
4283   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4284                                            ELF::STT_AMDGPU_HSA_KERNEL);
4285   Lex();
4286   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4287     KernelScope.initialize(getContext());
4288   return false;
4289 }
4290 
4291 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4292   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4293     return Error(getParser().getTok().getLoc(),
4294                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4295                  "architectures");
4296   }
4297 
4298   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4299 
4300   std::string ISAVersionStringFromSTI;
4301   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4302   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4303 
4304   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4305     return Error(getParser().getTok().getLoc(),
4306                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4307                  "arguments specified through the command line");
4308   }
4309 
4310   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4311   Lex();
4312 
4313   return false;
4314 }
4315 
4316 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4317   const char *AssemblerDirectiveBegin;
4318   const char *AssemblerDirectiveEnd;
4319   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4320       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4321           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4322                             HSAMD::V3::AssemblerDirectiveEnd)
4323           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4324                             HSAMD::AssemblerDirectiveEnd);
4325 
4326   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4327     return Error(getParser().getTok().getLoc(),
4328                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4329                  "not available on non-amdhsa OSes")).str());
4330   }
4331 
4332   std::string HSAMetadataString;
4333   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4334                           HSAMetadataString))
4335     return true;
4336 
4337   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4338     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4339       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4340   } else {
4341     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4342       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4343   }
4344 
4345   return false;
4346 }
4347 
4348 /// Common code to parse out a block of text (typically YAML) between start and
4349 /// end directives.
4350 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4351                                           const char *AssemblerDirectiveEnd,
4352                                           std::string &CollectString) {
4353 
4354   raw_string_ostream CollectStream(CollectString);
4355 
4356   getLexer().setSkipSpace(false);
4357 
4358   bool FoundEnd = false;
4359   while (!getLexer().is(AsmToken::Eof)) {
4360     while (getLexer().is(AsmToken::Space)) {
4361       CollectStream << getLexer().getTok().getString();
4362       Lex();
4363     }
4364 
4365     if (getLexer().is(AsmToken::Identifier)) {
4366       StringRef ID = getLexer().getTok().getIdentifier();
4367       if (ID == AssemblerDirectiveEnd) {
4368         Lex();
4369         FoundEnd = true;
4370         break;
4371       }
4372     }
4373 
4374     CollectStream << Parser.parseStringToEndOfStatement()
4375                   << getContext().getAsmInfo()->getSeparatorString();
4376 
4377     Parser.eatToEndOfStatement();
4378   }
4379 
4380   getLexer().setSkipSpace(true);
4381 
4382   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4383     return TokError(Twine("expected directive ") +
4384                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4385   }
4386 
4387   CollectStream.flush();
4388   return false;
4389 }
4390 
4391 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4392 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4393   std::string String;
4394   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4395                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4396     return true;
4397 
4398   auto PALMetadata = getTargetStreamer().getPALMetadata();
4399   if (!PALMetadata->setFromString(String))
4400     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4401   return false;
4402 }
4403 
4404 /// Parse the assembler directive for old linear-format PAL metadata.
4405 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4406   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4407     return Error(getParser().getTok().getLoc(),
4408                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4409                  "not available on non-amdpal OSes")).str());
4410   }
4411 
4412   auto PALMetadata = getTargetStreamer().getPALMetadata();
4413   PALMetadata->setLegacy();
4414   for (;;) {
4415     uint32_t Key, Value;
4416     if (ParseAsAbsoluteExpression(Key)) {
4417       return TokError(Twine("invalid value in ") +
4418                       Twine(PALMD::AssemblerDirective));
4419     }
4420     if (getLexer().isNot(AsmToken::Comma)) {
4421       return TokError(Twine("expected an even number of values in ") +
4422                       Twine(PALMD::AssemblerDirective));
4423     }
4424     Lex();
4425     if (ParseAsAbsoluteExpression(Value)) {
4426       return TokError(Twine("invalid value in ") +
4427                       Twine(PALMD::AssemblerDirective));
4428     }
4429     PALMetadata->setRegister(Key, Value);
4430     if (getLexer().isNot(AsmToken::Comma))
4431       break;
4432     Lex();
4433   }
4434   return false;
4435 }
4436 
4437 /// ParseDirectiveAMDGPULDS
4438 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4439 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4440   if (getParser().checkForValidSection())
4441     return true;
4442 
4443   StringRef Name;
4444   SMLoc NameLoc = getLexer().getLoc();
4445   if (getParser().parseIdentifier(Name))
4446     return TokError("expected identifier in directive");
4447 
4448   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4449   if (parseToken(AsmToken::Comma, "expected ','"))
4450     return true;
4451 
4452   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4453 
4454   int64_t Size;
4455   SMLoc SizeLoc = getLexer().getLoc();
4456   if (getParser().parseAbsoluteExpression(Size))
4457     return true;
4458   if (Size < 0)
4459     return Error(SizeLoc, "size must be non-negative");
4460   if (Size > LocalMemorySize)
4461     return Error(SizeLoc, "size is too large");
4462 
4463   int64_t Alignment = 4;
4464   if (getLexer().is(AsmToken::Comma)) {
4465     Lex();
4466     SMLoc AlignLoc = getLexer().getLoc();
4467     if (getParser().parseAbsoluteExpression(Alignment))
4468       return true;
4469     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4470       return Error(AlignLoc, "alignment must be a power of two");
4471 
4472     // Alignment larger than the size of LDS is possible in theory, as long
4473     // as the linker manages to place to symbol at address 0, but we do want
4474     // to make sure the alignment fits nicely into a 32-bit integer.
4475     if (Alignment >= 1u << 31)
4476       return Error(AlignLoc, "alignment is too large");
4477   }
4478 
4479   if (parseToken(AsmToken::EndOfStatement,
4480                  "unexpected token in '.amdgpu_lds' directive"))
4481     return true;
4482 
4483   Symbol->redefineIfPossible();
4484   if (!Symbol->isUndefined())
4485     return Error(NameLoc, "invalid symbol redefinition");
4486 
4487   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4488   return false;
4489 }
4490 
4491 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4492   StringRef IDVal = DirectiveID.getString();
4493 
4494   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4495     if (IDVal == ".amdgcn_target")
4496       return ParseDirectiveAMDGCNTarget();
4497 
4498     if (IDVal == ".amdhsa_kernel")
4499       return ParseDirectiveAMDHSAKernel();
4500 
4501     // TODO: Restructure/combine with PAL metadata directive.
4502     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4503       return ParseDirectiveHSAMetadata();
4504   } else {
4505     if (IDVal == ".hsa_code_object_version")
4506       return ParseDirectiveHSACodeObjectVersion();
4507 
4508     if (IDVal == ".hsa_code_object_isa")
4509       return ParseDirectiveHSACodeObjectISA();
4510 
4511     if (IDVal == ".amd_kernel_code_t")
4512       return ParseDirectiveAMDKernelCodeT();
4513 
4514     if (IDVal == ".amdgpu_hsa_kernel")
4515       return ParseDirectiveAMDGPUHsaKernel();
4516 
4517     if (IDVal == ".amd_amdgpu_isa")
4518       return ParseDirectiveISAVersion();
4519 
4520     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4521       return ParseDirectiveHSAMetadata();
4522   }
4523 
4524   if (IDVal == ".amdgpu_lds")
4525     return ParseDirectiveAMDGPULDS();
4526 
4527   if (IDVal == PALMD::AssemblerDirectiveBegin)
4528     return ParseDirectivePALMetadataBegin();
4529 
4530   if (IDVal == PALMD::AssemblerDirective)
4531     return ParseDirectivePALMetadata();
4532 
4533   return true;
4534 }
4535 
4536 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4537                                            unsigned RegNo) const {
4538 
4539   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4540        R.isValid(); ++R) {
4541     if (*R == RegNo)
4542       return isGFX9() || isGFX10();
4543   }
4544 
4545   // GFX10 has 2 more SGPRs 104 and 105.
4546   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4547        R.isValid(); ++R) {
4548     if (*R == RegNo)
4549       return hasSGPR104_SGPR105();
4550   }
4551 
4552   switch (RegNo) {
4553   case AMDGPU::SRC_SHARED_BASE:
4554   case AMDGPU::SRC_SHARED_LIMIT:
4555   case AMDGPU::SRC_PRIVATE_BASE:
4556   case AMDGPU::SRC_PRIVATE_LIMIT:
4557   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4558     return !isCI() && !isSI() && !isVI();
4559   case AMDGPU::TBA:
4560   case AMDGPU::TBA_LO:
4561   case AMDGPU::TBA_HI:
4562   case AMDGPU::TMA:
4563   case AMDGPU::TMA_LO:
4564   case AMDGPU::TMA_HI:
4565     return !isGFX9() && !isGFX10();
4566   case AMDGPU::XNACK_MASK:
4567   case AMDGPU::XNACK_MASK_LO:
4568   case AMDGPU::XNACK_MASK_HI:
4569     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4570   case AMDGPU::SGPR_NULL:
4571     return isGFX10();
4572   default:
4573     break;
4574   }
4575 
4576   if (isCI())
4577     return true;
4578 
4579   if (isSI() || isGFX10()) {
4580     // No flat_scr on SI.
4581     // On GFX10 flat scratch is not a valid register operand and can only be
4582     // accessed with s_setreg/s_getreg.
4583     switch (RegNo) {
4584     case AMDGPU::FLAT_SCR:
4585     case AMDGPU::FLAT_SCR_LO:
4586     case AMDGPU::FLAT_SCR_HI:
4587       return false;
4588     default:
4589       return true;
4590     }
4591   }
4592 
4593   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4594   // SI/CI have.
4595   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4596        R.isValid(); ++R) {
4597     if (*R == RegNo)
4598       return hasSGPR102_SGPR103();
4599   }
4600 
4601   return true;
4602 }
4603 
4604 OperandMatchResultTy
4605 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4606                               OperandMode Mode) {
4607   // Try to parse with a custom parser
4608   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4609 
4610   // If we successfully parsed the operand or if there as an error parsing,
4611   // we are done.
4612   //
4613   // If we are parsing after we reach EndOfStatement then this means we
4614   // are appending default values to the Operands list.  This is only done
4615   // by custom parser, so we shouldn't continue on to the generic parsing.
4616   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4617       getLexer().is(AsmToken::EndOfStatement))
4618     return ResTy;
4619 
4620   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4621     unsigned Prefix = Operands.size();
4622     SMLoc LBraceLoc = getTok().getLoc();
4623     Parser.Lex(); // eat the '['
4624 
4625     for (;;) {
4626       ResTy = parseReg(Operands);
4627       if (ResTy != MatchOperand_Success)
4628         return ResTy;
4629 
4630       if (getLexer().is(AsmToken::RBrac))
4631         break;
4632 
4633       if (getLexer().isNot(AsmToken::Comma))
4634         return MatchOperand_ParseFail;
4635       Parser.Lex();
4636     }
4637 
4638     if (Operands.size() - Prefix > 1) {
4639       Operands.insert(Operands.begin() + Prefix,
4640                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4641       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4642                                                     getTok().getLoc()));
4643     }
4644 
4645     Parser.Lex(); // eat the ']'
4646     return MatchOperand_Success;
4647   }
4648 
4649   return parseRegOrImm(Operands);
4650 }
4651 
4652 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4653   // Clear any forced encodings from the previous instruction.
4654   setForcedEncodingSize(0);
4655   setForcedDPP(false);
4656   setForcedSDWA(false);
4657 
4658   if (Name.endswith("_e64")) {
4659     setForcedEncodingSize(64);
4660     return Name.substr(0, Name.size() - 4);
4661   } else if (Name.endswith("_e32")) {
4662     setForcedEncodingSize(32);
4663     return Name.substr(0, Name.size() - 4);
4664   } else if (Name.endswith("_dpp")) {
4665     setForcedDPP(true);
4666     return Name.substr(0, Name.size() - 4);
4667   } else if (Name.endswith("_sdwa")) {
4668     setForcedSDWA(true);
4669     return Name.substr(0, Name.size() - 5);
4670   }
4671   return Name;
4672 }
4673 
4674 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4675                                        StringRef Name,
4676                                        SMLoc NameLoc, OperandVector &Operands) {
4677   // Add the instruction mnemonic
4678   Name = parseMnemonicSuffix(Name);
4679   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4680 
4681   bool IsMIMG = Name.startswith("image_");
4682 
4683   while (!getLexer().is(AsmToken::EndOfStatement)) {
4684     OperandMode Mode = OperandMode_Default;
4685     if (IsMIMG && isGFX10() && Operands.size() == 2)
4686       Mode = OperandMode_NSA;
4687     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4688 
4689     // Eat the comma or space if there is one.
4690     if (getLexer().is(AsmToken::Comma))
4691       Parser.Lex();
4692 
4693     switch (Res) {
4694       case MatchOperand_Success: break;
4695       case MatchOperand_ParseFail:
4696         // FIXME: use real operand location rather than the current location.
4697         Error(getLexer().getLoc(), "failed parsing operand.");
4698         while (!getLexer().is(AsmToken::EndOfStatement)) {
4699           Parser.Lex();
4700         }
4701         return true;
4702       case MatchOperand_NoMatch:
4703         // FIXME: use real operand location rather than the current location.
4704         Error(getLexer().getLoc(), "not a valid operand.");
4705         while (!getLexer().is(AsmToken::EndOfStatement)) {
4706           Parser.Lex();
4707         }
4708         return true;
4709     }
4710   }
4711 
4712   return false;
4713 }
4714 
4715 //===----------------------------------------------------------------------===//
4716 // Utility functions
4717 //===----------------------------------------------------------------------===//
4718 
4719 OperandMatchResultTy
4720 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4721 
4722   if (!trySkipId(Prefix, AsmToken::Colon))
4723     return MatchOperand_NoMatch;
4724 
4725   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4726 }
4727 
4728 OperandMatchResultTy
4729 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4730                                     AMDGPUOperand::ImmTy ImmTy,
4731                                     bool (*ConvertResult)(int64_t&)) {
4732   SMLoc S = getLoc();
4733   int64_t Value = 0;
4734 
4735   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4736   if (Res != MatchOperand_Success)
4737     return Res;
4738 
4739   if (ConvertResult && !ConvertResult(Value)) {
4740     Error(S, "invalid " + StringRef(Prefix) + " value.");
4741   }
4742 
4743   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4744   return MatchOperand_Success;
4745 }
4746 
4747 OperandMatchResultTy
4748 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4749                                              OperandVector &Operands,
4750                                              AMDGPUOperand::ImmTy ImmTy,
4751                                              bool (*ConvertResult)(int64_t&)) {
4752   SMLoc S = getLoc();
4753   if (!trySkipId(Prefix, AsmToken::Colon))
4754     return MatchOperand_NoMatch;
4755 
4756   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4757     return MatchOperand_ParseFail;
4758 
4759   unsigned Val = 0;
4760   const unsigned MaxSize = 4;
4761 
4762   // FIXME: How to verify the number of elements matches the number of src
4763   // operands?
4764   for (int I = 0; ; ++I) {
4765     int64_t Op;
4766     SMLoc Loc = getLoc();
4767     if (!parseExpr(Op))
4768       return MatchOperand_ParseFail;
4769 
4770     if (Op != 0 && Op != 1) {
4771       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4772       return MatchOperand_ParseFail;
4773     }
4774 
4775     Val |= (Op << I);
4776 
4777     if (trySkipToken(AsmToken::RBrac))
4778       break;
4779 
4780     if (I + 1 == MaxSize) {
4781       Error(getLoc(), "expected a closing square bracket");
4782       return MatchOperand_ParseFail;
4783     }
4784 
4785     if (!skipToken(AsmToken::Comma, "expected a comma"))
4786       return MatchOperand_ParseFail;
4787   }
4788 
4789   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4790   return MatchOperand_Success;
4791 }
4792 
4793 OperandMatchResultTy
4794 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4795                                AMDGPUOperand::ImmTy ImmTy) {
4796   int64_t Bit = 0;
4797   SMLoc S = Parser.getTok().getLoc();
4798 
4799   // We are at the end of the statement, and this is a default argument, so
4800   // use a default value.
4801   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4802     switch(getLexer().getKind()) {
4803       case AsmToken::Identifier: {
4804         StringRef Tok = Parser.getTok().getString();
4805         if (Tok == Name) {
4806           if (Tok == "r128" && !hasMIMG_R128())
4807             Error(S, "r128 modifier is not supported on this GPU");
4808           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4809             Error(S, "a16 modifier is not supported on this GPU");
4810           Bit = 1;
4811           Parser.Lex();
4812         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4813           Bit = 0;
4814           Parser.Lex();
4815         } else {
4816           return MatchOperand_NoMatch;
4817         }
4818         break;
4819       }
4820       default:
4821         return MatchOperand_NoMatch;
4822     }
4823   }
4824 
4825   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4826     return MatchOperand_ParseFail;
4827 
4828   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4829     ImmTy = AMDGPUOperand::ImmTyR128A16;
4830 
4831   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4832   return MatchOperand_Success;
4833 }
4834 
4835 static void addOptionalImmOperand(
4836   MCInst& Inst, const OperandVector& Operands,
4837   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4838   AMDGPUOperand::ImmTy ImmT,
4839   int64_t Default = 0) {
4840   auto i = OptionalIdx.find(ImmT);
4841   if (i != OptionalIdx.end()) {
4842     unsigned Idx = i->second;
4843     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4844   } else {
4845     Inst.addOperand(MCOperand::createImm(Default));
4846   }
4847 }
4848 
4849 OperandMatchResultTy
4850 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4851   if (getLexer().isNot(AsmToken::Identifier)) {
4852     return MatchOperand_NoMatch;
4853   }
4854   StringRef Tok = Parser.getTok().getString();
4855   if (Tok != Prefix) {
4856     return MatchOperand_NoMatch;
4857   }
4858 
4859   Parser.Lex();
4860   if (getLexer().isNot(AsmToken::Colon)) {
4861     return MatchOperand_ParseFail;
4862   }
4863 
4864   Parser.Lex();
4865   if (getLexer().isNot(AsmToken::Identifier)) {
4866     return MatchOperand_ParseFail;
4867   }
4868 
4869   Value = Parser.getTok().getString();
4870   return MatchOperand_Success;
4871 }
4872 
4873 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4874 // values to live in a joint format operand in the MCInst encoding.
4875 OperandMatchResultTy
4876 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4877   SMLoc S = Parser.getTok().getLoc();
4878   int64_t Dfmt = 0, Nfmt = 0;
4879   // dfmt and nfmt can appear in either order, and each is optional.
4880   bool GotDfmt = false, GotNfmt = false;
4881   while (!GotDfmt || !GotNfmt) {
4882     if (!GotDfmt) {
4883       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4884       if (Res != MatchOperand_NoMatch) {
4885         if (Res != MatchOperand_Success)
4886           return Res;
4887         if (Dfmt >= 16) {
4888           Error(Parser.getTok().getLoc(), "out of range dfmt");
4889           return MatchOperand_ParseFail;
4890         }
4891         GotDfmt = true;
4892         Parser.Lex();
4893         continue;
4894       }
4895     }
4896     if (!GotNfmt) {
4897       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4898       if (Res != MatchOperand_NoMatch) {
4899         if (Res != MatchOperand_Success)
4900           return Res;
4901         if (Nfmt >= 8) {
4902           Error(Parser.getTok().getLoc(), "out of range nfmt");
4903           return MatchOperand_ParseFail;
4904         }
4905         GotNfmt = true;
4906         Parser.Lex();
4907         continue;
4908       }
4909     }
4910     break;
4911   }
4912   if (!GotDfmt && !GotNfmt)
4913     return MatchOperand_NoMatch;
4914   auto Format = Dfmt | Nfmt << 4;
4915   Operands.push_back(
4916       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4917   return MatchOperand_Success;
4918 }
4919 
4920 //===----------------------------------------------------------------------===//
4921 // ds
4922 //===----------------------------------------------------------------------===//
4923 
4924 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4925                                     const OperandVector &Operands) {
4926   OptionalImmIndexMap OptionalIdx;
4927 
4928   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4929     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4930 
4931     // Add the register arguments
4932     if (Op.isReg()) {
4933       Op.addRegOperands(Inst, 1);
4934       continue;
4935     }
4936 
4937     // Handle optional arguments
4938     OptionalIdx[Op.getImmTy()] = i;
4939   }
4940 
4941   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4942   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4943   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4944 
4945   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4946 }
4947 
4948 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4949                                 bool IsGdsHardcoded) {
4950   OptionalImmIndexMap OptionalIdx;
4951 
4952   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4953     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4954 
4955     // Add the register arguments
4956     if (Op.isReg()) {
4957       Op.addRegOperands(Inst, 1);
4958       continue;
4959     }
4960 
4961     if (Op.isToken() && Op.getToken() == "gds") {
4962       IsGdsHardcoded = true;
4963       continue;
4964     }
4965 
4966     // Handle optional arguments
4967     OptionalIdx[Op.getImmTy()] = i;
4968   }
4969 
4970   AMDGPUOperand::ImmTy OffsetType =
4971     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4972      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4973      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4974                                                       AMDGPUOperand::ImmTyOffset;
4975 
4976   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4977 
4978   if (!IsGdsHardcoded) {
4979     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4980   }
4981   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4982 }
4983 
4984 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4985   OptionalImmIndexMap OptionalIdx;
4986 
4987   unsigned OperandIdx[4];
4988   unsigned EnMask = 0;
4989   int SrcIdx = 0;
4990 
4991   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4992     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4993 
4994     // Add the register arguments
4995     if (Op.isReg()) {
4996       assert(SrcIdx < 4);
4997       OperandIdx[SrcIdx] = Inst.size();
4998       Op.addRegOperands(Inst, 1);
4999       ++SrcIdx;
5000       continue;
5001     }
5002 
5003     if (Op.isOff()) {
5004       assert(SrcIdx < 4);
5005       OperandIdx[SrcIdx] = Inst.size();
5006       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5007       ++SrcIdx;
5008       continue;
5009     }
5010 
5011     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5012       Op.addImmOperands(Inst, 1);
5013       continue;
5014     }
5015 
5016     if (Op.isToken() && Op.getToken() == "done")
5017       continue;
5018 
5019     // Handle optional arguments
5020     OptionalIdx[Op.getImmTy()] = i;
5021   }
5022 
5023   assert(SrcIdx == 4);
5024 
5025   bool Compr = false;
5026   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5027     Compr = true;
5028     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5029     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5030     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5031   }
5032 
5033   for (auto i = 0; i < SrcIdx; ++i) {
5034     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5035       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5036     }
5037   }
5038 
5039   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5040   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5041 
5042   Inst.addOperand(MCOperand::createImm(EnMask));
5043 }
5044 
5045 //===----------------------------------------------------------------------===//
5046 // s_waitcnt
5047 //===----------------------------------------------------------------------===//
5048 
5049 static bool
5050 encodeCnt(
5051   const AMDGPU::IsaVersion ISA,
5052   int64_t &IntVal,
5053   int64_t CntVal,
5054   bool Saturate,
5055   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5056   unsigned (*decode)(const IsaVersion &Version, unsigned))
5057 {
5058   bool Failed = false;
5059 
5060   IntVal = encode(ISA, IntVal, CntVal);
5061   if (CntVal != decode(ISA, IntVal)) {
5062     if (Saturate) {
5063       IntVal = encode(ISA, IntVal, -1);
5064     } else {
5065       Failed = true;
5066     }
5067   }
5068   return Failed;
5069 }
5070 
5071 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5072 
5073   SMLoc CntLoc = getLoc();
5074   StringRef CntName = getTokenStr();
5075 
5076   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5077       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5078     return false;
5079 
5080   int64_t CntVal;
5081   SMLoc ValLoc = getLoc();
5082   if (!parseExpr(CntVal))
5083     return false;
5084 
5085   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5086 
5087   bool Failed = true;
5088   bool Sat = CntName.endswith("_sat");
5089 
5090   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5091     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5092   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5093     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5094   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5095     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5096   } else {
5097     Error(CntLoc, "invalid counter name " + CntName);
5098     return false;
5099   }
5100 
5101   if (Failed) {
5102     Error(ValLoc, "too large value for " + CntName);
5103     return false;
5104   }
5105 
5106   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5107     return false;
5108 
5109   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5110     if (isToken(AsmToken::EndOfStatement)) {
5111       Error(getLoc(), "expected a counter name");
5112       return false;
5113     }
5114   }
5115 
5116   return true;
5117 }
5118 
5119 OperandMatchResultTy
5120 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5121   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5122   int64_t Waitcnt = getWaitcntBitMask(ISA);
5123   SMLoc S = getLoc();
5124 
5125   // If parse failed, do not return error code
5126   // to avoid excessive error messages.
5127   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5128     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
5129   } else {
5130     parseExpr(Waitcnt);
5131   }
5132 
5133   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5134   return MatchOperand_Success;
5135 }
5136 
5137 bool
5138 AMDGPUOperand::isSWaitCnt() const {
5139   return isImm();
5140 }
5141 
5142 //===----------------------------------------------------------------------===//
5143 // hwreg
5144 //===----------------------------------------------------------------------===//
5145 
5146 bool
5147 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5148                                 int64_t &Offset,
5149                                 int64_t &Width) {
5150   using namespace llvm::AMDGPU::Hwreg;
5151 
5152   // The register may be specified by name or using a numeric code
5153   if (isToken(AsmToken::Identifier) &&
5154       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5155     HwReg.IsSymbolic = true;
5156     lex(); // skip message name
5157   } else if (!parseExpr(HwReg.Id)) {
5158     return false;
5159   }
5160 
5161   if (trySkipToken(AsmToken::RParen))
5162     return true;
5163 
5164   // parse optional params
5165   return
5166     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5167     parseExpr(Offset) &&
5168     skipToken(AsmToken::Comma, "expected a comma") &&
5169     parseExpr(Width) &&
5170     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5171 }
5172 
5173 bool
5174 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5175                                const int64_t Offset,
5176                                const int64_t Width,
5177                                const SMLoc Loc) {
5178 
5179   using namespace llvm::AMDGPU::Hwreg;
5180 
5181   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5182     Error(Loc, "specified hardware register is not supported on this GPU");
5183     return false;
5184   } else if (!isValidHwreg(HwReg.Id)) {
5185     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5186     return false;
5187   } else if (!isValidHwregOffset(Offset)) {
5188     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5189     return false;
5190   } else if (!isValidHwregWidth(Width)) {
5191     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5192     return false;
5193   }
5194   return true;
5195 }
5196 
5197 OperandMatchResultTy
5198 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5199   using namespace llvm::AMDGPU::Hwreg;
5200 
5201   int64_t ImmVal = 0;
5202   SMLoc Loc = getLoc();
5203 
5204   // If parse failed, do not return error code
5205   // to avoid excessive error messages.
5206   if (trySkipId("hwreg", AsmToken::LParen)) {
5207     OperandInfoTy HwReg(ID_UNKNOWN_);
5208     int64_t Offset = OFFSET_DEFAULT_;
5209     int64_t Width = WIDTH_DEFAULT_;
5210     if (parseHwregBody(HwReg, Offset, Width) &&
5211         validateHwreg(HwReg, Offset, Width, Loc)) {
5212       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5213     }
5214   } else if (parseExpr(ImmVal)) {
5215     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5216       Error(Loc, "invalid immediate: only 16-bit values are legal");
5217   }
5218 
5219   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5220   return MatchOperand_Success;
5221 }
5222 
5223 bool AMDGPUOperand::isHwreg() const {
5224   return isImmTy(ImmTyHwreg);
5225 }
5226 
5227 //===----------------------------------------------------------------------===//
5228 // sendmsg
5229 //===----------------------------------------------------------------------===//
5230 
5231 bool
5232 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5233                                   OperandInfoTy &Op,
5234                                   OperandInfoTy &Stream) {
5235   using namespace llvm::AMDGPU::SendMsg;
5236 
5237   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5238     Msg.IsSymbolic = true;
5239     lex(); // skip message name
5240   } else if (!parseExpr(Msg.Id)) {
5241     return false;
5242   }
5243 
5244   if (trySkipToken(AsmToken::Comma)) {
5245     Op.IsDefined = true;
5246     if (isToken(AsmToken::Identifier) &&
5247         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5248       lex(); // skip operation name
5249     } else if (!parseExpr(Op.Id)) {
5250       return false;
5251     }
5252 
5253     if (trySkipToken(AsmToken::Comma)) {
5254       Stream.IsDefined = true;
5255       if (!parseExpr(Stream.Id))
5256         return false;
5257     }
5258   }
5259 
5260   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5261 }
5262 
5263 bool
5264 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5265                                  const OperandInfoTy &Op,
5266                                  const OperandInfoTy &Stream,
5267                                  const SMLoc S) {
5268   using namespace llvm::AMDGPU::SendMsg;
5269 
5270   // Validation strictness depends on whether message is specified
5271   // in a symbolc or in a numeric form. In the latter case
5272   // only encoding possibility is checked.
5273   bool Strict = Msg.IsSymbolic;
5274 
5275   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5276     Error(S, "invalid message id");
5277     return false;
5278   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5279     Error(S, Op.IsDefined ?
5280              "message does not support operations" :
5281              "missing message operation");
5282     return false;
5283   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5284     Error(S, "invalid operation id");
5285     return false;
5286   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5287     Error(S, "message operation does not support streams");
5288     return false;
5289   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5290     Error(S, "invalid message stream id");
5291     return false;
5292   }
5293   return true;
5294 }
5295 
5296 OperandMatchResultTy
5297 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5298   using namespace llvm::AMDGPU::SendMsg;
5299 
5300   int64_t ImmVal = 0;
5301   SMLoc Loc = getLoc();
5302 
5303   // If parse failed, do not return error code
5304   // to avoid excessive error messages.
5305   if (trySkipId("sendmsg", AsmToken::LParen)) {
5306     OperandInfoTy Msg(ID_UNKNOWN_);
5307     OperandInfoTy Op(OP_NONE_);
5308     OperandInfoTy Stream(STREAM_ID_NONE_);
5309     if (parseSendMsgBody(Msg, Op, Stream) &&
5310         validateSendMsg(Msg, Op, Stream, Loc)) {
5311       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5312     }
5313   } else if (parseExpr(ImmVal)) {
5314     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5315       Error(Loc, "invalid immediate: only 16-bit values are legal");
5316   }
5317 
5318   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5319   return MatchOperand_Success;
5320 }
5321 
5322 bool AMDGPUOperand::isSendMsg() const {
5323   return isImmTy(ImmTySendMsg);
5324 }
5325 
5326 //===----------------------------------------------------------------------===//
5327 // v_interp
5328 //===----------------------------------------------------------------------===//
5329 
5330 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5331   if (getLexer().getKind() != AsmToken::Identifier)
5332     return MatchOperand_NoMatch;
5333 
5334   StringRef Str = Parser.getTok().getString();
5335   int Slot = StringSwitch<int>(Str)
5336     .Case("p10", 0)
5337     .Case("p20", 1)
5338     .Case("p0", 2)
5339     .Default(-1);
5340 
5341   SMLoc S = Parser.getTok().getLoc();
5342   if (Slot == -1)
5343     return MatchOperand_ParseFail;
5344 
5345   Parser.Lex();
5346   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5347                                               AMDGPUOperand::ImmTyInterpSlot));
5348   return MatchOperand_Success;
5349 }
5350 
5351 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5352   if (getLexer().getKind() != AsmToken::Identifier)
5353     return MatchOperand_NoMatch;
5354 
5355   StringRef Str = Parser.getTok().getString();
5356   if (!Str.startswith("attr"))
5357     return MatchOperand_NoMatch;
5358 
5359   StringRef Chan = Str.take_back(2);
5360   int AttrChan = StringSwitch<int>(Chan)
5361     .Case(".x", 0)
5362     .Case(".y", 1)
5363     .Case(".z", 2)
5364     .Case(".w", 3)
5365     .Default(-1);
5366   if (AttrChan == -1)
5367     return MatchOperand_ParseFail;
5368 
5369   Str = Str.drop_back(2).drop_front(4);
5370 
5371   uint8_t Attr;
5372   if (Str.getAsInteger(10, Attr))
5373     return MatchOperand_ParseFail;
5374 
5375   SMLoc S = Parser.getTok().getLoc();
5376   Parser.Lex();
5377   if (Attr > 63) {
5378     Error(S, "out of bounds attr");
5379     return MatchOperand_Success;
5380   }
5381 
5382   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5383 
5384   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5385                                               AMDGPUOperand::ImmTyInterpAttr));
5386   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5387                                               AMDGPUOperand::ImmTyAttrChan));
5388   return MatchOperand_Success;
5389 }
5390 
5391 //===----------------------------------------------------------------------===//
5392 // exp
5393 //===----------------------------------------------------------------------===//
5394 
5395 void AMDGPUAsmParser::errorExpTgt() {
5396   Error(Parser.getTok().getLoc(), "invalid exp target");
5397 }
5398 
5399 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5400                                                       uint8_t &Val) {
5401   if (Str == "null") {
5402     Val = 9;
5403     return MatchOperand_Success;
5404   }
5405 
5406   if (Str.startswith("mrt")) {
5407     Str = Str.drop_front(3);
5408     if (Str == "z") { // == mrtz
5409       Val = 8;
5410       return MatchOperand_Success;
5411     }
5412 
5413     if (Str.getAsInteger(10, Val))
5414       return MatchOperand_ParseFail;
5415 
5416     if (Val > 7)
5417       errorExpTgt();
5418 
5419     return MatchOperand_Success;
5420   }
5421 
5422   if (Str.startswith("pos")) {
5423     Str = Str.drop_front(3);
5424     if (Str.getAsInteger(10, Val))
5425       return MatchOperand_ParseFail;
5426 
5427     if (Val > 4 || (Val == 4 && !isGFX10()))
5428       errorExpTgt();
5429 
5430     Val += 12;
5431     return MatchOperand_Success;
5432   }
5433 
5434   if (isGFX10() && Str == "prim") {
5435     Val = 20;
5436     return MatchOperand_Success;
5437   }
5438 
5439   if (Str.startswith("param")) {
5440     Str = Str.drop_front(5);
5441     if (Str.getAsInteger(10, Val))
5442       return MatchOperand_ParseFail;
5443 
5444     if (Val >= 32)
5445       errorExpTgt();
5446 
5447     Val += 32;
5448     return MatchOperand_Success;
5449   }
5450 
5451   if (Str.startswith("invalid_target_")) {
5452     Str = Str.drop_front(15);
5453     if (Str.getAsInteger(10, Val))
5454       return MatchOperand_ParseFail;
5455 
5456     errorExpTgt();
5457     return MatchOperand_Success;
5458   }
5459 
5460   return MatchOperand_NoMatch;
5461 }
5462 
5463 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5464   uint8_t Val;
5465   StringRef Str = Parser.getTok().getString();
5466 
5467   auto Res = parseExpTgtImpl(Str, Val);
5468   if (Res != MatchOperand_Success)
5469     return Res;
5470 
5471   SMLoc S = Parser.getTok().getLoc();
5472   Parser.Lex();
5473 
5474   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5475                                               AMDGPUOperand::ImmTyExpTgt));
5476   return MatchOperand_Success;
5477 }
5478 
5479 //===----------------------------------------------------------------------===//
5480 // parser helpers
5481 //===----------------------------------------------------------------------===//
5482 
5483 bool
5484 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5485   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5486 }
5487 
5488 bool
5489 AMDGPUAsmParser::isId(const StringRef Id) const {
5490   return isId(getToken(), Id);
5491 }
5492 
5493 bool
5494 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5495   return getTokenKind() == Kind;
5496 }
5497 
5498 bool
5499 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5500   if (isId(Id)) {
5501     lex();
5502     return true;
5503   }
5504   return false;
5505 }
5506 
5507 bool
5508 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5509   if (isId(Id) && peekToken().is(Kind)) {
5510     lex();
5511     lex();
5512     return true;
5513   }
5514   return false;
5515 }
5516 
5517 bool
5518 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5519   if (isToken(Kind)) {
5520     lex();
5521     return true;
5522   }
5523   return false;
5524 }
5525 
5526 bool
5527 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5528                            const StringRef ErrMsg) {
5529   if (!trySkipToken(Kind)) {
5530     Error(getLoc(), ErrMsg);
5531     return false;
5532   }
5533   return true;
5534 }
5535 
5536 bool
5537 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5538   return !getParser().parseAbsoluteExpression(Imm);
5539 }
5540 
5541 bool
5542 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5543   SMLoc S = getLoc();
5544 
5545   const MCExpr *Expr;
5546   if (Parser.parseExpression(Expr))
5547     return false;
5548 
5549   int64_t IntVal;
5550   if (Expr->evaluateAsAbsolute(IntVal)) {
5551     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5552   } else {
5553     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5554   }
5555   return true;
5556 }
5557 
5558 bool
5559 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5560   if (isToken(AsmToken::String)) {
5561     Val = getToken().getStringContents();
5562     lex();
5563     return true;
5564   } else {
5565     Error(getLoc(), ErrMsg);
5566     return false;
5567   }
5568 }
5569 
5570 AsmToken
5571 AMDGPUAsmParser::getToken() const {
5572   return Parser.getTok();
5573 }
5574 
5575 AsmToken
5576 AMDGPUAsmParser::peekToken() {
5577   return getLexer().peekTok();
5578 }
5579 
5580 void
5581 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5582   auto TokCount = getLexer().peekTokens(Tokens);
5583 
5584   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5585     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5586 }
5587 
5588 AsmToken::TokenKind
5589 AMDGPUAsmParser::getTokenKind() const {
5590   return getLexer().getKind();
5591 }
5592 
5593 SMLoc
5594 AMDGPUAsmParser::getLoc() const {
5595   return getToken().getLoc();
5596 }
5597 
5598 StringRef
5599 AMDGPUAsmParser::getTokenStr() const {
5600   return getToken().getString();
5601 }
5602 
5603 void
5604 AMDGPUAsmParser::lex() {
5605   Parser.Lex();
5606 }
5607 
5608 //===----------------------------------------------------------------------===//
5609 // swizzle
5610 //===----------------------------------------------------------------------===//
5611 
5612 LLVM_READNONE
5613 static unsigned
5614 encodeBitmaskPerm(const unsigned AndMask,
5615                   const unsigned OrMask,
5616                   const unsigned XorMask) {
5617   using namespace llvm::AMDGPU::Swizzle;
5618 
5619   return BITMASK_PERM_ENC |
5620          (AndMask << BITMASK_AND_SHIFT) |
5621          (OrMask  << BITMASK_OR_SHIFT)  |
5622          (XorMask << BITMASK_XOR_SHIFT);
5623 }
5624 
5625 bool
5626 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5627                                       const unsigned MinVal,
5628                                       const unsigned MaxVal,
5629                                       const StringRef ErrMsg) {
5630   for (unsigned i = 0; i < OpNum; ++i) {
5631     if (!skipToken(AsmToken::Comma, "expected a comma")){
5632       return false;
5633     }
5634     SMLoc ExprLoc = Parser.getTok().getLoc();
5635     if (!parseExpr(Op[i])) {
5636       return false;
5637     }
5638     if (Op[i] < MinVal || Op[i] > MaxVal) {
5639       Error(ExprLoc, ErrMsg);
5640       return false;
5641     }
5642   }
5643 
5644   return true;
5645 }
5646 
5647 bool
5648 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5649   using namespace llvm::AMDGPU::Swizzle;
5650 
5651   int64_t Lane[LANE_NUM];
5652   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5653                            "expected a 2-bit lane id")) {
5654     Imm = QUAD_PERM_ENC;
5655     for (unsigned I = 0; I < LANE_NUM; ++I) {
5656       Imm |= Lane[I] << (LANE_SHIFT * I);
5657     }
5658     return true;
5659   }
5660   return false;
5661 }
5662 
5663 bool
5664 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5665   using namespace llvm::AMDGPU::Swizzle;
5666 
5667   SMLoc S = Parser.getTok().getLoc();
5668   int64_t GroupSize;
5669   int64_t LaneIdx;
5670 
5671   if (!parseSwizzleOperands(1, &GroupSize,
5672                             2, 32,
5673                             "group size must be in the interval [2,32]")) {
5674     return false;
5675   }
5676   if (!isPowerOf2_64(GroupSize)) {
5677     Error(S, "group size must be a power of two");
5678     return false;
5679   }
5680   if (parseSwizzleOperands(1, &LaneIdx,
5681                            0, GroupSize - 1,
5682                            "lane id must be in the interval [0,group size - 1]")) {
5683     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5684     return true;
5685   }
5686   return false;
5687 }
5688 
5689 bool
5690 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5691   using namespace llvm::AMDGPU::Swizzle;
5692 
5693   SMLoc S = Parser.getTok().getLoc();
5694   int64_t GroupSize;
5695 
5696   if (!parseSwizzleOperands(1, &GroupSize,
5697       2, 32, "group size must be in the interval [2,32]")) {
5698     return false;
5699   }
5700   if (!isPowerOf2_64(GroupSize)) {
5701     Error(S, "group size must be a power of two");
5702     return false;
5703   }
5704 
5705   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5706   return true;
5707 }
5708 
5709 bool
5710 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5711   using namespace llvm::AMDGPU::Swizzle;
5712 
5713   SMLoc S = Parser.getTok().getLoc();
5714   int64_t GroupSize;
5715 
5716   if (!parseSwizzleOperands(1, &GroupSize,
5717       1, 16, "group size must be in the interval [1,16]")) {
5718     return false;
5719   }
5720   if (!isPowerOf2_64(GroupSize)) {
5721     Error(S, "group size must be a power of two");
5722     return false;
5723   }
5724 
5725   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5726   return true;
5727 }
5728 
5729 bool
5730 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5731   using namespace llvm::AMDGPU::Swizzle;
5732 
5733   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5734     return false;
5735   }
5736 
5737   StringRef Ctl;
5738   SMLoc StrLoc = Parser.getTok().getLoc();
5739   if (!parseString(Ctl)) {
5740     return false;
5741   }
5742   if (Ctl.size() != BITMASK_WIDTH) {
5743     Error(StrLoc, "expected a 5-character mask");
5744     return false;
5745   }
5746 
5747   unsigned AndMask = 0;
5748   unsigned OrMask = 0;
5749   unsigned XorMask = 0;
5750 
5751   for (size_t i = 0; i < Ctl.size(); ++i) {
5752     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5753     switch(Ctl[i]) {
5754     default:
5755       Error(StrLoc, "invalid mask");
5756       return false;
5757     case '0':
5758       break;
5759     case '1':
5760       OrMask |= Mask;
5761       break;
5762     case 'p':
5763       AndMask |= Mask;
5764       break;
5765     case 'i':
5766       AndMask |= Mask;
5767       XorMask |= Mask;
5768       break;
5769     }
5770   }
5771 
5772   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5773   return true;
5774 }
5775 
5776 bool
5777 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5778 
5779   SMLoc OffsetLoc = Parser.getTok().getLoc();
5780 
5781   if (!parseExpr(Imm)) {
5782     return false;
5783   }
5784   if (!isUInt<16>(Imm)) {
5785     Error(OffsetLoc, "expected a 16-bit offset");
5786     return false;
5787   }
5788   return true;
5789 }
5790 
5791 bool
5792 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5793   using namespace llvm::AMDGPU::Swizzle;
5794 
5795   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5796 
5797     SMLoc ModeLoc = Parser.getTok().getLoc();
5798     bool Ok = false;
5799 
5800     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5801       Ok = parseSwizzleQuadPerm(Imm);
5802     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5803       Ok = parseSwizzleBitmaskPerm(Imm);
5804     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5805       Ok = parseSwizzleBroadcast(Imm);
5806     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5807       Ok = parseSwizzleSwap(Imm);
5808     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5809       Ok = parseSwizzleReverse(Imm);
5810     } else {
5811       Error(ModeLoc, "expected a swizzle mode");
5812     }
5813 
5814     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5815   }
5816 
5817   return false;
5818 }
5819 
5820 OperandMatchResultTy
5821 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5822   SMLoc S = Parser.getTok().getLoc();
5823   int64_t Imm = 0;
5824 
5825   if (trySkipId("offset")) {
5826 
5827     bool Ok = false;
5828     if (skipToken(AsmToken::Colon, "expected a colon")) {
5829       if (trySkipId("swizzle")) {
5830         Ok = parseSwizzleMacro(Imm);
5831       } else {
5832         Ok = parseSwizzleOffset(Imm);
5833       }
5834     }
5835 
5836     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5837 
5838     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5839   } else {
5840     // Swizzle "offset" operand is optional.
5841     // If it is omitted, try parsing other optional operands.
5842     return parseOptionalOpr(Operands);
5843   }
5844 }
5845 
5846 bool
5847 AMDGPUOperand::isSwizzle() const {
5848   return isImmTy(ImmTySwizzle);
5849 }
5850 
5851 //===----------------------------------------------------------------------===//
5852 // VGPR Index Mode
5853 //===----------------------------------------------------------------------===//
5854 
5855 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5856 
5857   using namespace llvm::AMDGPU::VGPRIndexMode;
5858 
5859   if (trySkipToken(AsmToken::RParen)) {
5860     return OFF;
5861   }
5862 
5863   int64_t Imm = 0;
5864 
5865   while (true) {
5866     unsigned Mode = 0;
5867     SMLoc S = Parser.getTok().getLoc();
5868 
5869     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5870       if (trySkipId(IdSymbolic[ModeId])) {
5871         Mode = 1 << ModeId;
5872         break;
5873       }
5874     }
5875 
5876     if (Mode == 0) {
5877       Error(S, (Imm == 0)?
5878                "expected a VGPR index mode or a closing parenthesis" :
5879                "expected a VGPR index mode");
5880       break;
5881     }
5882 
5883     if (Imm & Mode) {
5884       Error(S, "duplicate VGPR index mode");
5885       break;
5886     }
5887     Imm |= Mode;
5888 
5889     if (trySkipToken(AsmToken::RParen))
5890       break;
5891     if (!skipToken(AsmToken::Comma,
5892                    "expected a comma or a closing parenthesis"))
5893       break;
5894   }
5895 
5896   return Imm;
5897 }
5898 
5899 OperandMatchResultTy
5900 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5901 
5902   int64_t Imm = 0;
5903   SMLoc S = Parser.getTok().getLoc();
5904 
5905   if (getLexer().getKind() == AsmToken::Identifier &&
5906       Parser.getTok().getString() == "gpr_idx" &&
5907       getLexer().peekTok().is(AsmToken::LParen)) {
5908 
5909     Parser.Lex();
5910     Parser.Lex();
5911 
5912     // If parse failed, trigger an error but do not return error code
5913     // to avoid excessive error messages.
5914     Imm = parseGPRIdxMacro();
5915 
5916   } else {
5917     if (getParser().parseAbsoluteExpression(Imm))
5918       return MatchOperand_NoMatch;
5919     if (Imm < 0 || !isUInt<4>(Imm)) {
5920       Error(S, "invalid immediate: only 4-bit values are legal");
5921     }
5922   }
5923 
5924   Operands.push_back(
5925       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5926   return MatchOperand_Success;
5927 }
5928 
5929 bool AMDGPUOperand::isGPRIdxMode() const {
5930   return isImmTy(ImmTyGprIdxMode);
5931 }
5932 
5933 //===----------------------------------------------------------------------===//
5934 // sopp branch targets
5935 //===----------------------------------------------------------------------===//
5936 
5937 OperandMatchResultTy
5938 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5939 
5940   // Make sure we are not parsing something
5941   // that looks like a label or an expression but is not.
5942   // This will improve error messages.
5943   if (isRegister() || isModifier())
5944     return MatchOperand_NoMatch;
5945 
5946   if (parseExpr(Operands)) {
5947 
5948     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5949     assert(Opr.isImm() || Opr.isExpr());
5950     SMLoc Loc = Opr.getStartLoc();
5951 
5952     // Currently we do not support arbitrary expressions as branch targets.
5953     // Only labels and absolute expressions are accepted.
5954     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5955       Error(Loc, "expected an absolute expression or a label");
5956     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5957       Error(Loc, "expected a 16-bit signed jump offset");
5958     }
5959   }
5960 
5961   return MatchOperand_Success; // avoid excessive error messages
5962 }
5963 
5964 //===----------------------------------------------------------------------===//
5965 // Boolean holding registers
5966 //===----------------------------------------------------------------------===//
5967 
5968 OperandMatchResultTy
5969 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5970   return parseReg(Operands);
5971 }
5972 
5973 //===----------------------------------------------------------------------===//
5974 // mubuf
5975 //===----------------------------------------------------------------------===//
5976 
5977 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5978   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5979 }
5980 
5981 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5982   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5983 }
5984 
5985 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5986   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5987 }
5988 
5989 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5990                                const OperandVector &Operands,
5991                                bool IsAtomic,
5992                                bool IsAtomicReturn,
5993                                bool IsLds) {
5994   bool IsLdsOpcode = IsLds;
5995   bool HasLdsModifier = false;
5996   OptionalImmIndexMap OptionalIdx;
5997   assert(IsAtomicReturn ? IsAtomic : true);
5998   unsigned FirstOperandIdx = 1;
5999 
6000   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6001     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6002 
6003     // Add the register arguments
6004     if (Op.isReg()) {
6005       Op.addRegOperands(Inst, 1);
6006       // Insert a tied src for atomic return dst.
6007       // This cannot be postponed as subsequent calls to
6008       // addImmOperands rely on correct number of MC operands.
6009       if (IsAtomicReturn && i == FirstOperandIdx)
6010         Op.addRegOperands(Inst, 1);
6011       continue;
6012     }
6013 
6014     // Handle the case where soffset is an immediate
6015     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6016       Op.addImmOperands(Inst, 1);
6017       continue;
6018     }
6019 
6020     HasLdsModifier |= Op.isLDS();
6021 
6022     // Handle tokens like 'offen' which are sometimes hard-coded into the
6023     // asm string.  There are no MCInst operands for these.
6024     if (Op.isToken()) {
6025       continue;
6026     }
6027     assert(Op.isImm());
6028 
6029     // Handle optional arguments
6030     OptionalIdx[Op.getImmTy()] = i;
6031   }
6032 
6033   // This is a workaround for an llvm quirk which may result in an
6034   // incorrect instruction selection. Lds and non-lds versions of
6035   // MUBUF instructions are identical except that lds versions
6036   // have mandatory 'lds' modifier. However this modifier follows
6037   // optional modifiers and llvm asm matcher regards this 'lds'
6038   // modifier as an optional one. As a result, an lds version
6039   // of opcode may be selected even if it has no 'lds' modifier.
6040   if (IsLdsOpcode && !HasLdsModifier) {
6041     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6042     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6043       Inst.setOpcode(NoLdsOpcode);
6044       IsLdsOpcode = false;
6045     }
6046   }
6047 
6048   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6049   if (!IsAtomic) { // glc is hard-coded.
6050     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6051   }
6052   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6053 
6054   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6055     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6056   }
6057 
6058   if (isGFX10())
6059     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6060 }
6061 
6062 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6063   OptionalImmIndexMap OptionalIdx;
6064 
6065   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6066     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6067 
6068     // Add the register arguments
6069     if (Op.isReg()) {
6070       Op.addRegOperands(Inst, 1);
6071       continue;
6072     }
6073 
6074     // Handle the case where soffset is an immediate
6075     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6076       Op.addImmOperands(Inst, 1);
6077       continue;
6078     }
6079 
6080     // Handle tokens like 'offen' which are sometimes hard-coded into the
6081     // asm string.  There are no MCInst operands for these.
6082     if (Op.isToken()) {
6083       continue;
6084     }
6085     assert(Op.isImm());
6086 
6087     // Handle optional arguments
6088     OptionalIdx[Op.getImmTy()] = i;
6089   }
6090 
6091   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6092                         AMDGPUOperand::ImmTyOffset);
6093   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6094   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6095   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6096   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6097 
6098   if (isGFX10())
6099     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6100 }
6101 
6102 //===----------------------------------------------------------------------===//
6103 // mimg
6104 //===----------------------------------------------------------------------===//
6105 
6106 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6107                               bool IsAtomic) {
6108   unsigned I = 1;
6109   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6110   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6111     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6112   }
6113 
6114   if (IsAtomic) {
6115     // Add src, same as dst
6116     assert(Desc.getNumDefs() == 1);
6117     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6118   }
6119 
6120   OptionalImmIndexMap OptionalIdx;
6121 
6122   for (unsigned E = Operands.size(); I != E; ++I) {
6123     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6124 
6125     // Add the register arguments
6126     if (Op.isReg()) {
6127       Op.addRegOperands(Inst, 1);
6128     } else if (Op.isImmModifier()) {
6129       OptionalIdx[Op.getImmTy()] = I;
6130     } else if (!Op.isToken()) {
6131       llvm_unreachable("unexpected operand type");
6132     }
6133   }
6134 
6135   bool IsGFX10 = isGFX10();
6136 
6137   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6138   if (IsGFX10)
6139     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6140   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6141   if (IsGFX10)
6142     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6143   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6144   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6145   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6146   if (IsGFX10)
6147     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6148   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6149   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6150   if (!IsGFX10)
6151     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6152   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6153 }
6154 
6155 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6156   cvtMIMG(Inst, Operands, true);
6157 }
6158 
6159 //===----------------------------------------------------------------------===//
6160 // smrd
6161 //===----------------------------------------------------------------------===//
6162 
6163 bool AMDGPUOperand::isSMRDOffset8() const {
6164   return isImm() && isUInt<8>(getImm());
6165 }
6166 
6167 bool AMDGPUOperand::isSMEMOffset() const {
6168   return isImm(); // Offset range is checked later by validator.
6169 }
6170 
6171 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6172   // 32-bit literals are only supported on CI and we only want to use them
6173   // when the offset is > 8-bits.
6174   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6175 }
6176 
6177 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6178   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6179 }
6180 
6181 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6182   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6183 }
6184 
6185 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6186   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6187 }
6188 
6189 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6190   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6191 }
6192 
6193 //===----------------------------------------------------------------------===//
6194 // vop3
6195 //===----------------------------------------------------------------------===//
6196 
6197 static bool ConvertOmodMul(int64_t &Mul) {
6198   if (Mul != 1 && Mul != 2 && Mul != 4)
6199     return false;
6200 
6201   Mul >>= 1;
6202   return true;
6203 }
6204 
6205 static bool ConvertOmodDiv(int64_t &Div) {
6206   if (Div == 1) {
6207     Div = 0;
6208     return true;
6209   }
6210 
6211   if (Div == 2) {
6212     Div = 3;
6213     return true;
6214   }
6215 
6216   return false;
6217 }
6218 
6219 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6220   if (BoundCtrl == 0) {
6221     BoundCtrl = 1;
6222     return true;
6223   }
6224 
6225   if (BoundCtrl == -1) {
6226     BoundCtrl = 0;
6227     return true;
6228   }
6229 
6230   return false;
6231 }
6232 
6233 // Note: the order in this table matches the order of operands in AsmString.
6234 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6235   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6236   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6237   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6238   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6239   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6240   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6241   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6242   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6243   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6244   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6245   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6246   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6247   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6248   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6249   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6250   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6251   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6252   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6253   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6254   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6255   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6256   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6257   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6258   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6259   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6260   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6261   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6262   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6263   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6264   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6265   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6266   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6267   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6268   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6269   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6270   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6271   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6272   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6273   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6274   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6275   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6276   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6277   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6278   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6279 };
6280 
6281 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6282 
6283   OperandMatchResultTy res = parseOptionalOpr(Operands);
6284 
6285   // This is a hack to enable hardcoded mandatory operands which follow
6286   // optional operands.
6287   //
6288   // Current design assumes that all operands after the first optional operand
6289   // are also optional. However implementation of some instructions violates
6290   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6291   //
6292   // To alleviate this problem, we have to (implicitly) parse extra operands
6293   // to make sure autogenerated parser of custom operands never hit hardcoded
6294   // mandatory operands.
6295 
6296   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6297     if (res != MatchOperand_Success ||
6298         isToken(AsmToken::EndOfStatement))
6299       break;
6300 
6301     trySkipToken(AsmToken::Comma);
6302     res = parseOptionalOpr(Operands);
6303   }
6304 
6305   return res;
6306 }
6307 
6308 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6309   OperandMatchResultTy res;
6310   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6311     // try to parse any optional operand here
6312     if (Op.IsBit) {
6313       res = parseNamedBit(Op.Name, Operands, Op.Type);
6314     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6315       res = parseOModOperand(Operands);
6316     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6317                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6318                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6319       res = parseSDWASel(Operands, Op.Name, Op.Type);
6320     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6321       res = parseSDWADstUnused(Operands);
6322     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6323                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6324                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6325                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6326       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6327                                         Op.ConvertResult);
6328     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6329       res = parseDim(Operands);
6330     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6331       res = parseDfmtNfmt(Operands);
6332     } else {
6333       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6334     }
6335     if (res != MatchOperand_NoMatch) {
6336       return res;
6337     }
6338   }
6339   return MatchOperand_NoMatch;
6340 }
6341 
6342 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6343   StringRef Name = Parser.getTok().getString();
6344   if (Name == "mul") {
6345     return parseIntWithPrefix("mul", Operands,
6346                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6347   }
6348 
6349   if (Name == "div") {
6350     return parseIntWithPrefix("div", Operands,
6351                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6352   }
6353 
6354   return MatchOperand_NoMatch;
6355 }
6356 
6357 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6358   cvtVOP3P(Inst, Operands);
6359 
6360   int Opc = Inst.getOpcode();
6361 
6362   int SrcNum;
6363   const int Ops[] = { AMDGPU::OpName::src0,
6364                       AMDGPU::OpName::src1,
6365                       AMDGPU::OpName::src2 };
6366   for (SrcNum = 0;
6367        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6368        ++SrcNum);
6369   assert(SrcNum > 0);
6370 
6371   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6372   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6373 
6374   if ((OpSel & (1 << SrcNum)) != 0) {
6375     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6376     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6377     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6378   }
6379 }
6380 
6381 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6382       // 1. This operand is input modifiers
6383   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6384       // 2. This is not last operand
6385       && Desc.NumOperands > (OpNum + 1)
6386       // 3. Next operand is register class
6387       && Desc.OpInfo[OpNum + 1].RegClass != -1
6388       // 4. Next register is not tied to any other operand
6389       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6390 }
6391 
6392 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6393 {
6394   OptionalImmIndexMap OptionalIdx;
6395   unsigned Opc = Inst.getOpcode();
6396 
6397   unsigned I = 1;
6398   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6399   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6400     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6401   }
6402 
6403   for (unsigned E = Operands.size(); I != E; ++I) {
6404     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6405     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6406       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6407     } else if (Op.isInterpSlot() ||
6408                Op.isInterpAttr() ||
6409                Op.isAttrChan()) {
6410       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6411     } else if (Op.isImmModifier()) {
6412       OptionalIdx[Op.getImmTy()] = I;
6413     } else {
6414       llvm_unreachable("unhandled operand type");
6415     }
6416   }
6417 
6418   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6419     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6420   }
6421 
6422   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6423     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6424   }
6425 
6426   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6427     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6428   }
6429 }
6430 
6431 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6432                               OptionalImmIndexMap &OptionalIdx) {
6433   unsigned Opc = Inst.getOpcode();
6434 
6435   unsigned I = 1;
6436   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6437   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6438     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6439   }
6440 
6441   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6442     // This instruction has src modifiers
6443     for (unsigned E = Operands.size(); I != E; ++I) {
6444       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6445       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6446         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6447       } else if (Op.isImmModifier()) {
6448         OptionalIdx[Op.getImmTy()] = I;
6449       } else if (Op.isRegOrImm()) {
6450         Op.addRegOrImmOperands(Inst, 1);
6451       } else {
6452         llvm_unreachable("unhandled operand type");
6453       }
6454     }
6455   } else {
6456     // No src modifiers
6457     for (unsigned E = Operands.size(); I != E; ++I) {
6458       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6459       if (Op.isMod()) {
6460         OptionalIdx[Op.getImmTy()] = I;
6461       } else {
6462         Op.addRegOrImmOperands(Inst, 1);
6463       }
6464     }
6465   }
6466 
6467   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6468     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6469   }
6470 
6471   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6472     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6473   }
6474 
6475   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6476   // it has src2 register operand that is tied to dst operand
6477   // we don't allow modifiers for this operand in assembler so src2_modifiers
6478   // should be 0.
6479   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6480       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6481       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6482       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6483       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6484       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6485       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6486     auto it = Inst.begin();
6487     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6488     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6489     ++it;
6490     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6491   }
6492 }
6493 
6494 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6495   OptionalImmIndexMap OptionalIdx;
6496   cvtVOP3(Inst, Operands, OptionalIdx);
6497 }
6498 
6499 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6500                                const OperandVector &Operands) {
6501   OptionalImmIndexMap OptIdx;
6502   const int Opc = Inst.getOpcode();
6503   const MCInstrDesc &Desc = MII.get(Opc);
6504 
6505   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6506 
6507   cvtVOP3(Inst, Operands, OptIdx);
6508 
6509   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6510     assert(!IsPacked);
6511     Inst.addOperand(Inst.getOperand(0));
6512   }
6513 
6514   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6515   // instruction, and then figure out where to actually put the modifiers
6516 
6517   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6518 
6519   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6520   if (OpSelHiIdx != -1) {
6521     int DefaultVal = IsPacked ? -1 : 0;
6522     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6523                           DefaultVal);
6524   }
6525 
6526   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6527   if (NegLoIdx != -1) {
6528     assert(IsPacked);
6529     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6530     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6531   }
6532 
6533   const int Ops[] = { AMDGPU::OpName::src0,
6534                       AMDGPU::OpName::src1,
6535                       AMDGPU::OpName::src2 };
6536   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6537                          AMDGPU::OpName::src1_modifiers,
6538                          AMDGPU::OpName::src2_modifiers };
6539 
6540   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6541 
6542   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6543   unsigned OpSelHi = 0;
6544   unsigned NegLo = 0;
6545   unsigned NegHi = 0;
6546 
6547   if (OpSelHiIdx != -1) {
6548     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6549   }
6550 
6551   if (NegLoIdx != -1) {
6552     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6553     NegLo = Inst.getOperand(NegLoIdx).getImm();
6554     NegHi = Inst.getOperand(NegHiIdx).getImm();
6555   }
6556 
6557   for (int J = 0; J < 3; ++J) {
6558     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6559     if (OpIdx == -1)
6560       break;
6561 
6562     uint32_t ModVal = 0;
6563 
6564     if ((OpSel & (1 << J)) != 0)
6565       ModVal |= SISrcMods::OP_SEL_0;
6566 
6567     if ((OpSelHi & (1 << J)) != 0)
6568       ModVal |= SISrcMods::OP_SEL_1;
6569 
6570     if ((NegLo & (1 << J)) != 0)
6571       ModVal |= SISrcMods::NEG;
6572 
6573     if ((NegHi & (1 << J)) != 0)
6574       ModVal |= SISrcMods::NEG_HI;
6575 
6576     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6577 
6578     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6579   }
6580 }
6581 
6582 //===----------------------------------------------------------------------===//
6583 // dpp
6584 //===----------------------------------------------------------------------===//
6585 
6586 bool AMDGPUOperand::isDPP8() const {
6587   return isImmTy(ImmTyDPP8);
6588 }
6589 
6590 bool AMDGPUOperand::isDPPCtrl() const {
6591   using namespace AMDGPU::DPP;
6592 
6593   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6594   if (result) {
6595     int64_t Imm = getImm();
6596     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6597            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6598            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6599            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6600            (Imm == DppCtrl::WAVE_SHL1) ||
6601            (Imm == DppCtrl::WAVE_ROL1) ||
6602            (Imm == DppCtrl::WAVE_SHR1) ||
6603            (Imm == DppCtrl::WAVE_ROR1) ||
6604            (Imm == DppCtrl::ROW_MIRROR) ||
6605            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6606            (Imm == DppCtrl::BCAST15) ||
6607            (Imm == DppCtrl::BCAST31) ||
6608            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6609            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6610   }
6611   return false;
6612 }
6613 
6614 //===----------------------------------------------------------------------===//
6615 // mAI
6616 //===----------------------------------------------------------------------===//
6617 
6618 bool AMDGPUOperand::isBLGP() const {
6619   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6620 }
6621 
6622 bool AMDGPUOperand::isCBSZ() const {
6623   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6624 }
6625 
6626 bool AMDGPUOperand::isABID() const {
6627   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6628 }
6629 
6630 bool AMDGPUOperand::isS16Imm() const {
6631   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6632 }
6633 
6634 bool AMDGPUOperand::isU16Imm() const {
6635   return isImm() && isUInt<16>(getImm());
6636 }
6637 
6638 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6639   if (!isGFX10())
6640     return MatchOperand_NoMatch;
6641 
6642   SMLoc S = Parser.getTok().getLoc();
6643 
6644   if (getLexer().isNot(AsmToken::Identifier))
6645     return MatchOperand_NoMatch;
6646   if (getLexer().getTok().getString() != "dim")
6647     return MatchOperand_NoMatch;
6648 
6649   Parser.Lex();
6650   if (getLexer().isNot(AsmToken::Colon))
6651     return MatchOperand_ParseFail;
6652 
6653   Parser.Lex();
6654 
6655   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6656   // integer.
6657   std::string Token;
6658   if (getLexer().is(AsmToken::Integer)) {
6659     SMLoc Loc = getLexer().getTok().getEndLoc();
6660     Token = std::string(getLexer().getTok().getString());
6661     Parser.Lex();
6662     if (getLexer().getTok().getLoc() != Loc)
6663       return MatchOperand_ParseFail;
6664   }
6665   if (getLexer().isNot(AsmToken::Identifier))
6666     return MatchOperand_ParseFail;
6667   Token += getLexer().getTok().getString();
6668 
6669   StringRef DimId = Token;
6670   if (DimId.startswith("SQ_RSRC_IMG_"))
6671     DimId = DimId.substr(12);
6672 
6673   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6674   if (!DimInfo)
6675     return MatchOperand_ParseFail;
6676 
6677   Parser.Lex();
6678 
6679   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6680                                               AMDGPUOperand::ImmTyDim));
6681   return MatchOperand_Success;
6682 }
6683 
6684 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6685   SMLoc S = Parser.getTok().getLoc();
6686   StringRef Prefix;
6687 
6688   if (getLexer().getKind() == AsmToken::Identifier) {
6689     Prefix = Parser.getTok().getString();
6690   } else {
6691     return MatchOperand_NoMatch;
6692   }
6693 
6694   if (Prefix != "dpp8")
6695     return parseDPPCtrl(Operands);
6696   if (!isGFX10())
6697     return MatchOperand_NoMatch;
6698 
6699   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6700 
6701   int64_t Sels[8];
6702 
6703   Parser.Lex();
6704   if (getLexer().isNot(AsmToken::Colon))
6705     return MatchOperand_ParseFail;
6706 
6707   Parser.Lex();
6708   if (getLexer().isNot(AsmToken::LBrac))
6709     return MatchOperand_ParseFail;
6710 
6711   Parser.Lex();
6712   if (getParser().parseAbsoluteExpression(Sels[0]))
6713     return MatchOperand_ParseFail;
6714   if (0 > Sels[0] || 7 < Sels[0])
6715     return MatchOperand_ParseFail;
6716 
6717   for (size_t i = 1; i < 8; ++i) {
6718     if (getLexer().isNot(AsmToken::Comma))
6719       return MatchOperand_ParseFail;
6720 
6721     Parser.Lex();
6722     if (getParser().parseAbsoluteExpression(Sels[i]))
6723       return MatchOperand_ParseFail;
6724     if (0 > Sels[i] || 7 < Sels[i])
6725       return MatchOperand_ParseFail;
6726   }
6727 
6728   if (getLexer().isNot(AsmToken::RBrac))
6729     return MatchOperand_ParseFail;
6730   Parser.Lex();
6731 
6732   unsigned DPP8 = 0;
6733   for (size_t i = 0; i < 8; ++i)
6734     DPP8 |= (Sels[i] << (i * 3));
6735 
6736   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6737   return MatchOperand_Success;
6738 }
6739 
6740 OperandMatchResultTy
6741 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6742   using namespace AMDGPU::DPP;
6743 
6744   SMLoc S = Parser.getTok().getLoc();
6745   StringRef Prefix;
6746   int64_t Int;
6747 
6748   if (getLexer().getKind() == AsmToken::Identifier) {
6749     Prefix = Parser.getTok().getString();
6750   } else {
6751     return MatchOperand_NoMatch;
6752   }
6753 
6754   if (Prefix == "row_mirror") {
6755     Int = DppCtrl::ROW_MIRROR;
6756     Parser.Lex();
6757   } else if (Prefix == "row_half_mirror") {
6758     Int = DppCtrl::ROW_HALF_MIRROR;
6759     Parser.Lex();
6760   } else {
6761     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6762     if (Prefix != "quad_perm"
6763         && Prefix != "row_shl"
6764         && Prefix != "row_shr"
6765         && Prefix != "row_ror"
6766         && Prefix != "wave_shl"
6767         && Prefix != "wave_rol"
6768         && Prefix != "wave_shr"
6769         && Prefix != "wave_ror"
6770         && Prefix != "row_bcast"
6771         && Prefix != "row_share"
6772         && Prefix != "row_xmask") {
6773       return MatchOperand_NoMatch;
6774     }
6775 
6776     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6777       return MatchOperand_NoMatch;
6778 
6779     if (!isVI() && !isGFX9() &&
6780         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6781          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6782          Prefix == "row_bcast"))
6783       return MatchOperand_NoMatch;
6784 
6785     Parser.Lex();
6786     if (getLexer().isNot(AsmToken::Colon))
6787       return MatchOperand_ParseFail;
6788 
6789     if (Prefix == "quad_perm") {
6790       // quad_perm:[%d,%d,%d,%d]
6791       Parser.Lex();
6792       if (getLexer().isNot(AsmToken::LBrac))
6793         return MatchOperand_ParseFail;
6794       Parser.Lex();
6795 
6796       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6797         return MatchOperand_ParseFail;
6798 
6799       for (int i = 0; i < 3; ++i) {
6800         if (getLexer().isNot(AsmToken::Comma))
6801           return MatchOperand_ParseFail;
6802         Parser.Lex();
6803 
6804         int64_t Temp;
6805         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6806           return MatchOperand_ParseFail;
6807         const int shift = i*2 + 2;
6808         Int += (Temp << shift);
6809       }
6810 
6811       if (getLexer().isNot(AsmToken::RBrac))
6812         return MatchOperand_ParseFail;
6813       Parser.Lex();
6814     } else {
6815       // sel:%d
6816       Parser.Lex();
6817       if (getParser().parseAbsoluteExpression(Int))
6818         return MatchOperand_ParseFail;
6819 
6820       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6821         Int |= DppCtrl::ROW_SHL0;
6822       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6823         Int |= DppCtrl::ROW_SHR0;
6824       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6825         Int |= DppCtrl::ROW_ROR0;
6826       } else if (Prefix == "wave_shl" && 1 == Int) {
6827         Int = DppCtrl::WAVE_SHL1;
6828       } else if (Prefix == "wave_rol" && 1 == Int) {
6829         Int = DppCtrl::WAVE_ROL1;
6830       } else if (Prefix == "wave_shr" && 1 == Int) {
6831         Int = DppCtrl::WAVE_SHR1;
6832       } else if (Prefix == "wave_ror" && 1 == Int) {
6833         Int = DppCtrl::WAVE_ROR1;
6834       } else if (Prefix == "row_bcast") {
6835         if (Int == 15) {
6836           Int = DppCtrl::BCAST15;
6837         } else if (Int == 31) {
6838           Int = DppCtrl::BCAST31;
6839         } else {
6840           return MatchOperand_ParseFail;
6841         }
6842       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6843         Int |= DppCtrl::ROW_SHARE_FIRST;
6844       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6845         Int |= DppCtrl::ROW_XMASK_FIRST;
6846       } else {
6847         return MatchOperand_ParseFail;
6848       }
6849     }
6850   }
6851 
6852   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6853   return MatchOperand_Success;
6854 }
6855 
6856 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6857   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6858 }
6859 
6860 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6861   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6862 }
6863 
6864 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6865   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6866 }
6867 
6868 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6869   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6870 }
6871 
6872 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6873   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6874 }
6875 
6876 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6877   OptionalImmIndexMap OptionalIdx;
6878 
6879   unsigned I = 1;
6880   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6881   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6882     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6883   }
6884 
6885   int Fi = 0;
6886   for (unsigned E = Operands.size(); I != E; ++I) {
6887     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6888                                             MCOI::TIED_TO);
6889     if (TiedTo != -1) {
6890       assert((unsigned)TiedTo < Inst.getNumOperands());
6891       // handle tied old or src2 for MAC instructions
6892       Inst.addOperand(Inst.getOperand(TiedTo));
6893     }
6894     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6895     // Add the register arguments
6896     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6897       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6898       // Skip it.
6899       continue;
6900     }
6901 
6902     if (IsDPP8) {
6903       if (Op.isDPP8()) {
6904         Op.addImmOperands(Inst, 1);
6905       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6906         Op.addRegWithFPInputModsOperands(Inst, 2);
6907       } else if (Op.isFI()) {
6908         Fi = Op.getImm();
6909       } else if (Op.isReg()) {
6910         Op.addRegOperands(Inst, 1);
6911       } else {
6912         llvm_unreachable("Invalid operand type");
6913       }
6914     } else {
6915       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6916         Op.addRegWithFPInputModsOperands(Inst, 2);
6917       } else if (Op.isDPPCtrl()) {
6918         Op.addImmOperands(Inst, 1);
6919       } else if (Op.isImm()) {
6920         // Handle optional arguments
6921         OptionalIdx[Op.getImmTy()] = I;
6922       } else {
6923         llvm_unreachable("Invalid operand type");
6924       }
6925     }
6926   }
6927 
6928   if (IsDPP8) {
6929     using namespace llvm::AMDGPU::DPP;
6930     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6931   } else {
6932     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6933     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6934     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6935     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6936       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6937     }
6938   }
6939 }
6940 
6941 //===----------------------------------------------------------------------===//
6942 // sdwa
6943 //===----------------------------------------------------------------------===//
6944 
6945 OperandMatchResultTy
6946 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6947                               AMDGPUOperand::ImmTy Type) {
6948   using namespace llvm::AMDGPU::SDWA;
6949 
6950   SMLoc S = Parser.getTok().getLoc();
6951   StringRef Value;
6952   OperandMatchResultTy res;
6953 
6954   res = parseStringWithPrefix(Prefix, Value);
6955   if (res != MatchOperand_Success) {
6956     return res;
6957   }
6958 
6959   int64_t Int;
6960   Int = StringSwitch<int64_t>(Value)
6961         .Case("BYTE_0", SdwaSel::BYTE_0)
6962         .Case("BYTE_1", SdwaSel::BYTE_1)
6963         .Case("BYTE_2", SdwaSel::BYTE_2)
6964         .Case("BYTE_3", SdwaSel::BYTE_3)
6965         .Case("WORD_0", SdwaSel::WORD_0)
6966         .Case("WORD_1", SdwaSel::WORD_1)
6967         .Case("DWORD", SdwaSel::DWORD)
6968         .Default(0xffffffff);
6969   Parser.Lex(); // eat last token
6970 
6971   if (Int == 0xffffffff) {
6972     return MatchOperand_ParseFail;
6973   }
6974 
6975   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6976   return MatchOperand_Success;
6977 }
6978 
6979 OperandMatchResultTy
6980 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6981   using namespace llvm::AMDGPU::SDWA;
6982 
6983   SMLoc S = Parser.getTok().getLoc();
6984   StringRef Value;
6985   OperandMatchResultTy res;
6986 
6987   res = parseStringWithPrefix("dst_unused", Value);
6988   if (res != MatchOperand_Success) {
6989     return res;
6990   }
6991 
6992   int64_t Int;
6993   Int = StringSwitch<int64_t>(Value)
6994         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6995         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6996         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6997         .Default(0xffffffff);
6998   Parser.Lex(); // eat last token
6999 
7000   if (Int == 0xffffffff) {
7001     return MatchOperand_ParseFail;
7002   }
7003 
7004   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7005   return MatchOperand_Success;
7006 }
7007 
7008 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7009   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7010 }
7011 
7012 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7013   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7014 }
7015 
7016 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7017   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7018 }
7019 
7020 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7021   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7022 }
7023 
7024 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7025   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7026 }
7027 
7028 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7029                               uint64_t BasicInstType,
7030                               bool SkipDstVcc,
7031                               bool SkipSrcVcc) {
7032   using namespace llvm::AMDGPU::SDWA;
7033 
7034   OptionalImmIndexMap OptionalIdx;
7035   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7036   bool SkippedVcc = false;
7037 
7038   unsigned I = 1;
7039   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7040   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7041     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7042   }
7043 
7044   for (unsigned E = Operands.size(); I != E; ++I) {
7045     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7046     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7047         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7048       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7049       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7050       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7051       // Skip VCC only if we didn't skip it on previous iteration.
7052       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7053       if (BasicInstType == SIInstrFlags::VOP2 &&
7054           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7055            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7056         SkippedVcc = true;
7057         continue;
7058       } else if (BasicInstType == SIInstrFlags::VOPC &&
7059                  Inst.getNumOperands() == 0) {
7060         SkippedVcc = true;
7061         continue;
7062       }
7063     }
7064     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7065       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7066     } else if (Op.isImm()) {
7067       // Handle optional arguments
7068       OptionalIdx[Op.getImmTy()] = I;
7069     } else {
7070       llvm_unreachable("Invalid operand type");
7071     }
7072     SkippedVcc = false;
7073   }
7074 
7075   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7076       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7077       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7078     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7079     switch (BasicInstType) {
7080     case SIInstrFlags::VOP1:
7081       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7082       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7083         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7084       }
7085       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7086       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7087       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7088       break;
7089 
7090     case SIInstrFlags::VOP2:
7091       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7092       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7093         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7094       }
7095       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7096       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7097       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7098       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7099       break;
7100 
7101     case SIInstrFlags::VOPC:
7102       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7103         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7104       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7105       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7106       break;
7107 
7108     default:
7109       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7110     }
7111   }
7112 
7113   // special case v_mac_{f16, f32}:
7114   // it has src2 register operand that is tied to dst operand
7115   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7116       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7117     auto it = Inst.begin();
7118     std::advance(
7119       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7120     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7121   }
7122 }
7123 
7124 //===----------------------------------------------------------------------===//
7125 // mAI
7126 //===----------------------------------------------------------------------===//
7127 
7128 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7129   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7130 }
7131 
7132 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7133   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7134 }
7135 
7136 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7137   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7138 }
7139 
7140 /// Force static initialization.
7141 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7142   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7143   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7144 }
7145 
7146 #define GET_REGISTER_MATCHER
7147 #define GET_MATCHER_IMPLEMENTATION
7148 #define GET_MNEMONIC_SPELL_CHECKER
7149 #include "AMDGPUGenAsmMatcher.inc"
7150 
7151 // This fuction should be defined after auto-generated include so that we have
7152 // MatchClassKind enum defined
7153 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7154                                                      unsigned Kind) {
7155   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7156   // But MatchInstructionImpl() expects to meet token and fails to validate
7157   // operand. This method checks if we are given immediate operand but expect to
7158   // get corresponding token.
7159   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7160   switch (Kind) {
7161   case MCK_addr64:
7162     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7163   case MCK_gds:
7164     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7165   case MCK_lds:
7166     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7167   case MCK_glc:
7168     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7169   case MCK_idxen:
7170     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7171   case MCK_offen:
7172     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7173   case MCK_SSrcB32:
7174     // When operands have expression values, they will return true for isToken,
7175     // because it is not possible to distinguish between a token and an
7176     // expression at parse time. MatchInstructionImpl() will always try to
7177     // match an operand as a token, when isToken returns true, and when the
7178     // name of the expression is not a valid token, the match will fail,
7179     // so we need to handle it here.
7180     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7181   case MCK_SSrcF32:
7182     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7183   case MCK_SoppBrTarget:
7184     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7185   case MCK_VReg32OrOff:
7186     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7187   case MCK_InterpSlot:
7188     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7189   case MCK_Attr:
7190     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7191   case MCK_AttrChan:
7192     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7193   case MCK_ImmSMEMOffset:
7194     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7195   case MCK_SReg_64:
7196   case MCK_SReg_64_XEXEC:
7197     // Null is defined as a 32-bit register but
7198     // it should also be enabled with 64-bit operands.
7199     // The following code enables it for SReg_64 operands
7200     // used as source and destination. Remaining source
7201     // operands are handled in isInlinableImm.
7202     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7203   default:
7204     return Match_InvalidOperand;
7205   }
7206 }
7207 
7208 //===----------------------------------------------------------------------===//
7209 // endpgm
7210 //===----------------------------------------------------------------------===//
7211 
7212 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7213   SMLoc S = Parser.getTok().getLoc();
7214   int64_t Imm = 0;
7215 
7216   if (!parseExpr(Imm)) {
7217     // The operand is optional, if not present default to 0
7218     Imm = 0;
7219   }
7220 
7221   if (!isUInt<16>(Imm)) {
7222     Error(S, "expected a 16-bit value");
7223     return MatchOperand_ParseFail;
7224   }
7225 
7226   Operands.push_back(
7227       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7228   return MatchOperand_Success;
7229 }
7230 
7231 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7232