xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyHigh,
183     ImmTyBLGP,
184     ImmTyCBSZ,
185     ImmTyABID,
186     ImmTyEndpgm,
187   };
188 
189 private:
190   struct TokOp {
191     const char *Data;
192     unsigned Length;
193   };
194 
195   struct ImmOp {
196     int64_t Val;
197     ImmTy Type;
198     bool IsFPImm;
199     Modifiers Mods;
200   };
201 
202   struct RegOp {
203     unsigned RegNo;
204     Modifiers Mods;
205   };
206 
207   union {
208     TokOp Tok;
209     ImmOp Imm;
210     RegOp Reg;
211     const MCExpr *Expr;
212   };
213 
214 public:
215   bool isToken() const override {
216     if (Kind == Token)
217       return true;
218 
219     if (Kind != Expression || !Expr)
220       return false;
221 
222     // When parsing operands, we can't always tell if something was meant to be
223     // a token, like 'gds', or an expression that references a global variable.
224     // In this case, we assume the string is an expression, and if we need to
225     // interpret is a token, then we treat the symbol name as the token.
226     return isa<MCSymbolRefExpr>(Expr);
227   }
228 
229   bool isImm() const override {
230     return Kind == Immediate;
231   }
232 
233   bool isInlinableImm(MVT type) const;
234   bool isLiteralImm(MVT type) const;
235 
236   bool isRegKind() const {
237     return Kind == Register;
238   }
239 
240   bool isReg() const override {
241     return isRegKind() && !hasModifiers();
242   }
243 
244   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
246   }
247 
248   bool isRegOrImmWithInt16InputMods() const {
249     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250   }
251 
252   bool isRegOrImmWithInt32InputMods() const {
253     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
254   }
255 
256   bool isRegOrImmWithInt64InputMods() const {
257     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
258   }
259 
260   bool isRegOrImmWithFP16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
262   }
263 
264   bool isRegOrImmWithFP32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
266   }
267 
268   bool isRegOrImmWithFP64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
270   }
271 
272   bool isVReg() const {
273     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
274            isRegClass(AMDGPU::VReg_64RegClassID) ||
275            isRegClass(AMDGPU::VReg_96RegClassID) ||
276            isRegClass(AMDGPU::VReg_128RegClassID) ||
277            isRegClass(AMDGPU::VReg_256RegClassID) ||
278            isRegClass(AMDGPU::VReg_512RegClassID);
279   }
280 
281   bool isVReg32() const {
282     return isRegClass(AMDGPU::VGPR_32RegClassID);
283   }
284 
285   bool isVReg32OrOff() const {
286     return isOff() || isVReg32();
287   }
288 
289   bool isSDWAOperand(MVT type) const;
290   bool isSDWAFP16Operand() const;
291   bool isSDWAFP32Operand() const;
292   bool isSDWAInt16Operand() const;
293   bool isSDWAInt32Operand() const;
294 
295   bool isImmTy(ImmTy ImmT) const {
296     return isImm() && Imm.Type == ImmT;
297   }
298 
299   bool isImmModifier() const {
300     return isImm() && Imm.Type != ImmTyNone;
301   }
302 
303   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
304   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
305   bool isDMask() const { return isImmTy(ImmTyDMask); }
306   bool isDim() const { return isImmTy(ImmTyDim); }
307   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
308   bool isDA() const { return isImmTy(ImmTyDA); }
309   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
310   bool isLWE() const { return isImmTy(ImmTyLWE); }
311   bool isOff() const { return isImmTy(ImmTyOff); }
312   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
313   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
314   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
315   bool isOffen() const { return isImmTy(ImmTyOffen); }
316   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
317   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
318   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
319   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
320   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
321 
322   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
323   bool isGDS() const { return isImmTy(ImmTyGDS); }
324   bool isLDS() const { return isImmTy(ImmTyLDS); }
325   bool isDLC() const { return isImmTy(ImmTyDLC); }
326   bool isGLC() const { return isImmTy(ImmTyGLC); }
327   bool isSLC() const { return isImmTy(ImmTySLC); }
328   bool isTFE() const { return isImmTy(ImmTyTFE); }
329   bool isD16() const { return isImmTy(ImmTyD16); }
330   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
331   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
332   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
333   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
334   bool isFI() const { return isImmTy(ImmTyDppFi); }
335   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
336   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
337   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
338   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
339   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
340   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
341   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
342   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
343   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
344   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
345   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
346   bool isHigh() const { return isImmTy(ImmTyHigh); }
347 
348   bool isMod() const {
349     return isClampSI() || isOModSI();
350   }
351 
352   bool isRegOrImm() const {
353     return isReg() || isImm();
354   }
355 
356   bool isRegClass(unsigned RCID) const;
357 
358   bool isInlineValue() const;
359 
360   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
361     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
362   }
363 
364   bool isSCSrcB16() const {
365     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
366   }
367 
368   bool isSCSrcV2B16() const {
369     return isSCSrcB16();
370   }
371 
372   bool isSCSrcB32() const {
373     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
374   }
375 
376   bool isSCSrcB64() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
378   }
379 
380   bool isBoolReg() const;
381 
382   bool isSCSrcF16() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
384   }
385 
386   bool isSCSrcV2F16() const {
387     return isSCSrcF16();
388   }
389 
390   bool isSCSrcF32() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
392   }
393 
394   bool isSCSrcF64() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
396   }
397 
398   bool isSSrcB32() const {
399     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
400   }
401 
402   bool isSSrcB16() const {
403     return isSCSrcB16() || isLiteralImm(MVT::i16);
404   }
405 
406   bool isSSrcV2B16() const {
407     llvm_unreachable("cannot happen");
408     return isSSrcB16();
409   }
410 
411   bool isSSrcB64() const {
412     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
413     // See isVSrc64().
414     return isSCSrcB64() || isLiteralImm(MVT::i64);
415   }
416 
417   bool isSSrcF32() const {
418     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
419   }
420 
421   bool isSSrcF64() const {
422     return isSCSrcB64() || isLiteralImm(MVT::f64);
423   }
424 
425   bool isSSrcF16() const {
426     return isSCSrcB16() || isLiteralImm(MVT::f16);
427   }
428 
429   bool isSSrcV2F16() const {
430     llvm_unreachable("cannot happen");
431     return isSSrcF16();
432   }
433 
434   bool isSSrcOrLdsB32() const {
435     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
436            isLiteralImm(MVT::i32) || isExpr();
437   }
438 
439   bool isVCSrcB32() const {
440     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
441   }
442 
443   bool isVCSrcB64() const {
444     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
445   }
446 
447   bool isVCSrcB16() const {
448     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
449   }
450 
451   bool isVCSrcV2B16() const {
452     return isVCSrcB16();
453   }
454 
455   bool isVCSrcF32() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
457   }
458 
459   bool isVCSrcF64() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
461   }
462 
463   bool isVCSrcF16() const {
464     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
465   }
466 
467   bool isVCSrcV2F16() const {
468     return isVCSrcF16();
469   }
470 
471   bool isVSrcB32() const {
472     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
473   }
474 
475   bool isVSrcB64() const {
476     return isVCSrcF64() || isLiteralImm(MVT::i64);
477   }
478 
479   bool isVSrcB16() const {
480     return isVCSrcF16() || isLiteralImm(MVT::i16);
481   }
482 
483   bool isVSrcV2B16() const {
484     return isVSrcB16() || isLiteralImm(MVT::v2i16);
485   }
486 
487   bool isVSrcF32() const {
488     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
489   }
490 
491   bool isVSrcF64() const {
492     return isVCSrcF64() || isLiteralImm(MVT::f64);
493   }
494 
495   bool isVSrcF16() const {
496     return isVCSrcF16() || isLiteralImm(MVT::f16);
497   }
498 
499   bool isVSrcV2F16() const {
500     return isVSrcF16() || isLiteralImm(MVT::v2f16);
501   }
502 
503   bool isVISrcB32() const {
504     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
505   }
506 
507   bool isVISrcB16() const {
508     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
509   }
510 
511   bool isVISrcV2B16() const {
512     return isVISrcB16();
513   }
514 
515   bool isVISrcF32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
517   }
518 
519   bool isVISrcF16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
521   }
522 
523   bool isVISrcV2F16() const {
524     return isVISrcF16() || isVISrcB32();
525   }
526 
527   bool isAISrcB32() const {
528     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
529   }
530 
531   bool isAISrcB16() const {
532     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
533   }
534 
535   bool isAISrcV2B16() const {
536     return isAISrcB16();
537   }
538 
539   bool isAISrcF32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
541   }
542 
543   bool isAISrcF16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
545   }
546 
547   bool isAISrcV2F16() const {
548     return isAISrcF16() || isAISrcB32();
549   }
550 
551   bool isAISrc_128B32() const {
552     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
553   }
554 
555   bool isAISrc_128B16() const {
556     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
557   }
558 
559   bool isAISrc_128V2B16() const {
560     return isAISrc_128B16();
561   }
562 
563   bool isAISrc_128F32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
565   }
566 
567   bool isAISrc_128F16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
569   }
570 
571   bool isAISrc_128V2F16() const {
572     return isAISrc_128F16() || isAISrc_128B32();
573   }
574 
575   bool isAISrc_512B32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
577   }
578 
579   bool isAISrc_512B16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
581   }
582 
583   bool isAISrc_512V2B16() const {
584     return isAISrc_512B16();
585   }
586 
587   bool isAISrc_512F32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
589   }
590 
591   bool isAISrc_512F16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
593   }
594 
595   bool isAISrc_512V2F16() const {
596     return isAISrc_512F16() || isAISrc_512B32();
597   }
598 
599   bool isAISrc_1024B32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
601   }
602 
603   bool isAISrc_1024B16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
605   }
606 
607   bool isAISrc_1024V2B16() const {
608     return isAISrc_1024B16();
609   }
610 
611   bool isAISrc_1024F32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
613   }
614 
615   bool isAISrc_1024F16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
617   }
618 
619   bool isAISrc_1024V2F16() const {
620     return isAISrc_1024F16() || isAISrc_1024B32();
621   }
622 
623   bool isKImmFP32() const {
624     return isLiteralImm(MVT::f32);
625   }
626 
627   bool isKImmFP16() const {
628     return isLiteralImm(MVT::f16);
629   }
630 
631   bool isMem() const override {
632     return false;
633   }
634 
635   bool isExpr() const {
636     return Kind == Expression;
637   }
638 
639   bool isSoppBrTarget() const {
640     return isExpr() || isImm();
641   }
642 
643   bool isSWaitCnt() const;
644   bool isHwreg() const;
645   bool isSendMsg() const;
646   bool isSwizzle() const;
647   bool isSMRDOffset8() const;
648   bool isSMRDOffset20() const;
649   bool isSMRDLiteralOffset() const;
650   bool isDPP8() const;
651   bool isDPPCtrl() const;
652   bool isBLGP() const;
653   bool isCBSZ() const;
654   bool isABID() const;
655   bool isGPRIdxMode() const;
656   bool isS16Imm() const;
657   bool isU16Imm() const;
658   bool isEndpgm() const;
659 
660   StringRef getExpressionAsToken() const {
661     assert(isExpr());
662     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
663     return S->getSymbol().getName();
664   }
665 
666   StringRef getToken() const {
667     assert(isToken());
668 
669     if (Kind == Expression)
670       return getExpressionAsToken();
671 
672     return StringRef(Tok.Data, Tok.Length);
673   }
674 
675   int64_t getImm() const {
676     assert(isImm());
677     return Imm.Val;
678   }
679 
680   ImmTy getImmTy() const {
681     assert(isImm());
682     return Imm.Type;
683   }
684 
685   unsigned getReg() const override {
686     assert(isRegKind());
687     return Reg.RegNo;
688   }
689 
690   SMLoc getStartLoc() const override {
691     return StartLoc;
692   }
693 
694   SMLoc getEndLoc() const override {
695     return EndLoc;
696   }
697 
698   SMRange getLocRange() const {
699     return SMRange(StartLoc, EndLoc);
700   }
701 
702   Modifiers getModifiers() const {
703     assert(isRegKind() || isImmTy(ImmTyNone));
704     return isRegKind() ? Reg.Mods : Imm.Mods;
705   }
706 
707   void setModifiers(Modifiers Mods) {
708     assert(isRegKind() || isImmTy(ImmTyNone));
709     if (isRegKind())
710       Reg.Mods = Mods;
711     else
712       Imm.Mods = Mods;
713   }
714 
715   bool hasModifiers() const {
716     return getModifiers().hasModifiers();
717   }
718 
719   bool hasFPModifiers() const {
720     return getModifiers().hasFPModifiers();
721   }
722 
723   bool hasIntModifiers() const {
724     return getModifiers().hasIntModifiers();
725   }
726 
727   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
728 
729   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
730 
731   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
732 
733   template <unsigned Bitwidth>
734   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
735 
736   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
737     addKImmFPOperands<16>(Inst, N);
738   }
739 
740   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
741     addKImmFPOperands<32>(Inst, N);
742   }
743 
744   void addRegOperands(MCInst &Inst, unsigned N) const;
745 
746   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
747     addRegOperands(Inst, N);
748   }
749 
750   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
751     if (isRegKind())
752       addRegOperands(Inst, N);
753     else if (isExpr())
754       Inst.addOperand(MCOperand::createExpr(Expr));
755     else
756       addImmOperands(Inst, N);
757   }
758 
759   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
760     Modifiers Mods = getModifiers();
761     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
762     if (isRegKind()) {
763       addRegOperands(Inst, N);
764     } else {
765       addImmOperands(Inst, N, false);
766     }
767   }
768 
769   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
770     assert(!hasIntModifiers());
771     addRegOrImmWithInputModsOperands(Inst, N);
772   }
773 
774   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
775     assert(!hasFPModifiers());
776     addRegOrImmWithInputModsOperands(Inst, N);
777   }
778 
779   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
780     Modifiers Mods = getModifiers();
781     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
782     assert(isRegKind());
783     addRegOperands(Inst, N);
784   }
785 
786   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegWithInputModsOperands(Inst, N);
794   }
795 
796   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
797     if (isImm())
798       addImmOperands(Inst, N);
799     else {
800       assert(isExpr());
801       Inst.addOperand(MCOperand::createExpr(Expr));
802     }
803   }
804 
805   static void printImmTy(raw_ostream& OS, ImmTy Type) {
806     switch (Type) {
807     case ImmTyNone: OS << "None"; break;
808     case ImmTyGDS: OS << "GDS"; break;
809     case ImmTyLDS: OS << "LDS"; break;
810     case ImmTyOffen: OS << "Offen"; break;
811     case ImmTyIdxen: OS << "Idxen"; break;
812     case ImmTyAddr64: OS << "Addr64"; break;
813     case ImmTyOffset: OS << "Offset"; break;
814     case ImmTyInstOffset: OS << "InstOffset"; break;
815     case ImmTyOffset0: OS << "Offset0"; break;
816     case ImmTyOffset1: OS << "Offset1"; break;
817     case ImmTyDLC: OS << "DLC"; break;
818     case ImmTyGLC: OS << "GLC"; break;
819     case ImmTySLC: OS << "SLC"; break;
820     case ImmTyTFE: OS << "TFE"; break;
821     case ImmTyD16: OS << "D16"; break;
822     case ImmTyFORMAT: OS << "FORMAT"; break;
823     case ImmTyClampSI: OS << "ClampSI"; break;
824     case ImmTyOModSI: OS << "OModSI"; break;
825     case ImmTyDPP8: OS << "DPP8"; break;
826     case ImmTyDppCtrl: OS << "DppCtrl"; break;
827     case ImmTyDppRowMask: OS << "DppRowMask"; break;
828     case ImmTyDppBankMask: OS << "DppBankMask"; break;
829     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
830     case ImmTyDppFi: OS << "FI"; break;
831     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
832     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
833     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
834     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
835     case ImmTyDMask: OS << "DMask"; break;
836     case ImmTyDim: OS << "Dim"; break;
837     case ImmTyUNorm: OS << "UNorm"; break;
838     case ImmTyDA: OS << "DA"; break;
839     case ImmTyR128A16: OS << "R128A16"; break;
840     case ImmTyLWE: OS << "LWE"; break;
841     case ImmTyOff: OS << "Off"; break;
842     case ImmTyExpTgt: OS << "ExpTgt"; break;
843     case ImmTyExpCompr: OS << "ExpCompr"; break;
844     case ImmTyExpVM: OS << "ExpVM"; break;
845     case ImmTyHwreg: OS << "Hwreg"; break;
846     case ImmTySendMsg: OS << "SendMsg"; break;
847     case ImmTyInterpSlot: OS << "InterpSlot"; break;
848     case ImmTyInterpAttr: OS << "InterpAttr"; break;
849     case ImmTyAttrChan: OS << "AttrChan"; break;
850     case ImmTyOpSel: OS << "OpSel"; break;
851     case ImmTyOpSelHi: OS << "OpSelHi"; break;
852     case ImmTyNegLo: OS << "NegLo"; break;
853     case ImmTyNegHi: OS << "NegHi"; break;
854     case ImmTySwizzle: OS << "Swizzle"; break;
855     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
856     case ImmTyHigh: OS << "High"; break;
857     case ImmTyBLGP: OS << "BLGP"; break;
858     case ImmTyCBSZ: OS << "CBSZ"; break;
859     case ImmTyABID: OS << "ABID"; break;
860     case ImmTyEndpgm: OS << "Endpgm"; break;
861     }
862   }
863 
864   void print(raw_ostream &OS) const override {
865     switch (Kind) {
866     case Register:
867       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
868       break;
869     case Immediate:
870       OS << '<' << getImm();
871       if (getImmTy() != ImmTyNone) {
872         OS << " type: "; printImmTy(OS, getImmTy());
873       }
874       OS << " mods: " << Imm.Mods << '>';
875       break;
876     case Token:
877       OS << '\'' << getToken() << '\'';
878       break;
879     case Expression:
880       OS << "<expr " << *Expr << '>';
881       break;
882     }
883   }
884 
885   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
886                                       int64_t Val, SMLoc Loc,
887                                       ImmTy Type = ImmTyNone,
888                                       bool IsFPImm = false) {
889     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
890     Op->Imm.Val = Val;
891     Op->Imm.IsFPImm = IsFPImm;
892     Op->Imm.Type = Type;
893     Op->Imm.Mods = Modifiers();
894     Op->StartLoc = Loc;
895     Op->EndLoc = Loc;
896     return Op;
897   }
898 
899   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
900                                         StringRef Str, SMLoc Loc,
901                                         bool HasExplicitEncodingSize = true) {
902     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
903     Res->Tok.Data = Str.data();
904     Res->Tok.Length = Str.size();
905     Res->StartLoc = Loc;
906     Res->EndLoc = Loc;
907     return Res;
908   }
909 
910   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
911                                       unsigned RegNo, SMLoc S,
912                                       SMLoc E) {
913     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
914     Op->Reg.RegNo = RegNo;
915     Op->Reg.Mods = Modifiers();
916     Op->StartLoc = S;
917     Op->EndLoc = E;
918     return Op;
919   }
920 
921   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
922                                        const class MCExpr *Expr, SMLoc S) {
923     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
924     Op->Expr = Expr;
925     Op->StartLoc = S;
926     Op->EndLoc = S;
927     return Op;
928   }
929 };
930 
931 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
932   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
933   return OS;
934 }
935 
936 //===----------------------------------------------------------------------===//
937 // AsmParser
938 //===----------------------------------------------------------------------===//
939 
940 // Holds info related to the current kernel, e.g. count of SGPRs used.
941 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
942 // .amdgpu_hsa_kernel or at EOF.
943 class KernelScopeInfo {
944   int SgprIndexUnusedMin = -1;
945   int VgprIndexUnusedMin = -1;
946   MCContext *Ctx = nullptr;
947 
948   void usesSgprAt(int i) {
949     if (i >= SgprIndexUnusedMin) {
950       SgprIndexUnusedMin = ++i;
951       if (Ctx) {
952         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
953         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
954       }
955     }
956   }
957 
958   void usesVgprAt(int i) {
959     if (i >= VgprIndexUnusedMin) {
960       VgprIndexUnusedMin = ++i;
961       if (Ctx) {
962         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
963         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
964       }
965     }
966   }
967 
968 public:
969   KernelScopeInfo() = default;
970 
971   void initialize(MCContext &Context) {
972     Ctx = &Context;
973     usesSgprAt(SgprIndexUnusedMin = -1);
974     usesVgprAt(VgprIndexUnusedMin = -1);
975   }
976 
977   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
978     switch (RegKind) {
979       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
980       case IS_AGPR: // fall through
981       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
982       default: break;
983     }
984   }
985 };
986 
987 class AMDGPUAsmParser : public MCTargetAsmParser {
988   MCAsmParser &Parser;
989 
990   // Number of extra operands parsed after the first optional operand.
991   // This may be necessary to skip hardcoded mandatory operands.
992   static const unsigned MAX_OPR_LOOKAHEAD = 8;
993 
994   unsigned ForcedEncodingSize = 0;
995   bool ForcedDPP = false;
996   bool ForcedSDWA = false;
997   KernelScopeInfo KernelScope;
998 
999   /// @name Auto-generated Match Functions
1000   /// {
1001 
1002 #define GET_ASSEMBLER_HEADER
1003 #include "AMDGPUGenAsmMatcher.inc"
1004 
1005   /// }
1006 
1007 private:
1008   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1009   bool OutOfRangeError(SMRange Range);
1010   /// Calculate VGPR/SGPR blocks required for given target, reserved
1011   /// registers, and user-specified NextFreeXGPR values.
1012   ///
1013   /// \param Features [in] Target features, used for bug corrections.
1014   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1015   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1016   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1017   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1018   /// descriptor field, if valid.
1019   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1020   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1021   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1022   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1023   /// \param VGPRBlocks [out] Result VGPR block count.
1024   /// \param SGPRBlocks [out] Result SGPR block count.
1025   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1026                           bool FlatScrUsed, bool XNACKUsed,
1027                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1028                           SMRange VGPRRange, unsigned NextFreeSGPR,
1029                           SMRange SGPRRange, unsigned &VGPRBlocks,
1030                           unsigned &SGPRBlocks);
1031   bool ParseDirectiveAMDGCNTarget();
1032   bool ParseDirectiveAMDHSAKernel();
1033   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1034   bool ParseDirectiveHSACodeObjectVersion();
1035   bool ParseDirectiveHSACodeObjectISA();
1036   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1037   bool ParseDirectiveAMDKernelCodeT();
1038   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1039   bool ParseDirectiveAMDGPUHsaKernel();
1040 
1041   bool ParseDirectiveISAVersion();
1042   bool ParseDirectiveHSAMetadata();
1043   bool ParseDirectivePALMetadataBegin();
1044   bool ParseDirectivePALMetadata();
1045   bool ParseDirectiveAMDGPULDS();
1046 
1047   /// Common code to parse out a block of text (typically YAML) between start and
1048   /// end directives.
1049   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1050                            const char *AssemblerDirectiveEnd,
1051                            std::string &CollectString);
1052 
1053   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1054                              RegisterKind RegKind, unsigned Reg1,
1055                              unsigned RegNum);
1056   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1057                            unsigned& RegNum, unsigned& RegWidth,
1058                            unsigned *DwordRegIndex);
1059   bool isRegister();
1060   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1061   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1062   void initializeGprCountSymbol(RegisterKind RegKind);
1063   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1064                              unsigned RegWidth);
1065   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1066                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1067   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1068                  bool IsGdsHardcoded);
1069 
1070 public:
1071   enum AMDGPUMatchResultTy {
1072     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1073   };
1074   enum OperandMode {
1075     OperandMode_Default,
1076     OperandMode_NSA,
1077   };
1078 
1079   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1080 
1081   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1082                const MCInstrInfo &MII,
1083                const MCTargetOptions &Options)
1084       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1085     MCAsmParserExtension::Initialize(Parser);
1086 
1087     if (getFeatureBits().none()) {
1088       // Set default features.
1089       copySTI().ToggleFeature("southern-islands");
1090     }
1091 
1092     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1093 
1094     {
1095       // TODO: make those pre-defined variables read-only.
1096       // Currently there is none suitable machinery in the core llvm-mc for this.
1097       // MCSymbol::isRedefinable is intended for another purpose, and
1098       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1099       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1100       MCContext &Ctx = getContext();
1101       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1102         MCSymbol *Sym =
1103             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1104         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1105         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1106         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1107         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1108         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1109       } else {
1110         MCSymbol *Sym =
1111             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1112         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1113         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1114         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1115         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1116         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1117       }
1118       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1119         initializeGprCountSymbol(IS_VGPR);
1120         initializeGprCountSymbol(IS_SGPR);
1121       } else
1122         KernelScope.initialize(getContext());
1123     }
1124   }
1125 
1126   bool hasXNACK() const {
1127     return AMDGPU::hasXNACK(getSTI());
1128   }
1129 
1130   bool hasMIMG_R128() const {
1131     return AMDGPU::hasMIMG_R128(getSTI());
1132   }
1133 
1134   bool hasPackedD16() const {
1135     return AMDGPU::hasPackedD16(getSTI());
1136   }
1137 
1138   bool isSI() const {
1139     return AMDGPU::isSI(getSTI());
1140   }
1141 
1142   bool isCI() const {
1143     return AMDGPU::isCI(getSTI());
1144   }
1145 
1146   bool isVI() const {
1147     return AMDGPU::isVI(getSTI());
1148   }
1149 
1150   bool isGFX9() const {
1151     return AMDGPU::isGFX9(getSTI());
1152   }
1153 
1154   bool isGFX10() const {
1155     return AMDGPU::isGFX10(getSTI());
1156   }
1157 
1158   bool hasInv2PiInlineImm() const {
1159     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1160   }
1161 
1162   bool hasFlatOffsets() const {
1163     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1164   }
1165 
1166   bool hasSGPR102_SGPR103() const {
1167     return !isVI() && !isGFX9();
1168   }
1169 
1170   bool hasSGPR104_SGPR105() const {
1171     return isGFX10();
1172   }
1173 
1174   bool hasIntClamp() const {
1175     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1176   }
1177 
1178   AMDGPUTargetStreamer &getTargetStreamer() {
1179     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1180     return static_cast<AMDGPUTargetStreamer &>(TS);
1181   }
1182 
1183   const MCRegisterInfo *getMRI() const {
1184     // We need this const_cast because for some reason getContext() is not const
1185     // in MCAsmParser.
1186     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1187   }
1188 
1189   const MCInstrInfo *getMII() const {
1190     return &MII;
1191   }
1192 
1193   const FeatureBitset &getFeatureBits() const {
1194     return getSTI().getFeatureBits();
1195   }
1196 
1197   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1198   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1199   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1200 
1201   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1202   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1203   bool isForcedDPP() const { return ForcedDPP; }
1204   bool isForcedSDWA() const { return ForcedSDWA; }
1205   ArrayRef<unsigned> getMatchedVariants() const;
1206 
1207   std::unique_ptr<AMDGPUOperand> parseRegister();
1208   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1209   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1210   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1211                                       unsigned Kind) override;
1212   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1213                                OperandVector &Operands, MCStreamer &Out,
1214                                uint64_t &ErrorInfo,
1215                                bool MatchingInlineAsm) override;
1216   bool ParseDirective(AsmToken DirectiveID) override;
1217   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1218                                     OperandMode Mode = OperandMode_Default);
1219   StringRef parseMnemonicSuffix(StringRef Name);
1220   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1221                         SMLoc NameLoc, OperandVector &Operands) override;
1222   //bool ProcessInstruction(MCInst &Inst);
1223 
1224   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1225 
1226   OperandMatchResultTy
1227   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1228                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1229                      bool (*ConvertResult)(int64_t &) = nullptr);
1230 
1231   OperandMatchResultTy
1232   parseOperandArrayWithPrefix(const char *Prefix,
1233                               OperandVector &Operands,
1234                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1235                               bool (*ConvertResult)(int64_t&) = nullptr);
1236 
1237   OperandMatchResultTy
1238   parseNamedBit(const char *Name, OperandVector &Operands,
1239                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1240   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1241                                              StringRef &Value);
1242 
1243   bool isModifier();
1244   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1245   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1246   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1247   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1248   bool parseSP3NegModifier();
1249   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1250   OperandMatchResultTy parseReg(OperandVector &Operands);
1251   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1252   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1253   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1254   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1255   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1256   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1257   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1258 
1259   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1260   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1261   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1262   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1263 
1264   bool parseCnt(int64_t &IntVal);
1265   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1266   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1267 
1268 private:
1269   struct OperandInfoTy {
1270     int64_t Id;
1271     bool IsSymbolic = false;
1272     bool IsDefined = false;
1273 
1274     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1275   };
1276 
1277   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1278   bool validateSendMsg(const OperandInfoTy &Msg,
1279                        const OperandInfoTy &Op,
1280                        const OperandInfoTy &Stream,
1281                        const SMLoc Loc);
1282 
1283   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1284   bool validateHwreg(const OperandInfoTy &HwReg,
1285                      const int64_t Offset,
1286                      const int64_t Width,
1287                      const SMLoc Loc);
1288 
1289   void errorExpTgt();
1290   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1291   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1292 
1293   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1294   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1295   bool validateSOPLiteral(const MCInst &Inst) const;
1296   bool validateConstantBusLimitations(const MCInst &Inst);
1297   bool validateEarlyClobberLimitations(const MCInst &Inst);
1298   bool validateIntClampSupported(const MCInst &Inst);
1299   bool validateMIMGAtomicDMask(const MCInst &Inst);
1300   bool validateMIMGGatherDMask(const MCInst &Inst);
1301   bool validateMIMGDataSize(const MCInst &Inst);
1302   bool validateMIMGAddrSize(const MCInst &Inst);
1303   bool validateMIMGD16(const MCInst &Inst);
1304   bool validateMIMGDim(const MCInst &Inst);
1305   bool validateLdsDirect(const MCInst &Inst);
1306   bool validateOpSel(const MCInst &Inst);
1307   bool validateVccOperand(unsigned Reg) const;
1308   bool validateVOP3Literal(const MCInst &Inst) const;
1309   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1310   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1311   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1312 
1313   bool isId(const StringRef Id) const;
1314   bool isId(const AsmToken &Token, const StringRef Id) const;
1315   bool isToken(const AsmToken::TokenKind Kind) const;
1316   bool trySkipId(const StringRef Id);
1317   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1318   bool trySkipToken(const AsmToken::TokenKind Kind);
1319   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1320   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1321   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1322   AsmToken::TokenKind getTokenKind() const;
1323   bool parseExpr(int64_t &Imm);
1324   StringRef getTokenStr() const;
1325   AsmToken peekToken();
1326   AsmToken getToken() const;
1327   SMLoc getLoc() const;
1328   void lex();
1329 
1330 public:
1331   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1332   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1333 
1334   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1335   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1336   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1337   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1338   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1339   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1340 
1341   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1342                             const unsigned MinVal,
1343                             const unsigned MaxVal,
1344                             const StringRef ErrMsg);
1345   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1346   bool parseSwizzleOffset(int64_t &Imm);
1347   bool parseSwizzleMacro(int64_t &Imm);
1348   bool parseSwizzleQuadPerm(int64_t &Imm);
1349   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1350   bool parseSwizzleBroadcast(int64_t &Imm);
1351   bool parseSwizzleSwap(int64_t &Imm);
1352   bool parseSwizzleReverse(int64_t &Imm);
1353 
1354   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1355   int64_t parseGPRIdxMacro();
1356 
1357   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1358   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1359   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1360   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1361   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1362 
1363   AMDGPUOperand::Ptr defaultDLC() const;
1364   AMDGPUOperand::Ptr defaultGLC() const;
1365   AMDGPUOperand::Ptr defaultSLC() const;
1366 
1367   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1368   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1369   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1370   AMDGPUOperand::Ptr defaultFlatOffset() const;
1371 
1372   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1373 
1374   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1375                OptionalImmIndexMap &OptionalIdx);
1376   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1377   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1378   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1379 
1380   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1381 
1382   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1383                bool IsAtomic = false);
1384   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1385 
1386   OperandMatchResultTy parseDim(OperandVector &Operands);
1387   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1388   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1389   AMDGPUOperand::Ptr defaultRowMask() const;
1390   AMDGPUOperand::Ptr defaultBankMask() const;
1391   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1392   AMDGPUOperand::Ptr defaultFI() const;
1393   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1394   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1395 
1396   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1397                                     AMDGPUOperand::ImmTy Type);
1398   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1399   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1400   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1401   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1402   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1403   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1404                 uint64_t BasicInstType, bool skipVcc = false);
1405 
1406   AMDGPUOperand::Ptr defaultBLGP() const;
1407   AMDGPUOperand::Ptr defaultCBSZ() const;
1408   AMDGPUOperand::Ptr defaultABID() const;
1409 
1410   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1411   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1412 };
1413 
1414 struct OptionalOperand {
1415   const char *Name;
1416   AMDGPUOperand::ImmTy Type;
1417   bool IsBit;
1418   bool (*ConvertResult)(int64_t&);
1419 };
1420 
1421 } // end anonymous namespace
1422 
1423 // May be called with integer type with equivalent bitwidth.
1424 static const fltSemantics *getFltSemantics(unsigned Size) {
1425   switch (Size) {
1426   case 4:
1427     return &APFloat::IEEEsingle();
1428   case 8:
1429     return &APFloat::IEEEdouble();
1430   case 2:
1431     return &APFloat::IEEEhalf();
1432   default:
1433     llvm_unreachable("unsupported fp type");
1434   }
1435 }
1436 
1437 static const fltSemantics *getFltSemantics(MVT VT) {
1438   return getFltSemantics(VT.getSizeInBits() / 8);
1439 }
1440 
1441 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1442   switch (OperandType) {
1443   case AMDGPU::OPERAND_REG_IMM_INT32:
1444   case AMDGPU::OPERAND_REG_IMM_FP32:
1445   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1446   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1447   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1448   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1449     return &APFloat::IEEEsingle();
1450   case AMDGPU::OPERAND_REG_IMM_INT64:
1451   case AMDGPU::OPERAND_REG_IMM_FP64:
1452   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1453   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1454     return &APFloat::IEEEdouble();
1455   case AMDGPU::OPERAND_REG_IMM_INT16:
1456   case AMDGPU::OPERAND_REG_IMM_FP16:
1457   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1458   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1459   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1460   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1461   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1462   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1463   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1464   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1465   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1466   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1467     return &APFloat::IEEEhalf();
1468   default:
1469     llvm_unreachable("unsupported fp type");
1470   }
1471 }
1472 
1473 //===----------------------------------------------------------------------===//
1474 // Operand
1475 //===----------------------------------------------------------------------===//
1476 
1477 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1478   bool Lost;
1479 
1480   // Convert literal to single precision
1481   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1482                                                APFloat::rmNearestTiesToEven,
1483                                                &Lost);
1484   // We allow precision lost but not overflow or underflow
1485   if (Status != APFloat::opOK &&
1486       Lost &&
1487       ((Status & APFloat::opOverflow)  != 0 ||
1488        (Status & APFloat::opUnderflow) != 0)) {
1489     return false;
1490   }
1491 
1492   return true;
1493 }
1494 
1495 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1496   return isUIntN(Size, Val) || isIntN(Size, Val);
1497 }
1498 
1499 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1500 
1501   // This is a hack to enable named inline values like
1502   // shared_base with both 32-bit and 64-bit operands.
1503   // Note that these values are defined as
1504   // 32-bit operands only.
1505   if (isInlineValue()) {
1506     return true;
1507   }
1508 
1509   if (!isImmTy(ImmTyNone)) {
1510     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1511     return false;
1512   }
1513   // TODO: We should avoid using host float here. It would be better to
1514   // check the float bit values which is what a few other places do.
1515   // We've had bot failures before due to weird NaN support on mips hosts.
1516 
1517   APInt Literal(64, Imm.Val);
1518 
1519   if (Imm.IsFPImm) { // We got fp literal token
1520     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1521       return AMDGPU::isInlinableLiteral64(Imm.Val,
1522                                           AsmParser->hasInv2PiInlineImm());
1523     }
1524 
1525     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1526     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1527       return false;
1528 
1529     if (type.getScalarSizeInBits() == 16) {
1530       return AMDGPU::isInlinableLiteral16(
1531         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1532         AsmParser->hasInv2PiInlineImm());
1533     }
1534 
1535     // Check if single precision literal is inlinable
1536     return AMDGPU::isInlinableLiteral32(
1537       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1538       AsmParser->hasInv2PiInlineImm());
1539   }
1540 
1541   // We got int literal token.
1542   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1543     return AMDGPU::isInlinableLiteral64(Imm.Val,
1544                                         AsmParser->hasInv2PiInlineImm());
1545   }
1546 
1547   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1548     return false;
1549   }
1550 
1551   if (type.getScalarSizeInBits() == 16) {
1552     return AMDGPU::isInlinableLiteral16(
1553       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1554       AsmParser->hasInv2PiInlineImm());
1555   }
1556 
1557   return AMDGPU::isInlinableLiteral32(
1558     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1559     AsmParser->hasInv2PiInlineImm());
1560 }
1561 
1562 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1563   // Check that this immediate can be added as literal
1564   if (!isImmTy(ImmTyNone)) {
1565     return false;
1566   }
1567 
1568   if (!Imm.IsFPImm) {
1569     // We got int literal token.
1570 
1571     if (type == MVT::f64 && hasFPModifiers()) {
1572       // Cannot apply fp modifiers to int literals preserving the same semantics
1573       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1574       // disable these cases.
1575       return false;
1576     }
1577 
1578     unsigned Size = type.getSizeInBits();
1579     if (Size == 64)
1580       Size = 32;
1581 
1582     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1583     // types.
1584     return isSafeTruncation(Imm.Val, Size);
1585   }
1586 
1587   // We got fp literal token
1588   if (type == MVT::f64) { // Expected 64-bit fp operand
1589     // We would set low 64-bits of literal to zeroes but we accept this literals
1590     return true;
1591   }
1592 
1593   if (type == MVT::i64) { // Expected 64-bit int operand
1594     // We don't allow fp literals in 64-bit integer instructions. It is
1595     // unclear how we should encode them.
1596     return false;
1597   }
1598 
1599   // We allow fp literals with f16x2 operands assuming that the specified
1600   // literal goes into the lower half and the upper half is zero. We also
1601   // require that the literal may be losslesly converted to f16.
1602   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1603                      (type == MVT::v2i16)? MVT::i16 : type;
1604 
1605   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1606   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1607 }
1608 
1609 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1610   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1611 }
1612 
1613 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1614   if (AsmParser->isVI())
1615     return isVReg32();
1616   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1617     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1618   else
1619     return false;
1620 }
1621 
1622 bool AMDGPUOperand::isSDWAFP16Operand() const {
1623   return isSDWAOperand(MVT::f16);
1624 }
1625 
1626 bool AMDGPUOperand::isSDWAFP32Operand() const {
1627   return isSDWAOperand(MVT::f32);
1628 }
1629 
1630 bool AMDGPUOperand::isSDWAInt16Operand() const {
1631   return isSDWAOperand(MVT::i16);
1632 }
1633 
1634 bool AMDGPUOperand::isSDWAInt32Operand() const {
1635   return isSDWAOperand(MVT::i32);
1636 }
1637 
1638 bool AMDGPUOperand::isBoolReg() const {
1639   return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1640     isSCSrcB64() : isSCSrcB32();
1641 }
1642 
1643 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1644 {
1645   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1646   assert(Size == 2 || Size == 4 || Size == 8);
1647 
1648   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1649 
1650   if (Imm.Mods.Abs) {
1651     Val &= ~FpSignMask;
1652   }
1653   if (Imm.Mods.Neg) {
1654     Val ^= FpSignMask;
1655   }
1656 
1657   return Val;
1658 }
1659 
1660 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1661   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1662                              Inst.getNumOperands())) {
1663     addLiteralImmOperand(Inst, Imm.Val,
1664                          ApplyModifiers &
1665                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1666   } else {
1667     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1668     Inst.addOperand(MCOperand::createImm(Imm.Val));
1669   }
1670 }
1671 
1672 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1673   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1674   auto OpNum = Inst.getNumOperands();
1675   // Check that this operand accepts literals
1676   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1677 
1678   if (ApplyModifiers) {
1679     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1680     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1681     Val = applyInputFPModifiers(Val, Size);
1682   }
1683 
1684   APInt Literal(64, Val);
1685   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1686 
1687   if (Imm.IsFPImm) { // We got fp literal token
1688     switch (OpTy) {
1689     case AMDGPU::OPERAND_REG_IMM_INT64:
1690     case AMDGPU::OPERAND_REG_IMM_FP64:
1691     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1692     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1693       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1694                                        AsmParser->hasInv2PiInlineImm())) {
1695         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1696         return;
1697       }
1698 
1699       // Non-inlineable
1700       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1701         // For fp operands we check if low 32 bits are zeros
1702         if (Literal.getLoBits(32) != 0) {
1703           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1704           "Can't encode literal as exact 64-bit floating-point operand. "
1705           "Low 32-bits will be set to zero");
1706         }
1707 
1708         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1709         return;
1710       }
1711 
1712       // We don't allow fp literals in 64-bit integer instructions. It is
1713       // unclear how we should encode them. This case should be checked earlier
1714       // in predicate methods (isLiteralImm())
1715       llvm_unreachable("fp literal in 64-bit integer instruction.");
1716 
1717     case AMDGPU::OPERAND_REG_IMM_INT32:
1718     case AMDGPU::OPERAND_REG_IMM_FP32:
1719     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1720     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1721     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1722     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1723     case AMDGPU::OPERAND_REG_IMM_INT16:
1724     case AMDGPU::OPERAND_REG_IMM_FP16:
1725     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1726     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1727     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1728     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1729     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1730     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1731     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1732     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1733     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1734     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1735       bool lost;
1736       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1737       // Convert literal to single precision
1738       FPLiteral.convert(*getOpFltSemantics(OpTy),
1739                         APFloat::rmNearestTiesToEven, &lost);
1740       // We allow precision lost but not overflow or underflow. This should be
1741       // checked earlier in isLiteralImm()
1742 
1743       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1744       Inst.addOperand(MCOperand::createImm(ImmVal));
1745       return;
1746     }
1747     default:
1748       llvm_unreachable("invalid operand size");
1749     }
1750 
1751     return;
1752   }
1753 
1754   // We got int literal token.
1755   // Only sign extend inline immediates.
1756   switch (OpTy) {
1757   case AMDGPU::OPERAND_REG_IMM_INT32:
1758   case AMDGPU::OPERAND_REG_IMM_FP32:
1759   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1760   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1761   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1762   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1763   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1764   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1765     if (isSafeTruncation(Val, 32) &&
1766         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1767                                      AsmParser->hasInv2PiInlineImm())) {
1768       Inst.addOperand(MCOperand::createImm(Val));
1769       return;
1770     }
1771 
1772     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1773     return;
1774 
1775   case AMDGPU::OPERAND_REG_IMM_INT64:
1776   case AMDGPU::OPERAND_REG_IMM_FP64:
1777   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1778   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1779     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1780       Inst.addOperand(MCOperand::createImm(Val));
1781       return;
1782     }
1783 
1784     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1785     return;
1786 
1787   case AMDGPU::OPERAND_REG_IMM_INT16:
1788   case AMDGPU::OPERAND_REG_IMM_FP16:
1789   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1790   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1791   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1792   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1793     if (isSafeTruncation(Val, 16) &&
1794         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1795                                      AsmParser->hasInv2PiInlineImm())) {
1796       Inst.addOperand(MCOperand::createImm(Val));
1797       return;
1798     }
1799 
1800     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1801     return;
1802 
1803   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1804   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1805   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1806   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1807     assert(isSafeTruncation(Val, 16));
1808     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1809                                         AsmParser->hasInv2PiInlineImm()));
1810 
1811     Inst.addOperand(MCOperand::createImm(Val));
1812     return;
1813   }
1814   default:
1815     llvm_unreachable("invalid operand size");
1816   }
1817 }
1818 
1819 template <unsigned Bitwidth>
1820 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1821   APInt Literal(64, Imm.Val);
1822 
1823   if (!Imm.IsFPImm) {
1824     // We got int literal token.
1825     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1826     return;
1827   }
1828 
1829   bool Lost;
1830   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1831   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1832                     APFloat::rmNearestTiesToEven, &Lost);
1833   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1834 }
1835 
1836 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1837   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1838 }
1839 
1840 static bool isInlineValue(unsigned Reg) {
1841   switch (Reg) {
1842   case AMDGPU::SRC_SHARED_BASE:
1843   case AMDGPU::SRC_SHARED_LIMIT:
1844   case AMDGPU::SRC_PRIVATE_BASE:
1845   case AMDGPU::SRC_PRIVATE_LIMIT:
1846   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1847     return true;
1848   case AMDGPU::SRC_VCCZ:
1849   case AMDGPU::SRC_EXECZ:
1850   case AMDGPU::SRC_SCC:
1851     return true;
1852   default:
1853     return false;
1854   }
1855 }
1856 
1857 bool AMDGPUOperand::isInlineValue() const {
1858   return isRegKind() && ::isInlineValue(getReg());
1859 }
1860 
1861 //===----------------------------------------------------------------------===//
1862 // AsmParser
1863 //===----------------------------------------------------------------------===//
1864 
1865 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1866   if (Is == IS_VGPR) {
1867     switch (RegWidth) {
1868       default: return -1;
1869       case 1: return AMDGPU::VGPR_32RegClassID;
1870       case 2: return AMDGPU::VReg_64RegClassID;
1871       case 3: return AMDGPU::VReg_96RegClassID;
1872       case 4: return AMDGPU::VReg_128RegClassID;
1873       case 8: return AMDGPU::VReg_256RegClassID;
1874       case 16: return AMDGPU::VReg_512RegClassID;
1875     }
1876   } else if (Is == IS_TTMP) {
1877     switch (RegWidth) {
1878       default: return -1;
1879       case 1: return AMDGPU::TTMP_32RegClassID;
1880       case 2: return AMDGPU::TTMP_64RegClassID;
1881       case 4: return AMDGPU::TTMP_128RegClassID;
1882       case 8: return AMDGPU::TTMP_256RegClassID;
1883       case 16: return AMDGPU::TTMP_512RegClassID;
1884     }
1885   } else if (Is == IS_SGPR) {
1886     switch (RegWidth) {
1887       default: return -1;
1888       case 1: return AMDGPU::SGPR_32RegClassID;
1889       case 2: return AMDGPU::SGPR_64RegClassID;
1890       case 4: return AMDGPU::SGPR_128RegClassID;
1891       case 8: return AMDGPU::SGPR_256RegClassID;
1892       case 16: return AMDGPU::SGPR_512RegClassID;
1893     }
1894   } else if (Is == IS_AGPR) {
1895     switch (RegWidth) {
1896       default: return -1;
1897       case 1: return AMDGPU::AGPR_32RegClassID;
1898       case 2: return AMDGPU::AReg_64RegClassID;
1899       case 4: return AMDGPU::AReg_128RegClassID;
1900       case 16: return AMDGPU::AReg_512RegClassID;
1901       case 32: return AMDGPU::AReg_1024RegClassID;
1902     }
1903   }
1904   return -1;
1905 }
1906 
1907 static unsigned getSpecialRegForName(StringRef RegName) {
1908   return StringSwitch<unsigned>(RegName)
1909     .Case("exec", AMDGPU::EXEC)
1910     .Case("vcc", AMDGPU::VCC)
1911     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1912     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1913     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1914     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1915     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1916     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1917     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1918     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1919     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1920     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1921     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1922     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1923     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1924     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1925     .Case("m0", AMDGPU::M0)
1926     .Case("vccz", AMDGPU::SRC_VCCZ)
1927     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1928     .Case("execz", AMDGPU::SRC_EXECZ)
1929     .Case("src_execz", AMDGPU::SRC_EXECZ)
1930     .Case("scc", AMDGPU::SRC_SCC)
1931     .Case("src_scc", AMDGPU::SRC_SCC)
1932     .Case("tba", AMDGPU::TBA)
1933     .Case("tma", AMDGPU::TMA)
1934     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1935     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1936     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1937     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1938     .Case("vcc_lo", AMDGPU::VCC_LO)
1939     .Case("vcc_hi", AMDGPU::VCC_HI)
1940     .Case("exec_lo", AMDGPU::EXEC_LO)
1941     .Case("exec_hi", AMDGPU::EXEC_HI)
1942     .Case("tma_lo", AMDGPU::TMA_LO)
1943     .Case("tma_hi", AMDGPU::TMA_HI)
1944     .Case("tba_lo", AMDGPU::TBA_LO)
1945     .Case("tba_hi", AMDGPU::TBA_HI)
1946     .Case("null", AMDGPU::SGPR_NULL)
1947     .Default(0);
1948 }
1949 
1950 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1951                                     SMLoc &EndLoc) {
1952   auto R = parseRegister();
1953   if (!R) return true;
1954   assert(R->isReg());
1955   RegNo = R->getReg();
1956   StartLoc = R->getStartLoc();
1957   EndLoc = R->getEndLoc();
1958   return false;
1959 }
1960 
1961 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1962                                             RegisterKind RegKind, unsigned Reg1,
1963                                             unsigned RegNum) {
1964   switch (RegKind) {
1965   case IS_SPECIAL:
1966     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1967       Reg = AMDGPU::EXEC;
1968       RegWidth = 2;
1969       return true;
1970     }
1971     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1972       Reg = AMDGPU::FLAT_SCR;
1973       RegWidth = 2;
1974       return true;
1975     }
1976     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1977       Reg = AMDGPU::XNACK_MASK;
1978       RegWidth = 2;
1979       return true;
1980     }
1981     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1982       Reg = AMDGPU::VCC;
1983       RegWidth = 2;
1984       return true;
1985     }
1986     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1987       Reg = AMDGPU::TBA;
1988       RegWidth = 2;
1989       return true;
1990     }
1991     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1992       Reg = AMDGPU::TMA;
1993       RegWidth = 2;
1994       return true;
1995     }
1996     return false;
1997   case IS_VGPR:
1998   case IS_SGPR:
1999   case IS_AGPR:
2000   case IS_TTMP:
2001     if (Reg1 != Reg + RegWidth) {
2002       return false;
2003     }
2004     RegWidth++;
2005     return true;
2006   default:
2007     llvm_unreachable("unexpected register kind");
2008   }
2009 }
2010 
2011 static const StringRef Registers[] = {
2012   { "v" },
2013   { "s" },
2014   { "ttmp" },
2015   { "acc" },
2016   { "a" },
2017 };
2018 
2019 bool
2020 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2021                             const AsmToken &NextToken) const {
2022 
2023   // A list of consecutive registers: [s0,s1,s2,s3]
2024   if (Token.is(AsmToken::LBrac))
2025     return true;
2026 
2027   if (!Token.is(AsmToken::Identifier))
2028     return false;
2029 
2030   // A single register like s0 or a range of registers like s[0:1]
2031 
2032   StringRef RegName = Token.getString();
2033 
2034   for (StringRef Reg : Registers) {
2035     if (RegName.startswith(Reg)) {
2036       if (Reg.size() < RegName.size()) {
2037         unsigned RegNum;
2038         // A single register with an index: rXX
2039         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2040           return true;
2041       } else {
2042         // A range of registers: r[XX:YY].
2043         if (NextToken.is(AsmToken::LBrac))
2044           return true;
2045       }
2046     }
2047   }
2048 
2049   return getSpecialRegForName(RegName);
2050 }
2051 
2052 bool
2053 AMDGPUAsmParser::isRegister()
2054 {
2055   return isRegister(getToken(), peekToken());
2056 }
2057 
2058 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2059                                           unsigned &RegNum, unsigned &RegWidth,
2060                                           unsigned *DwordRegIndex) {
2061   if (DwordRegIndex) { *DwordRegIndex = 0; }
2062   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2063   if (getLexer().is(AsmToken::Identifier)) {
2064     StringRef RegName = Parser.getTok().getString();
2065     if ((Reg = getSpecialRegForName(RegName))) {
2066       Parser.Lex();
2067       RegKind = IS_SPECIAL;
2068     } else {
2069       unsigned RegNumIndex = 0;
2070       if (RegName[0] == 'v') {
2071         RegNumIndex = 1;
2072         RegKind = IS_VGPR;
2073       } else if (RegName[0] == 's') {
2074         RegNumIndex = 1;
2075         RegKind = IS_SGPR;
2076       } else if (RegName[0] == 'a') {
2077         RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2078         RegKind = IS_AGPR;
2079       } else if (RegName.startswith("ttmp")) {
2080         RegNumIndex = strlen("ttmp");
2081         RegKind = IS_TTMP;
2082       } else {
2083         return false;
2084       }
2085       if (RegName.size() > RegNumIndex) {
2086         // Single 32-bit register: vXX.
2087         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2088           return false;
2089         Parser.Lex();
2090         RegWidth = 1;
2091       } else {
2092         // Range of registers: v[XX:YY]. ":YY" is optional.
2093         Parser.Lex();
2094         int64_t RegLo, RegHi;
2095         if (getLexer().isNot(AsmToken::LBrac))
2096           return false;
2097         Parser.Lex();
2098 
2099         if (getParser().parseAbsoluteExpression(RegLo))
2100           return false;
2101 
2102         const bool isRBrace = getLexer().is(AsmToken::RBrac);
2103         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2104           return false;
2105         Parser.Lex();
2106 
2107         if (isRBrace) {
2108           RegHi = RegLo;
2109         } else {
2110           if (getParser().parseAbsoluteExpression(RegHi))
2111             return false;
2112 
2113           if (getLexer().isNot(AsmToken::RBrac))
2114             return false;
2115           Parser.Lex();
2116         }
2117         RegNum = (unsigned) RegLo;
2118         RegWidth = (RegHi - RegLo) + 1;
2119       }
2120     }
2121   } else if (getLexer().is(AsmToken::LBrac)) {
2122     // List of consecutive registers: [s0,s1,s2,s3]
2123     Parser.Lex();
2124     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2125       return false;
2126     if (RegWidth != 1)
2127       return false;
2128     RegisterKind RegKind1;
2129     unsigned Reg1, RegNum1, RegWidth1;
2130     do {
2131       if (getLexer().is(AsmToken::Comma)) {
2132         Parser.Lex();
2133       } else if (getLexer().is(AsmToken::RBrac)) {
2134         Parser.Lex();
2135         break;
2136       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2137         if (RegWidth1 != 1) {
2138           return false;
2139         }
2140         if (RegKind1 != RegKind) {
2141           return false;
2142         }
2143         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2144           return false;
2145         }
2146       } else {
2147         return false;
2148       }
2149     } while (true);
2150   } else {
2151     return false;
2152   }
2153   switch (RegKind) {
2154   case IS_SPECIAL:
2155     RegNum = 0;
2156     RegWidth = 1;
2157     break;
2158   case IS_VGPR:
2159   case IS_SGPR:
2160   case IS_AGPR:
2161   case IS_TTMP:
2162   {
2163     unsigned Size = 1;
2164     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2165       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2166       Size = std::min(RegWidth, 4u);
2167     }
2168     if (RegNum % Size != 0)
2169       return false;
2170     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2171     RegNum = RegNum / Size;
2172     int RCID = getRegClass(RegKind, RegWidth);
2173     if (RCID == -1)
2174       return false;
2175     const MCRegisterClass RC = TRI->getRegClass(RCID);
2176     if (RegNum >= RC.getNumRegs())
2177       return false;
2178     Reg = RC.getRegister(RegNum);
2179     break;
2180   }
2181 
2182   default:
2183     llvm_unreachable("unexpected register kind");
2184   }
2185 
2186   if (!subtargetHasRegister(*TRI, Reg))
2187     return false;
2188   return true;
2189 }
2190 
2191 Optional<StringRef>
2192 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2193   switch (RegKind) {
2194   case IS_VGPR:
2195     return StringRef(".amdgcn.next_free_vgpr");
2196   case IS_SGPR:
2197     return StringRef(".amdgcn.next_free_sgpr");
2198   default:
2199     return None;
2200   }
2201 }
2202 
2203 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2204   auto SymbolName = getGprCountSymbolName(RegKind);
2205   assert(SymbolName && "initializing invalid register kind");
2206   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2207   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2208 }
2209 
2210 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2211                                             unsigned DwordRegIndex,
2212                                             unsigned RegWidth) {
2213   // Symbols are only defined for GCN targets
2214   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2215     return true;
2216 
2217   auto SymbolName = getGprCountSymbolName(RegKind);
2218   if (!SymbolName)
2219     return true;
2220   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2221 
2222   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2223   int64_t OldCount;
2224 
2225   if (!Sym->isVariable())
2226     return !Error(getParser().getTok().getLoc(),
2227                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2228   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2229     return !Error(
2230         getParser().getTok().getLoc(),
2231         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2232 
2233   if (OldCount <= NewMax)
2234     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2235 
2236   return true;
2237 }
2238 
2239 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2240   const auto &Tok = Parser.getTok();
2241   SMLoc StartLoc = Tok.getLoc();
2242   SMLoc EndLoc = Tok.getEndLoc();
2243   RegisterKind RegKind;
2244   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2245 
2246   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2247     //FIXME: improve error messages (bug 41303).
2248     Error(StartLoc, "not a valid operand.");
2249     return nullptr;
2250   }
2251   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2252     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2253       return nullptr;
2254   } else
2255     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2256   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2257 }
2258 
2259 OperandMatchResultTy
2260 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2261   // TODO: add syntactic sugar for 1/(2*PI)
2262 
2263   assert(!isRegister());
2264   assert(!isModifier());
2265 
2266   const auto& Tok = getToken();
2267   const auto& NextTok = peekToken();
2268   bool IsReal = Tok.is(AsmToken::Real);
2269   SMLoc S = getLoc();
2270   bool Negate = false;
2271 
2272   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2273     lex();
2274     IsReal = true;
2275     Negate = true;
2276   }
2277 
2278   if (IsReal) {
2279     // Floating-point expressions are not supported.
2280     // Can only allow floating-point literals with an
2281     // optional sign.
2282 
2283     StringRef Num = getTokenStr();
2284     lex();
2285 
2286     APFloat RealVal(APFloat::IEEEdouble());
2287     auto roundMode = APFloat::rmNearestTiesToEven;
2288     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2289       return MatchOperand_ParseFail;
2290     }
2291     if (Negate)
2292       RealVal.changeSign();
2293 
2294     Operands.push_back(
2295       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2296                                AMDGPUOperand::ImmTyNone, true));
2297 
2298     return MatchOperand_Success;
2299 
2300   } else {
2301     int64_t IntVal;
2302     const MCExpr *Expr;
2303     SMLoc S = getLoc();
2304 
2305     if (HasSP3AbsModifier) {
2306       // This is a workaround for handling expressions
2307       // as arguments of SP3 'abs' modifier, for example:
2308       //     |1.0|
2309       //     |-1|
2310       //     |1+x|
2311       // This syntax is not compatible with syntax of standard
2312       // MC expressions (due to the trailing '|').
2313       SMLoc EndLoc;
2314       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2315         return MatchOperand_ParseFail;
2316     } else {
2317       if (Parser.parseExpression(Expr))
2318         return MatchOperand_ParseFail;
2319     }
2320 
2321     if (Expr->evaluateAsAbsolute(IntVal)) {
2322       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2323     } else {
2324       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2325     }
2326 
2327     return MatchOperand_Success;
2328   }
2329 
2330   return MatchOperand_NoMatch;
2331 }
2332 
2333 OperandMatchResultTy
2334 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2335   if (!isRegister())
2336     return MatchOperand_NoMatch;
2337 
2338   if (auto R = parseRegister()) {
2339     assert(R->isReg());
2340     Operands.push_back(std::move(R));
2341     return MatchOperand_Success;
2342   }
2343   return MatchOperand_ParseFail;
2344 }
2345 
2346 OperandMatchResultTy
2347 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2348   auto res = parseReg(Operands);
2349   if (res != MatchOperand_NoMatch) {
2350     return res;
2351   } else if (isModifier()) {
2352     return MatchOperand_NoMatch;
2353   } else {
2354     return parseImm(Operands, HasSP3AbsMod);
2355   }
2356 }
2357 
2358 bool
2359 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2360   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2361     const auto &str = Token.getString();
2362     return str == "abs" || str == "neg" || str == "sext";
2363   }
2364   return false;
2365 }
2366 
2367 bool
2368 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2369   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2370 }
2371 
2372 bool
2373 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2374   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2375 }
2376 
2377 bool
2378 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2379   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2380 }
2381 
2382 // Check if this is an operand modifier or an opcode modifier
2383 // which may look like an expression but it is not. We should
2384 // avoid parsing these modifiers as expressions. Currently
2385 // recognized sequences are:
2386 //   |...|
2387 //   abs(...)
2388 //   neg(...)
2389 //   sext(...)
2390 //   -reg
2391 //   -|...|
2392 //   -abs(...)
2393 //   name:...
2394 // Note that simple opcode modifiers like 'gds' may be parsed as
2395 // expressions; this is a special case. See getExpressionAsToken.
2396 //
2397 bool
2398 AMDGPUAsmParser::isModifier() {
2399 
2400   AsmToken Tok = getToken();
2401   AsmToken NextToken[2];
2402   peekTokens(NextToken);
2403 
2404   return isOperandModifier(Tok, NextToken[0]) ||
2405          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2406          isOpcodeModifierWithVal(Tok, NextToken[0]);
2407 }
2408 
2409 // Check if the current token is an SP3 'neg' modifier.
2410 // Currently this modifier is allowed in the following context:
2411 //
2412 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2413 // 2. Before an 'abs' modifier: -abs(...)
2414 // 3. Before an SP3 'abs' modifier: -|...|
2415 //
2416 // In all other cases "-" is handled as a part
2417 // of an expression that follows the sign.
2418 //
2419 // Note: When "-" is followed by an integer literal,
2420 // this is interpreted as integer negation rather
2421 // than a floating-point NEG modifier applied to N.
2422 // Beside being contr-intuitive, such use of floating-point
2423 // NEG modifier would have resulted in different meaning
2424 // of integer literals used with VOP1/2/C and VOP3,
2425 // for example:
2426 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2427 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2428 // Negative fp literals with preceding "-" are
2429 // handled likewise for unifomtity
2430 //
2431 bool
2432 AMDGPUAsmParser::parseSP3NegModifier() {
2433 
2434   AsmToken NextToken[2];
2435   peekTokens(NextToken);
2436 
2437   if (isToken(AsmToken::Minus) &&
2438       (isRegister(NextToken[0], NextToken[1]) ||
2439        NextToken[0].is(AsmToken::Pipe) ||
2440        isId(NextToken[0], "abs"))) {
2441     lex();
2442     return true;
2443   }
2444 
2445   return false;
2446 }
2447 
2448 OperandMatchResultTy
2449 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2450                                               bool AllowImm) {
2451   bool Neg, SP3Neg;
2452   bool Abs, SP3Abs;
2453   SMLoc Loc;
2454 
2455   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2456   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2457     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2458     return MatchOperand_ParseFail;
2459   }
2460 
2461   SP3Neg = parseSP3NegModifier();
2462 
2463   Loc = getLoc();
2464   Neg = trySkipId("neg");
2465   if (Neg && SP3Neg) {
2466     Error(Loc, "expected register or immediate");
2467     return MatchOperand_ParseFail;
2468   }
2469   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2470     return MatchOperand_ParseFail;
2471 
2472   Abs = trySkipId("abs");
2473   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2474     return MatchOperand_ParseFail;
2475 
2476   Loc = getLoc();
2477   SP3Abs = trySkipToken(AsmToken::Pipe);
2478   if (Abs && SP3Abs) {
2479     Error(Loc, "expected register or immediate");
2480     return MatchOperand_ParseFail;
2481   }
2482 
2483   OperandMatchResultTy Res;
2484   if (AllowImm) {
2485     Res = parseRegOrImm(Operands, SP3Abs);
2486   } else {
2487     Res = parseReg(Operands);
2488   }
2489   if (Res != MatchOperand_Success) {
2490     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2491   }
2492 
2493   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2494     return MatchOperand_ParseFail;
2495   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2496     return MatchOperand_ParseFail;
2497   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2498     return MatchOperand_ParseFail;
2499 
2500   AMDGPUOperand::Modifiers Mods;
2501   Mods.Abs = Abs || SP3Abs;
2502   Mods.Neg = Neg || SP3Neg;
2503 
2504   if (Mods.hasFPModifiers()) {
2505     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2506     if (Op.isExpr()) {
2507       Error(Op.getStartLoc(), "expected an absolute expression");
2508       return MatchOperand_ParseFail;
2509     }
2510     Op.setModifiers(Mods);
2511   }
2512   return MatchOperand_Success;
2513 }
2514 
2515 OperandMatchResultTy
2516 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2517                                                bool AllowImm) {
2518   bool Sext = trySkipId("sext");
2519   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2520     return MatchOperand_ParseFail;
2521 
2522   OperandMatchResultTy Res;
2523   if (AllowImm) {
2524     Res = parseRegOrImm(Operands);
2525   } else {
2526     Res = parseReg(Operands);
2527   }
2528   if (Res != MatchOperand_Success) {
2529     return Sext? MatchOperand_ParseFail : Res;
2530   }
2531 
2532   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2533     return MatchOperand_ParseFail;
2534 
2535   AMDGPUOperand::Modifiers Mods;
2536   Mods.Sext = Sext;
2537 
2538   if (Mods.hasIntModifiers()) {
2539     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2540     if (Op.isExpr()) {
2541       Error(Op.getStartLoc(), "expected an absolute expression");
2542       return MatchOperand_ParseFail;
2543     }
2544     Op.setModifiers(Mods);
2545   }
2546 
2547   return MatchOperand_Success;
2548 }
2549 
2550 OperandMatchResultTy
2551 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2552   return parseRegOrImmWithFPInputMods(Operands, false);
2553 }
2554 
2555 OperandMatchResultTy
2556 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2557   return parseRegOrImmWithIntInputMods(Operands, false);
2558 }
2559 
2560 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2561   auto Loc = getLoc();
2562   if (trySkipId("off")) {
2563     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2564                                                 AMDGPUOperand::ImmTyOff, false));
2565     return MatchOperand_Success;
2566   }
2567 
2568   if (!isRegister())
2569     return MatchOperand_NoMatch;
2570 
2571   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2572   if (Reg) {
2573     Operands.push_back(std::move(Reg));
2574     return MatchOperand_Success;
2575   }
2576 
2577   return MatchOperand_ParseFail;
2578 
2579 }
2580 
2581 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2582   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2583 
2584   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2585       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2586       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2587       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2588     return Match_InvalidOperand;
2589 
2590   if ((TSFlags & SIInstrFlags::VOP3) &&
2591       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2592       getForcedEncodingSize() != 64)
2593     return Match_PreferE32;
2594 
2595   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2596       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2597     // v_mac_f32/16 allow only dst_sel == DWORD;
2598     auto OpNum =
2599         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2600     const auto &Op = Inst.getOperand(OpNum);
2601     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2602       return Match_InvalidOperand;
2603     }
2604   }
2605 
2606   return Match_Success;
2607 }
2608 
2609 // What asm variants we should check
2610 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2611   if (getForcedEncodingSize() == 32) {
2612     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2613     return makeArrayRef(Variants);
2614   }
2615 
2616   if (isForcedVOP3()) {
2617     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2618     return makeArrayRef(Variants);
2619   }
2620 
2621   if (isForcedSDWA()) {
2622     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2623                                         AMDGPUAsmVariants::SDWA9};
2624     return makeArrayRef(Variants);
2625   }
2626 
2627   if (isForcedDPP()) {
2628     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2629     return makeArrayRef(Variants);
2630   }
2631 
2632   static const unsigned Variants[] = {
2633     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2634     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2635   };
2636 
2637   return makeArrayRef(Variants);
2638 }
2639 
2640 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2641   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2642   const unsigned Num = Desc.getNumImplicitUses();
2643   for (unsigned i = 0; i < Num; ++i) {
2644     unsigned Reg = Desc.ImplicitUses[i];
2645     switch (Reg) {
2646     case AMDGPU::FLAT_SCR:
2647     case AMDGPU::VCC:
2648     case AMDGPU::VCC_LO:
2649     case AMDGPU::VCC_HI:
2650     case AMDGPU::M0:
2651     case AMDGPU::SGPR_NULL:
2652       return Reg;
2653     default:
2654       break;
2655     }
2656   }
2657   return AMDGPU::NoRegister;
2658 }
2659 
2660 // NB: This code is correct only when used to check constant
2661 // bus limitations because GFX7 support no f16 inline constants.
2662 // Note that there are no cases when a GFX7 opcode violates
2663 // constant bus limitations due to the use of an f16 constant.
2664 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2665                                        unsigned OpIdx) const {
2666   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2667 
2668   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2669     return false;
2670   }
2671 
2672   const MCOperand &MO = Inst.getOperand(OpIdx);
2673 
2674   int64_t Val = MO.getImm();
2675   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2676 
2677   switch (OpSize) { // expected operand size
2678   case 8:
2679     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2680   case 4:
2681     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2682   case 2: {
2683     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2684     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2685         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2686         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2687         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2688         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2689         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2690       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2691     } else {
2692       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2693     }
2694   }
2695   default:
2696     llvm_unreachable("invalid operand size");
2697   }
2698 }
2699 
2700 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2701   const MCOperand &MO = Inst.getOperand(OpIdx);
2702   if (MO.isImm()) {
2703     return !isInlineConstant(Inst, OpIdx);
2704   }
2705   return !MO.isReg() ||
2706          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2707 }
2708 
2709 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2710   const unsigned Opcode = Inst.getOpcode();
2711   const MCInstrDesc &Desc = MII.get(Opcode);
2712   unsigned ConstantBusUseCount = 0;
2713   unsigned NumLiterals = 0;
2714   unsigned LiteralSize;
2715 
2716   if (Desc.TSFlags &
2717       (SIInstrFlags::VOPC |
2718        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2719        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2720        SIInstrFlags::SDWA)) {
2721     // Check special imm operands (used by madmk, etc)
2722     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2723       ++ConstantBusUseCount;
2724     }
2725 
2726     SmallDenseSet<unsigned> SGPRsUsed;
2727     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2728     if (SGPRUsed != AMDGPU::NoRegister) {
2729       SGPRsUsed.insert(SGPRUsed);
2730       ++ConstantBusUseCount;
2731     }
2732 
2733     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2734     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2735     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2736 
2737     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2738 
2739     for (int OpIdx : OpIndices) {
2740       if (OpIdx == -1) break;
2741 
2742       const MCOperand &MO = Inst.getOperand(OpIdx);
2743       if (usesConstantBus(Inst, OpIdx)) {
2744         if (MO.isReg()) {
2745           const unsigned Reg = mc2PseudoReg(MO.getReg());
2746           // Pairs of registers with a partial intersections like these
2747           //   s0, s[0:1]
2748           //   flat_scratch_lo, flat_scratch
2749           //   flat_scratch_lo, flat_scratch_hi
2750           // are theoretically valid but they are disabled anyway.
2751           // Note that this code mimics SIInstrInfo::verifyInstruction
2752           if (!SGPRsUsed.count(Reg)) {
2753             SGPRsUsed.insert(Reg);
2754             ++ConstantBusUseCount;
2755           }
2756         } else { // Expression or a literal
2757 
2758           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2759             continue; // special operand like VINTERP attr_chan
2760 
2761           // An instruction may use only one literal.
2762           // This has been validated on the previous step.
2763           // See validateVOP3Literal.
2764           // This literal may be used as more than one operand.
2765           // If all these operands are of the same size,
2766           // this literal counts as one scalar value.
2767           // Otherwise it counts as 2 scalar values.
2768           // See "GFX10 Shader Programming", section 3.6.2.3.
2769 
2770           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2771           if (Size < 4) Size = 4;
2772 
2773           if (NumLiterals == 0) {
2774             NumLiterals = 1;
2775             LiteralSize = Size;
2776           } else if (LiteralSize != Size) {
2777             NumLiterals = 2;
2778           }
2779         }
2780       }
2781     }
2782   }
2783   ConstantBusUseCount += NumLiterals;
2784 
2785   if (isGFX10())
2786     return ConstantBusUseCount <= 2;
2787 
2788   return ConstantBusUseCount <= 1;
2789 }
2790 
2791 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2792   const unsigned Opcode = Inst.getOpcode();
2793   const MCInstrDesc &Desc = MII.get(Opcode);
2794 
2795   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2796   if (DstIdx == -1 ||
2797       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2798     return true;
2799   }
2800 
2801   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2802 
2803   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2804   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2805   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2806 
2807   assert(DstIdx != -1);
2808   const MCOperand &Dst = Inst.getOperand(DstIdx);
2809   assert(Dst.isReg());
2810   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2811 
2812   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2813 
2814   for (int SrcIdx : SrcIndices) {
2815     if (SrcIdx == -1) break;
2816     const MCOperand &Src = Inst.getOperand(SrcIdx);
2817     if (Src.isReg()) {
2818       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2819       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2820         return false;
2821       }
2822     }
2823   }
2824 
2825   return true;
2826 }
2827 
2828 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2829 
2830   const unsigned Opc = Inst.getOpcode();
2831   const MCInstrDesc &Desc = MII.get(Opc);
2832 
2833   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2834     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2835     assert(ClampIdx != -1);
2836     return Inst.getOperand(ClampIdx).getImm() == 0;
2837   }
2838 
2839   return true;
2840 }
2841 
2842 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2843 
2844   const unsigned Opc = Inst.getOpcode();
2845   const MCInstrDesc &Desc = MII.get(Opc);
2846 
2847   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2848     return true;
2849 
2850   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2851   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2852   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2853 
2854   assert(VDataIdx != -1);
2855   assert(DMaskIdx != -1);
2856   assert(TFEIdx != -1);
2857 
2858   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2859   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2860   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2861   if (DMask == 0)
2862     DMask = 1;
2863 
2864   unsigned DataSize =
2865     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2866   if (hasPackedD16()) {
2867     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2868     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2869       DataSize = (DataSize + 1) / 2;
2870   }
2871 
2872   return (VDataSize / 4) == DataSize + TFESize;
2873 }
2874 
2875 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2876   const unsigned Opc = Inst.getOpcode();
2877   const MCInstrDesc &Desc = MII.get(Opc);
2878 
2879   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2880     return true;
2881 
2882   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2883   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2884       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2885   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2886   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2887   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2888 
2889   assert(VAddr0Idx != -1);
2890   assert(SrsrcIdx != -1);
2891   assert(DimIdx != -1);
2892   assert(SrsrcIdx > VAddr0Idx);
2893 
2894   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2895   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2896   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2897   unsigned VAddrSize =
2898       IsNSA ? SrsrcIdx - VAddr0Idx
2899             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2900 
2901   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2902                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2903                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2904                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2905   if (!IsNSA) {
2906     if (AddrSize > 8)
2907       AddrSize = 16;
2908     else if (AddrSize > 4)
2909       AddrSize = 8;
2910   }
2911 
2912   return VAddrSize == AddrSize;
2913 }
2914 
2915 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2916 
2917   const unsigned Opc = Inst.getOpcode();
2918   const MCInstrDesc &Desc = MII.get(Opc);
2919 
2920   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2921     return true;
2922   if (!Desc.mayLoad() || !Desc.mayStore())
2923     return true; // Not atomic
2924 
2925   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2926   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2927 
2928   // This is an incomplete check because image_atomic_cmpswap
2929   // may only use 0x3 and 0xf while other atomic operations
2930   // may use 0x1 and 0x3. However these limitations are
2931   // verified when we check that dmask matches dst size.
2932   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2933 }
2934 
2935 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2936 
2937   const unsigned Opc = Inst.getOpcode();
2938   const MCInstrDesc &Desc = MII.get(Opc);
2939 
2940   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2941     return true;
2942 
2943   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2944   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2945 
2946   // GATHER4 instructions use dmask in a different fashion compared to
2947   // other MIMG instructions. The only useful DMASK values are
2948   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2949   // (red,red,red,red) etc.) The ISA document doesn't mention
2950   // this.
2951   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2952 }
2953 
2954 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2955 
2956   const unsigned Opc = Inst.getOpcode();
2957   const MCInstrDesc &Desc = MII.get(Opc);
2958 
2959   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2960     return true;
2961 
2962   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2963   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2964     if (isCI() || isSI())
2965       return false;
2966   }
2967 
2968   return true;
2969 }
2970 
2971 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2972   const unsigned Opc = Inst.getOpcode();
2973   const MCInstrDesc &Desc = MII.get(Opc);
2974 
2975   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2976     return true;
2977 
2978   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2979   if (DimIdx < 0)
2980     return true;
2981 
2982   long Imm = Inst.getOperand(DimIdx).getImm();
2983   if (Imm < 0 || Imm >= 8)
2984     return false;
2985 
2986   return true;
2987 }
2988 
2989 static bool IsRevOpcode(const unsigned Opcode)
2990 {
2991   switch (Opcode) {
2992   case AMDGPU::V_SUBREV_F32_e32:
2993   case AMDGPU::V_SUBREV_F32_e64:
2994   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2995   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2996   case AMDGPU::V_SUBREV_F32_e32_vi:
2997   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2998   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2999   case AMDGPU::V_SUBREV_F32_e64_vi:
3000 
3001   case AMDGPU::V_SUBREV_I32_e32:
3002   case AMDGPU::V_SUBREV_I32_e64:
3003   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3004   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3005 
3006   case AMDGPU::V_SUBBREV_U32_e32:
3007   case AMDGPU::V_SUBBREV_U32_e64:
3008   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3009   case AMDGPU::V_SUBBREV_U32_e32_vi:
3010   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3011   case AMDGPU::V_SUBBREV_U32_e64_vi:
3012 
3013   case AMDGPU::V_SUBREV_U32_e32:
3014   case AMDGPU::V_SUBREV_U32_e64:
3015   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3016   case AMDGPU::V_SUBREV_U32_e32_vi:
3017   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3018   case AMDGPU::V_SUBREV_U32_e64_vi:
3019 
3020   case AMDGPU::V_SUBREV_F16_e32:
3021   case AMDGPU::V_SUBREV_F16_e64:
3022   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3023   case AMDGPU::V_SUBREV_F16_e32_vi:
3024   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3025   case AMDGPU::V_SUBREV_F16_e64_vi:
3026 
3027   case AMDGPU::V_SUBREV_U16_e32:
3028   case AMDGPU::V_SUBREV_U16_e64:
3029   case AMDGPU::V_SUBREV_U16_e32_vi:
3030   case AMDGPU::V_SUBREV_U16_e64_vi:
3031 
3032   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3033   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3034   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3035 
3036   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3037   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3038 
3039   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3040   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3041 
3042   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3043   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3044 
3045   case AMDGPU::V_LSHRREV_B32_e32:
3046   case AMDGPU::V_LSHRREV_B32_e64:
3047   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3048   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3049   case AMDGPU::V_LSHRREV_B32_e32_vi:
3050   case AMDGPU::V_LSHRREV_B32_e64_vi:
3051   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3052   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3053 
3054   case AMDGPU::V_ASHRREV_I32_e32:
3055   case AMDGPU::V_ASHRREV_I32_e64:
3056   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3057   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3058   case AMDGPU::V_ASHRREV_I32_e32_vi:
3059   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3060   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3061   case AMDGPU::V_ASHRREV_I32_e64_vi:
3062 
3063   case AMDGPU::V_LSHLREV_B32_e32:
3064   case AMDGPU::V_LSHLREV_B32_e64:
3065   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3066   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3067   case AMDGPU::V_LSHLREV_B32_e32_vi:
3068   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3069   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3070   case AMDGPU::V_LSHLREV_B32_e64_vi:
3071 
3072   case AMDGPU::V_LSHLREV_B16_e32:
3073   case AMDGPU::V_LSHLREV_B16_e64:
3074   case AMDGPU::V_LSHLREV_B16_e32_vi:
3075   case AMDGPU::V_LSHLREV_B16_e64_vi:
3076   case AMDGPU::V_LSHLREV_B16_gfx10:
3077 
3078   case AMDGPU::V_LSHRREV_B16_e32:
3079   case AMDGPU::V_LSHRREV_B16_e64:
3080   case AMDGPU::V_LSHRREV_B16_e32_vi:
3081   case AMDGPU::V_LSHRREV_B16_e64_vi:
3082   case AMDGPU::V_LSHRREV_B16_gfx10:
3083 
3084   case AMDGPU::V_ASHRREV_I16_e32:
3085   case AMDGPU::V_ASHRREV_I16_e64:
3086   case AMDGPU::V_ASHRREV_I16_e32_vi:
3087   case AMDGPU::V_ASHRREV_I16_e64_vi:
3088   case AMDGPU::V_ASHRREV_I16_gfx10:
3089 
3090   case AMDGPU::V_LSHLREV_B64:
3091   case AMDGPU::V_LSHLREV_B64_gfx10:
3092   case AMDGPU::V_LSHLREV_B64_vi:
3093 
3094   case AMDGPU::V_LSHRREV_B64:
3095   case AMDGPU::V_LSHRREV_B64_gfx10:
3096   case AMDGPU::V_LSHRREV_B64_vi:
3097 
3098   case AMDGPU::V_ASHRREV_I64:
3099   case AMDGPU::V_ASHRREV_I64_gfx10:
3100   case AMDGPU::V_ASHRREV_I64_vi:
3101 
3102   case AMDGPU::V_PK_LSHLREV_B16:
3103   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3104   case AMDGPU::V_PK_LSHLREV_B16_vi:
3105 
3106   case AMDGPU::V_PK_LSHRREV_B16:
3107   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3108   case AMDGPU::V_PK_LSHRREV_B16_vi:
3109   case AMDGPU::V_PK_ASHRREV_I16:
3110   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3111   case AMDGPU::V_PK_ASHRREV_I16_vi:
3112     return true;
3113   default:
3114     return false;
3115   }
3116 }
3117 
3118 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3119 
3120   using namespace SIInstrFlags;
3121   const unsigned Opcode = Inst.getOpcode();
3122   const MCInstrDesc &Desc = MII.get(Opcode);
3123 
3124   // lds_direct register is defined so that it can be used
3125   // with 9-bit operands only. Ignore encodings which do not accept these.
3126   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3127     return true;
3128 
3129   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3130   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3131   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3132 
3133   const int SrcIndices[] = { Src1Idx, Src2Idx };
3134 
3135   // lds_direct cannot be specified as either src1 or src2.
3136   for (int SrcIdx : SrcIndices) {
3137     if (SrcIdx == -1) break;
3138     const MCOperand &Src = Inst.getOperand(SrcIdx);
3139     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3140       return false;
3141     }
3142   }
3143 
3144   if (Src0Idx == -1)
3145     return true;
3146 
3147   const MCOperand &Src = Inst.getOperand(Src0Idx);
3148   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3149     return true;
3150 
3151   // lds_direct is specified as src0. Check additional limitations.
3152   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3153 }
3154 
3155 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3156   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3157     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3158     if (Op.isFlatOffset())
3159       return Op.getStartLoc();
3160   }
3161   return getLoc();
3162 }
3163 
3164 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3165                                          const OperandVector &Operands) {
3166   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3167   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3168     return true;
3169 
3170   auto Opcode = Inst.getOpcode();
3171   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3172   assert(OpNum != -1);
3173 
3174   const auto &Op = Inst.getOperand(OpNum);
3175   if (!hasFlatOffsets() && Op.getImm() != 0) {
3176     Error(getFlatOffsetLoc(Operands),
3177           "flat offset modifier is not supported on this GPU");
3178     return false;
3179   }
3180 
3181   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3182   // For FLAT segment the offset must be positive;
3183   // MSB is ignored and forced to zero.
3184   unsigned OffsetSize = isGFX9() ? 13 : 12;
3185   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3186     if (!isIntN(OffsetSize, Op.getImm())) {
3187       Error(getFlatOffsetLoc(Operands),
3188             isGFX9() ? "expected a 13-bit signed offset" :
3189                        "expected a 12-bit signed offset");
3190       return false;
3191     }
3192   } else {
3193     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3194       Error(getFlatOffsetLoc(Operands),
3195             isGFX9() ? "expected a 12-bit unsigned offset" :
3196                        "expected an 11-bit unsigned offset");
3197       return false;
3198     }
3199   }
3200 
3201   return true;
3202 }
3203 
3204 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3205   unsigned Opcode = Inst.getOpcode();
3206   const MCInstrDesc &Desc = MII.get(Opcode);
3207   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3208     return true;
3209 
3210   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3211   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3212 
3213   const int OpIndices[] = { Src0Idx, Src1Idx };
3214 
3215   unsigned NumLiterals = 0;
3216   uint32_t LiteralValue;
3217 
3218   for (int OpIdx : OpIndices) {
3219     if (OpIdx == -1) break;
3220 
3221     const MCOperand &MO = Inst.getOperand(OpIdx);
3222     if (MO.isImm() &&
3223         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3224         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3225         !isInlineConstant(Inst, OpIdx)) {
3226       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3227       if (NumLiterals == 0 || LiteralValue != Value) {
3228         LiteralValue = Value;
3229         ++NumLiterals;
3230       }
3231     }
3232   }
3233 
3234   return NumLiterals <= 1;
3235 }
3236 
3237 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3238   const unsigned Opc = Inst.getOpcode();
3239   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3240       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3241     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3242     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3243 
3244     if (OpSel & ~3)
3245       return false;
3246   }
3247   return true;
3248 }
3249 
3250 // Check if VCC register matches wavefront size
3251 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3252   auto FB = getFeatureBits();
3253   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3254     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3255 }
3256 
3257 // VOP3 literal is only allowed in GFX10+ and only one can be used
3258 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3259   unsigned Opcode = Inst.getOpcode();
3260   const MCInstrDesc &Desc = MII.get(Opcode);
3261   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3262     return true;
3263 
3264   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3265   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3266   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3267 
3268   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3269 
3270   unsigned NumLiterals = 0;
3271   uint32_t LiteralValue;
3272 
3273   for (int OpIdx : OpIndices) {
3274     if (OpIdx == -1) break;
3275 
3276     const MCOperand &MO = Inst.getOperand(OpIdx);
3277     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3278       continue;
3279 
3280     if (!isInlineConstant(Inst, OpIdx)) {
3281       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3282       if (NumLiterals == 0 || LiteralValue != Value) {
3283         LiteralValue = Value;
3284         ++NumLiterals;
3285       }
3286     }
3287   }
3288 
3289   return !NumLiterals ||
3290          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3291 }
3292 
3293 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3294                                           const SMLoc &IDLoc,
3295                                           const OperandVector &Operands) {
3296   if (!validateLdsDirect(Inst)) {
3297     Error(IDLoc,
3298       "invalid use of lds_direct");
3299     return false;
3300   }
3301   if (!validateSOPLiteral(Inst)) {
3302     Error(IDLoc,
3303       "only one literal operand is allowed");
3304     return false;
3305   }
3306   if (!validateVOP3Literal(Inst)) {
3307     Error(IDLoc,
3308       "invalid literal operand");
3309     return false;
3310   }
3311   if (!validateConstantBusLimitations(Inst)) {
3312     Error(IDLoc,
3313       "invalid operand (violates constant bus restrictions)");
3314     return false;
3315   }
3316   if (!validateEarlyClobberLimitations(Inst)) {
3317     Error(IDLoc,
3318       "destination must be different than all sources");
3319     return false;
3320   }
3321   if (!validateIntClampSupported(Inst)) {
3322     Error(IDLoc,
3323       "integer clamping is not supported on this GPU");
3324     return false;
3325   }
3326   if (!validateOpSel(Inst)) {
3327     Error(IDLoc,
3328       "invalid op_sel operand");
3329     return false;
3330   }
3331   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3332   if (!validateMIMGD16(Inst)) {
3333     Error(IDLoc,
3334       "d16 modifier is not supported on this GPU");
3335     return false;
3336   }
3337   if (!validateMIMGDim(Inst)) {
3338     Error(IDLoc, "dim modifier is required on this GPU");
3339     return false;
3340   }
3341   if (!validateMIMGDataSize(Inst)) {
3342     Error(IDLoc,
3343       "image data size does not match dmask and tfe");
3344     return false;
3345   }
3346   if (!validateMIMGAddrSize(Inst)) {
3347     Error(IDLoc,
3348       "image address size does not match dim and a16");
3349     return false;
3350   }
3351   if (!validateMIMGAtomicDMask(Inst)) {
3352     Error(IDLoc,
3353       "invalid atomic image dmask");
3354     return false;
3355   }
3356   if (!validateMIMGGatherDMask(Inst)) {
3357     Error(IDLoc,
3358       "invalid image_gather dmask: only one bit must be set");
3359     return false;
3360   }
3361   if (!validateFlatOffset(Inst, Operands)) {
3362     return false;
3363   }
3364 
3365   return true;
3366 }
3367 
3368 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3369                                             const FeatureBitset &FBS,
3370                                             unsigned VariantID = 0);
3371 
3372 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3373                                               OperandVector &Operands,
3374                                               MCStreamer &Out,
3375                                               uint64_t &ErrorInfo,
3376                                               bool MatchingInlineAsm) {
3377   MCInst Inst;
3378   unsigned Result = Match_Success;
3379   for (auto Variant : getMatchedVariants()) {
3380     uint64_t EI;
3381     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3382                                   Variant);
3383     // We order match statuses from least to most specific. We use most specific
3384     // status as resulting
3385     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3386     if ((R == Match_Success) ||
3387         (R == Match_PreferE32) ||
3388         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3389         (R == Match_InvalidOperand && Result != Match_MissingFeature
3390                                    && Result != Match_PreferE32) ||
3391         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3392                                    && Result != Match_MissingFeature
3393                                    && Result != Match_PreferE32)) {
3394       Result = R;
3395       ErrorInfo = EI;
3396     }
3397     if (R == Match_Success)
3398       break;
3399   }
3400 
3401   switch (Result) {
3402   default: break;
3403   case Match_Success:
3404     if (!validateInstruction(Inst, IDLoc, Operands)) {
3405       return true;
3406     }
3407     Inst.setLoc(IDLoc);
3408     Out.EmitInstruction(Inst, getSTI());
3409     return false;
3410 
3411   case Match_MissingFeature:
3412     return Error(IDLoc, "instruction not supported on this GPU");
3413 
3414   case Match_MnemonicFail: {
3415     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3416     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3417         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3418     return Error(IDLoc, "invalid instruction" + Suggestion,
3419                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3420   }
3421 
3422   case Match_InvalidOperand: {
3423     SMLoc ErrorLoc = IDLoc;
3424     if (ErrorInfo != ~0ULL) {
3425       if (ErrorInfo >= Operands.size()) {
3426         return Error(IDLoc, "too few operands for instruction");
3427       }
3428       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3429       if (ErrorLoc == SMLoc())
3430         ErrorLoc = IDLoc;
3431     }
3432     return Error(ErrorLoc, "invalid operand for instruction");
3433   }
3434 
3435   case Match_PreferE32:
3436     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3437                         "should be encoded as e32");
3438   }
3439   llvm_unreachable("Implement any new match types added!");
3440 }
3441 
3442 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3443   int64_t Tmp = -1;
3444   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3445     return true;
3446   }
3447   if (getParser().parseAbsoluteExpression(Tmp)) {
3448     return true;
3449   }
3450   Ret = static_cast<uint32_t>(Tmp);
3451   return false;
3452 }
3453 
3454 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3455                                                uint32_t &Minor) {
3456   if (ParseAsAbsoluteExpression(Major))
3457     return TokError("invalid major version");
3458 
3459   if (getLexer().isNot(AsmToken::Comma))
3460     return TokError("minor version number required, comma expected");
3461   Lex();
3462 
3463   if (ParseAsAbsoluteExpression(Minor))
3464     return TokError("invalid minor version");
3465 
3466   return false;
3467 }
3468 
3469 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3470   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3471     return TokError("directive only supported for amdgcn architecture");
3472 
3473   std::string Target;
3474 
3475   SMLoc TargetStart = getTok().getLoc();
3476   if (getParser().parseEscapedString(Target))
3477     return true;
3478   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3479 
3480   std::string ExpectedTarget;
3481   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3482   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3483 
3484   if (Target != ExpectedTargetOS.str())
3485     return getParser().Error(TargetRange.Start, "target must match options",
3486                              TargetRange);
3487 
3488   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3489   return false;
3490 }
3491 
3492 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3493   return getParser().Error(Range.Start, "value out of range", Range);
3494 }
3495 
3496 bool AMDGPUAsmParser::calculateGPRBlocks(
3497     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3498     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3499     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3500     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3501   // TODO(scott.linder): These calculations are duplicated from
3502   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3503   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3504 
3505   unsigned NumVGPRs = NextFreeVGPR;
3506   unsigned NumSGPRs = NextFreeSGPR;
3507 
3508   if (Version.Major >= 10)
3509     NumSGPRs = 0;
3510   else {
3511     unsigned MaxAddressableNumSGPRs =
3512         IsaInfo::getAddressableNumSGPRs(&getSTI());
3513 
3514     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3515         NumSGPRs > MaxAddressableNumSGPRs)
3516       return OutOfRangeError(SGPRRange);
3517 
3518     NumSGPRs +=
3519         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3520 
3521     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3522         NumSGPRs > MaxAddressableNumSGPRs)
3523       return OutOfRangeError(SGPRRange);
3524 
3525     if (Features.test(FeatureSGPRInitBug))
3526       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3527   }
3528 
3529   VGPRBlocks =
3530       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3531   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3532 
3533   return false;
3534 }
3535 
3536 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3537   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3538     return TokError("directive only supported for amdgcn architecture");
3539 
3540   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3541     return TokError("directive only supported for amdhsa OS");
3542 
3543   StringRef KernelName;
3544   if (getParser().parseIdentifier(KernelName))
3545     return true;
3546 
3547   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3548 
3549   StringSet<> Seen;
3550 
3551   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3552 
3553   SMRange VGPRRange;
3554   uint64_t NextFreeVGPR = 0;
3555   SMRange SGPRRange;
3556   uint64_t NextFreeSGPR = 0;
3557   unsigned UserSGPRCount = 0;
3558   bool ReserveVCC = true;
3559   bool ReserveFlatScr = true;
3560   bool ReserveXNACK = hasXNACK();
3561   Optional<bool> EnableWavefrontSize32;
3562 
3563   while (true) {
3564     while (getLexer().is(AsmToken::EndOfStatement))
3565       Lex();
3566 
3567     if (getLexer().isNot(AsmToken::Identifier))
3568       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3569 
3570     StringRef ID = getTok().getIdentifier();
3571     SMRange IDRange = getTok().getLocRange();
3572     Lex();
3573 
3574     if (ID == ".end_amdhsa_kernel")
3575       break;
3576 
3577     if (Seen.find(ID) != Seen.end())
3578       return TokError(".amdhsa_ directives cannot be repeated");
3579     Seen.insert(ID);
3580 
3581     SMLoc ValStart = getTok().getLoc();
3582     int64_t IVal;
3583     if (getParser().parseAbsoluteExpression(IVal))
3584       return true;
3585     SMLoc ValEnd = getTok().getLoc();
3586     SMRange ValRange = SMRange(ValStart, ValEnd);
3587 
3588     if (IVal < 0)
3589       return OutOfRangeError(ValRange);
3590 
3591     uint64_t Val = IVal;
3592 
3593 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3594   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3595     return OutOfRangeError(RANGE);                                             \
3596   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3597 
3598     if (ID == ".amdhsa_group_segment_fixed_size") {
3599       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3600         return OutOfRangeError(ValRange);
3601       KD.group_segment_fixed_size = Val;
3602     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3603       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3604         return OutOfRangeError(ValRange);
3605       KD.private_segment_fixed_size = Val;
3606     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3607       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3608                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3609                        Val, ValRange);
3610       UserSGPRCount += 4;
3611     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3612       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3613                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3614                        ValRange);
3615       UserSGPRCount += 2;
3616     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3617       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3618                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3619                        ValRange);
3620       UserSGPRCount += 2;
3621     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3622       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3623                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3624                        Val, ValRange);
3625       UserSGPRCount += 2;
3626     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3627       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3628                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3629                        ValRange);
3630       UserSGPRCount += 2;
3631     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3632       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3633                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3634                        ValRange);
3635       UserSGPRCount += 2;
3636     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3637       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3638                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3639                        Val, ValRange);
3640       UserSGPRCount += 1;
3641     } else if (ID == ".amdhsa_wavefront_size32") {
3642       if (IVersion.Major < 10)
3643         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3644                                  IDRange);
3645       EnableWavefrontSize32 = Val;
3646       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3647                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3648                        Val, ValRange);
3649     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3650       PARSE_BITS_ENTRY(
3651           KD.compute_pgm_rsrc2,
3652           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3653           ValRange);
3654     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3655       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3656                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3657                        ValRange);
3658     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3659       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3660                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3661                        ValRange);
3662     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3663       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3664                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3665                        ValRange);
3666     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3667       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3668                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3669                        ValRange);
3670     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3671       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3672                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3673                        ValRange);
3674     } else if (ID == ".amdhsa_next_free_vgpr") {
3675       VGPRRange = ValRange;
3676       NextFreeVGPR = Val;
3677     } else if (ID == ".amdhsa_next_free_sgpr") {
3678       SGPRRange = ValRange;
3679       NextFreeSGPR = Val;
3680     } else if (ID == ".amdhsa_reserve_vcc") {
3681       if (!isUInt<1>(Val))
3682         return OutOfRangeError(ValRange);
3683       ReserveVCC = Val;
3684     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3685       if (IVersion.Major < 7)
3686         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3687                                  IDRange);
3688       if (!isUInt<1>(Val))
3689         return OutOfRangeError(ValRange);
3690       ReserveFlatScr = Val;
3691     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3692       if (IVersion.Major < 8)
3693         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3694                                  IDRange);
3695       if (!isUInt<1>(Val))
3696         return OutOfRangeError(ValRange);
3697       ReserveXNACK = Val;
3698     } else if (ID == ".amdhsa_float_round_mode_32") {
3699       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3700                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3701     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3702       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3703                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3704     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3705       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3706                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3707     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3708       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3709                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3710                        ValRange);
3711     } else if (ID == ".amdhsa_dx10_clamp") {
3712       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3713                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3714     } else if (ID == ".amdhsa_ieee_mode") {
3715       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3716                        Val, ValRange);
3717     } else if (ID == ".amdhsa_fp16_overflow") {
3718       if (IVersion.Major < 9)
3719         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3720                                  IDRange);
3721       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3722                        ValRange);
3723     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3724       if (IVersion.Major < 10)
3725         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3726                                  IDRange);
3727       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3728                        ValRange);
3729     } else if (ID == ".amdhsa_memory_ordered") {
3730       if (IVersion.Major < 10)
3731         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3732                                  IDRange);
3733       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3734                        ValRange);
3735     } else if (ID == ".amdhsa_forward_progress") {
3736       if (IVersion.Major < 10)
3737         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3738                                  IDRange);
3739       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3740                        ValRange);
3741     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3742       PARSE_BITS_ENTRY(
3743           KD.compute_pgm_rsrc2,
3744           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3745           ValRange);
3746     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3747       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3748                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3749                        Val, ValRange);
3750     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3751       PARSE_BITS_ENTRY(
3752           KD.compute_pgm_rsrc2,
3753           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3754           ValRange);
3755     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3756       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3757                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3758                        Val, ValRange);
3759     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3760       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3761                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3762                        Val, ValRange);
3763     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3764       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3765                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3766                        Val, ValRange);
3767     } else if (ID == ".amdhsa_exception_int_div_zero") {
3768       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3769                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3770                        Val, ValRange);
3771     } else {
3772       return getParser().Error(IDRange.Start,
3773                                "unknown .amdhsa_kernel directive", IDRange);
3774     }
3775 
3776 #undef PARSE_BITS_ENTRY
3777   }
3778 
3779   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3780     return TokError(".amdhsa_next_free_vgpr directive is required");
3781 
3782   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3783     return TokError(".amdhsa_next_free_sgpr directive is required");
3784 
3785   unsigned VGPRBlocks;
3786   unsigned SGPRBlocks;
3787   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3788                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3789                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3790                          SGPRBlocks))
3791     return true;
3792 
3793   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3794           VGPRBlocks))
3795     return OutOfRangeError(VGPRRange);
3796   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3797                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3798 
3799   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3800           SGPRBlocks))
3801     return OutOfRangeError(SGPRRange);
3802   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3803                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3804                   SGPRBlocks);
3805 
3806   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3807     return TokError("too many user SGPRs enabled");
3808   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3809                   UserSGPRCount);
3810 
3811   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3812       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3813       ReserveFlatScr, ReserveXNACK);
3814   return false;
3815 }
3816 
3817 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3818   uint32_t Major;
3819   uint32_t Minor;
3820 
3821   if (ParseDirectiveMajorMinor(Major, Minor))
3822     return true;
3823 
3824   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3825   return false;
3826 }
3827 
3828 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3829   uint32_t Major;
3830   uint32_t Minor;
3831   uint32_t Stepping;
3832   StringRef VendorName;
3833   StringRef ArchName;
3834 
3835   // If this directive has no arguments, then use the ISA version for the
3836   // targeted GPU.
3837   if (getLexer().is(AsmToken::EndOfStatement)) {
3838     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3839     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3840                                                       ISA.Stepping,
3841                                                       "AMD", "AMDGPU");
3842     return false;
3843   }
3844 
3845   if (ParseDirectiveMajorMinor(Major, Minor))
3846     return true;
3847 
3848   if (getLexer().isNot(AsmToken::Comma))
3849     return TokError("stepping version number required, comma expected");
3850   Lex();
3851 
3852   if (ParseAsAbsoluteExpression(Stepping))
3853     return TokError("invalid stepping version");
3854 
3855   if (getLexer().isNot(AsmToken::Comma))
3856     return TokError("vendor name required, comma expected");
3857   Lex();
3858 
3859   if (getLexer().isNot(AsmToken::String))
3860     return TokError("invalid vendor name");
3861 
3862   VendorName = getLexer().getTok().getStringContents();
3863   Lex();
3864 
3865   if (getLexer().isNot(AsmToken::Comma))
3866     return TokError("arch name required, comma expected");
3867   Lex();
3868 
3869   if (getLexer().isNot(AsmToken::String))
3870     return TokError("invalid arch name");
3871 
3872   ArchName = getLexer().getTok().getStringContents();
3873   Lex();
3874 
3875   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3876                                                     VendorName, ArchName);
3877   return false;
3878 }
3879 
3880 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3881                                                amd_kernel_code_t &Header) {
3882   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3883   // assembly for backwards compatibility.
3884   if (ID == "max_scratch_backing_memory_byte_size") {
3885     Parser.eatToEndOfStatement();
3886     return false;
3887   }
3888 
3889   SmallString<40> ErrStr;
3890   raw_svector_ostream Err(ErrStr);
3891   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3892     return TokError(Err.str());
3893   }
3894   Lex();
3895 
3896   if (ID == "enable_wavefront_size32") {
3897     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3898       if (!isGFX10())
3899         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3900       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3901         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3902     } else {
3903       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3904         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3905     }
3906   }
3907 
3908   if (ID == "wavefront_size") {
3909     if (Header.wavefront_size == 5) {
3910       if (!isGFX10())
3911         return TokError("wavefront_size=5 is only allowed on GFX10+");
3912       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3913         return TokError("wavefront_size=5 requires +WavefrontSize32");
3914     } else if (Header.wavefront_size == 6) {
3915       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3916         return TokError("wavefront_size=6 requires +WavefrontSize64");
3917     }
3918   }
3919 
3920   if (ID == "enable_wgp_mode") {
3921     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3922       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3923   }
3924 
3925   if (ID == "enable_mem_ordered") {
3926     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3927       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3928   }
3929 
3930   if (ID == "enable_fwd_progress") {
3931     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3932       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3933   }
3934 
3935   return false;
3936 }
3937 
3938 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3939   amd_kernel_code_t Header;
3940   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3941 
3942   while (true) {
3943     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3944     // will set the current token to EndOfStatement.
3945     while(getLexer().is(AsmToken::EndOfStatement))
3946       Lex();
3947 
3948     if (getLexer().isNot(AsmToken::Identifier))
3949       return TokError("expected value identifier or .end_amd_kernel_code_t");
3950 
3951     StringRef ID = getLexer().getTok().getIdentifier();
3952     Lex();
3953 
3954     if (ID == ".end_amd_kernel_code_t")
3955       break;
3956 
3957     if (ParseAMDKernelCodeTValue(ID, Header))
3958       return true;
3959   }
3960 
3961   getTargetStreamer().EmitAMDKernelCodeT(Header);
3962 
3963   return false;
3964 }
3965 
3966 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3967   if (getLexer().isNot(AsmToken::Identifier))
3968     return TokError("expected symbol name");
3969 
3970   StringRef KernelName = Parser.getTok().getString();
3971 
3972   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3973                                            ELF::STT_AMDGPU_HSA_KERNEL);
3974   Lex();
3975   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3976     KernelScope.initialize(getContext());
3977   return false;
3978 }
3979 
3980 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3981   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3982     return Error(getParser().getTok().getLoc(),
3983                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3984                  "architectures");
3985   }
3986 
3987   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3988 
3989   std::string ISAVersionStringFromSTI;
3990   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3991   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3992 
3993   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3994     return Error(getParser().getTok().getLoc(),
3995                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3996                  "arguments specified through the command line");
3997   }
3998 
3999   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4000   Lex();
4001 
4002   return false;
4003 }
4004 
4005 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4006   const char *AssemblerDirectiveBegin;
4007   const char *AssemblerDirectiveEnd;
4008   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4009       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4010           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4011                             HSAMD::V3::AssemblerDirectiveEnd)
4012           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4013                             HSAMD::AssemblerDirectiveEnd);
4014 
4015   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4016     return Error(getParser().getTok().getLoc(),
4017                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4018                  "not available on non-amdhsa OSes")).str());
4019   }
4020 
4021   std::string HSAMetadataString;
4022   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4023                           HSAMetadataString))
4024     return true;
4025 
4026   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4027     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4028       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4029   } else {
4030     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4031       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4032   }
4033 
4034   return false;
4035 }
4036 
4037 /// Common code to parse out a block of text (typically YAML) between start and
4038 /// end directives.
4039 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4040                                           const char *AssemblerDirectiveEnd,
4041                                           std::string &CollectString) {
4042 
4043   raw_string_ostream CollectStream(CollectString);
4044 
4045   getLexer().setSkipSpace(false);
4046 
4047   bool FoundEnd = false;
4048   while (!getLexer().is(AsmToken::Eof)) {
4049     while (getLexer().is(AsmToken::Space)) {
4050       CollectStream << getLexer().getTok().getString();
4051       Lex();
4052     }
4053 
4054     if (getLexer().is(AsmToken::Identifier)) {
4055       StringRef ID = getLexer().getTok().getIdentifier();
4056       if (ID == AssemblerDirectiveEnd) {
4057         Lex();
4058         FoundEnd = true;
4059         break;
4060       }
4061     }
4062 
4063     CollectStream << Parser.parseStringToEndOfStatement()
4064                   << getContext().getAsmInfo()->getSeparatorString();
4065 
4066     Parser.eatToEndOfStatement();
4067   }
4068 
4069   getLexer().setSkipSpace(true);
4070 
4071   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4072     return TokError(Twine("expected directive ") +
4073                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4074   }
4075 
4076   CollectStream.flush();
4077   return false;
4078 }
4079 
4080 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4081 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4082   std::string String;
4083   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4084                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4085     return true;
4086 
4087   auto PALMetadata = getTargetStreamer().getPALMetadata();
4088   if (!PALMetadata->setFromString(String))
4089     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4090   return false;
4091 }
4092 
4093 /// Parse the assembler directive for old linear-format PAL metadata.
4094 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4095   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4096     return Error(getParser().getTok().getLoc(),
4097                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4098                  "not available on non-amdpal OSes")).str());
4099   }
4100 
4101   auto PALMetadata = getTargetStreamer().getPALMetadata();
4102   PALMetadata->setLegacy();
4103   for (;;) {
4104     uint32_t Key, Value;
4105     if (ParseAsAbsoluteExpression(Key)) {
4106       return TokError(Twine("invalid value in ") +
4107                       Twine(PALMD::AssemblerDirective));
4108     }
4109     if (getLexer().isNot(AsmToken::Comma)) {
4110       return TokError(Twine("expected an even number of values in ") +
4111                       Twine(PALMD::AssemblerDirective));
4112     }
4113     Lex();
4114     if (ParseAsAbsoluteExpression(Value)) {
4115       return TokError(Twine("invalid value in ") +
4116                       Twine(PALMD::AssemblerDirective));
4117     }
4118     PALMetadata->setRegister(Key, Value);
4119     if (getLexer().isNot(AsmToken::Comma))
4120       break;
4121     Lex();
4122   }
4123   return false;
4124 }
4125 
4126 /// ParseDirectiveAMDGPULDS
4127 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4128 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4129   if (getParser().checkForValidSection())
4130     return true;
4131 
4132   StringRef Name;
4133   SMLoc NameLoc = getLexer().getLoc();
4134   if (getParser().parseIdentifier(Name))
4135     return TokError("expected identifier in directive");
4136 
4137   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4138   if (parseToken(AsmToken::Comma, "expected ','"))
4139     return true;
4140 
4141   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4142 
4143   int64_t Size;
4144   SMLoc SizeLoc = getLexer().getLoc();
4145   if (getParser().parseAbsoluteExpression(Size))
4146     return true;
4147   if (Size < 0)
4148     return Error(SizeLoc, "size must be non-negative");
4149   if (Size > LocalMemorySize)
4150     return Error(SizeLoc, "size is too large");
4151 
4152   int64_t Align = 4;
4153   if (getLexer().is(AsmToken::Comma)) {
4154     Lex();
4155     SMLoc AlignLoc = getLexer().getLoc();
4156     if (getParser().parseAbsoluteExpression(Align))
4157       return true;
4158     if (Align < 0 || !isPowerOf2_64(Align))
4159       return Error(AlignLoc, "alignment must be a power of two");
4160 
4161     // Alignment larger than the size of LDS is possible in theory, as long
4162     // as the linker manages to place to symbol at address 0, but we do want
4163     // to make sure the alignment fits nicely into a 32-bit integer.
4164     if (Align >= 1u << 31)
4165       return Error(AlignLoc, "alignment is too large");
4166   }
4167 
4168   if (parseToken(AsmToken::EndOfStatement,
4169                  "unexpected token in '.amdgpu_lds' directive"))
4170     return true;
4171 
4172   Symbol->redefineIfPossible();
4173   if (!Symbol->isUndefined())
4174     return Error(NameLoc, "invalid symbol redefinition");
4175 
4176   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4177   return false;
4178 }
4179 
4180 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4181   StringRef IDVal = DirectiveID.getString();
4182 
4183   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4184     if (IDVal == ".amdgcn_target")
4185       return ParseDirectiveAMDGCNTarget();
4186 
4187     if (IDVal == ".amdhsa_kernel")
4188       return ParseDirectiveAMDHSAKernel();
4189 
4190     // TODO: Restructure/combine with PAL metadata directive.
4191     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4192       return ParseDirectiveHSAMetadata();
4193   } else {
4194     if (IDVal == ".hsa_code_object_version")
4195       return ParseDirectiveHSACodeObjectVersion();
4196 
4197     if (IDVal == ".hsa_code_object_isa")
4198       return ParseDirectiveHSACodeObjectISA();
4199 
4200     if (IDVal == ".amd_kernel_code_t")
4201       return ParseDirectiveAMDKernelCodeT();
4202 
4203     if (IDVal == ".amdgpu_hsa_kernel")
4204       return ParseDirectiveAMDGPUHsaKernel();
4205 
4206     if (IDVal == ".amd_amdgpu_isa")
4207       return ParseDirectiveISAVersion();
4208 
4209     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4210       return ParseDirectiveHSAMetadata();
4211   }
4212 
4213   if (IDVal == ".amdgpu_lds")
4214     return ParseDirectiveAMDGPULDS();
4215 
4216   if (IDVal == PALMD::AssemblerDirectiveBegin)
4217     return ParseDirectivePALMetadataBegin();
4218 
4219   if (IDVal == PALMD::AssemblerDirective)
4220     return ParseDirectivePALMetadata();
4221 
4222   return true;
4223 }
4224 
4225 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4226                                            unsigned RegNo) const {
4227 
4228   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4229        R.isValid(); ++R) {
4230     if (*R == RegNo)
4231       return isGFX9() || isGFX10();
4232   }
4233 
4234   // GFX10 has 2 more SGPRs 104 and 105.
4235   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4236        R.isValid(); ++R) {
4237     if (*R == RegNo)
4238       return hasSGPR104_SGPR105();
4239   }
4240 
4241   switch (RegNo) {
4242   case AMDGPU::SRC_SHARED_BASE:
4243   case AMDGPU::SRC_SHARED_LIMIT:
4244   case AMDGPU::SRC_PRIVATE_BASE:
4245   case AMDGPU::SRC_PRIVATE_LIMIT:
4246   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4247     return !isCI() && !isSI() && !isVI();
4248   case AMDGPU::TBA:
4249   case AMDGPU::TBA_LO:
4250   case AMDGPU::TBA_HI:
4251   case AMDGPU::TMA:
4252   case AMDGPU::TMA_LO:
4253   case AMDGPU::TMA_HI:
4254     return !isGFX9() && !isGFX10();
4255   case AMDGPU::XNACK_MASK:
4256   case AMDGPU::XNACK_MASK_LO:
4257   case AMDGPU::XNACK_MASK_HI:
4258     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4259   case AMDGPU::SGPR_NULL:
4260     return isGFX10();
4261   default:
4262     break;
4263   }
4264 
4265   if (isCI())
4266     return true;
4267 
4268   if (isSI() || isGFX10()) {
4269     // No flat_scr on SI.
4270     // On GFX10 flat scratch is not a valid register operand and can only be
4271     // accessed with s_setreg/s_getreg.
4272     switch (RegNo) {
4273     case AMDGPU::FLAT_SCR:
4274     case AMDGPU::FLAT_SCR_LO:
4275     case AMDGPU::FLAT_SCR_HI:
4276       return false;
4277     default:
4278       return true;
4279     }
4280   }
4281 
4282   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4283   // SI/CI have.
4284   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4285        R.isValid(); ++R) {
4286     if (*R == RegNo)
4287       return hasSGPR102_SGPR103();
4288   }
4289 
4290   return true;
4291 }
4292 
4293 OperandMatchResultTy
4294 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4295                               OperandMode Mode) {
4296   // Try to parse with a custom parser
4297   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4298 
4299   // If we successfully parsed the operand or if there as an error parsing,
4300   // we are done.
4301   //
4302   // If we are parsing after we reach EndOfStatement then this means we
4303   // are appending default values to the Operands list.  This is only done
4304   // by custom parser, so we shouldn't continue on to the generic parsing.
4305   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4306       getLexer().is(AsmToken::EndOfStatement))
4307     return ResTy;
4308 
4309   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4310     unsigned Prefix = Operands.size();
4311     SMLoc LBraceLoc = getTok().getLoc();
4312     Parser.Lex(); // eat the '['
4313 
4314     for (;;) {
4315       ResTy = parseReg(Operands);
4316       if (ResTy != MatchOperand_Success)
4317         return ResTy;
4318 
4319       if (getLexer().is(AsmToken::RBrac))
4320         break;
4321 
4322       if (getLexer().isNot(AsmToken::Comma))
4323         return MatchOperand_ParseFail;
4324       Parser.Lex();
4325     }
4326 
4327     if (Operands.size() - Prefix > 1) {
4328       Operands.insert(Operands.begin() + Prefix,
4329                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4330       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4331                                                     getTok().getLoc()));
4332     }
4333 
4334     Parser.Lex(); // eat the ']'
4335     return MatchOperand_Success;
4336   }
4337 
4338   return parseRegOrImm(Operands);
4339 }
4340 
4341 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4342   // Clear any forced encodings from the previous instruction.
4343   setForcedEncodingSize(0);
4344   setForcedDPP(false);
4345   setForcedSDWA(false);
4346 
4347   if (Name.endswith("_e64")) {
4348     setForcedEncodingSize(64);
4349     return Name.substr(0, Name.size() - 4);
4350   } else if (Name.endswith("_e32")) {
4351     setForcedEncodingSize(32);
4352     return Name.substr(0, Name.size() - 4);
4353   } else if (Name.endswith("_dpp")) {
4354     setForcedDPP(true);
4355     return Name.substr(0, Name.size() - 4);
4356   } else if (Name.endswith("_sdwa")) {
4357     setForcedSDWA(true);
4358     return Name.substr(0, Name.size() - 5);
4359   }
4360   return Name;
4361 }
4362 
4363 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4364                                        StringRef Name,
4365                                        SMLoc NameLoc, OperandVector &Operands) {
4366   // Add the instruction mnemonic
4367   Name = parseMnemonicSuffix(Name);
4368   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4369 
4370   bool IsMIMG = Name.startswith("image_");
4371 
4372   while (!getLexer().is(AsmToken::EndOfStatement)) {
4373     OperandMode Mode = OperandMode_Default;
4374     if (IsMIMG && isGFX10() && Operands.size() == 2)
4375       Mode = OperandMode_NSA;
4376     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4377 
4378     // Eat the comma or space if there is one.
4379     if (getLexer().is(AsmToken::Comma))
4380       Parser.Lex();
4381 
4382     switch (Res) {
4383       case MatchOperand_Success: break;
4384       case MatchOperand_ParseFail:
4385         // FIXME: use real operand location rather than the current location.
4386         Error(getLexer().getLoc(), "failed parsing operand.");
4387         while (!getLexer().is(AsmToken::EndOfStatement)) {
4388           Parser.Lex();
4389         }
4390         return true;
4391       case MatchOperand_NoMatch:
4392         // FIXME: use real operand location rather than the current location.
4393         Error(getLexer().getLoc(), "not a valid operand.");
4394         while (!getLexer().is(AsmToken::EndOfStatement)) {
4395           Parser.Lex();
4396         }
4397         return true;
4398     }
4399   }
4400 
4401   return false;
4402 }
4403 
4404 //===----------------------------------------------------------------------===//
4405 // Utility functions
4406 //===----------------------------------------------------------------------===//
4407 
4408 OperandMatchResultTy
4409 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4410 
4411   if (!trySkipId(Prefix, AsmToken::Colon))
4412     return MatchOperand_NoMatch;
4413 
4414   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4415 }
4416 
4417 OperandMatchResultTy
4418 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4419                                     AMDGPUOperand::ImmTy ImmTy,
4420                                     bool (*ConvertResult)(int64_t&)) {
4421   SMLoc S = getLoc();
4422   int64_t Value = 0;
4423 
4424   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4425   if (Res != MatchOperand_Success)
4426     return Res;
4427 
4428   if (ConvertResult && !ConvertResult(Value)) {
4429     Error(S, "invalid " + StringRef(Prefix) + " value.");
4430   }
4431 
4432   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4433   return MatchOperand_Success;
4434 }
4435 
4436 OperandMatchResultTy
4437 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4438                                              OperandVector &Operands,
4439                                              AMDGPUOperand::ImmTy ImmTy,
4440                                              bool (*ConvertResult)(int64_t&)) {
4441   SMLoc S = getLoc();
4442   if (!trySkipId(Prefix, AsmToken::Colon))
4443     return MatchOperand_NoMatch;
4444 
4445   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4446     return MatchOperand_ParseFail;
4447 
4448   unsigned Val = 0;
4449   const unsigned MaxSize = 4;
4450 
4451   // FIXME: How to verify the number of elements matches the number of src
4452   // operands?
4453   for (int I = 0; ; ++I) {
4454     int64_t Op;
4455     SMLoc Loc = getLoc();
4456     if (!parseExpr(Op))
4457       return MatchOperand_ParseFail;
4458 
4459     if (Op != 0 && Op != 1) {
4460       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4461       return MatchOperand_ParseFail;
4462     }
4463 
4464     Val |= (Op << I);
4465 
4466     if (trySkipToken(AsmToken::RBrac))
4467       break;
4468 
4469     if (I + 1 == MaxSize) {
4470       Error(getLoc(), "expected a closing square bracket");
4471       return MatchOperand_ParseFail;
4472     }
4473 
4474     if (!skipToken(AsmToken::Comma, "expected a comma"))
4475       return MatchOperand_ParseFail;
4476   }
4477 
4478   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4479   return MatchOperand_Success;
4480 }
4481 
4482 OperandMatchResultTy
4483 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4484                                AMDGPUOperand::ImmTy ImmTy) {
4485   int64_t Bit = 0;
4486   SMLoc S = Parser.getTok().getLoc();
4487 
4488   // We are at the end of the statement, and this is a default argument, so
4489   // use a default value.
4490   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4491     switch(getLexer().getKind()) {
4492       case AsmToken::Identifier: {
4493         StringRef Tok = Parser.getTok().getString();
4494         if (Tok == Name) {
4495           if (Tok == "r128" && isGFX9())
4496             Error(S, "r128 modifier is not supported on this GPU");
4497           if (Tok == "a16" && !isGFX9() && !isGFX10())
4498             Error(S, "a16 modifier is not supported on this GPU");
4499           Bit = 1;
4500           Parser.Lex();
4501         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4502           Bit = 0;
4503           Parser.Lex();
4504         } else {
4505           return MatchOperand_NoMatch;
4506         }
4507         break;
4508       }
4509       default:
4510         return MatchOperand_NoMatch;
4511     }
4512   }
4513 
4514   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4515     return MatchOperand_ParseFail;
4516 
4517   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4518   return MatchOperand_Success;
4519 }
4520 
4521 static void addOptionalImmOperand(
4522   MCInst& Inst, const OperandVector& Operands,
4523   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4524   AMDGPUOperand::ImmTy ImmT,
4525   int64_t Default = 0) {
4526   auto i = OptionalIdx.find(ImmT);
4527   if (i != OptionalIdx.end()) {
4528     unsigned Idx = i->second;
4529     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4530   } else {
4531     Inst.addOperand(MCOperand::createImm(Default));
4532   }
4533 }
4534 
4535 OperandMatchResultTy
4536 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4537   if (getLexer().isNot(AsmToken::Identifier)) {
4538     return MatchOperand_NoMatch;
4539   }
4540   StringRef Tok = Parser.getTok().getString();
4541   if (Tok != Prefix) {
4542     return MatchOperand_NoMatch;
4543   }
4544 
4545   Parser.Lex();
4546   if (getLexer().isNot(AsmToken::Colon)) {
4547     return MatchOperand_ParseFail;
4548   }
4549 
4550   Parser.Lex();
4551   if (getLexer().isNot(AsmToken::Identifier)) {
4552     return MatchOperand_ParseFail;
4553   }
4554 
4555   Value = Parser.getTok().getString();
4556   return MatchOperand_Success;
4557 }
4558 
4559 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4560 // values to live in a joint format operand in the MCInst encoding.
4561 OperandMatchResultTy
4562 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4563   SMLoc S = Parser.getTok().getLoc();
4564   int64_t Dfmt = 0, Nfmt = 0;
4565   // dfmt and nfmt can appear in either order, and each is optional.
4566   bool GotDfmt = false, GotNfmt = false;
4567   while (!GotDfmt || !GotNfmt) {
4568     if (!GotDfmt) {
4569       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4570       if (Res != MatchOperand_NoMatch) {
4571         if (Res != MatchOperand_Success)
4572           return Res;
4573         if (Dfmt >= 16) {
4574           Error(Parser.getTok().getLoc(), "out of range dfmt");
4575           return MatchOperand_ParseFail;
4576         }
4577         GotDfmt = true;
4578         Parser.Lex();
4579         continue;
4580       }
4581     }
4582     if (!GotNfmt) {
4583       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4584       if (Res != MatchOperand_NoMatch) {
4585         if (Res != MatchOperand_Success)
4586           return Res;
4587         if (Nfmt >= 8) {
4588           Error(Parser.getTok().getLoc(), "out of range nfmt");
4589           return MatchOperand_ParseFail;
4590         }
4591         GotNfmt = true;
4592         Parser.Lex();
4593         continue;
4594       }
4595     }
4596     break;
4597   }
4598   if (!GotDfmt && !GotNfmt)
4599     return MatchOperand_NoMatch;
4600   auto Format = Dfmt | Nfmt << 4;
4601   Operands.push_back(
4602       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4603   return MatchOperand_Success;
4604 }
4605 
4606 //===----------------------------------------------------------------------===//
4607 // ds
4608 //===----------------------------------------------------------------------===//
4609 
4610 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4611                                     const OperandVector &Operands) {
4612   OptionalImmIndexMap OptionalIdx;
4613 
4614   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4615     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4616 
4617     // Add the register arguments
4618     if (Op.isReg()) {
4619       Op.addRegOperands(Inst, 1);
4620       continue;
4621     }
4622 
4623     // Handle optional arguments
4624     OptionalIdx[Op.getImmTy()] = i;
4625   }
4626 
4627   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4628   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4629   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4630 
4631   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4632 }
4633 
4634 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4635                                 bool IsGdsHardcoded) {
4636   OptionalImmIndexMap OptionalIdx;
4637 
4638   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4639     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4640 
4641     // Add the register arguments
4642     if (Op.isReg()) {
4643       Op.addRegOperands(Inst, 1);
4644       continue;
4645     }
4646 
4647     if (Op.isToken() && Op.getToken() == "gds") {
4648       IsGdsHardcoded = true;
4649       continue;
4650     }
4651 
4652     // Handle optional arguments
4653     OptionalIdx[Op.getImmTy()] = i;
4654   }
4655 
4656   AMDGPUOperand::ImmTy OffsetType =
4657     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4658      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4659      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4660                                                       AMDGPUOperand::ImmTyOffset;
4661 
4662   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4663 
4664   if (!IsGdsHardcoded) {
4665     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4666   }
4667   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4668 }
4669 
4670 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4671   OptionalImmIndexMap OptionalIdx;
4672 
4673   unsigned OperandIdx[4];
4674   unsigned EnMask = 0;
4675   int SrcIdx = 0;
4676 
4677   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4678     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4679 
4680     // Add the register arguments
4681     if (Op.isReg()) {
4682       assert(SrcIdx < 4);
4683       OperandIdx[SrcIdx] = Inst.size();
4684       Op.addRegOperands(Inst, 1);
4685       ++SrcIdx;
4686       continue;
4687     }
4688 
4689     if (Op.isOff()) {
4690       assert(SrcIdx < 4);
4691       OperandIdx[SrcIdx] = Inst.size();
4692       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4693       ++SrcIdx;
4694       continue;
4695     }
4696 
4697     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4698       Op.addImmOperands(Inst, 1);
4699       continue;
4700     }
4701 
4702     if (Op.isToken() && Op.getToken() == "done")
4703       continue;
4704 
4705     // Handle optional arguments
4706     OptionalIdx[Op.getImmTy()] = i;
4707   }
4708 
4709   assert(SrcIdx == 4);
4710 
4711   bool Compr = false;
4712   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4713     Compr = true;
4714     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4715     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4716     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4717   }
4718 
4719   for (auto i = 0; i < SrcIdx; ++i) {
4720     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4721       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4722     }
4723   }
4724 
4725   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4726   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4727 
4728   Inst.addOperand(MCOperand::createImm(EnMask));
4729 }
4730 
4731 //===----------------------------------------------------------------------===//
4732 // s_waitcnt
4733 //===----------------------------------------------------------------------===//
4734 
4735 static bool
4736 encodeCnt(
4737   const AMDGPU::IsaVersion ISA,
4738   int64_t &IntVal,
4739   int64_t CntVal,
4740   bool Saturate,
4741   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4742   unsigned (*decode)(const IsaVersion &Version, unsigned))
4743 {
4744   bool Failed = false;
4745 
4746   IntVal = encode(ISA, IntVal, CntVal);
4747   if (CntVal != decode(ISA, IntVal)) {
4748     if (Saturate) {
4749       IntVal = encode(ISA, IntVal, -1);
4750     } else {
4751       Failed = true;
4752     }
4753   }
4754   return Failed;
4755 }
4756 
4757 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4758 
4759   SMLoc CntLoc = getLoc();
4760   StringRef CntName = getTokenStr();
4761 
4762   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4763       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4764     return false;
4765 
4766   int64_t CntVal;
4767   SMLoc ValLoc = getLoc();
4768   if (!parseExpr(CntVal))
4769     return false;
4770 
4771   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4772 
4773   bool Failed = true;
4774   bool Sat = CntName.endswith("_sat");
4775 
4776   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4777     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4778   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4779     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4780   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4781     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4782   } else {
4783     Error(CntLoc, "invalid counter name " + CntName);
4784     return false;
4785   }
4786 
4787   if (Failed) {
4788     Error(ValLoc, "too large value for " + CntName);
4789     return false;
4790   }
4791 
4792   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4793     return false;
4794 
4795   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4796     if (isToken(AsmToken::EndOfStatement)) {
4797       Error(getLoc(), "expected a counter name");
4798       return false;
4799     }
4800   }
4801 
4802   return true;
4803 }
4804 
4805 OperandMatchResultTy
4806 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4807   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4808   int64_t Waitcnt = getWaitcntBitMask(ISA);
4809   SMLoc S = getLoc();
4810 
4811   // If parse failed, do not return error code
4812   // to avoid excessive error messages.
4813   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4814     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4815   } else {
4816     parseExpr(Waitcnt);
4817   }
4818 
4819   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4820   return MatchOperand_Success;
4821 }
4822 
4823 bool
4824 AMDGPUOperand::isSWaitCnt() const {
4825   return isImm();
4826 }
4827 
4828 //===----------------------------------------------------------------------===//
4829 // hwreg
4830 //===----------------------------------------------------------------------===//
4831 
4832 bool
4833 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4834                                 int64_t &Offset,
4835                                 int64_t &Width) {
4836   using namespace llvm::AMDGPU::Hwreg;
4837 
4838   // The register may be specified by name or using a numeric code
4839   if (isToken(AsmToken::Identifier) &&
4840       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4841     HwReg.IsSymbolic = true;
4842     lex(); // skip message name
4843   } else if (!parseExpr(HwReg.Id)) {
4844     return false;
4845   }
4846 
4847   if (trySkipToken(AsmToken::RParen))
4848     return true;
4849 
4850   // parse optional params
4851   return
4852     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4853     parseExpr(Offset) &&
4854     skipToken(AsmToken::Comma, "expected a comma") &&
4855     parseExpr(Width) &&
4856     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4857 }
4858 
4859 bool
4860 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4861                                const int64_t Offset,
4862                                const int64_t Width,
4863                                const SMLoc Loc) {
4864 
4865   using namespace llvm::AMDGPU::Hwreg;
4866 
4867   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4868     Error(Loc, "specified hardware register is not supported on this GPU");
4869     return false;
4870   } else if (!isValidHwreg(HwReg.Id)) {
4871     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4872     return false;
4873   } else if (!isValidHwregOffset(Offset)) {
4874     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4875     return false;
4876   } else if (!isValidHwregWidth(Width)) {
4877     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4878     return false;
4879   }
4880   return true;
4881 }
4882 
4883 OperandMatchResultTy
4884 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4885   using namespace llvm::AMDGPU::Hwreg;
4886 
4887   int64_t ImmVal = 0;
4888   SMLoc Loc = getLoc();
4889 
4890   // If parse failed, do not return error code
4891   // to avoid excessive error messages.
4892   if (trySkipId("hwreg", AsmToken::LParen)) {
4893     OperandInfoTy HwReg(ID_UNKNOWN_);
4894     int64_t Offset = OFFSET_DEFAULT_;
4895     int64_t Width = WIDTH_DEFAULT_;
4896     if (parseHwregBody(HwReg, Offset, Width) &&
4897         validateHwreg(HwReg, Offset, Width, Loc)) {
4898       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4899     }
4900   } else if (parseExpr(ImmVal)) {
4901     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4902       Error(Loc, "invalid immediate: only 16-bit values are legal");
4903   }
4904 
4905   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4906   return MatchOperand_Success;
4907 }
4908 
4909 bool AMDGPUOperand::isHwreg() const {
4910   return isImmTy(ImmTyHwreg);
4911 }
4912 
4913 //===----------------------------------------------------------------------===//
4914 // sendmsg
4915 //===----------------------------------------------------------------------===//
4916 
4917 bool
4918 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4919                                   OperandInfoTy &Op,
4920                                   OperandInfoTy &Stream) {
4921   using namespace llvm::AMDGPU::SendMsg;
4922 
4923   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4924     Msg.IsSymbolic = true;
4925     lex(); // skip message name
4926   } else if (!parseExpr(Msg.Id)) {
4927     return false;
4928   }
4929 
4930   if (trySkipToken(AsmToken::Comma)) {
4931     Op.IsDefined = true;
4932     if (isToken(AsmToken::Identifier) &&
4933         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4934       lex(); // skip operation name
4935     } else if (!parseExpr(Op.Id)) {
4936       return false;
4937     }
4938 
4939     if (trySkipToken(AsmToken::Comma)) {
4940       Stream.IsDefined = true;
4941       if (!parseExpr(Stream.Id))
4942         return false;
4943     }
4944   }
4945 
4946   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4947 }
4948 
4949 bool
4950 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4951                                  const OperandInfoTy &Op,
4952                                  const OperandInfoTy &Stream,
4953                                  const SMLoc S) {
4954   using namespace llvm::AMDGPU::SendMsg;
4955 
4956   // Validation strictness depends on whether message is specified
4957   // in a symbolc or in a numeric form. In the latter case
4958   // only encoding possibility is checked.
4959   bool Strict = Msg.IsSymbolic;
4960 
4961   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4962     Error(S, "invalid message id");
4963     return false;
4964   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
4965     Error(S, Op.IsDefined ?
4966              "message does not support operations" :
4967              "missing message operation");
4968     return false;
4969   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
4970     Error(S, "invalid operation id");
4971     return false;
4972   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
4973     Error(S, "message operation does not support streams");
4974     return false;
4975   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
4976     Error(S, "invalid message stream id");
4977     return false;
4978   }
4979   return true;
4980 }
4981 
4982 OperandMatchResultTy
4983 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4984   using namespace llvm::AMDGPU::SendMsg;
4985 
4986   int64_t ImmVal = 0;
4987   SMLoc Loc = getLoc();
4988 
4989   // If parse failed, do not return error code
4990   // to avoid excessive error messages.
4991   if (trySkipId("sendmsg", AsmToken::LParen)) {
4992     OperandInfoTy Msg(ID_UNKNOWN_);
4993     OperandInfoTy Op(OP_NONE_);
4994     OperandInfoTy Stream(STREAM_ID_NONE_);
4995     if (parseSendMsgBody(Msg, Op, Stream) &&
4996         validateSendMsg(Msg, Op, Stream, Loc)) {
4997       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
4998     }
4999   } else if (parseExpr(ImmVal)) {
5000     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5001       Error(Loc, "invalid immediate: only 16-bit values are legal");
5002   }
5003 
5004   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5005   return MatchOperand_Success;
5006 }
5007 
5008 bool AMDGPUOperand::isSendMsg() const {
5009   return isImmTy(ImmTySendMsg);
5010 }
5011 
5012 //===----------------------------------------------------------------------===//
5013 // v_interp
5014 //===----------------------------------------------------------------------===//
5015 
5016 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5017   if (getLexer().getKind() != AsmToken::Identifier)
5018     return MatchOperand_NoMatch;
5019 
5020   StringRef Str = Parser.getTok().getString();
5021   int Slot = StringSwitch<int>(Str)
5022     .Case("p10", 0)
5023     .Case("p20", 1)
5024     .Case("p0", 2)
5025     .Default(-1);
5026 
5027   SMLoc S = Parser.getTok().getLoc();
5028   if (Slot == -1)
5029     return MatchOperand_ParseFail;
5030 
5031   Parser.Lex();
5032   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5033                                               AMDGPUOperand::ImmTyInterpSlot));
5034   return MatchOperand_Success;
5035 }
5036 
5037 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5038   if (getLexer().getKind() != AsmToken::Identifier)
5039     return MatchOperand_NoMatch;
5040 
5041   StringRef Str = Parser.getTok().getString();
5042   if (!Str.startswith("attr"))
5043     return MatchOperand_NoMatch;
5044 
5045   StringRef Chan = Str.take_back(2);
5046   int AttrChan = StringSwitch<int>(Chan)
5047     .Case(".x", 0)
5048     .Case(".y", 1)
5049     .Case(".z", 2)
5050     .Case(".w", 3)
5051     .Default(-1);
5052   if (AttrChan == -1)
5053     return MatchOperand_ParseFail;
5054 
5055   Str = Str.drop_back(2).drop_front(4);
5056 
5057   uint8_t Attr;
5058   if (Str.getAsInteger(10, Attr))
5059     return MatchOperand_ParseFail;
5060 
5061   SMLoc S = Parser.getTok().getLoc();
5062   Parser.Lex();
5063   if (Attr > 63) {
5064     Error(S, "out of bounds attr");
5065     return MatchOperand_Success;
5066   }
5067 
5068   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5069 
5070   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5071                                               AMDGPUOperand::ImmTyInterpAttr));
5072   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5073                                               AMDGPUOperand::ImmTyAttrChan));
5074   return MatchOperand_Success;
5075 }
5076 
5077 //===----------------------------------------------------------------------===//
5078 // exp
5079 //===----------------------------------------------------------------------===//
5080 
5081 void AMDGPUAsmParser::errorExpTgt() {
5082   Error(Parser.getTok().getLoc(), "invalid exp target");
5083 }
5084 
5085 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5086                                                       uint8_t &Val) {
5087   if (Str == "null") {
5088     Val = 9;
5089     return MatchOperand_Success;
5090   }
5091 
5092   if (Str.startswith("mrt")) {
5093     Str = Str.drop_front(3);
5094     if (Str == "z") { // == mrtz
5095       Val = 8;
5096       return MatchOperand_Success;
5097     }
5098 
5099     if (Str.getAsInteger(10, Val))
5100       return MatchOperand_ParseFail;
5101 
5102     if (Val > 7)
5103       errorExpTgt();
5104 
5105     return MatchOperand_Success;
5106   }
5107 
5108   if (Str.startswith("pos")) {
5109     Str = Str.drop_front(3);
5110     if (Str.getAsInteger(10, Val))
5111       return MatchOperand_ParseFail;
5112 
5113     if (Val > 4 || (Val == 4 && !isGFX10()))
5114       errorExpTgt();
5115 
5116     Val += 12;
5117     return MatchOperand_Success;
5118   }
5119 
5120   if (isGFX10() && Str == "prim") {
5121     Val = 20;
5122     return MatchOperand_Success;
5123   }
5124 
5125   if (Str.startswith("param")) {
5126     Str = Str.drop_front(5);
5127     if (Str.getAsInteger(10, Val))
5128       return MatchOperand_ParseFail;
5129 
5130     if (Val >= 32)
5131       errorExpTgt();
5132 
5133     Val += 32;
5134     return MatchOperand_Success;
5135   }
5136 
5137   if (Str.startswith("invalid_target_")) {
5138     Str = Str.drop_front(15);
5139     if (Str.getAsInteger(10, Val))
5140       return MatchOperand_ParseFail;
5141 
5142     errorExpTgt();
5143     return MatchOperand_Success;
5144   }
5145 
5146   return MatchOperand_NoMatch;
5147 }
5148 
5149 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5150   uint8_t Val;
5151   StringRef Str = Parser.getTok().getString();
5152 
5153   auto Res = parseExpTgtImpl(Str, Val);
5154   if (Res != MatchOperand_Success)
5155     return Res;
5156 
5157   SMLoc S = Parser.getTok().getLoc();
5158   Parser.Lex();
5159 
5160   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5161                                               AMDGPUOperand::ImmTyExpTgt));
5162   return MatchOperand_Success;
5163 }
5164 
5165 //===----------------------------------------------------------------------===//
5166 // parser helpers
5167 //===----------------------------------------------------------------------===//
5168 
5169 bool
5170 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5171   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5172 }
5173 
5174 bool
5175 AMDGPUAsmParser::isId(const StringRef Id) const {
5176   return isId(getToken(), Id);
5177 }
5178 
5179 bool
5180 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5181   return getTokenKind() == Kind;
5182 }
5183 
5184 bool
5185 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5186   if (isId(Id)) {
5187     lex();
5188     return true;
5189   }
5190   return false;
5191 }
5192 
5193 bool
5194 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5195   if (isId(Id) && peekToken().is(Kind)) {
5196     lex();
5197     lex();
5198     return true;
5199   }
5200   return false;
5201 }
5202 
5203 bool
5204 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5205   if (isToken(Kind)) {
5206     lex();
5207     return true;
5208   }
5209   return false;
5210 }
5211 
5212 bool
5213 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5214                            const StringRef ErrMsg) {
5215   if (!trySkipToken(Kind)) {
5216     Error(getLoc(), ErrMsg);
5217     return false;
5218   }
5219   return true;
5220 }
5221 
5222 bool
5223 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5224   return !getParser().parseAbsoluteExpression(Imm);
5225 }
5226 
5227 bool
5228 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5229   if (isToken(AsmToken::String)) {
5230     Val = getToken().getStringContents();
5231     lex();
5232     return true;
5233   } else {
5234     Error(getLoc(), ErrMsg);
5235     return false;
5236   }
5237 }
5238 
5239 AsmToken
5240 AMDGPUAsmParser::getToken() const {
5241   return Parser.getTok();
5242 }
5243 
5244 AsmToken
5245 AMDGPUAsmParser::peekToken() {
5246   return getLexer().peekTok();
5247 }
5248 
5249 void
5250 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5251   auto TokCount = getLexer().peekTokens(Tokens);
5252 
5253   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5254     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5255 }
5256 
5257 AsmToken::TokenKind
5258 AMDGPUAsmParser::getTokenKind() const {
5259   return getLexer().getKind();
5260 }
5261 
5262 SMLoc
5263 AMDGPUAsmParser::getLoc() const {
5264   return getToken().getLoc();
5265 }
5266 
5267 StringRef
5268 AMDGPUAsmParser::getTokenStr() const {
5269   return getToken().getString();
5270 }
5271 
5272 void
5273 AMDGPUAsmParser::lex() {
5274   Parser.Lex();
5275 }
5276 
5277 //===----------------------------------------------------------------------===//
5278 // swizzle
5279 //===----------------------------------------------------------------------===//
5280 
5281 LLVM_READNONE
5282 static unsigned
5283 encodeBitmaskPerm(const unsigned AndMask,
5284                   const unsigned OrMask,
5285                   const unsigned XorMask) {
5286   using namespace llvm::AMDGPU::Swizzle;
5287 
5288   return BITMASK_PERM_ENC |
5289          (AndMask << BITMASK_AND_SHIFT) |
5290          (OrMask  << BITMASK_OR_SHIFT)  |
5291          (XorMask << BITMASK_XOR_SHIFT);
5292 }
5293 
5294 bool
5295 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5296                                       const unsigned MinVal,
5297                                       const unsigned MaxVal,
5298                                       const StringRef ErrMsg) {
5299   for (unsigned i = 0; i < OpNum; ++i) {
5300     if (!skipToken(AsmToken::Comma, "expected a comma")){
5301       return false;
5302     }
5303     SMLoc ExprLoc = Parser.getTok().getLoc();
5304     if (!parseExpr(Op[i])) {
5305       return false;
5306     }
5307     if (Op[i] < MinVal || Op[i] > MaxVal) {
5308       Error(ExprLoc, ErrMsg);
5309       return false;
5310     }
5311   }
5312 
5313   return true;
5314 }
5315 
5316 bool
5317 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5318   using namespace llvm::AMDGPU::Swizzle;
5319 
5320   int64_t Lane[LANE_NUM];
5321   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5322                            "expected a 2-bit lane id")) {
5323     Imm = QUAD_PERM_ENC;
5324     for (unsigned I = 0; I < LANE_NUM; ++I) {
5325       Imm |= Lane[I] << (LANE_SHIFT * I);
5326     }
5327     return true;
5328   }
5329   return false;
5330 }
5331 
5332 bool
5333 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5334   using namespace llvm::AMDGPU::Swizzle;
5335 
5336   SMLoc S = Parser.getTok().getLoc();
5337   int64_t GroupSize;
5338   int64_t LaneIdx;
5339 
5340   if (!parseSwizzleOperands(1, &GroupSize,
5341                             2, 32,
5342                             "group size must be in the interval [2,32]")) {
5343     return false;
5344   }
5345   if (!isPowerOf2_64(GroupSize)) {
5346     Error(S, "group size must be a power of two");
5347     return false;
5348   }
5349   if (parseSwizzleOperands(1, &LaneIdx,
5350                            0, GroupSize - 1,
5351                            "lane id must be in the interval [0,group size - 1]")) {
5352     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5353     return true;
5354   }
5355   return false;
5356 }
5357 
5358 bool
5359 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5360   using namespace llvm::AMDGPU::Swizzle;
5361 
5362   SMLoc S = Parser.getTok().getLoc();
5363   int64_t GroupSize;
5364 
5365   if (!parseSwizzleOperands(1, &GroupSize,
5366       2, 32, "group size must be in the interval [2,32]")) {
5367     return false;
5368   }
5369   if (!isPowerOf2_64(GroupSize)) {
5370     Error(S, "group size must be a power of two");
5371     return false;
5372   }
5373 
5374   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5375   return true;
5376 }
5377 
5378 bool
5379 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5380   using namespace llvm::AMDGPU::Swizzle;
5381 
5382   SMLoc S = Parser.getTok().getLoc();
5383   int64_t GroupSize;
5384 
5385   if (!parseSwizzleOperands(1, &GroupSize,
5386       1, 16, "group size must be in the interval [1,16]")) {
5387     return false;
5388   }
5389   if (!isPowerOf2_64(GroupSize)) {
5390     Error(S, "group size must be a power of two");
5391     return false;
5392   }
5393 
5394   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5395   return true;
5396 }
5397 
5398 bool
5399 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5400   using namespace llvm::AMDGPU::Swizzle;
5401 
5402   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5403     return false;
5404   }
5405 
5406   StringRef Ctl;
5407   SMLoc StrLoc = Parser.getTok().getLoc();
5408   if (!parseString(Ctl)) {
5409     return false;
5410   }
5411   if (Ctl.size() != BITMASK_WIDTH) {
5412     Error(StrLoc, "expected a 5-character mask");
5413     return false;
5414   }
5415 
5416   unsigned AndMask = 0;
5417   unsigned OrMask = 0;
5418   unsigned XorMask = 0;
5419 
5420   for (size_t i = 0; i < Ctl.size(); ++i) {
5421     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5422     switch(Ctl[i]) {
5423     default:
5424       Error(StrLoc, "invalid mask");
5425       return false;
5426     case '0':
5427       break;
5428     case '1':
5429       OrMask |= Mask;
5430       break;
5431     case 'p':
5432       AndMask |= Mask;
5433       break;
5434     case 'i':
5435       AndMask |= Mask;
5436       XorMask |= Mask;
5437       break;
5438     }
5439   }
5440 
5441   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5442   return true;
5443 }
5444 
5445 bool
5446 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5447 
5448   SMLoc OffsetLoc = Parser.getTok().getLoc();
5449 
5450   if (!parseExpr(Imm)) {
5451     return false;
5452   }
5453   if (!isUInt<16>(Imm)) {
5454     Error(OffsetLoc, "expected a 16-bit offset");
5455     return false;
5456   }
5457   return true;
5458 }
5459 
5460 bool
5461 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5462   using namespace llvm::AMDGPU::Swizzle;
5463 
5464   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5465 
5466     SMLoc ModeLoc = Parser.getTok().getLoc();
5467     bool Ok = false;
5468 
5469     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5470       Ok = parseSwizzleQuadPerm(Imm);
5471     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5472       Ok = parseSwizzleBitmaskPerm(Imm);
5473     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5474       Ok = parseSwizzleBroadcast(Imm);
5475     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5476       Ok = parseSwizzleSwap(Imm);
5477     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5478       Ok = parseSwizzleReverse(Imm);
5479     } else {
5480       Error(ModeLoc, "expected a swizzle mode");
5481     }
5482 
5483     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5484   }
5485 
5486   return false;
5487 }
5488 
5489 OperandMatchResultTy
5490 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5491   SMLoc S = Parser.getTok().getLoc();
5492   int64_t Imm = 0;
5493 
5494   if (trySkipId("offset")) {
5495 
5496     bool Ok = false;
5497     if (skipToken(AsmToken::Colon, "expected a colon")) {
5498       if (trySkipId("swizzle")) {
5499         Ok = parseSwizzleMacro(Imm);
5500       } else {
5501         Ok = parseSwizzleOffset(Imm);
5502       }
5503     }
5504 
5505     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5506 
5507     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5508   } else {
5509     // Swizzle "offset" operand is optional.
5510     // If it is omitted, try parsing other optional operands.
5511     return parseOptionalOpr(Operands);
5512   }
5513 }
5514 
5515 bool
5516 AMDGPUOperand::isSwizzle() const {
5517   return isImmTy(ImmTySwizzle);
5518 }
5519 
5520 //===----------------------------------------------------------------------===//
5521 // VGPR Index Mode
5522 //===----------------------------------------------------------------------===//
5523 
5524 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5525 
5526   using namespace llvm::AMDGPU::VGPRIndexMode;
5527 
5528   if (trySkipToken(AsmToken::RParen)) {
5529     return OFF;
5530   }
5531 
5532   int64_t Imm = 0;
5533 
5534   while (true) {
5535     unsigned Mode = 0;
5536     SMLoc S = Parser.getTok().getLoc();
5537 
5538     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5539       if (trySkipId(IdSymbolic[ModeId])) {
5540         Mode = 1 << ModeId;
5541         break;
5542       }
5543     }
5544 
5545     if (Mode == 0) {
5546       Error(S, (Imm == 0)?
5547                "expected a VGPR index mode or a closing parenthesis" :
5548                "expected a VGPR index mode");
5549       break;
5550     }
5551 
5552     if (Imm & Mode) {
5553       Error(S, "duplicate VGPR index mode");
5554       break;
5555     }
5556     Imm |= Mode;
5557 
5558     if (trySkipToken(AsmToken::RParen))
5559       break;
5560     if (!skipToken(AsmToken::Comma,
5561                    "expected a comma or a closing parenthesis"))
5562       break;
5563   }
5564 
5565   return Imm;
5566 }
5567 
5568 OperandMatchResultTy
5569 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5570 
5571   int64_t Imm = 0;
5572   SMLoc S = Parser.getTok().getLoc();
5573 
5574   if (getLexer().getKind() == AsmToken::Identifier &&
5575       Parser.getTok().getString() == "gpr_idx" &&
5576       getLexer().peekTok().is(AsmToken::LParen)) {
5577 
5578     Parser.Lex();
5579     Parser.Lex();
5580 
5581     // If parse failed, trigger an error but do not return error code
5582     // to avoid excessive error messages.
5583     Imm = parseGPRIdxMacro();
5584 
5585   } else {
5586     if (getParser().parseAbsoluteExpression(Imm))
5587       return MatchOperand_NoMatch;
5588     if (Imm < 0 || !isUInt<4>(Imm)) {
5589       Error(S, "invalid immediate: only 4-bit values are legal");
5590     }
5591   }
5592 
5593   Operands.push_back(
5594       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5595   return MatchOperand_Success;
5596 }
5597 
5598 bool AMDGPUOperand::isGPRIdxMode() const {
5599   return isImmTy(ImmTyGprIdxMode);
5600 }
5601 
5602 //===----------------------------------------------------------------------===//
5603 // sopp branch targets
5604 //===----------------------------------------------------------------------===//
5605 
5606 OperandMatchResultTy
5607 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5608   SMLoc S = Parser.getTok().getLoc();
5609 
5610   switch (getLexer().getKind()) {
5611     default: return MatchOperand_ParseFail;
5612     case AsmToken::Integer: {
5613       int64_t Imm;
5614       if (getParser().parseAbsoluteExpression(Imm))
5615         return MatchOperand_ParseFail;
5616       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5617       return MatchOperand_Success;
5618     }
5619 
5620     case AsmToken::Identifier:
5621       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5622           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5623                                   Parser.getTok().getString()), getContext()), S));
5624       Parser.Lex();
5625       return MatchOperand_Success;
5626   }
5627 }
5628 
5629 //===----------------------------------------------------------------------===//
5630 // Boolean holding registers
5631 //===----------------------------------------------------------------------===//
5632 
5633 OperandMatchResultTy
5634 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5635   return parseReg(Operands);
5636 }
5637 
5638 //===----------------------------------------------------------------------===//
5639 // mubuf
5640 //===----------------------------------------------------------------------===//
5641 
5642 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5643   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5644 }
5645 
5646 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5647   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5648 }
5649 
5650 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5651   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5652 }
5653 
5654 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5655                                const OperandVector &Operands,
5656                                bool IsAtomic,
5657                                bool IsAtomicReturn,
5658                                bool IsLds) {
5659   bool IsLdsOpcode = IsLds;
5660   bool HasLdsModifier = false;
5661   OptionalImmIndexMap OptionalIdx;
5662   assert(IsAtomicReturn ? IsAtomic : true);
5663   unsigned FirstOperandIdx = 1;
5664 
5665   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5666     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5667 
5668     // Add the register arguments
5669     if (Op.isReg()) {
5670       Op.addRegOperands(Inst, 1);
5671       // Insert a tied src for atomic return dst.
5672       // This cannot be postponed as subsequent calls to
5673       // addImmOperands rely on correct number of MC operands.
5674       if (IsAtomicReturn && i == FirstOperandIdx)
5675         Op.addRegOperands(Inst, 1);
5676       continue;
5677     }
5678 
5679     // Handle the case where soffset is an immediate
5680     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5681       Op.addImmOperands(Inst, 1);
5682       continue;
5683     }
5684 
5685     HasLdsModifier |= Op.isLDS();
5686 
5687     // Handle tokens like 'offen' which are sometimes hard-coded into the
5688     // asm string.  There are no MCInst operands for these.
5689     if (Op.isToken()) {
5690       continue;
5691     }
5692     assert(Op.isImm());
5693 
5694     // Handle optional arguments
5695     OptionalIdx[Op.getImmTy()] = i;
5696   }
5697 
5698   // This is a workaround for an llvm quirk which may result in an
5699   // incorrect instruction selection. Lds and non-lds versions of
5700   // MUBUF instructions are identical except that lds versions
5701   // have mandatory 'lds' modifier. However this modifier follows
5702   // optional modifiers and llvm asm matcher regards this 'lds'
5703   // modifier as an optional one. As a result, an lds version
5704   // of opcode may be selected even if it has no 'lds' modifier.
5705   if (IsLdsOpcode && !HasLdsModifier) {
5706     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5707     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5708       Inst.setOpcode(NoLdsOpcode);
5709       IsLdsOpcode = false;
5710     }
5711   }
5712 
5713   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5714   if (!IsAtomic) { // glc is hard-coded.
5715     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5716   }
5717   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5718 
5719   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5720     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5721   }
5722 
5723   if (isGFX10())
5724     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5725 }
5726 
5727 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5728   OptionalImmIndexMap OptionalIdx;
5729 
5730   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5731     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5732 
5733     // Add the register arguments
5734     if (Op.isReg()) {
5735       Op.addRegOperands(Inst, 1);
5736       continue;
5737     }
5738 
5739     // Handle the case where soffset is an immediate
5740     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5741       Op.addImmOperands(Inst, 1);
5742       continue;
5743     }
5744 
5745     // Handle tokens like 'offen' which are sometimes hard-coded into the
5746     // asm string.  There are no MCInst operands for these.
5747     if (Op.isToken()) {
5748       continue;
5749     }
5750     assert(Op.isImm());
5751 
5752     // Handle optional arguments
5753     OptionalIdx[Op.getImmTy()] = i;
5754   }
5755 
5756   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5757                         AMDGPUOperand::ImmTyOffset);
5758   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5759   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5760   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5761   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5762 
5763   if (isGFX10())
5764     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5765 }
5766 
5767 //===----------------------------------------------------------------------===//
5768 // mimg
5769 //===----------------------------------------------------------------------===//
5770 
5771 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5772                               bool IsAtomic) {
5773   unsigned I = 1;
5774   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5775   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5776     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5777   }
5778 
5779   if (IsAtomic) {
5780     // Add src, same as dst
5781     assert(Desc.getNumDefs() == 1);
5782     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5783   }
5784 
5785   OptionalImmIndexMap OptionalIdx;
5786 
5787   for (unsigned E = Operands.size(); I != E; ++I) {
5788     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5789 
5790     // Add the register arguments
5791     if (Op.isReg()) {
5792       Op.addRegOperands(Inst, 1);
5793     } else if (Op.isImmModifier()) {
5794       OptionalIdx[Op.getImmTy()] = I;
5795     } else if (!Op.isToken()) {
5796       llvm_unreachable("unexpected operand type");
5797     }
5798   }
5799 
5800   bool IsGFX10 = isGFX10();
5801 
5802   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5803   if (IsGFX10)
5804     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5805   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5806   if (IsGFX10)
5807     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5808   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5809   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5810   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5811   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5812   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5813   if (!IsGFX10)
5814     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5815   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5816 }
5817 
5818 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5819   cvtMIMG(Inst, Operands, true);
5820 }
5821 
5822 //===----------------------------------------------------------------------===//
5823 // smrd
5824 //===----------------------------------------------------------------------===//
5825 
5826 bool AMDGPUOperand::isSMRDOffset8() const {
5827   return isImm() && isUInt<8>(getImm());
5828 }
5829 
5830 bool AMDGPUOperand::isSMRDOffset20() const {
5831   return isImm() && isUInt<20>(getImm());
5832 }
5833 
5834 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5835   // 32-bit literals are only supported on CI and we only want to use them
5836   // when the offset is > 8-bits.
5837   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5838 }
5839 
5840 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5841   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5842 }
5843 
5844 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5845   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5846 }
5847 
5848 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5849   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5850 }
5851 
5852 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5853   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5854 }
5855 
5856 //===----------------------------------------------------------------------===//
5857 // vop3
5858 //===----------------------------------------------------------------------===//
5859 
5860 static bool ConvertOmodMul(int64_t &Mul) {
5861   if (Mul != 1 && Mul != 2 && Mul != 4)
5862     return false;
5863 
5864   Mul >>= 1;
5865   return true;
5866 }
5867 
5868 static bool ConvertOmodDiv(int64_t &Div) {
5869   if (Div == 1) {
5870     Div = 0;
5871     return true;
5872   }
5873 
5874   if (Div == 2) {
5875     Div = 3;
5876     return true;
5877   }
5878 
5879   return false;
5880 }
5881 
5882 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5883   if (BoundCtrl == 0) {
5884     BoundCtrl = 1;
5885     return true;
5886   }
5887 
5888   if (BoundCtrl == -1) {
5889     BoundCtrl = 0;
5890     return true;
5891   }
5892 
5893   return false;
5894 }
5895 
5896 // Note: the order in this table matches the order of operands in AsmString.
5897 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5898   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5899   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5900   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5901   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5902   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5903   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5904   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5905   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5906   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5907   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5908   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5909   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5910   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5911   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5912   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5913   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5914   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5915   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5916   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5917   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5918   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5919   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5920   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5921   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5922   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5923   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5924   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5925   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5926   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5927   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5928   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5929   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5930   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5931   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5932   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5933   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5934   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5935   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5936   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5937   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5938   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5939   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5940   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5941 };
5942 
5943 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5944   unsigned size = Operands.size();
5945   assert(size > 0);
5946 
5947   OperandMatchResultTy res = parseOptionalOpr(Operands);
5948 
5949   // This is a hack to enable hardcoded mandatory operands which follow
5950   // optional operands.
5951   //
5952   // Current design assumes that all operands after the first optional operand
5953   // are also optional. However implementation of some instructions violates
5954   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5955   //
5956   // To alleviate this problem, we have to (implicitly) parse extra operands
5957   // to make sure autogenerated parser of custom operands never hit hardcoded
5958   // mandatory operands.
5959 
5960   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5961 
5962     // We have parsed the first optional operand.
5963     // Parse as many operands as necessary to skip all mandatory operands.
5964 
5965     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5966       if (res != MatchOperand_Success ||
5967           getLexer().is(AsmToken::EndOfStatement)) break;
5968       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5969       res = parseOptionalOpr(Operands);
5970     }
5971   }
5972 
5973   return res;
5974 }
5975 
5976 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5977   OperandMatchResultTy res;
5978   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5979     // try to parse any optional operand here
5980     if (Op.IsBit) {
5981       res = parseNamedBit(Op.Name, Operands, Op.Type);
5982     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5983       res = parseOModOperand(Operands);
5984     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5985                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5986                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5987       res = parseSDWASel(Operands, Op.Name, Op.Type);
5988     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5989       res = parseSDWADstUnused(Operands);
5990     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5991                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5992                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5993                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5994       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5995                                         Op.ConvertResult);
5996     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5997       res = parseDim(Operands);
5998     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5999       res = parseDfmtNfmt(Operands);
6000     } else {
6001       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6002     }
6003     if (res != MatchOperand_NoMatch) {
6004       return res;
6005     }
6006   }
6007   return MatchOperand_NoMatch;
6008 }
6009 
6010 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6011   StringRef Name = Parser.getTok().getString();
6012   if (Name == "mul") {
6013     return parseIntWithPrefix("mul", Operands,
6014                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6015   }
6016 
6017   if (Name == "div") {
6018     return parseIntWithPrefix("div", Operands,
6019                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6020   }
6021 
6022   return MatchOperand_NoMatch;
6023 }
6024 
6025 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6026   cvtVOP3P(Inst, Operands);
6027 
6028   int Opc = Inst.getOpcode();
6029 
6030   int SrcNum;
6031   const int Ops[] = { AMDGPU::OpName::src0,
6032                       AMDGPU::OpName::src1,
6033                       AMDGPU::OpName::src2 };
6034   for (SrcNum = 0;
6035        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6036        ++SrcNum);
6037   assert(SrcNum > 0);
6038 
6039   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6040   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6041 
6042   if ((OpSel & (1 << SrcNum)) != 0) {
6043     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6044     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6045     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6046   }
6047 }
6048 
6049 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6050       // 1. This operand is input modifiers
6051   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6052       // 2. This is not last operand
6053       && Desc.NumOperands > (OpNum + 1)
6054       // 3. Next operand is register class
6055       && Desc.OpInfo[OpNum + 1].RegClass != -1
6056       // 4. Next register is not tied to any other operand
6057       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6058 }
6059 
6060 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6061 {
6062   OptionalImmIndexMap OptionalIdx;
6063   unsigned Opc = Inst.getOpcode();
6064 
6065   unsigned I = 1;
6066   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6067   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6068     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6069   }
6070 
6071   for (unsigned E = Operands.size(); I != E; ++I) {
6072     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6073     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6074       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6075     } else if (Op.isInterpSlot() ||
6076                Op.isInterpAttr() ||
6077                Op.isAttrChan()) {
6078       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6079     } else if (Op.isImmModifier()) {
6080       OptionalIdx[Op.getImmTy()] = I;
6081     } else {
6082       llvm_unreachable("unhandled operand type");
6083     }
6084   }
6085 
6086   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6087     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6088   }
6089 
6090   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6091     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6092   }
6093 
6094   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6095     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6096   }
6097 }
6098 
6099 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6100                               OptionalImmIndexMap &OptionalIdx) {
6101   unsigned Opc = Inst.getOpcode();
6102 
6103   unsigned I = 1;
6104   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6105   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6106     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6107   }
6108 
6109   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6110     // This instruction has src modifiers
6111     for (unsigned E = Operands.size(); I != E; ++I) {
6112       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6113       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6114         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6115       } else if (Op.isImmModifier()) {
6116         OptionalIdx[Op.getImmTy()] = I;
6117       } else if (Op.isRegOrImm()) {
6118         Op.addRegOrImmOperands(Inst, 1);
6119       } else {
6120         llvm_unreachable("unhandled operand type");
6121       }
6122     }
6123   } else {
6124     // No src modifiers
6125     for (unsigned E = Operands.size(); I != E; ++I) {
6126       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6127       if (Op.isMod()) {
6128         OptionalIdx[Op.getImmTy()] = I;
6129       } else {
6130         Op.addRegOrImmOperands(Inst, 1);
6131       }
6132     }
6133   }
6134 
6135   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6136     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6137   }
6138 
6139   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6140     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6141   }
6142 
6143   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6144   // it has src2 register operand that is tied to dst operand
6145   // we don't allow modifiers for this operand in assembler so src2_modifiers
6146   // should be 0.
6147   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6148       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6149       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6150       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6151       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6152       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6153       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6154     auto it = Inst.begin();
6155     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6156     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6157     ++it;
6158     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6159   }
6160 }
6161 
6162 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6163   OptionalImmIndexMap OptionalIdx;
6164   cvtVOP3(Inst, Operands, OptionalIdx);
6165 }
6166 
6167 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6168                                const OperandVector &Operands) {
6169   OptionalImmIndexMap OptIdx;
6170   const int Opc = Inst.getOpcode();
6171   const MCInstrDesc &Desc = MII.get(Opc);
6172 
6173   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6174 
6175   cvtVOP3(Inst, Operands, OptIdx);
6176 
6177   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6178     assert(!IsPacked);
6179     Inst.addOperand(Inst.getOperand(0));
6180   }
6181 
6182   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6183   // instruction, and then figure out where to actually put the modifiers
6184 
6185   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6186 
6187   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6188   if (OpSelHiIdx != -1) {
6189     int DefaultVal = IsPacked ? -1 : 0;
6190     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6191                           DefaultVal);
6192   }
6193 
6194   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6195   if (NegLoIdx != -1) {
6196     assert(IsPacked);
6197     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6198     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6199   }
6200 
6201   const int Ops[] = { AMDGPU::OpName::src0,
6202                       AMDGPU::OpName::src1,
6203                       AMDGPU::OpName::src2 };
6204   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6205                          AMDGPU::OpName::src1_modifiers,
6206                          AMDGPU::OpName::src2_modifiers };
6207 
6208   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6209 
6210   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6211   unsigned OpSelHi = 0;
6212   unsigned NegLo = 0;
6213   unsigned NegHi = 0;
6214 
6215   if (OpSelHiIdx != -1) {
6216     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6217   }
6218 
6219   if (NegLoIdx != -1) {
6220     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6221     NegLo = Inst.getOperand(NegLoIdx).getImm();
6222     NegHi = Inst.getOperand(NegHiIdx).getImm();
6223   }
6224 
6225   for (int J = 0; J < 3; ++J) {
6226     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6227     if (OpIdx == -1)
6228       break;
6229 
6230     uint32_t ModVal = 0;
6231 
6232     if ((OpSel & (1 << J)) != 0)
6233       ModVal |= SISrcMods::OP_SEL_0;
6234 
6235     if ((OpSelHi & (1 << J)) != 0)
6236       ModVal |= SISrcMods::OP_SEL_1;
6237 
6238     if ((NegLo & (1 << J)) != 0)
6239       ModVal |= SISrcMods::NEG;
6240 
6241     if ((NegHi & (1 << J)) != 0)
6242       ModVal |= SISrcMods::NEG_HI;
6243 
6244     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6245 
6246     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6247   }
6248 }
6249 
6250 //===----------------------------------------------------------------------===//
6251 // dpp
6252 //===----------------------------------------------------------------------===//
6253 
6254 bool AMDGPUOperand::isDPP8() const {
6255   return isImmTy(ImmTyDPP8);
6256 }
6257 
6258 bool AMDGPUOperand::isDPPCtrl() const {
6259   using namespace AMDGPU::DPP;
6260 
6261   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6262   if (result) {
6263     int64_t Imm = getImm();
6264     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6265            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6266            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6267            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6268            (Imm == DppCtrl::WAVE_SHL1) ||
6269            (Imm == DppCtrl::WAVE_ROL1) ||
6270            (Imm == DppCtrl::WAVE_SHR1) ||
6271            (Imm == DppCtrl::WAVE_ROR1) ||
6272            (Imm == DppCtrl::ROW_MIRROR) ||
6273            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6274            (Imm == DppCtrl::BCAST15) ||
6275            (Imm == DppCtrl::BCAST31) ||
6276            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6277            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6278   }
6279   return false;
6280 }
6281 
6282 //===----------------------------------------------------------------------===//
6283 // mAI
6284 //===----------------------------------------------------------------------===//
6285 
6286 bool AMDGPUOperand::isBLGP() const {
6287   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6288 }
6289 
6290 bool AMDGPUOperand::isCBSZ() const {
6291   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6292 }
6293 
6294 bool AMDGPUOperand::isABID() const {
6295   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6296 }
6297 
6298 bool AMDGPUOperand::isS16Imm() const {
6299   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6300 }
6301 
6302 bool AMDGPUOperand::isU16Imm() const {
6303   return isImm() && isUInt<16>(getImm());
6304 }
6305 
6306 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6307   if (!isGFX10())
6308     return MatchOperand_NoMatch;
6309 
6310   SMLoc S = Parser.getTok().getLoc();
6311 
6312   if (getLexer().isNot(AsmToken::Identifier))
6313     return MatchOperand_NoMatch;
6314   if (getLexer().getTok().getString() != "dim")
6315     return MatchOperand_NoMatch;
6316 
6317   Parser.Lex();
6318   if (getLexer().isNot(AsmToken::Colon))
6319     return MatchOperand_ParseFail;
6320 
6321   Parser.Lex();
6322 
6323   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6324   // integer.
6325   std::string Token;
6326   if (getLexer().is(AsmToken::Integer)) {
6327     SMLoc Loc = getLexer().getTok().getEndLoc();
6328     Token = getLexer().getTok().getString();
6329     Parser.Lex();
6330     if (getLexer().getTok().getLoc() != Loc)
6331       return MatchOperand_ParseFail;
6332   }
6333   if (getLexer().isNot(AsmToken::Identifier))
6334     return MatchOperand_ParseFail;
6335   Token += getLexer().getTok().getString();
6336 
6337   StringRef DimId = Token;
6338   if (DimId.startswith("SQ_RSRC_IMG_"))
6339     DimId = DimId.substr(12);
6340 
6341   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6342   if (!DimInfo)
6343     return MatchOperand_ParseFail;
6344 
6345   Parser.Lex();
6346 
6347   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6348                                               AMDGPUOperand::ImmTyDim));
6349   return MatchOperand_Success;
6350 }
6351 
6352 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6353   SMLoc S = Parser.getTok().getLoc();
6354   StringRef Prefix;
6355 
6356   if (getLexer().getKind() == AsmToken::Identifier) {
6357     Prefix = Parser.getTok().getString();
6358   } else {
6359     return MatchOperand_NoMatch;
6360   }
6361 
6362   if (Prefix != "dpp8")
6363     return parseDPPCtrl(Operands);
6364   if (!isGFX10())
6365     return MatchOperand_NoMatch;
6366 
6367   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6368 
6369   int64_t Sels[8];
6370 
6371   Parser.Lex();
6372   if (getLexer().isNot(AsmToken::Colon))
6373     return MatchOperand_ParseFail;
6374 
6375   Parser.Lex();
6376   if (getLexer().isNot(AsmToken::LBrac))
6377     return MatchOperand_ParseFail;
6378 
6379   Parser.Lex();
6380   if (getParser().parseAbsoluteExpression(Sels[0]))
6381     return MatchOperand_ParseFail;
6382   if (0 > Sels[0] || 7 < Sels[0])
6383     return MatchOperand_ParseFail;
6384 
6385   for (size_t i = 1; i < 8; ++i) {
6386     if (getLexer().isNot(AsmToken::Comma))
6387       return MatchOperand_ParseFail;
6388 
6389     Parser.Lex();
6390     if (getParser().parseAbsoluteExpression(Sels[i]))
6391       return MatchOperand_ParseFail;
6392     if (0 > Sels[i] || 7 < Sels[i])
6393       return MatchOperand_ParseFail;
6394   }
6395 
6396   if (getLexer().isNot(AsmToken::RBrac))
6397     return MatchOperand_ParseFail;
6398   Parser.Lex();
6399 
6400   unsigned DPP8 = 0;
6401   for (size_t i = 0; i < 8; ++i)
6402     DPP8 |= (Sels[i] << (i * 3));
6403 
6404   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6405   return MatchOperand_Success;
6406 }
6407 
6408 OperandMatchResultTy
6409 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6410   using namespace AMDGPU::DPP;
6411 
6412   SMLoc S = Parser.getTok().getLoc();
6413   StringRef Prefix;
6414   int64_t Int;
6415 
6416   if (getLexer().getKind() == AsmToken::Identifier) {
6417     Prefix = Parser.getTok().getString();
6418   } else {
6419     return MatchOperand_NoMatch;
6420   }
6421 
6422   if (Prefix == "row_mirror") {
6423     Int = DppCtrl::ROW_MIRROR;
6424     Parser.Lex();
6425   } else if (Prefix == "row_half_mirror") {
6426     Int = DppCtrl::ROW_HALF_MIRROR;
6427     Parser.Lex();
6428   } else {
6429     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6430     if (Prefix != "quad_perm"
6431         && Prefix != "row_shl"
6432         && Prefix != "row_shr"
6433         && Prefix != "row_ror"
6434         && Prefix != "wave_shl"
6435         && Prefix != "wave_rol"
6436         && Prefix != "wave_shr"
6437         && Prefix != "wave_ror"
6438         && Prefix != "row_bcast"
6439         && Prefix != "row_share"
6440         && Prefix != "row_xmask") {
6441       return MatchOperand_NoMatch;
6442     }
6443 
6444     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6445       return MatchOperand_NoMatch;
6446 
6447     if (!isVI() && !isGFX9() &&
6448         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6449          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6450          Prefix == "row_bcast"))
6451       return MatchOperand_NoMatch;
6452 
6453     Parser.Lex();
6454     if (getLexer().isNot(AsmToken::Colon))
6455       return MatchOperand_ParseFail;
6456 
6457     if (Prefix == "quad_perm") {
6458       // quad_perm:[%d,%d,%d,%d]
6459       Parser.Lex();
6460       if (getLexer().isNot(AsmToken::LBrac))
6461         return MatchOperand_ParseFail;
6462       Parser.Lex();
6463 
6464       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6465         return MatchOperand_ParseFail;
6466 
6467       for (int i = 0; i < 3; ++i) {
6468         if (getLexer().isNot(AsmToken::Comma))
6469           return MatchOperand_ParseFail;
6470         Parser.Lex();
6471 
6472         int64_t Temp;
6473         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6474           return MatchOperand_ParseFail;
6475         const int shift = i*2 + 2;
6476         Int += (Temp << shift);
6477       }
6478 
6479       if (getLexer().isNot(AsmToken::RBrac))
6480         return MatchOperand_ParseFail;
6481       Parser.Lex();
6482     } else {
6483       // sel:%d
6484       Parser.Lex();
6485       if (getParser().parseAbsoluteExpression(Int))
6486         return MatchOperand_ParseFail;
6487 
6488       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6489         Int |= DppCtrl::ROW_SHL0;
6490       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6491         Int |= DppCtrl::ROW_SHR0;
6492       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6493         Int |= DppCtrl::ROW_ROR0;
6494       } else if (Prefix == "wave_shl" && 1 == Int) {
6495         Int = DppCtrl::WAVE_SHL1;
6496       } else if (Prefix == "wave_rol" && 1 == Int) {
6497         Int = DppCtrl::WAVE_ROL1;
6498       } else if (Prefix == "wave_shr" && 1 == Int) {
6499         Int = DppCtrl::WAVE_SHR1;
6500       } else if (Prefix == "wave_ror" && 1 == Int) {
6501         Int = DppCtrl::WAVE_ROR1;
6502       } else if (Prefix == "row_bcast") {
6503         if (Int == 15) {
6504           Int = DppCtrl::BCAST15;
6505         } else if (Int == 31) {
6506           Int = DppCtrl::BCAST31;
6507         } else {
6508           return MatchOperand_ParseFail;
6509         }
6510       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6511         Int |= DppCtrl::ROW_SHARE_FIRST;
6512       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6513         Int |= DppCtrl::ROW_XMASK_FIRST;
6514       } else {
6515         return MatchOperand_ParseFail;
6516       }
6517     }
6518   }
6519 
6520   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6521   return MatchOperand_Success;
6522 }
6523 
6524 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6525   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6526 }
6527 
6528 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6529   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6530 }
6531 
6532 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6533   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6534 }
6535 
6536 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6537   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6538 }
6539 
6540 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6541   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6542 }
6543 
6544 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6545   OptionalImmIndexMap OptionalIdx;
6546 
6547   unsigned I = 1;
6548   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6549   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6550     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6551   }
6552 
6553   int Fi = 0;
6554   for (unsigned E = Operands.size(); I != E; ++I) {
6555     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6556                                             MCOI::TIED_TO);
6557     if (TiedTo != -1) {
6558       assert((unsigned)TiedTo < Inst.getNumOperands());
6559       // handle tied old or src2 for MAC instructions
6560       Inst.addOperand(Inst.getOperand(TiedTo));
6561     }
6562     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6563     // Add the register arguments
6564     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6565       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6566       // Skip it.
6567       continue;
6568     }
6569 
6570     if (IsDPP8) {
6571       if (Op.isDPP8()) {
6572         Op.addImmOperands(Inst, 1);
6573       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6574         Op.addRegWithFPInputModsOperands(Inst, 2);
6575       } else if (Op.isFI()) {
6576         Fi = Op.getImm();
6577       } else if (Op.isReg()) {
6578         Op.addRegOperands(Inst, 1);
6579       } else {
6580         llvm_unreachable("Invalid operand type");
6581       }
6582     } else {
6583       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6584         Op.addRegWithFPInputModsOperands(Inst, 2);
6585       } else if (Op.isDPPCtrl()) {
6586         Op.addImmOperands(Inst, 1);
6587       } else if (Op.isImm()) {
6588         // Handle optional arguments
6589         OptionalIdx[Op.getImmTy()] = I;
6590       } else {
6591         llvm_unreachable("Invalid operand type");
6592       }
6593     }
6594   }
6595 
6596   if (IsDPP8) {
6597     using namespace llvm::AMDGPU::DPP;
6598     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6599   } else {
6600     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6601     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6602     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6603     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6604       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6605     }
6606   }
6607 }
6608 
6609 //===----------------------------------------------------------------------===//
6610 // sdwa
6611 //===----------------------------------------------------------------------===//
6612 
6613 OperandMatchResultTy
6614 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6615                               AMDGPUOperand::ImmTy Type) {
6616   using namespace llvm::AMDGPU::SDWA;
6617 
6618   SMLoc S = Parser.getTok().getLoc();
6619   StringRef Value;
6620   OperandMatchResultTy res;
6621 
6622   res = parseStringWithPrefix(Prefix, Value);
6623   if (res != MatchOperand_Success) {
6624     return res;
6625   }
6626 
6627   int64_t Int;
6628   Int = StringSwitch<int64_t>(Value)
6629         .Case("BYTE_0", SdwaSel::BYTE_0)
6630         .Case("BYTE_1", SdwaSel::BYTE_1)
6631         .Case("BYTE_2", SdwaSel::BYTE_2)
6632         .Case("BYTE_3", SdwaSel::BYTE_3)
6633         .Case("WORD_0", SdwaSel::WORD_0)
6634         .Case("WORD_1", SdwaSel::WORD_1)
6635         .Case("DWORD", SdwaSel::DWORD)
6636         .Default(0xffffffff);
6637   Parser.Lex(); // eat last token
6638 
6639   if (Int == 0xffffffff) {
6640     return MatchOperand_ParseFail;
6641   }
6642 
6643   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6644   return MatchOperand_Success;
6645 }
6646 
6647 OperandMatchResultTy
6648 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6649   using namespace llvm::AMDGPU::SDWA;
6650 
6651   SMLoc S = Parser.getTok().getLoc();
6652   StringRef Value;
6653   OperandMatchResultTy res;
6654 
6655   res = parseStringWithPrefix("dst_unused", Value);
6656   if (res != MatchOperand_Success) {
6657     return res;
6658   }
6659 
6660   int64_t Int;
6661   Int = StringSwitch<int64_t>(Value)
6662         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6663         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6664         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6665         .Default(0xffffffff);
6666   Parser.Lex(); // eat last token
6667 
6668   if (Int == 0xffffffff) {
6669     return MatchOperand_ParseFail;
6670   }
6671 
6672   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6673   return MatchOperand_Success;
6674 }
6675 
6676 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6677   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6678 }
6679 
6680 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6681   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6682 }
6683 
6684 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6685   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6686 }
6687 
6688 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6689   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6690 }
6691 
6692 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6693                               uint64_t BasicInstType, bool skipVcc) {
6694   using namespace llvm::AMDGPU::SDWA;
6695 
6696   OptionalImmIndexMap OptionalIdx;
6697   bool skippedVcc = false;
6698 
6699   unsigned I = 1;
6700   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6701   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6702     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6703   }
6704 
6705   for (unsigned E = Operands.size(); I != E; ++I) {
6706     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6707     if (skipVcc && !skippedVcc && Op.isReg() &&
6708         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6709       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6710       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6711       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6712       // Skip VCC only if we didn't skip it on previous iteration.
6713       if (BasicInstType == SIInstrFlags::VOP2 &&
6714           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6715         skippedVcc = true;
6716         continue;
6717       } else if (BasicInstType == SIInstrFlags::VOPC &&
6718                  Inst.getNumOperands() == 0) {
6719         skippedVcc = true;
6720         continue;
6721       }
6722     }
6723     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6724       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6725     } else if (Op.isImm()) {
6726       // Handle optional arguments
6727       OptionalIdx[Op.getImmTy()] = I;
6728     } else {
6729       llvm_unreachable("Invalid operand type");
6730     }
6731     skippedVcc = false;
6732   }
6733 
6734   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6735       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6736       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6737     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6738     switch (BasicInstType) {
6739     case SIInstrFlags::VOP1:
6740       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6741       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6742         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6743       }
6744       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6745       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6746       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6747       break;
6748 
6749     case SIInstrFlags::VOP2:
6750       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6751       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6752         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6753       }
6754       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6755       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6756       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6757       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6758       break;
6759 
6760     case SIInstrFlags::VOPC:
6761       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6762         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6763       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6764       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6765       break;
6766 
6767     default:
6768       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6769     }
6770   }
6771 
6772   // special case v_mac_{f16, f32}:
6773   // it has src2 register operand that is tied to dst operand
6774   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6775       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6776     auto it = Inst.begin();
6777     std::advance(
6778       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6779     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6780   }
6781 }
6782 
6783 //===----------------------------------------------------------------------===//
6784 // mAI
6785 //===----------------------------------------------------------------------===//
6786 
6787 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6788   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6789 }
6790 
6791 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6792   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6793 }
6794 
6795 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6796   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6797 }
6798 
6799 /// Force static initialization.
6800 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6801   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6802   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6803 }
6804 
6805 #define GET_REGISTER_MATCHER
6806 #define GET_MATCHER_IMPLEMENTATION
6807 #define GET_MNEMONIC_SPELL_CHECKER
6808 #include "AMDGPUGenAsmMatcher.inc"
6809 
6810 // This fuction should be defined after auto-generated include so that we have
6811 // MatchClassKind enum defined
6812 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6813                                                      unsigned Kind) {
6814   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6815   // But MatchInstructionImpl() expects to meet token and fails to validate
6816   // operand. This method checks if we are given immediate operand but expect to
6817   // get corresponding token.
6818   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6819   switch (Kind) {
6820   case MCK_addr64:
6821     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6822   case MCK_gds:
6823     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6824   case MCK_lds:
6825     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6826   case MCK_glc:
6827     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6828   case MCK_idxen:
6829     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6830   case MCK_offen:
6831     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6832   case MCK_SSrcB32:
6833     // When operands have expression values, they will return true for isToken,
6834     // because it is not possible to distinguish between a token and an
6835     // expression at parse time. MatchInstructionImpl() will always try to
6836     // match an operand as a token, when isToken returns true, and when the
6837     // name of the expression is not a valid token, the match will fail,
6838     // so we need to handle it here.
6839     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6840   case MCK_SSrcF32:
6841     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6842   case MCK_SoppBrTarget:
6843     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6844   case MCK_VReg32OrOff:
6845     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6846   case MCK_InterpSlot:
6847     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6848   case MCK_Attr:
6849     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6850   case MCK_AttrChan:
6851     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6852   default:
6853     return Match_InvalidOperand;
6854   }
6855 }
6856 
6857 //===----------------------------------------------------------------------===//
6858 // endpgm
6859 //===----------------------------------------------------------------------===//
6860 
6861 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6862   SMLoc S = Parser.getTok().getLoc();
6863   int64_t Imm = 0;
6864 
6865   if (!parseExpr(Imm)) {
6866     // The operand is optional, if not present default to 0
6867     Imm = 0;
6868   }
6869 
6870   if (!isUInt<16>(Imm)) {
6871     Error(S, "expected a 16-bit value");
6872     return MatchOperand_ParseFail;
6873   }
6874 
6875   Operands.push_back(
6876       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6877   return MatchOperand_Success;
6878 }
6879 
6880 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6881