1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUInstPrinter.h"
11 #include "MCTargetDesc/AMDGPUMCAsmInfo.h"
12 #include "MCTargetDesc/AMDGPUMCExpr.h"
13 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
16 #include "SIDefines.h"
17 #include "SIInstrInfo.h"
18 #include "TargetInfo/AMDGPUTargetInfo.h"
19 #include "Utils/AMDGPUAsmUtils.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "Utils/AMDKernelCodeTUtils.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/StringSet.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/BinaryFormat/ELF.h"
27 #include "llvm/CodeGenTypes/MachineValueType.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCParser/AsmLexer.h"
34 #include "llvm/MC/MCParser/MCAsmParser.h"
35 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
36 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
37 #include "llvm/MC/MCRegisterInfo.h"
38 #include "llvm/MC/MCSymbol.h"
39 #include "llvm/MC/TargetRegistry.h"
40 #include "llvm/Support/AMDGPUMetadata.h"
41 #include "llvm/Support/AMDHSAKernelDescriptor.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Compiler.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/TargetParser/TargetParser.h"
46 #include <optional>
47
48 using namespace llvm;
49 using namespace llvm::AMDGPU;
50 using namespace llvm::amdhsa;
51
52 namespace {
53
54 class AMDGPUAsmParser;
55
56 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58 //===----------------------------------------------------------------------===//
59 // Operand
60 //===----------------------------------------------------------------------===//
61
62 class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73 public:
AMDGPUOperand(KindTy Kind_,const AMDGPUAsmParser * AsmParser_)74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 bool Lit = false;
84 bool Lit64 = false;
85
hasFPModifiers__anon6862249c0111::AMDGPUOperand::Modifiers86 bool hasFPModifiers() const { return Abs || Neg; }
hasIntModifiers__anon6862249c0111::AMDGPUOperand::Modifiers87 bool hasIntModifiers() const { return Sext; }
hasModifiers__anon6862249c0111::AMDGPUOperand::Modifiers88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89
getFPModifiersOperand__anon6862249c0111::AMDGPUOperand::Modifiers90 int64_t getFPModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Abs ? SISrcMods::ABS : 0u;
93 Operand |= Neg ? SISrcMods::NEG : 0u;
94 return Operand;
95 }
96
getIntModifiersOperand__anon6862249c0111::AMDGPUOperand::Modifiers97 int64_t getIntModifiersOperand() const {
98 int64_t Operand = 0;
99 Operand |= Sext ? SISrcMods::SEXT : 0u;
100 return Operand;
101 }
102
getModifiersOperand__anon6862249c0111::AMDGPUOperand::Modifiers103 int64_t getModifiersOperand() const {
104 assert(!(hasFPModifiers() && hasIntModifiers())
105 && "fp and int modifiers should not be used simultaneously");
106 if (hasFPModifiers())
107 return getFPModifiersOperand();
108 if (hasIntModifiers())
109 return getIntModifiersOperand();
110 return 0;
111 }
112
113 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
114 };
115
116 enum ImmTy {
117 ImmTyNone,
118 ImmTyGDS,
119 ImmTyLDS,
120 ImmTyOffen,
121 ImmTyIdxen,
122 ImmTyAddr64,
123 ImmTyOffset,
124 ImmTyInstOffset,
125 ImmTyOffset0,
126 ImmTyOffset1,
127 ImmTySMEMOffsetMod,
128 ImmTyCPol,
129 ImmTyTFE,
130 ImmTyD16,
131 ImmTyClamp,
132 ImmTyOModSI,
133 ImmTySDWADstSel,
134 ImmTySDWASrc0Sel,
135 ImmTySDWASrc1Sel,
136 ImmTySDWADstUnused,
137 ImmTyDMask,
138 ImmTyDim,
139 ImmTyUNorm,
140 ImmTyDA,
141 ImmTyR128A16,
142 ImmTyA16,
143 ImmTyLWE,
144 ImmTyExpTgt,
145 ImmTyExpCompr,
146 ImmTyExpVM,
147 ImmTyFORMAT,
148 ImmTyHwreg,
149 ImmTyOff,
150 ImmTySendMsg,
151 ImmTyInterpSlot,
152 ImmTyInterpAttr,
153 ImmTyInterpAttrChan,
154 ImmTyOpSel,
155 ImmTyOpSelHi,
156 ImmTyNegLo,
157 ImmTyNegHi,
158 ImmTyIndexKey8bit,
159 ImmTyIndexKey16bit,
160 ImmTyIndexKey32bit,
161 ImmTyDPP8,
162 ImmTyDppCtrl,
163 ImmTyDppRowMask,
164 ImmTyDppBankMask,
165 ImmTyDppBoundCtrl,
166 ImmTyDppFI,
167 ImmTySwizzle,
168 ImmTyGprIdxMode,
169 ImmTyHigh,
170 ImmTyBLGP,
171 ImmTyCBSZ,
172 ImmTyABID,
173 ImmTyEndpgm,
174 ImmTyWaitVDST,
175 ImmTyWaitEXP,
176 ImmTyWaitVAVDst,
177 ImmTyWaitVMVSrc,
178 ImmTyBitOp3,
179 ImmTyMatrixAReuse,
180 ImmTyMatrixBReuse,
181 ImmTyByteSel,
182 };
183
184 // Immediate operand kind.
185 // It helps to identify the location of an offending operand after an error.
186 // Note that regular literals and mandatory literals (KImm) must be handled
187 // differently. When looking for an offending operand, we should usually
188 // ignore mandatory literals because they are part of the instruction and
189 // cannot be changed. Report location of mandatory operands only for VOPD,
190 // when both OpX and OpY have a KImm and there are no other literals.
191 enum ImmKindTy {
192 ImmKindTyNone,
193 ImmKindTyLiteral,
194 ImmKindTyMandatoryLiteral,
195 ImmKindTyConst,
196 };
197
198 private:
199 struct TokOp {
200 const char *Data;
201 unsigned Length;
202 };
203
204 struct ImmOp {
205 int64_t Val;
206 ImmTy Type;
207 bool IsFPImm;
208 mutable ImmKindTy Kind;
209 Modifiers Mods;
210 };
211
212 struct RegOp {
213 MCRegister RegNo;
214 Modifiers Mods;
215 };
216
217 union {
218 TokOp Tok;
219 ImmOp Imm;
220 RegOp Reg;
221 const MCExpr *Expr;
222 };
223
224 public:
isToken() const225 bool isToken() const override { return Kind == Token; }
226
isSymbolRefExpr() const227 bool isSymbolRefExpr() const {
228 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
229 }
230
isImm() const231 bool isImm() const override {
232 return Kind == Immediate;
233 }
234
setImmKindNone() const235 void setImmKindNone() const {
236 assert(isImm());
237 Imm.Kind = ImmKindTyNone;
238 }
239
setImmKindLiteral() const240 void setImmKindLiteral() const {
241 assert(isImm());
242 Imm.Kind = ImmKindTyLiteral;
243 }
244
setImmKindMandatoryLiteral() const245 void setImmKindMandatoryLiteral() const {
246 assert(isImm());
247 Imm.Kind = ImmKindTyMandatoryLiteral;
248 }
249
setImmKindConst() const250 void setImmKindConst() const {
251 assert(isImm());
252 Imm.Kind = ImmKindTyConst;
253 }
254
IsImmKindLiteral() const255 bool IsImmKindLiteral() const {
256 return isImm() && Imm.Kind == ImmKindTyLiteral;
257 }
258
IsImmKindMandatoryLiteral() const259 bool IsImmKindMandatoryLiteral() const {
260 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
261 }
262
isImmKindConst() const263 bool isImmKindConst() const {
264 return isImm() && Imm.Kind == ImmKindTyConst;
265 }
266
267 bool isInlinableImm(MVT type) const;
268 bool isLiteralImm(MVT type) const;
269
isRegKind() const270 bool isRegKind() const {
271 return Kind == Register;
272 }
273
isReg() const274 bool isReg() const override {
275 return isRegKind() && !hasModifiers();
276 }
277
isRegOrInline(unsigned RCID,MVT type) const278 bool isRegOrInline(unsigned RCID, MVT type) const {
279 return isRegClass(RCID) || isInlinableImm(type);
280 }
281
isRegOrImmWithInputMods(unsigned RCID,MVT type) const282 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
283 return isRegOrInline(RCID, type) || isLiteralImm(type);
284 }
285
isRegOrImmWithInt16InputMods() const286 bool isRegOrImmWithInt16InputMods() const {
287 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
288 }
289
isRegOrImmWithIntT16InputMods() const290 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
291 return isRegOrImmWithInputMods(
292 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
293 }
294
isRegOrImmWithInt32InputMods() const295 bool isRegOrImmWithInt32InputMods() const {
296 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
297 }
298
isRegOrInlineImmWithInt16InputMods() const299 bool isRegOrInlineImmWithInt16InputMods() const {
300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
301 }
302
isRegOrInlineImmWithIntT16InputMods() const303 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
304 return isRegOrInline(
305 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
306 }
307
isRegOrInlineImmWithInt32InputMods() const308 bool isRegOrInlineImmWithInt32InputMods() const {
309 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
310 }
311
isRegOrImmWithInt64InputMods() const312 bool isRegOrImmWithInt64InputMods() const {
313 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
314 }
315
isRegOrImmWithFP16InputMods() const316 bool isRegOrImmWithFP16InputMods() const {
317 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
318 }
319
isRegOrImmWithFPT16InputMods() const320 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
321 return isRegOrImmWithInputMods(
322 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
323 }
324
isRegOrImmWithFP32InputMods() const325 bool isRegOrImmWithFP32InputMods() const {
326 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
327 }
328
isRegOrImmWithFP64InputMods() const329 bool isRegOrImmWithFP64InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
331 }
332
isRegOrInlineImmWithFP16InputMods() const333 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
334 return isRegOrInline(
335 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
336 }
337
isRegOrInlineImmWithFP32InputMods() const338 bool isRegOrInlineImmWithFP32InputMods() const {
339 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
340 }
341
isRegOrInlineImmWithFP64InputMods() const342 bool isRegOrInlineImmWithFP64InputMods() const {
343 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
344 }
345
isVRegWithInputMods(unsigned RCID) const346 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
347
isVRegWithFP32InputMods() const348 bool isVRegWithFP32InputMods() const {
349 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
350 }
351
isVRegWithFP64InputMods() const352 bool isVRegWithFP64InputMods() const {
353 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
354 }
355
isPackedFP16InputMods() const356 bool isPackedFP16InputMods() const {
357 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
358 }
359
isPackedVGPRFP32InputMods() const360 bool isPackedVGPRFP32InputMods() const {
361 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
362 }
363
isVReg() const364 bool isVReg() const {
365 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
366 isRegClass(AMDGPU::VReg_64RegClassID) ||
367 isRegClass(AMDGPU::VReg_96RegClassID) ||
368 isRegClass(AMDGPU::VReg_128RegClassID) ||
369 isRegClass(AMDGPU::VReg_160RegClassID) ||
370 isRegClass(AMDGPU::VReg_192RegClassID) ||
371 isRegClass(AMDGPU::VReg_256RegClassID) ||
372 isRegClass(AMDGPU::VReg_512RegClassID) ||
373 isRegClass(AMDGPU::VReg_1024RegClassID);
374 }
375
isVReg32() const376 bool isVReg32() const {
377 return isRegClass(AMDGPU::VGPR_32RegClassID);
378 }
379
isVReg32OrOff() const380 bool isVReg32OrOff() const {
381 return isOff() || isVReg32();
382 }
383
isNull() const384 bool isNull() const {
385 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
386 }
387
388 bool isVRegWithInputMods() const;
389 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
390 template <bool IsFake16> bool isT16VRegWithInputMods() const;
391
392 bool isSDWAOperand(MVT type) const;
393 bool isSDWAFP16Operand() const;
394 bool isSDWAFP32Operand() const;
395 bool isSDWAInt16Operand() const;
396 bool isSDWAInt32Operand() const;
397
isImmTy(ImmTy ImmT) const398 bool isImmTy(ImmTy ImmT) const {
399 return isImm() && Imm.Type == ImmT;
400 }
401
isImmTy() const402 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
403
isImmLiteral() const404 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
405
isImmModifier() const406 bool isImmModifier() const {
407 return isImm() && Imm.Type != ImmTyNone;
408 }
409
isOModSI() const410 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
isDim() const411 bool isDim() const { return isImmTy(ImmTyDim); }
isR128A16() const412 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
isOff() const413 bool isOff() const { return isImmTy(ImmTyOff); }
isExpTgt() const414 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
isOffen() const415 bool isOffen() const { return isImmTy(ImmTyOffen); }
isIdxen() const416 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
isAddr64() const417 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
isSMEMOffsetMod() const418 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
isFlatOffset() const419 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
isGDS() const420 bool isGDS() const { return isImmTy(ImmTyGDS); }
isLDS() const421 bool isLDS() const { return isImmTy(ImmTyLDS); }
isCPol() const422 bool isCPol() const { return isImmTy(ImmTyCPol); }
isIndexKey8bit() const423 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
isIndexKey16bit() const424 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
isIndexKey32bit() const425 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
isMatrixAReuse() const426 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
isMatrixBReuse() const427 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
isTFE() const428 bool isTFE() const { return isImmTy(ImmTyTFE); }
isFORMAT() const429 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
isDppFI() const430 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
isSDWADstSel() const431 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
isSDWASrc0Sel() const432 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
isSDWASrc1Sel() const433 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
isSDWADstUnused() const434 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
isInterpSlot() const435 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
isInterpAttr() const436 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
isInterpAttrChan() const437 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
isOpSel() const438 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
isOpSelHi() const439 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
isNegLo() const440 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
isNegHi() const441 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
isBitOp3() const442 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
443
isRegOrImm() const444 bool isRegOrImm() const {
445 return isReg() || isImm();
446 }
447
448 bool isRegClass(unsigned RCID) const;
449
450 bool isInlineValue() const;
451
isRegOrInlineNoMods(unsigned RCID,MVT type) const452 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
453 return isRegOrInline(RCID, type) && !hasModifiers();
454 }
455
isSCSrcB16() const456 bool isSCSrcB16() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
458 }
459
isSCSrcV2B16() const460 bool isSCSrcV2B16() const {
461 return isSCSrcB16();
462 }
463
isSCSrc_b32() const464 bool isSCSrc_b32() const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
466 }
467
isSCSrc_b64() const468 bool isSCSrc_b64() const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
470 }
471
472 bool isBoolReg() const;
473
isSCSrcF16() const474 bool isSCSrcF16() const {
475 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
476 }
477
isSCSrcV2F16() const478 bool isSCSrcV2F16() const {
479 return isSCSrcF16();
480 }
481
isSCSrcF32() const482 bool isSCSrcF32() const {
483 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
484 }
485
isSCSrcF64() const486 bool isSCSrcF64() const {
487 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
488 }
489
isSSrc_b32() const490 bool isSSrc_b32() const {
491 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
492 }
493
isSSrc_b16() const494 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
495
isSSrcV2B16() const496 bool isSSrcV2B16() const {
497 llvm_unreachable("cannot happen");
498 return isSSrc_b16();
499 }
500
isSSrc_b64() const501 bool isSSrc_b64() const {
502 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
503 // See isVSrc64().
504 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
505 (((const MCTargetAsmParser *)AsmParser)
506 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
507 isExpr());
508 }
509
isSSrc_f32() const510 bool isSSrc_f32() const {
511 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
512 }
513
isSSrcF64() const514 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
515
isSSrc_bf16() const516 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
517
isSSrc_f16() const518 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
519
isSSrcV2F16() const520 bool isSSrcV2F16() const {
521 llvm_unreachable("cannot happen");
522 return isSSrc_f16();
523 }
524
isSSrcV2FP32() const525 bool isSSrcV2FP32() const {
526 llvm_unreachable("cannot happen");
527 return isSSrc_f32();
528 }
529
isSCSrcV2FP32() const530 bool isSCSrcV2FP32() const {
531 llvm_unreachable("cannot happen");
532 return isSCSrcF32();
533 }
534
isSSrcV2INT32() const535 bool isSSrcV2INT32() const {
536 llvm_unreachable("cannot happen");
537 return isSSrc_b32();
538 }
539
isSCSrcV2INT32() const540 bool isSCSrcV2INT32() const {
541 llvm_unreachable("cannot happen");
542 return isSCSrc_b32();
543 }
544
isSSrcOrLds_b32() const545 bool isSSrcOrLds_b32() const {
546 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
547 isLiteralImm(MVT::i32) || isExpr();
548 }
549
isVCSrc_b32() const550 bool isVCSrc_b32() const {
551 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
552 }
553
isVCSrc_b64() const554 bool isVCSrc_b64() const {
555 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
556 }
557
isVCSrcT_b16() const558 bool isVCSrcT_b16() const {
559 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
560 }
561
isVCSrcTB16_Lo128() const562 bool isVCSrcTB16_Lo128() const {
563 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
564 }
565
isVCSrcFake16B16_Lo128() const566 bool isVCSrcFake16B16_Lo128() const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
568 }
569
isVCSrc_b16() const570 bool isVCSrc_b16() const {
571 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
572 }
573
isVCSrc_v2b16() const574 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
575
isVCSrc_f32() const576 bool isVCSrc_f32() const {
577 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
578 }
579
isVCSrc_f64() const580 bool isVCSrc_f64() const {
581 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
582 }
583
isVCSrcTBF16() const584 bool isVCSrcTBF16() const {
585 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
586 }
587
isVCSrcT_f16() const588 bool isVCSrcT_f16() const {
589 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
590 }
591
isVCSrcT_bf16() const592 bool isVCSrcT_bf16() const {
593 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
594 }
595
isVCSrcTBF16_Lo128() const596 bool isVCSrcTBF16_Lo128() const {
597 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
598 }
599
isVCSrcTF16_Lo128() const600 bool isVCSrcTF16_Lo128() const {
601 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
602 }
603
isVCSrcFake16BF16_Lo128() const604 bool isVCSrcFake16BF16_Lo128() const {
605 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
606 }
607
isVCSrcFake16F16_Lo128() const608 bool isVCSrcFake16F16_Lo128() const {
609 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
610 }
611
isVCSrc_bf16() const612 bool isVCSrc_bf16() const {
613 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
614 }
615
isVCSrc_f16() const616 bool isVCSrc_f16() const {
617 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
618 }
619
isVCSrc_v2bf16() const620 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
621
isVCSrc_v2f16() const622 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
623
isVSrc_b32() const624 bool isVSrc_b32() const {
625 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
626 }
627
isVSrc_b64() const628 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
629
isVSrcT_b16() const630 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
631
isVSrcT_b16_Lo128() const632 bool isVSrcT_b16_Lo128() const {
633 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
634 }
635
isVSrcFake16_b16_Lo128() const636 bool isVSrcFake16_b16_Lo128() const {
637 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
638 }
639
isVSrc_b16() const640 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
641
isVSrc_v2b16() const642 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
643
isVCSrcV2FP32() const644 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
645
isVSrc_v2f32() const646 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
647
isVCSrc_v2b32() const648 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
649
isVSrc_v2b32() const650 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
651
isVSrc_f32() const652 bool isVSrc_f32() const {
653 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
654 }
655
isVSrc_f64() const656 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
657
isVSrcT_bf16() const658 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
659
isVSrcT_f16() const660 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
661
isVSrcT_bf16_Lo128() const662 bool isVSrcT_bf16_Lo128() const {
663 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
664 }
665
isVSrcT_f16_Lo128() const666 bool isVSrcT_f16_Lo128() const {
667 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
668 }
669
isVSrcFake16_bf16_Lo128() const670 bool isVSrcFake16_bf16_Lo128() const {
671 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
672 }
673
isVSrcFake16_f16_Lo128() const674 bool isVSrcFake16_f16_Lo128() const {
675 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
676 }
677
isVSrc_bf16() const678 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
679
isVSrc_f16() const680 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
681
isVSrc_v2bf16() const682 bool isVSrc_v2bf16() const {
683 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
684 }
685
isVSrc_v2f16() const686 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
687
isVISrcB32() const688 bool isVISrcB32() const {
689 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
690 }
691
isVISrcB16() const692 bool isVISrcB16() const {
693 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
694 }
695
isVISrcV2B16() const696 bool isVISrcV2B16() const {
697 return isVISrcB16();
698 }
699
isVISrcF32() const700 bool isVISrcF32() const {
701 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
702 }
703
isVISrcF16() const704 bool isVISrcF16() const {
705 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
706 }
707
isVISrcV2F16() const708 bool isVISrcV2F16() const {
709 return isVISrcF16() || isVISrcB32();
710 }
711
isVISrc_64_bf16() const712 bool isVISrc_64_bf16() const {
713 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
714 }
715
isVISrc_64_f16() const716 bool isVISrc_64_f16() const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
718 }
719
isVISrc_64_b32() const720 bool isVISrc_64_b32() const {
721 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
722 }
723
isVISrc_64B64() const724 bool isVISrc_64B64() const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
726 }
727
isVISrc_64_f64() const728 bool isVISrc_64_f64() const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
730 }
731
isVISrc_64V2FP32() const732 bool isVISrc_64V2FP32() const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
734 }
735
isVISrc_64V2INT32() const736 bool isVISrc_64V2INT32() const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
738 }
739
isVISrc_256_b32() const740 bool isVISrc_256_b32() const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
742 }
743
isVISrc_256_f32() const744 bool isVISrc_256_f32() const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
746 }
747
isVISrc_256B64() const748 bool isVISrc_256B64() const {
749 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
750 }
751
isVISrc_256_f64() const752 bool isVISrc_256_f64() const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
754 }
755
isVISrc_512_f64() const756 bool isVISrc_512_f64() const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
758 }
759
isVISrc_128B16() const760 bool isVISrc_128B16() const {
761 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
762 }
763
isVISrc_128V2B16() const764 bool isVISrc_128V2B16() const {
765 return isVISrc_128B16();
766 }
767
isVISrc_128_b32() const768 bool isVISrc_128_b32() const {
769 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
770 }
771
isVISrc_128_f32() const772 bool isVISrc_128_f32() const {
773 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
774 }
775
isVISrc_256V2FP32() const776 bool isVISrc_256V2FP32() const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
778 }
779
isVISrc_256V2INT32() const780 bool isVISrc_256V2INT32() const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
782 }
783
isVISrc_512_b32() const784 bool isVISrc_512_b32() const {
785 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
786 }
787
isVISrc_512B16() const788 bool isVISrc_512B16() const {
789 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
790 }
791
isVISrc_512V2B16() const792 bool isVISrc_512V2B16() const {
793 return isVISrc_512B16();
794 }
795
isVISrc_512_f32() const796 bool isVISrc_512_f32() const {
797 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
798 }
799
isVISrc_512F16() const800 bool isVISrc_512F16() const {
801 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
802 }
803
isVISrc_512V2F16() const804 bool isVISrc_512V2F16() const {
805 return isVISrc_512F16() || isVISrc_512_b32();
806 }
807
isVISrc_1024_b32() const808 bool isVISrc_1024_b32() const {
809 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
810 }
811
isVISrc_1024B16() const812 bool isVISrc_1024B16() const {
813 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
814 }
815
isVISrc_1024V2B16() const816 bool isVISrc_1024V2B16() const {
817 return isVISrc_1024B16();
818 }
819
isVISrc_1024_f32() const820 bool isVISrc_1024_f32() const {
821 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
822 }
823
isVISrc_1024F16() const824 bool isVISrc_1024F16() const {
825 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
826 }
827
isVISrc_1024V2F16() const828 bool isVISrc_1024V2F16() const {
829 return isVISrc_1024F16() || isVISrc_1024_b32();
830 }
831
isAISrcB32() const832 bool isAISrcB32() const {
833 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
834 }
835
isAISrcB16() const836 bool isAISrcB16() const {
837 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
838 }
839
isAISrcV2B16() const840 bool isAISrcV2B16() const {
841 return isAISrcB16();
842 }
843
isAISrcF32() const844 bool isAISrcF32() const {
845 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
846 }
847
isAISrcF16() const848 bool isAISrcF16() const {
849 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
850 }
851
isAISrcV2F16() const852 bool isAISrcV2F16() const {
853 return isAISrcF16() || isAISrcB32();
854 }
855
isAISrc_64B64() const856 bool isAISrc_64B64() const {
857 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
858 }
859
isAISrc_64_f64() const860 bool isAISrc_64_f64() const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
862 }
863
isAISrc_128_b32() const864 bool isAISrc_128_b32() const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
866 }
867
isAISrc_128B16() const868 bool isAISrc_128B16() const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
870 }
871
isAISrc_128V2B16() const872 bool isAISrc_128V2B16() const {
873 return isAISrc_128B16();
874 }
875
isAISrc_128_f32() const876 bool isAISrc_128_f32() const {
877 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
878 }
879
isAISrc_128F16() const880 bool isAISrc_128F16() const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
882 }
883
isAISrc_128V2F16() const884 bool isAISrc_128V2F16() const {
885 return isAISrc_128F16() || isAISrc_128_b32();
886 }
887
isVISrc_128_bf16() const888 bool isVISrc_128_bf16() const {
889 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
890 }
891
isVISrc_128_f16() const892 bool isVISrc_128_f16() const {
893 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
894 }
895
isVISrc_128V2F16() const896 bool isVISrc_128V2F16() const {
897 return isVISrc_128_f16() || isVISrc_128_b32();
898 }
899
isAISrc_256B64() const900 bool isAISrc_256B64() const {
901 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
902 }
903
isAISrc_256_f64() const904 bool isAISrc_256_f64() const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
906 }
907
isAISrc_512_b32() const908 bool isAISrc_512_b32() const {
909 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
910 }
911
isAISrc_512B16() const912 bool isAISrc_512B16() const {
913 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
914 }
915
isAISrc_512V2B16() const916 bool isAISrc_512V2B16() const {
917 return isAISrc_512B16();
918 }
919
isAISrc_512_f32() const920 bool isAISrc_512_f32() const {
921 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
922 }
923
isAISrc_512F16() const924 bool isAISrc_512F16() const {
925 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
926 }
927
isAISrc_512V2F16() const928 bool isAISrc_512V2F16() const {
929 return isAISrc_512F16() || isAISrc_512_b32();
930 }
931
isAISrc_1024_b32() const932 bool isAISrc_1024_b32() const {
933 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
934 }
935
isAISrc_1024B16() const936 bool isAISrc_1024B16() const {
937 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
938 }
939
isAISrc_1024V2B16() const940 bool isAISrc_1024V2B16() const {
941 return isAISrc_1024B16();
942 }
943
isAISrc_1024_f32() const944 bool isAISrc_1024_f32() const {
945 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
946 }
947
isAISrc_1024F16() const948 bool isAISrc_1024F16() const {
949 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
950 }
951
isAISrc_1024V2F16() const952 bool isAISrc_1024V2F16() const {
953 return isAISrc_1024F16() || isAISrc_1024_b32();
954 }
955
isKImmFP32() const956 bool isKImmFP32() const {
957 return isLiteralImm(MVT::f32);
958 }
959
isKImmFP16() const960 bool isKImmFP16() const {
961 return isLiteralImm(MVT::f16);
962 }
963
isKImmFP64() const964 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
965
isMem() const966 bool isMem() const override {
967 return false;
968 }
969
isExpr() const970 bool isExpr() const {
971 return Kind == Expression;
972 }
973
isSOPPBrTarget() const974 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
975
976 bool isSWaitCnt() const;
977 bool isDepCtr() const;
978 bool isSDelayALU() const;
979 bool isHwreg() const;
980 bool isSendMsg() const;
981 bool isSplitBarrier() const;
982 bool isSwizzle() const;
983 bool isSMRDOffset8() const;
984 bool isSMEMOffset() const;
985 bool isSMRDLiteralOffset() const;
986 bool isDPP8() const;
987 bool isDPPCtrl() const;
988 bool isBLGP() const;
989 bool isGPRIdxMode() const;
990 bool isS16Imm() const;
991 bool isU16Imm() const;
992 bool isEndpgm() const;
993
getPredicate(std::function<bool (const AMDGPUOperand & Op)> P) const994 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
995 return [=](){ return P(*this); };
996 }
997
getToken() const998 StringRef getToken() const {
999 assert(isToken());
1000 return StringRef(Tok.Data, Tok.Length);
1001 }
1002
getImm() const1003 int64_t getImm() const {
1004 assert(isImm());
1005 return Imm.Val;
1006 }
1007
setImm(int64_t Val)1008 void setImm(int64_t Val) {
1009 assert(isImm());
1010 Imm.Val = Val;
1011 }
1012
getImmTy() const1013 ImmTy getImmTy() const {
1014 assert(isImm());
1015 return Imm.Type;
1016 }
1017
getReg() const1018 MCRegister getReg() const override {
1019 assert(isRegKind());
1020 return Reg.RegNo;
1021 }
1022
getStartLoc() const1023 SMLoc getStartLoc() const override {
1024 return StartLoc;
1025 }
1026
getEndLoc() const1027 SMLoc getEndLoc() const override {
1028 return EndLoc;
1029 }
1030
getLocRange() const1031 SMRange getLocRange() const {
1032 return SMRange(StartLoc, EndLoc);
1033 }
1034
getModifiers() const1035 Modifiers getModifiers() const {
1036 assert(isRegKind() || isImmTy(ImmTyNone));
1037 return isRegKind() ? Reg.Mods : Imm.Mods;
1038 }
1039
setModifiers(Modifiers Mods)1040 void setModifiers(Modifiers Mods) {
1041 assert(isRegKind() || isImmTy(ImmTyNone));
1042 if (isRegKind())
1043 Reg.Mods = Mods;
1044 else
1045 Imm.Mods = Mods;
1046 }
1047
hasModifiers() const1048 bool hasModifiers() const {
1049 return getModifiers().hasModifiers();
1050 }
1051
hasFPModifiers() const1052 bool hasFPModifiers() const {
1053 return getModifiers().hasFPModifiers();
1054 }
1055
hasIntModifiers() const1056 bool hasIntModifiers() const {
1057 return getModifiers().hasIntModifiers();
1058 }
1059
1060 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1061
1062 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1063
1064 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1065
1066 void addRegOperands(MCInst &Inst, unsigned N) const;
1067
addRegOrImmOperands(MCInst & Inst,unsigned N) const1068 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1069 if (isRegKind())
1070 addRegOperands(Inst, N);
1071 else
1072 addImmOperands(Inst, N);
1073 }
1074
addRegOrImmWithInputModsOperands(MCInst & Inst,unsigned N) const1075 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1076 Modifiers Mods = getModifiers();
1077 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1078 if (isRegKind()) {
1079 addRegOperands(Inst, N);
1080 } else {
1081 addImmOperands(Inst, N, false);
1082 }
1083 }
1084
addRegOrImmWithFPInputModsOperands(MCInst & Inst,unsigned N) const1085 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1086 assert(!hasIntModifiers());
1087 addRegOrImmWithInputModsOperands(Inst, N);
1088 }
1089
addRegOrImmWithIntInputModsOperands(MCInst & Inst,unsigned N) const1090 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1091 assert(!hasFPModifiers());
1092 addRegOrImmWithInputModsOperands(Inst, N);
1093 }
1094
addRegWithInputModsOperands(MCInst & Inst,unsigned N) const1095 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1096 Modifiers Mods = getModifiers();
1097 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1098 assert(isRegKind());
1099 addRegOperands(Inst, N);
1100 }
1101
addRegWithFPInputModsOperands(MCInst & Inst,unsigned N) const1102 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1103 assert(!hasIntModifiers());
1104 addRegWithInputModsOperands(Inst, N);
1105 }
1106
addRegWithIntInputModsOperands(MCInst & Inst,unsigned N) const1107 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1108 assert(!hasFPModifiers());
1109 addRegWithInputModsOperands(Inst, N);
1110 }
1111
printImmTy(raw_ostream & OS,ImmTy Type)1112 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1113 // clang-format off
1114 switch (Type) {
1115 case ImmTyNone: OS << "None"; break;
1116 case ImmTyGDS: OS << "GDS"; break;
1117 case ImmTyLDS: OS << "LDS"; break;
1118 case ImmTyOffen: OS << "Offen"; break;
1119 case ImmTyIdxen: OS << "Idxen"; break;
1120 case ImmTyAddr64: OS << "Addr64"; break;
1121 case ImmTyOffset: OS << "Offset"; break;
1122 case ImmTyInstOffset: OS << "InstOffset"; break;
1123 case ImmTyOffset0: OS << "Offset0"; break;
1124 case ImmTyOffset1: OS << "Offset1"; break;
1125 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1126 case ImmTyCPol: OS << "CPol"; break;
1127 case ImmTyIndexKey8bit: OS << "index_key"; break;
1128 case ImmTyIndexKey16bit: OS << "index_key"; break;
1129 case ImmTyIndexKey32bit: OS << "index_key"; break;
1130 case ImmTyTFE: OS << "TFE"; break;
1131 case ImmTyD16: OS << "D16"; break;
1132 case ImmTyFORMAT: OS << "FORMAT"; break;
1133 case ImmTyClamp: OS << "Clamp"; break;
1134 case ImmTyOModSI: OS << "OModSI"; break;
1135 case ImmTyDPP8: OS << "DPP8"; break;
1136 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1137 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1138 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1139 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1140 case ImmTyDppFI: OS << "DppFI"; break;
1141 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1142 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1143 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1144 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1145 case ImmTyDMask: OS << "DMask"; break;
1146 case ImmTyDim: OS << "Dim"; break;
1147 case ImmTyUNorm: OS << "UNorm"; break;
1148 case ImmTyDA: OS << "DA"; break;
1149 case ImmTyR128A16: OS << "R128A16"; break;
1150 case ImmTyA16: OS << "A16"; break;
1151 case ImmTyLWE: OS << "LWE"; break;
1152 case ImmTyOff: OS << "Off"; break;
1153 case ImmTyExpTgt: OS << "ExpTgt"; break;
1154 case ImmTyExpCompr: OS << "ExpCompr"; break;
1155 case ImmTyExpVM: OS << "ExpVM"; break;
1156 case ImmTyHwreg: OS << "Hwreg"; break;
1157 case ImmTySendMsg: OS << "SendMsg"; break;
1158 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1159 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1160 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1161 case ImmTyOpSel: OS << "OpSel"; break;
1162 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1163 case ImmTyNegLo: OS << "NegLo"; break;
1164 case ImmTyNegHi: OS << "NegHi"; break;
1165 case ImmTySwizzle: OS << "Swizzle"; break;
1166 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1167 case ImmTyHigh: OS << "High"; break;
1168 case ImmTyBLGP: OS << "BLGP"; break;
1169 case ImmTyCBSZ: OS << "CBSZ"; break;
1170 case ImmTyABID: OS << "ABID"; break;
1171 case ImmTyEndpgm: OS << "Endpgm"; break;
1172 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1173 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1174 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1175 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1176 case ImmTyBitOp3: OS << "BitOp3"; break;
1177 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1178 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1179 case ImmTyByteSel: OS << "ByteSel" ; break;
1180 }
1181 // clang-format on
1182 }
1183
print(raw_ostream & OS,const MCAsmInfo & MAI) const1184 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1185 switch (Kind) {
1186 case Register:
1187 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1188 << " mods: " << Reg.Mods << '>';
1189 break;
1190 case Immediate:
1191 OS << '<' << getImm();
1192 if (getImmTy() != ImmTyNone) {
1193 OS << " type: "; printImmTy(OS, getImmTy());
1194 }
1195 OS << " mods: " << Imm.Mods << '>';
1196 break;
1197 case Token:
1198 OS << '\'' << getToken() << '\'';
1199 break;
1200 case Expression:
1201 OS << "<expr ";
1202 MAI.printExpr(OS, *Expr);
1203 OS << '>';
1204 break;
1205 }
1206 }
1207
CreateImm(const AMDGPUAsmParser * AsmParser,int64_t Val,SMLoc Loc,ImmTy Type=ImmTyNone,bool IsFPImm=false)1208 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1209 int64_t Val, SMLoc Loc,
1210 ImmTy Type = ImmTyNone,
1211 bool IsFPImm = false) {
1212 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1213 Op->Imm.Val = Val;
1214 Op->Imm.IsFPImm = IsFPImm;
1215 Op->Imm.Kind = ImmKindTyNone;
1216 Op->Imm.Type = Type;
1217 Op->Imm.Mods = Modifiers();
1218 Op->StartLoc = Loc;
1219 Op->EndLoc = Loc;
1220 return Op;
1221 }
1222
CreateToken(const AMDGPUAsmParser * AsmParser,StringRef Str,SMLoc Loc,bool HasExplicitEncodingSize=true)1223 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1224 StringRef Str, SMLoc Loc,
1225 bool HasExplicitEncodingSize = true) {
1226 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1227 Res->Tok.Data = Str.data();
1228 Res->Tok.Length = Str.size();
1229 Res->StartLoc = Loc;
1230 Res->EndLoc = Loc;
1231 return Res;
1232 }
1233
CreateReg(const AMDGPUAsmParser * AsmParser,MCRegister Reg,SMLoc S,SMLoc E)1234 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1235 MCRegister Reg, SMLoc S, SMLoc E) {
1236 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1237 Op->Reg.RegNo = Reg;
1238 Op->Reg.Mods = Modifiers();
1239 Op->StartLoc = S;
1240 Op->EndLoc = E;
1241 return Op;
1242 }
1243
CreateExpr(const AMDGPUAsmParser * AsmParser,const class MCExpr * Expr,SMLoc S)1244 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1245 const class MCExpr *Expr, SMLoc S) {
1246 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1247 Op->Expr = Expr;
1248 Op->StartLoc = S;
1249 Op->EndLoc = S;
1250 return Op;
1251 }
1252 };
1253
operator <<(raw_ostream & OS,AMDGPUOperand::Modifiers Mods)1254 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1255 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1256 return OS;
1257 }
1258
1259 //===----------------------------------------------------------------------===//
1260 // AsmParser
1261 //===----------------------------------------------------------------------===//
1262
1263 // Holds info related to the current kernel, e.g. count of SGPRs used.
1264 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1265 // .amdgpu_hsa_kernel or at EOF.
1266 class KernelScopeInfo {
1267 int SgprIndexUnusedMin = -1;
1268 int VgprIndexUnusedMin = -1;
1269 int AgprIndexUnusedMin = -1;
1270 MCContext *Ctx = nullptr;
1271 MCSubtargetInfo const *MSTI = nullptr;
1272
usesSgprAt(int i)1273 void usesSgprAt(int i) {
1274 if (i >= SgprIndexUnusedMin) {
1275 SgprIndexUnusedMin = ++i;
1276 if (Ctx) {
1277 MCSymbol* const Sym =
1278 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1279 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1280 }
1281 }
1282 }
1283
usesVgprAt(int i)1284 void usesVgprAt(int i) {
1285 if (i >= VgprIndexUnusedMin) {
1286 VgprIndexUnusedMin = ++i;
1287 if (Ctx) {
1288 MCSymbol* const Sym =
1289 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1290 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1291 VgprIndexUnusedMin);
1292 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1293 }
1294 }
1295 }
1296
usesAgprAt(int i)1297 void usesAgprAt(int i) {
1298 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1299 if (!hasMAIInsts(*MSTI))
1300 return;
1301
1302 if (i >= AgprIndexUnusedMin) {
1303 AgprIndexUnusedMin = ++i;
1304 if (Ctx) {
1305 MCSymbol* const Sym =
1306 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1307 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1308
1309 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1310 MCSymbol* const vSym =
1311 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1312 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1313 VgprIndexUnusedMin);
1314 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1315 }
1316 }
1317 }
1318
1319 public:
1320 KernelScopeInfo() = default;
1321
initialize(MCContext & Context)1322 void initialize(MCContext &Context) {
1323 Ctx = &Context;
1324 MSTI = Ctx->getSubtargetInfo();
1325
1326 usesSgprAt(SgprIndexUnusedMin = -1);
1327 usesVgprAt(VgprIndexUnusedMin = -1);
1328 if (hasMAIInsts(*MSTI)) {
1329 usesAgprAt(AgprIndexUnusedMin = -1);
1330 }
1331 }
1332
usesRegister(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)1333 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1334 unsigned RegWidth) {
1335 switch (RegKind) {
1336 case IS_SGPR:
1337 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1338 break;
1339 case IS_AGPR:
1340 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1341 break;
1342 case IS_VGPR:
1343 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1344 break;
1345 default:
1346 break;
1347 }
1348 }
1349 };
1350
1351 class AMDGPUAsmParser : public MCTargetAsmParser {
1352 MCAsmParser &Parser;
1353
1354 unsigned ForcedEncodingSize = 0;
1355 bool ForcedDPP = false;
1356 bool ForcedSDWA = false;
1357 KernelScopeInfo KernelScope;
1358
1359 /// @name Auto-generated Match Functions
1360 /// {
1361
1362 #define GET_ASSEMBLER_HEADER
1363 #include "AMDGPUGenAsmMatcher.inc"
1364
1365 /// }
1366
1367 private:
1368 void createConstantSymbol(StringRef Id, int64_t Val);
1369
1370 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1371 bool OutOfRangeError(SMRange Range);
1372 /// Calculate VGPR/SGPR blocks required for given target, reserved
1373 /// registers, and user-specified NextFreeXGPR values.
1374 ///
1375 /// \param Features [in] Target features, used for bug corrections.
1376 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1377 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1378 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1379 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1380 /// descriptor field, if valid.
1381 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1382 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1383 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1384 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1385 /// \param VGPRBlocks [out] Result VGPR block count.
1386 /// \param SGPRBlocks [out] Result SGPR block count.
1387 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1388 const MCExpr *FlatScrUsed, bool XNACKUsed,
1389 std::optional<bool> EnableWavefrontSize32,
1390 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1391 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1392 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1393 bool ParseDirectiveAMDGCNTarget();
1394 bool ParseDirectiveAMDHSACodeObjectVersion();
1395 bool ParseDirectiveAMDHSAKernel();
1396 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1397 bool ParseDirectiveAMDKernelCodeT();
1398 // TODO: Possibly make subtargetHasRegister const.
1399 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1400 bool ParseDirectiveAMDGPUHsaKernel();
1401
1402 bool ParseDirectiveISAVersion();
1403 bool ParseDirectiveHSAMetadata();
1404 bool ParseDirectivePALMetadataBegin();
1405 bool ParseDirectivePALMetadata();
1406 bool ParseDirectiveAMDGPULDS();
1407
1408 /// Common code to parse out a block of text (typically YAML) between start and
1409 /// end directives.
1410 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1411 const char *AssemblerDirectiveEnd,
1412 std::string &CollectString);
1413
1414 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1415 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1416 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1417 unsigned &RegNum, unsigned &RegWidth,
1418 bool RestoreOnFailure = false);
1419 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1420 unsigned &RegNum, unsigned &RegWidth,
1421 SmallVectorImpl<AsmToken> &Tokens);
1422 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1423 unsigned &RegWidth,
1424 SmallVectorImpl<AsmToken> &Tokens);
1425 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1426 unsigned &RegWidth,
1427 SmallVectorImpl<AsmToken> &Tokens);
1428 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1429 unsigned &RegWidth,
1430 SmallVectorImpl<AsmToken> &Tokens);
1431 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1432 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1433 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1434
1435 bool isRegister();
1436 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1437 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1438 void initializeGprCountSymbol(RegisterKind RegKind);
1439 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1440 unsigned RegWidth);
1441 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1442 bool IsAtomic);
1443
1444 public:
1445 enum OperandMode {
1446 OperandMode_Default,
1447 OperandMode_NSA,
1448 };
1449
1450 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1451
AMDGPUAsmParser(const MCSubtargetInfo & STI,MCAsmParser & _Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)1452 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1453 const MCInstrInfo &MII,
1454 const MCTargetOptions &Options)
1455 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1456 MCAsmParserExtension::Initialize(Parser);
1457
1458 if (getFeatureBits().none()) {
1459 // Set default features.
1460 copySTI().ToggleFeature("southern-islands");
1461 }
1462
1463 FeatureBitset FB = getFeatureBits();
1464 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1465 !FB[AMDGPU::FeatureWavefrontSize32]) {
1466 // If there is no default wave size it must be a generation before gfx10,
1467 // these have FeatureWavefrontSize64 in their definition already. For
1468 // gfx10+ set wave32 as a default.
1469 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1470 }
1471
1472 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1473
1474 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1475 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1476 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1477 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1478 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1479 } else {
1480 createConstantSymbol(".option.machine_version_major", ISA.Major);
1481 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1482 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1483 }
1484 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1485 initializeGprCountSymbol(IS_VGPR);
1486 initializeGprCountSymbol(IS_SGPR);
1487 } else
1488 KernelScope.initialize(getContext());
1489
1490 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1491 createConstantSymbol(Symbol, Code);
1492
1493 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1494 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1495 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1496 }
1497
hasMIMG_R128() const1498 bool hasMIMG_R128() const {
1499 return AMDGPU::hasMIMG_R128(getSTI());
1500 }
1501
hasPackedD16() const1502 bool hasPackedD16() const {
1503 return AMDGPU::hasPackedD16(getSTI());
1504 }
1505
hasA16() const1506 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1507
hasG16() const1508 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1509
hasGDS() const1510 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1511
isSI() const1512 bool isSI() const {
1513 return AMDGPU::isSI(getSTI());
1514 }
1515
isCI() const1516 bool isCI() const {
1517 return AMDGPU::isCI(getSTI());
1518 }
1519
isVI() const1520 bool isVI() const {
1521 return AMDGPU::isVI(getSTI());
1522 }
1523
isGFX9() const1524 bool isGFX9() const {
1525 return AMDGPU::isGFX9(getSTI());
1526 }
1527
1528 // TODO: isGFX90A is also true for GFX940. We need to clean it.
isGFX90A() const1529 bool isGFX90A() const {
1530 return AMDGPU::isGFX90A(getSTI());
1531 }
1532
isGFX940() const1533 bool isGFX940() const {
1534 return AMDGPU::isGFX940(getSTI());
1535 }
1536
isGFX9Plus() const1537 bool isGFX9Plus() const {
1538 return AMDGPU::isGFX9Plus(getSTI());
1539 }
1540
isGFX10() const1541 bool isGFX10() const {
1542 return AMDGPU::isGFX10(getSTI());
1543 }
1544
isGFX10Plus() const1545 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1546
isGFX11() const1547 bool isGFX11() const {
1548 return AMDGPU::isGFX11(getSTI());
1549 }
1550
isGFX11Plus() const1551 bool isGFX11Plus() const {
1552 return AMDGPU::isGFX11Plus(getSTI());
1553 }
1554
isGFX12() const1555 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1556
isGFX12Plus() const1557 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1558
isGFX1250() const1559 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1560
isGFX10_AEncoding() const1561 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1562
isGFX10_BEncoding() const1563 bool isGFX10_BEncoding() const {
1564 return AMDGPU::isGFX10_BEncoding(getSTI());
1565 }
1566
hasInv2PiInlineImm() const1567 bool hasInv2PiInlineImm() const {
1568 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1569 }
1570
has64BitLiterals() const1571 bool has64BitLiterals() const {
1572 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1573 }
1574
hasFlatOffsets() const1575 bool hasFlatOffsets() const {
1576 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1577 }
1578
hasTrue16Insts() const1579 bool hasTrue16Insts() const {
1580 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1581 }
1582
hasArchitectedFlatScratch() const1583 bool hasArchitectedFlatScratch() const {
1584 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1585 }
1586
hasSGPR102_SGPR103() const1587 bool hasSGPR102_SGPR103() const {
1588 return !isVI() && !isGFX9();
1589 }
1590
hasSGPR104_SGPR105() const1591 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1592
hasIntClamp() const1593 bool hasIntClamp() const {
1594 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1595 }
1596
hasPartialNSAEncoding() const1597 bool hasPartialNSAEncoding() const {
1598 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1599 }
1600
getNSAMaxSize(bool HasSampler=false) const1601 unsigned getNSAMaxSize(bool HasSampler = false) const {
1602 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1603 }
1604
getMaxNumUserSGPRs() const1605 unsigned getMaxNumUserSGPRs() const {
1606 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1607 }
1608
hasKernargPreload() const1609 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1610
getTargetStreamer()1611 AMDGPUTargetStreamer &getTargetStreamer() {
1612 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1613 return static_cast<AMDGPUTargetStreamer &>(TS);
1614 }
1615
getMRI() const1616 const MCRegisterInfo *getMRI() const {
1617 // We need this const_cast because for some reason getContext() is not const
1618 // in MCAsmParser.
1619 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1620 }
1621
getMII() const1622 const MCInstrInfo *getMII() const {
1623 return &MII;
1624 }
1625
getFeatureBits() const1626 const FeatureBitset &getFeatureBits() const {
1627 return getSTI().getFeatureBits();
1628 }
1629
setForcedEncodingSize(unsigned Size)1630 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
setForcedDPP(bool ForceDPP_)1631 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
setForcedSDWA(bool ForceSDWA_)1632 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1633
getForcedEncodingSize() const1634 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
isForcedVOP3() const1635 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
isForcedDPP() const1636 bool isForcedDPP() const { return ForcedDPP; }
isForcedSDWA() const1637 bool isForcedSDWA() const { return ForcedSDWA; }
1638 ArrayRef<unsigned> getMatchedVariants() const;
1639 StringRef getMatchedVariantName() const;
1640
1641 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1642 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1643 bool RestoreOnFailure);
1644 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1645 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1646 SMLoc &EndLoc) override;
1647 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1648 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1649 unsigned Kind) override;
1650 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1651 OperandVector &Operands, MCStreamer &Out,
1652 uint64_t &ErrorInfo,
1653 bool MatchingInlineAsm) override;
1654 bool ParseDirective(AsmToken DirectiveID) override;
1655 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1656 OperandMode Mode = OperandMode_Default);
1657 StringRef parseMnemonicSuffix(StringRef Name);
1658 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1659 SMLoc NameLoc, OperandVector &Operands) override;
1660 //bool ProcessInstruction(MCInst &Inst);
1661
1662 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1663
1664 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1665
1666 ParseStatus
1667 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1668 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1669 std::function<bool(int64_t &)> ConvertResult = nullptr);
1670
1671 ParseStatus parseOperandArrayWithPrefix(
1672 const char *Prefix, OperandVector &Operands,
1673 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1674 bool (*ConvertResult)(int64_t &) = nullptr);
1675
1676 ParseStatus
1677 parseNamedBit(StringRef Name, OperandVector &Operands,
1678 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1679 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1680 ParseStatus parseCPol(OperandVector &Operands);
1681 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1682 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1683 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1684 SMLoc &StringLoc);
1685 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1686 StringRef Name,
1687 ArrayRef<const char *> Ids,
1688 int64_t &IntVal);
1689 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1690 StringRef Name,
1691 ArrayRef<const char *> Ids,
1692 AMDGPUOperand::ImmTy Type);
1693
1694 bool isModifier();
1695 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1696 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1697 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1698 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1699 bool parseSP3NegModifier();
1700 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1701 bool HasLit = false, bool HasLit64 = false);
1702 ParseStatus parseReg(OperandVector &Operands);
1703 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1704 bool HasLit = false, bool HasLit64 = false);
1705 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1706 bool AllowImm = true);
1707 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1708 bool AllowImm = true);
1709 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1710 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1711 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1712 ParseStatus tryParseIndexKey(OperandVector &Operands,
1713 AMDGPUOperand::ImmTy ImmTy);
1714 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1715 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1716 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1717
1718 ParseStatus parseDfmtNfmt(int64_t &Format);
1719 ParseStatus parseUfmt(int64_t &Format);
1720 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1721 int64_t &Format);
1722 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1723 int64_t &Format);
1724 ParseStatus parseFORMAT(OperandVector &Operands);
1725 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1726 ParseStatus parseNumericFormat(int64_t &Format);
1727 ParseStatus parseFlatOffset(OperandVector &Operands);
1728 ParseStatus parseR128A16(OperandVector &Operands);
1729 ParseStatus parseBLGP(OperandVector &Operands);
1730 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1731 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1732
1733 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1734
1735 bool parseCnt(int64_t &IntVal);
1736 ParseStatus parseSWaitCnt(OperandVector &Operands);
1737
1738 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1739 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1740 ParseStatus parseDepCtr(OperandVector &Operands);
1741
1742 bool parseDelay(int64_t &Delay);
1743 ParseStatus parseSDelayALU(OperandVector &Operands);
1744
1745 ParseStatus parseHwreg(OperandVector &Operands);
1746
1747 private:
1748 struct OperandInfoTy {
1749 SMLoc Loc;
1750 int64_t Val;
1751 bool IsSymbolic = false;
1752 bool IsDefined = false;
1753
OperandInfoTy__anon6862249c0111::AMDGPUAsmParser::OperandInfoTy1754 OperandInfoTy(int64_t Val) : Val(Val) {}
1755 };
1756
1757 struct StructuredOpField : OperandInfoTy {
1758 StringLiteral Id;
1759 StringLiteral Desc;
1760 unsigned Width;
1761 bool IsDefined = false;
1762
StructuredOpField__anon6862249c0111::AMDGPUAsmParser::StructuredOpField1763 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1764 int64_t Default)
1765 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1766 virtual ~StructuredOpField() = default;
1767
Error__anon6862249c0111::AMDGPUAsmParser::StructuredOpField1768 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1769 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1770 return false;
1771 }
1772
validate__anon6862249c0111::AMDGPUAsmParser::StructuredOpField1773 virtual bool validate(AMDGPUAsmParser &Parser) const {
1774 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1775 return Error(Parser, "not supported on this GPU");
1776 if (!isUIntN(Width, Val))
1777 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1778 return true;
1779 }
1780 };
1781
1782 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1783 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1784
1785 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1786 bool validateSendMsg(const OperandInfoTy &Msg,
1787 const OperandInfoTy &Op,
1788 const OperandInfoTy &Stream);
1789
1790 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1791 OperandInfoTy &Width);
1792
1793 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1794 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1795 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1796
1797 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1798 const OperandVector &Operands) const;
1799 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1800 SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const;
1801 SMLoc getLitLoc(const OperandVector &Operands,
1802 bool SearchMandatoryLiterals = false) const;
1803 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1804 SMLoc getConstLoc(const OperandVector &Operands) const;
1805 SMLoc getInstLoc(const OperandVector &Operands) const;
1806
1807 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1808 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1809 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1810 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1811 bool validateSOPLiteral(const MCInst &Inst) const;
1812 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1813 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1814 bool AsVOPD3);
1815 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1816 bool tryVOPD(const MCInst &Inst);
1817 bool tryVOPD3(const MCInst &Inst);
1818 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1819
1820 bool validateIntClampSupported(const MCInst &Inst);
1821 bool validateMIMGAtomicDMask(const MCInst &Inst);
1822 bool validateMIMGGatherDMask(const MCInst &Inst);
1823 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1824 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1825 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1826 bool validateMIMGD16(const MCInst &Inst);
1827 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1828 bool validateTensorR128(const MCInst &Inst);
1829 bool validateMIMGMSAA(const MCInst &Inst);
1830 bool validateOpSel(const MCInst &Inst);
1831 bool validateTrue16OpSel(const MCInst &Inst);
1832 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1833 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1834 bool validateVccOperand(MCRegister Reg) const;
1835 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1836 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1837 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1838 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1839 bool validateAGPRLdSt(const MCInst &Inst) const;
1840 bool validateVGPRAlign(const MCInst &Inst) const;
1841 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1842 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1843 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1844 bool validateDivScale(const MCInst &Inst);
1845 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1846 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1847 const SMLoc &IDLoc);
1848 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1849 const unsigned CPol);
1850 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1851 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1852 unsigned getConstantBusLimit(unsigned Opcode) const;
1853 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1854 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1855 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1856
1857 bool isSupportedMnemo(StringRef Mnemo,
1858 const FeatureBitset &FBS);
1859 bool isSupportedMnemo(StringRef Mnemo,
1860 const FeatureBitset &FBS,
1861 ArrayRef<unsigned> Variants);
1862 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1863
1864 bool isId(const StringRef Id) const;
1865 bool isId(const AsmToken &Token, const StringRef Id) const;
1866 bool isToken(const AsmToken::TokenKind Kind) const;
1867 StringRef getId() const;
1868 bool trySkipId(const StringRef Id);
1869 bool trySkipId(const StringRef Pref, const StringRef Id);
1870 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1871 bool trySkipToken(const AsmToken::TokenKind Kind);
1872 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1873 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1874 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1875
1876 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1877 AsmToken::TokenKind getTokenKind() const;
1878 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1879 bool parseExpr(OperandVector &Operands);
1880 StringRef getTokenStr() const;
1881 AsmToken peekToken(bool ShouldSkipSpace = true);
1882 AsmToken getToken() const;
1883 SMLoc getLoc() const;
1884 void lex();
1885
1886 public:
1887 void onBeginOfFile() override;
1888 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1889
1890 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1891
1892 ParseStatus parseExpTgt(OperandVector &Operands);
1893 ParseStatus parseSendMsg(OperandVector &Operands);
1894 ParseStatus parseInterpSlot(OperandVector &Operands);
1895 ParseStatus parseInterpAttr(OperandVector &Operands);
1896 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1897 ParseStatus parseBoolReg(OperandVector &Operands);
1898
1899 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1900 const unsigned MaxVal, const Twine &ErrMsg,
1901 SMLoc &Loc);
1902 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1903 const unsigned MinVal,
1904 const unsigned MaxVal,
1905 const StringRef ErrMsg);
1906 ParseStatus parseSwizzle(OperandVector &Operands);
1907 bool parseSwizzleOffset(int64_t &Imm);
1908 bool parseSwizzleMacro(int64_t &Imm);
1909 bool parseSwizzleQuadPerm(int64_t &Imm);
1910 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1911 bool parseSwizzleBroadcast(int64_t &Imm);
1912 bool parseSwizzleSwap(int64_t &Imm);
1913 bool parseSwizzleReverse(int64_t &Imm);
1914 bool parseSwizzleFFT(int64_t &Imm);
1915 bool parseSwizzleRotate(int64_t &Imm);
1916
1917 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1918 int64_t parseGPRIdxMacro();
1919
cvtMubuf(MCInst & Inst,const OperandVector & Operands)1920 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
cvtMubufAtomic(MCInst & Inst,const OperandVector & Operands)1921 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1922
1923 ParseStatus parseOModSI(OperandVector &Operands);
1924
1925 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1926 OptionalImmIndexMap &OptionalIdx);
1927 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1928 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1929 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1930 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1931 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1932
1933 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1934 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1935 OptionalImmIndexMap &OptionalIdx);
1936 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1937 OptionalImmIndexMap &OptionalIdx);
1938
1939 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1940 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1941
1942 bool parseDimId(unsigned &Encoding);
1943 ParseStatus parseDim(OperandVector &Operands);
1944 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1945 ParseStatus parseDPP8(OperandVector &Operands);
1946 ParseStatus parseDPPCtrl(OperandVector &Operands);
1947 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1948 int64_t parseDPPCtrlSel(StringRef Ctrl);
1949 int64_t parseDPPCtrlPerm();
1950 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
cvtDPP8(MCInst & Inst,const OperandVector & Operands)1951 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1952 cvtDPP(Inst, Operands, true);
1953 }
1954 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1955 bool IsDPP8 = false);
cvtVOP3DPP8(MCInst & Inst,const OperandVector & Operands)1956 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1957 cvtVOP3DPP(Inst, Operands, true);
1958 }
1959
1960 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1961 AMDGPUOperand::ImmTy Type);
1962 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1963 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1964 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1965 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1966 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1967 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1968 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1969 uint64_t BasicInstType,
1970 bool SkipDstVcc = false,
1971 bool SkipSrcVcc = false);
1972
1973 ParseStatus parseEndpgm(OperandVector &Operands);
1974
1975 ParseStatus parseVOPD(OperandVector &Operands);
1976 };
1977
1978 } // end anonymous namespace
1979
1980 // May be called with integer type with equivalent bitwidth.
getFltSemantics(unsigned Size)1981 static const fltSemantics *getFltSemantics(unsigned Size) {
1982 switch (Size) {
1983 case 4:
1984 return &APFloat::IEEEsingle();
1985 case 8:
1986 return &APFloat::IEEEdouble();
1987 case 2:
1988 return &APFloat::IEEEhalf();
1989 default:
1990 llvm_unreachable("unsupported fp type");
1991 }
1992 }
1993
getFltSemantics(MVT VT)1994 static const fltSemantics *getFltSemantics(MVT VT) {
1995 return getFltSemantics(VT.getSizeInBits() / 8);
1996 }
1997
getOpFltSemantics(uint8_t OperandType)1998 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1999 switch (OperandType) {
2000 // When floating-point immediate is used as operand of type i16, the 32-bit
2001 // representation of the constant truncated to the 16 LSBs should be used.
2002 case AMDGPU::OPERAND_REG_IMM_INT16:
2003 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2004 case AMDGPU::OPERAND_REG_IMM_INT32:
2005 case AMDGPU::OPERAND_REG_IMM_FP32:
2006 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2007 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2008 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2009 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2010 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2011 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2012 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2013 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2014 case AMDGPU::OPERAND_KIMM32:
2015 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2016 return &APFloat::IEEEsingle();
2017 case AMDGPU::OPERAND_REG_IMM_INT64:
2018 case AMDGPU::OPERAND_REG_IMM_FP64:
2019 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2020 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2021 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2022 case AMDGPU::OPERAND_KIMM64:
2023 return &APFloat::IEEEdouble();
2024 case AMDGPU::OPERAND_REG_IMM_FP16:
2025 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2026 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2027 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2028 case AMDGPU::OPERAND_KIMM16:
2029 return &APFloat::IEEEhalf();
2030 case AMDGPU::OPERAND_REG_IMM_BF16:
2031 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2032 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2033 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2034 return &APFloat::BFloat();
2035 default:
2036 llvm_unreachable("unsupported fp type");
2037 }
2038 }
2039
2040 //===----------------------------------------------------------------------===//
2041 // Operand
2042 //===----------------------------------------------------------------------===//
2043
canLosslesslyConvertToFPType(APFloat & FPLiteral,MVT VT)2044 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2045 bool Lost;
2046
2047 // Convert literal to single precision
2048 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
2049 APFloat::rmNearestTiesToEven,
2050 &Lost);
2051 // We allow precision lost but not overflow or underflow
2052 if (Status != APFloat::opOK &&
2053 Lost &&
2054 ((Status & APFloat::opOverflow) != 0 ||
2055 (Status & APFloat::opUnderflow) != 0)) {
2056 return false;
2057 }
2058
2059 return true;
2060 }
2061
isSafeTruncation(int64_t Val,unsigned Size)2062 static bool isSafeTruncation(int64_t Val, unsigned Size) {
2063 return isUIntN(Size, Val) || isIntN(Size, Val);
2064 }
2065
isInlineableLiteralOp16(int64_t Val,MVT VT,bool HasInv2Pi)2066 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2067 if (VT.getScalarType() == MVT::i16)
2068 return isInlinableLiteral32(Val, HasInv2Pi);
2069
2070 if (VT.getScalarType() == MVT::f16)
2071 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2072
2073 assert(VT.getScalarType() == MVT::bf16);
2074
2075 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2076 }
2077
isInlinableImm(MVT type) const2078 bool AMDGPUOperand::isInlinableImm(MVT type) const {
2079
2080 // This is a hack to enable named inline values like
2081 // shared_base with both 32-bit and 64-bit operands.
2082 // Note that these values are defined as
2083 // 32-bit operands only.
2084 if (isInlineValue()) {
2085 return true;
2086 }
2087
2088 if (!isImmTy(ImmTyNone)) {
2089 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2090 return false;
2091 }
2092 // TODO: We should avoid using host float here. It would be better to
2093 // check the float bit values which is what a few other places do.
2094 // We've had bot failures before due to weird NaN support on mips hosts.
2095
2096 APInt Literal(64, Imm.Val);
2097
2098 if (Imm.IsFPImm) { // We got fp literal token
2099 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2100 return AMDGPU::isInlinableLiteral64(Imm.Val,
2101 AsmParser->hasInv2PiInlineImm());
2102 }
2103
2104 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2105 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2106 return false;
2107
2108 if (type.getScalarSizeInBits() == 16) {
2109 bool Lost = false;
2110 switch (type.getScalarType().SimpleTy) {
2111 default:
2112 llvm_unreachable("unknown 16-bit type");
2113 case MVT::bf16:
2114 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2115 &Lost);
2116 break;
2117 case MVT::f16:
2118 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2119 &Lost);
2120 break;
2121 case MVT::i16:
2122 FPLiteral.convert(APFloatBase::IEEEsingle(),
2123 APFloat::rmNearestTiesToEven, &Lost);
2124 break;
2125 }
2126 // We need to use 32-bit representation here because when a floating-point
2127 // inline constant is used as an i16 operand, its 32-bit representation
2128 // representation will be used. We will need the 32-bit value to check if
2129 // it is FP inline constant.
2130 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2131 return isInlineableLiteralOp16(ImmVal, type,
2132 AsmParser->hasInv2PiInlineImm());
2133 }
2134
2135 // Check if single precision literal is inlinable
2136 return AMDGPU::isInlinableLiteral32(
2137 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2138 AsmParser->hasInv2PiInlineImm());
2139 }
2140
2141 // We got int literal token.
2142 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2143 return AMDGPU::isInlinableLiteral64(Imm.Val,
2144 AsmParser->hasInv2PiInlineImm());
2145 }
2146
2147 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2148 return false;
2149 }
2150
2151 if (type.getScalarSizeInBits() == 16) {
2152 return isInlineableLiteralOp16(
2153 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2154 type, AsmParser->hasInv2PiInlineImm());
2155 }
2156
2157 return AMDGPU::isInlinableLiteral32(
2158 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2159 AsmParser->hasInv2PiInlineImm());
2160 }
2161
isLiteralImm(MVT type) const2162 bool AMDGPUOperand::isLiteralImm(MVT type) const {
2163 // Check that this immediate can be added as literal
2164 if (!isImmTy(ImmTyNone)) {
2165 return false;
2166 }
2167
2168 bool Allow64Bit =
2169 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2170
2171 if (!Imm.IsFPImm) {
2172 // We got int literal token.
2173
2174 if (type == MVT::f64 && hasFPModifiers()) {
2175 // Cannot apply fp modifiers to int literals preserving the same semantics
2176 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2177 // disable these cases.
2178 return false;
2179 }
2180
2181 unsigned Size = type.getSizeInBits();
2182 if (Size == 64) {
2183 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2184 return true;
2185 Size = 32;
2186 }
2187
2188 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2189 // types.
2190 return isSafeTruncation(Imm.Val, Size);
2191 }
2192
2193 // We got fp literal token
2194 if (type == MVT::f64) { // Expected 64-bit fp operand
2195 // We would set low 64-bits of literal to zeroes but we accept this literals
2196 return true;
2197 }
2198
2199 if (type == MVT::i64) { // Expected 64-bit int operand
2200 // We don't allow fp literals in 64-bit integer instructions. It is
2201 // unclear how we should encode them.
2202 return false;
2203 }
2204
2205 // We allow fp literals with f16x2 operands assuming that the specified
2206 // literal goes into the lower half and the upper half is zero. We also
2207 // require that the literal may be losslessly converted to f16.
2208 //
2209 // For i16x2 operands, we assume that the specified literal is encoded as a
2210 // single-precision float. This is pretty odd, but it matches SP3 and what
2211 // happens in hardware.
2212 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2213 : (type == MVT::v2i16) ? MVT::f32
2214 : (type == MVT::v2f32) ? MVT::f32
2215 : type;
2216
2217 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2218 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2219 }
2220
isRegClass(unsigned RCID) const2221 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2222 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2223 }
2224
isVRegWithInputMods() const2225 bool AMDGPUOperand::isVRegWithInputMods() const {
2226 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2227 // GFX90A allows DPP on 64-bit operands.
2228 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2229 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2230 }
2231
2232 template <bool IsFake16>
isT16_Lo128VRegWithInputMods() const2233 bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2234 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2235 : AMDGPU::VGPR_16_Lo128RegClassID);
2236 }
2237
isT16VRegWithInputMods() const2238 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2239 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2240 : AMDGPU::VGPR_16RegClassID);
2241 }
2242
isSDWAOperand(MVT type) const2243 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2244 if (AsmParser->isVI())
2245 return isVReg32();
2246 if (AsmParser->isGFX9Plus())
2247 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2248 return false;
2249 }
2250
isSDWAFP16Operand() const2251 bool AMDGPUOperand::isSDWAFP16Operand() const {
2252 return isSDWAOperand(MVT::f16);
2253 }
2254
isSDWAFP32Operand() const2255 bool AMDGPUOperand::isSDWAFP32Operand() const {
2256 return isSDWAOperand(MVT::f32);
2257 }
2258
isSDWAInt16Operand() const2259 bool AMDGPUOperand::isSDWAInt16Operand() const {
2260 return isSDWAOperand(MVT::i16);
2261 }
2262
isSDWAInt32Operand() const2263 bool AMDGPUOperand::isSDWAInt32Operand() const {
2264 return isSDWAOperand(MVT::i32);
2265 }
2266
isBoolReg() const2267 bool AMDGPUOperand::isBoolReg() const {
2268 auto FB = AsmParser->getFeatureBits();
2269 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2270 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2271 }
2272
applyInputFPModifiers(uint64_t Val,unsigned Size) const2273 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2274 {
2275 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2276 assert(Size == 2 || Size == 4 || Size == 8);
2277
2278 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2279
2280 if (Imm.Mods.Abs) {
2281 Val &= ~FpSignMask;
2282 }
2283 if (Imm.Mods.Neg) {
2284 Val ^= FpSignMask;
2285 }
2286
2287 return Val;
2288 }
2289
addImmOperands(MCInst & Inst,unsigned N,bool ApplyModifiers) const2290 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2291 if (isExpr()) {
2292 Inst.addOperand(MCOperand::createExpr(Expr));
2293 return;
2294 }
2295
2296 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2297 Inst.getNumOperands())) {
2298 addLiteralImmOperand(Inst, Imm.Val,
2299 ApplyModifiers &
2300 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2301 } else {
2302 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2303 Inst.addOperand(MCOperand::createImm(Imm.Val));
2304 setImmKindNone();
2305 }
2306 }
2307
addLiteralImmOperand(MCInst & Inst,int64_t Val,bool ApplyModifiers) const2308 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2309 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2310 auto OpNum = Inst.getNumOperands();
2311 // Check that this operand accepts literals
2312 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2313
2314 if (ApplyModifiers) {
2315 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2316 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2317 Val = applyInputFPModifiers(Val, Size);
2318 }
2319
2320 APInt Literal(64, Val);
2321 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2322
2323 if (Imm.IsFPImm) { // We got fp literal token
2324 switch (OpTy) {
2325 case AMDGPU::OPERAND_REG_IMM_INT64:
2326 case AMDGPU::OPERAND_REG_IMM_FP64:
2327 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2328 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2329 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2330 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2331 AsmParser->hasInv2PiInlineImm())) {
2332 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2333 setImmKindConst();
2334 return;
2335 }
2336
2337 // Non-inlineable
2338 if (AMDGPU::isSISrcFPOperand(InstDesc,
2339 OpNum)) { // Expected 64-bit fp operand
2340 bool HasMandatoryLiteral =
2341 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2342 // For fp operands we check if low 32 bits are zeros
2343 if (Literal.getLoBits(32) != 0 &&
2344 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2345 !HasMandatoryLiteral) {
2346 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2347 Inst.getLoc(),
2348 "Can't encode literal as exact 64-bit floating-point operand. "
2349 "Low 32-bits will be set to zero");
2350 Val &= 0xffffffff00000000u;
2351 }
2352
2353 Inst.addOperand(MCOperand::createImm(Val));
2354 setImmKindLiteral();
2355 return;
2356 }
2357
2358 // We don't allow fp literals in 64-bit integer instructions. It is
2359 // unclear how we should encode them. This case should be checked earlier
2360 // in predicate methods (isLiteralImm())
2361 llvm_unreachable("fp literal in 64-bit integer instruction.");
2362
2363 case AMDGPU::OPERAND_KIMM64:
2364 Inst.addOperand(MCOperand::createImm(Val));
2365 setImmKindMandatoryLiteral();
2366 return;
2367
2368 case AMDGPU::OPERAND_REG_IMM_BF16:
2369 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2370 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2371 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2372 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2373 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2374 // loss of precision. The constant represents ideomatic fp32 value of
2375 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2376 // bits. Prevent rounding below.
2377 Inst.addOperand(MCOperand::createImm(0x3e22));
2378 setImmKindLiteral();
2379 return;
2380 }
2381 [[fallthrough]];
2382
2383 case AMDGPU::OPERAND_REG_IMM_INT32:
2384 case AMDGPU::OPERAND_REG_IMM_FP32:
2385 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2386 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2387 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2388 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2389 case AMDGPU::OPERAND_REG_IMM_INT16:
2390 case AMDGPU::OPERAND_REG_IMM_FP16:
2391 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2392 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2393 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2394 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2395 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2396 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2397 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2398 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2399 case AMDGPU::OPERAND_KIMM32:
2400 case AMDGPU::OPERAND_KIMM16:
2401 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2402 bool lost;
2403 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2404 // Convert literal to single precision
2405 FPLiteral.convert(*getOpFltSemantics(OpTy),
2406 APFloat::rmNearestTiesToEven, &lost);
2407 // We allow precision lost but not overflow or underflow. This should be
2408 // checked earlier in isLiteralImm()
2409
2410 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2411 Inst.addOperand(MCOperand::createImm(ImmVal));
2412 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2413 setImmKindMandatoryLiteral();
2414 } else {
2415 setImmKindLiteral();
2416 }
2417 return;
2418 }
2419 default:
2420 llvm_unreachable("invalid operand size");
2421 }
2422
2423 return;
2424 }
2425
2426 // We got int literal token.
2427 // Only sign extend inline immediates.
2428 switch (OpTy) {
2429 case AMDGPU::OPERAND_REG_IMM_INT32:
2430 case AMDGPU::OPERAND_REG_IMM_FP32:
2431 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2432 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2433 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2434 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2435 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2436 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2437 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2438 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2439 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2440 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2441 if (isSafeTruncation(Val, 32) &&
2442 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2443 AsmParser->hasInv2PiInlineImm())) {
2444 Inst.addOperand(MCOperand::createImm(Val));
2445 setImmKindConst();
2446 return;
2447 }
2448
2449 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2450 setImmKindLiteral();
2451 return;
2452
2453 case AMDGPU::OPERAND_REG_IMM_INT64:
2454 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2455 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2456 Inst.addOperand(MCOperand::createImm(Val));
2457 setImmKindConst();
2458 return;
2459 }
2460
2461 // When the 32 MSBs are not zero (effectively means it can't be safely
2462 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2463 // the lit modifier is explicitly used, we need to truncate it to the 32
2464 // LSBs.
2465 if (!AsmParser->has64BitLiterals() || getModifiers().Lit)
2466 Val = Lo_32(Val);
2467
2468 Inst.addOperand(MCOperand::createImm(Val));
2469 setImmKindLiteral();
2470 return;
2471
2472 case AMDGPU::OPERAND_REG_IMM_FP64:
2473 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2474 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2475 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2476 Inst.addOperand(MCOperand::createImm(Val));
2477 setImmKindConst();
2478 return;
2479 }
2480
2481 // If the target doesn't support 64-bit literals, we need to use the
2482 // constant as the high 32 MSBs of a double-precision floating point value.
2483 if (!AsmParser->has64BitLiterals()) {
2484 Val = static_cast<uint64_t>(Val) << 32;
2485 } else {
2486 // Now the target does support 64-bit literals, there are two cases
2487 // where we still want to use src_literal encoding:
2488 // 1) explicitly forced by using lit modifier;
2489 // 2) the value is a valid 32-bit representation (signed or unsigned),
2490 // meanwhile not forced by lit64 modifier.
2491 if (getModifiers().Lit ||
2492 (!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2493 Val = static_cast<uint64_t>(Val) << 32;
2494 }
2495
2496 Inst.addOperand(MCOperand::createImm(Val));
2497 setImmKindLiteral();
2498 return;
2499
2500 case AMDGPU::OPERAND_REG_IMM_INT16:
2501 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2502 if (isSafeTruncation(Val, 16) &&
2503 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2504 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2505 setImmKindConst();
2506 return;
2507 }
2508
2509 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2510 setImmKindLiteral();
2511 return;
2512
2513 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2514 case AMDGPU::OPERAND_REG_IMM_FP16:
2515 if (isSafeTruncation(Val, 16) &&
2516 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2517 AsmParser->hasInv2PiInlineImm())) {
2518 Inst.addOperand(MCOperand::createImm(Val));
2519 setImmKindConst();
2520 return;
2521 }
2522
2523 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2524 setImmKindLiteral();
2525 return;
2526
2527 case AMDGPU::OPERAND_REG_IMM_BF16:
2528 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2529 if (isSafeTruncation(Val, 16) &&
2530 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2531 AsmParser->hasInv2PiInlineImm())) {
2532 Inst.addOperand(MCOperand::createImm(Val));
2533 setImmKindConst();
2534 return;
2535 }
2536
2537 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2538 setImmKindLiteral();
2539 return;
2540
2541 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: {
2542 assert(isSafeTruncation(Val, 16));
2543 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2544 Inst.addOperand(MCOperand::createImm(Val));
2545 return;
2546 }
2547 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
2548 assert(isSafeTruncation(Val, 16));
2549 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2550 AsmParser->hasInv2PiInlineImm()));
2551
2552 Inst.addOperand(MCOperand::createImm(Val));
2553 return;
2554 }
2555
2556 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: {
2557 assert(isSafeTruncation(Val, 16));
2558 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2559 AsmParser->hasInv2PiInlineImm()));
2560
2561 Inst.addOperand(MCOperand::createImm(Val));
2562 return;
2563 }
2564
2565 case AMDGPU::OPERAND_KIMM32:
2566 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2567 setImmKindMandatoryLiteral();
2568 return;
2569 case AMDGPU::OPERAND_KIMM16:
2570 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2571 setImmKindMandatoryLiteral();
2572 return;
2573 case AMDGPU::OPERAND_KIMM64:
2574 if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64)
2575 Val <<= 32;
2576
2577 Inst.addOperand(MCOperand::createImm(Val));
2578 setImmKindMandatoryLiteral();
2579 return;
2580 default:
2581 llvm_unreachable("invalid operand size");
2582 }
2583 }
2584
addRegOperands(MCInst & Inst,unsigned N) const2585 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2586 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2587 }
2588
isInlineValue() const2589 bool AMDGPUOperand::isInlineValue() const {
2590 return isRegKind() && ::isInlineValue(getReg());
2591 }
2592
2593 //===----------------------------------------------------------------------===//
2594 // AsmParser
2595 //===----------------------------------------------------------------------===//
2596
createConstantSymbol(StringRef Id,int64_t Val)2597 void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2598 // TODO: make those pre-defined variables read-only.
2599 // Currently there is none suitable machinery in the core llvm-mc for this.
2600 // MCSymbol::isRedefinable is intended for another purpose, and
2601 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2602 MCContext &Ctx = getContext();
2603 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2604 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2605 }
2606
getRegClass(RegisterKind Is,unsigned RegWidth)2607 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2608 if (Is == IS_VGPR) {
2609 switch (RegWidth) {
2610 default: return -1;
2611 case 32:
2612 return AMDGPU::VGPR_32RegClassID;
2613 case 64:
2614 return AMDGPU::VReg_64RegClassID;
2615 case 96:
2616 return AMDGPU::VReg_96RegClassID;
2617 case 128:
2618 return AMDGPU::VReg_128RegClassID;
2619 case 160:
2620 return AMDGPU::VReg_160RegClassID;
2621 case 192:
2622 return AMDGPU::VReg_192RegClassID;
2623 case 224:
2624 return AMDGPU::VReg_224RegClassID;
2625 case 256:
2626 return AMDGPU::VReg_256RegClassID;
2627 case 288:
2628 return AMDGPU::VReg_288RegClassID;
2629 case 320:
2630 return AMDGPU::VReg_320RegClassID;
2631 case 352:
2632 return AMDGPU::VReg_352RegClassID;
2633 case 384:
2634 return AMDGPU::VReg_384RegClassID;
2635 case 512:
2636 return AMDGPU::VReg_512RegClassID;
2637 case 1024:
2638 return AMDGPU::VReg_1024RegClassID;
2639 }
2640 } else if (Is == IS_TTMP) {
2641 switch (RegWidth) {
2642 default: return -1;
2643 case 32:
2644 return AMDGPU::TTMP_32RegClassID;
2645 case 64:
2646 return AMDGPU::TTMP_64RegClassID;
2647 case 128:
2648 return AMDGPU::TTMP_128RegClassID;
2649 case 256:
2650 return AMDGPU::TTMP_256RegClassID;
2651 case 512:
2652 return AMDGPU::TTMP_512RegClassID;
2653 }
2654 } else if (Is == IS_SGPR) {
2655 switch (RegWidth) {
2656 default: return -1;
2657 case 32:
2658 return AMDGPU::SGPR_32RegClassID;
2659 case 64:
2660 return AMDGPU::SGPR_64RegClassID;
2661 case 96:
2662 return AMDGPU::SGPR_96RegClassID;
2663 case 128:
2664 return AMDGPU::SGPR_128RegClassID;
2665 case 160:
2666 return AMDGPU::SGPR_160RegClassID;
2667 case 192:
2668 return AMDGPU::SGPR_192RegClassID;
2669 case 224:
2670 return AMDGPU::SGPR_224RegClassID;
2671 case 256:
2672 return AMDGPU::SGPR_256RegClassID;
2673 case 288:
2674 return AMDGPU::SGPR_288RegClassID;
2675 case 320:
2676 return AMDGPU::SGPR_320RegClassID;
2677 case 352:
2678 return AMDGPU::SGPR_352RegClassID;
2679 case 384:
2680 return AMDGPU::SGPR_384RegClassID;
2681 case 512:
2682 return AMDGPU::SGPR_512RegClassID;
2683 }
2684 } else if (Is == IS_AGPR) {
2685 switch (RegWidth) {
2686 default: return -1;
2687 case 32:
2688 return AMDGPU::AGPR_32RegClassID;
2689 case 64:
2690 return AMDGPU::AReg_64RegClassID;
2691 case 96:
2692 return AMDGPU::AReg_96RegClassID;
2693 case 128:
2694 return AMDGPU::AReg_128RegClassID;
2695 case 160:
2696 return AMDGPU::AReg_160RegClassID;
2697 case 192:
2698 return AMDGPU::AReg_192RegClassID;
2699 case 224:
2700 return AMDGPU::AReg_224RegClassID;
2701 case 256:
2702 return AMDGPU::AReg_256RegClassID;
2703 case 288:
2704 return AMDGPU::AReg_288RegClassID;
2705 case 320:
2706 return AMDGPU::AReg_320RegClassID;
2707 case 352:
2708 return AMDGPU::AReg_352RegClassID;
2709 case 384:
2710 return AMDGPU::AReg_384RegClassID;
2711 case 512:
2712 return AMDGPU::AReg_512RegClassID;
2713 case 1024:
2714 return AMDGPU::AReg_1024RegClassID;
2715 }
2716 }
2717 return -1;
2718 }
2719
getSpecialRegForName(StringRef RegName)2720 static MCRegister getSpecialRegForName(StringRef RegName) {
2721 return StringSwitch<unsigned>(RegName)
2722 .Case("exec", AMDGPU::EXEC)
2723 .Case("vcc", AMDGPU::VCC)
2724 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2725 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2726 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2727 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2728 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2729 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2730 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2731 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2732 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2733 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2734 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2735 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2736 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2737 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2738 .Case("m0", AMDGPU::M0)
2739 .Case("vccz", AMDGPU::SRC_VCCZ)
2740 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2741 .Case("execz", AMDGPU::SRC_EXECZ)
2742 .Case("src_execz", AMDGPU::SRC_EXECZ)
2743 .Case("scc", AMDGPU::SRC_SCC)
2744 .Case("src_scc", AMDGPU::SRC_SCC)
2745 .Case("tba", AMDGPU::TBA)
2746 .Case("tma", AMDGPU::TMA)
2747 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2748 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2749 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2750 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2751 .Case("vcc_lo", AMDGPU::VCC_LO)
2752 .Case("vcc_hi", AMDGPU::VCC_HI)
2753 .Case("exec_lo", AMDGPU::EXEC_LO)
2754 .Case("exec_hi", AMDGPU::EXEC_HI)
2755 .Case("tma_lo", AMDGPU::TMA_LO)
2756 .Case("tma_hi", AMDGPU::TMA_HI)
2757 .Case("tba_lo", AMDGPU::TBA_LO)
2758 .Case("tba_hi", AMDGPU::TBA_HI)
2759 .Case("pc", AMDGPU::PC_REG)
2760 .Case("null", AMDGPU::SGPR_NULL)
2761 .Default(AMDGPU::NoRegister);
2762 }
2763
ParseRegister(MCRegister & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)2764 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2765 SMLoc &EndLoc, bool RestoreOnFailure) {
2766 auto R = parseRegister();
2767 if (!R) return true;
2768 assert(R->isReg());
2769 RegNo = R->getReg();
2770 StartLoc = R->getStartLoc();
2771 EndLoc = R->getEndLoc();
2772 return false;
2773 }
2774
parseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)2775 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2776 SMLoc &EndLoc) {
2777 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2778 }
2779
tryParseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)2780 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2781 SMLoc &EndLoc) {
2782 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2783 bool PendingErrors = getParser().hasPendingError();
2784 getParser().clearPendingErrors();
2785 if (PendingErrors)
2786 return ParseStatus::Failure;
2787 if (Result)
2788 return ParseStatus::NoMatch;
2789 return ParseStatus::Success;
2790 }
2791
AddNextRegisterToList(MCRegister & Reg,unsigned & RegWidth,RegisterKind RegKind,MCRegister Reg1,SMLoc Loc)2792 bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2793 RegisterKind RegKind,
2794 MCRegister Reg1, SMLoc Loc) {
2795 switch (RegKind) {
2796 case IS_SPECIAL:
2797 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2798 Reg = AMDGPU::EXEC;
2799 RegWidth = 64;
2800 return true;
2801 }
2802 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2803 Reg = AMDGPU::FLAT_SCR;
2804 RegWidth = 64;
2805 return true;
2806 }
2807 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2808 Reg = AMDGPU::XNACK_MASK;
2809 RegWidth = 64;
2810 return true;
2811 }
2812 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2813 Reg = AMDGPU::VCC;
2814 RegWidth = 64;
2815 return true;
2816 }
2817 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2818 Reg = AMDGPU::TBA;
2819 RegWidth = 64;
2820 return true;
2821 }
2822 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2823 Reg = AMDGPU::TMA;
2824 RegWidth = 64;
2825 return true;
2826 }
2827 Error(Loc, "register does not fit in the list");
2828 return false;
2829 case IS_VGPR:
2830 case IS_SGPR:
2831 case IS_AGPR:
2832 case IS_TTMP:
2833 if (Reg1 != Reg + RegWidth / 32) {
2834 Error(Loc, "registers in a list must have consecutive indices");
2835 return false;
2836 }
2837 RegWidth += 32;
2838 return true;
2839 default:
2840 llvm_unreachable("unexpected register kind");
2841 }
2842 }
2843
2844 struct RegInfo {
2845 StringLiteral Name;
2846 RegisterKind Kind;
2847 };
2848
2849 static constexpr RegInfo RegularRegisters[] = {
2850 {{"v"}, IS_VGPR},
2851 {{"s"}, IS_SGPR},
2852 {{"ttmp"}, IS_TTMP},
2853 {{"acc"}, IS_AGPR},
2854 {{"a"}, IS_AGPR},
2855 };
2856
isRegularReg(RegisterKind Kind)2857 static bool isRegularReg(RegisterKind Kind) {
2858 return Kind == IS_VGPR ||
2859 Kind == IS_SGPR ||
2860 Kind == IS_TTMP ||
2861 Kind == IS_AGPR;
2862 }
2863
getRegularRegInfo(StringRef Str)2864 static const RegInfo* getRegularRegInfo(StringRef Str) {
2865 for (const RegInfo &Reg : RegularRegisters)
2866 if (Str.starts_with(Reg.Name))
2867 return &Reg;
2868 return nullptr;
2869 }
2870
getRegNum(StringRef Str,unsigned & Num)2871 static bool getRegNum(StringRef Str, unsigned& Num) {
2872 return !Str.getAsInteger(10, Num);
2873 }
2874
2875 bool
isRegister(const AsmToken & Token,const AsmToken & NextToken) const2876 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2877 const AsmToken &NextToken) const {
2878
2879 // A list of consecutive registers: [s0,s1,s2,s3]
2880 if (Token.is(AsmToken::LBrac))
2881 return true;
2882
2883 if (!Token.is(AsmToken::Identifier))
2884 return false;
2885
2886 // A single register like s0 or a range of registers like s[0:1]
2887
2888 StringRef Str = Token.getString();
2889 const RegInfo *Reg = getRegularRegInfo(Str);
2890 if (Reg) {
2891 StringRef RegName = Reg->Name;
2892 StringRef RegSuffix = Str.substr(RegName.size());
2893 if (!RegSuffix.empty()) {
2894 RegSuffix.consume_back(".l");
2895 RegSuffix.consume_back(".h");
2896 unsigned Num;
2897 // A single register with an index: rXX
2898 if (getRegNum(RegSuffix, Num))
2899 return true;
2900 } else {
2901 // A range of registers: r[XX:YY].
2902 if (NextToken.is(AsmToken::LBrac))
2903 return true;
2904 }
2905 }
2906
2907 return getSpecialRegForName(Str).isValid();
2908 }
2909
2910 bool
isRegister()2911 AMDGPUAsmParser::isRegister()
2912 {
2913 return isRegister(getToken(), peekToken());
2914 }
2915
getRegularReg(RegisterKind RegKind,unsigned RegNum,unsigned SubReg,unsigned RegWidth,SMLoc Loc)2916 MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2917 unsigned SubReg, unsigned RegWidth,
2918 SMLoc Loc) {
2919 assert(isRegularReg(RegKind));
2920
2921 unsigned AlignSize = 1;
2922 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2923 // SGPR and TTMP registers must be aligned.
2924 // Max required alignment is 4 dwords.
2925 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2926 }
2927
2928 if (RegNum % AlignSize != 0) {
2929 Error(Loc, "invalid register alignment");
2930 return MCRegister();
2931 }
2932
2933 unsigned RegIdx = RegNum / AlignSize;
2934 int RCID = getRegClass(RegKind, RegWidth);
2935 if (RCID == -1) {
2936 Error(Loc, "invalid or unsupported register size");
2937 return MCRegister();
2938 }
2939
2940 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2941 const MCRegisterClass RC = TRI->getRegClass(RCID);
2942 if (RegIdx >= RC.getNumRegs()) {
2943 Error(Loc, "register index is out of range");
2944 return MCRegister();
2945 }
2946
2947 MCRegister Reg = RC.getRegister(RegIdx);
2948
2949 if (SubReg) {
2950 Reg = TRI->getSubReg(Reg, SubReg);
2951
2952 // Currently all regular registers have their .l and .h subregisters, so
2953 // we should never need to generate an error here.
2954 assert(Reg && "Invalid subregister!");
2955 }
2956
2957 return Reg;
2958 }
2959
ParseRegRange(unsigned & Num,unsigned & RegWidth,unsigned & SubReg)2960 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2961 unsigned &SubReg) {
2962 int64_t RegLo, RegHi;
2963 if (!skipToken(AsmToken::LBrac, "missing register index"))
2964 return false;
2965
2966 SMLoc FirstIdxLoc = getLoc();
2967 SMLoc SecondIdxLoc;
2968
2969 if (!parseExpr(RegLo))
2970 return false;
2971
2972 if (trySkipToken(AsmToken::Colon)) {
2973 SecondIdxLoc = getLoc();
2974 if (!parseExpr(RegHi))
2975 return false;
2976 } else {
2977 RegHi = RegLo;
2978 }
2979
2980 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2981 return false;
2982
2983 if (!isUInt<32>(RegLo)) {
2984 Error(FirstIdxLoc, "invalid register index");
2985 return false;
2986 }
2987
2988 if (!isUInt<32>(RegHi)) {
2989 Error(SecondIdxLoc, "invalid register index");
2990 return false;
2991 }
2992
2993 if (RegLo > RegHi) {
2994 Error(FirstIdxLoc, "first register index should not exceed second index");
2995 return false;
2996 }
2997
2998 if (RegHi == RegLo) {
2999 StringRef RegSuffix = getTokenStr();
3000 if (RegSuffix == ".l") {
3001 SubReg = AMDGPU::lo16;
3002 lex();
3003 } else if (RegSuffix == ".h") {
3004 SubReg = AMDGPU::hi16;
3005 lex();
3006 }
3007 }
3008
3009 Num = static_cast<unsigned>(RegLo);
3010 RegWidth = 32 * ((RegHi - RegLo) + 1);
3011
3012 return true;
3013 }
3014
ParseSpecialReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)3015 MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3016 unsigned &RegNum,
3017 unsigned &RegWidth,
3018 SmallVectorImpl<AsmToken> &Tokens) {
3019 assert(isToken(AsmToken::Identifier));
3020 MCRegister Reg = getSpecialRegForName(getTokenStr());
3021 if (Reg) {
3022 RegNum = 0;
3023 RegWidth = 32;
3024 RegKind = IS_SPECIAL;
3025 Tokens.push_back(getToken());
3026 lex(); // skip register name
3027 }
3028 return Reg;
3029 }
3030
ParseRegularReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)3031 MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3032 unsigned &RegNum,
3033 unsigned &RegWidth,
3034 SmallVectorImpl<AsmToken> &Tokens) {
3035 assert(isToken(AsmToken::Identifier));
3036 StringRef RegName = getTokenStr();
3037 auto Loc = getLoc();
3038
3039 const RegInfo *RI = getRegularRegInfo(RegName);
3040 if (!RI) {
3041 Error(Loc, "invalid register name");
3042 return MCRegister();
3043 }
3044
3045 Tokens.push_back(getToken());
3046 lex(); // skip register name
3047
3048 RegKind = RI->Kind;
3049 StringRef RegSuffix = RegName.substr(RI->Name.size());
3050 unsigned SubReg = NoSubRegister;
3051 if (!RegSuffix.empty()) {
3052 if (RegSuffix.consume_back(".l"))
3053 SubReg = AMDGPU::lo16;
3054 else if (RegSuffix.consume_back(".h"))
3055 SubReg = AMDGPU::hi16;
3056
3057 // Single 32-bit register: vXX.
3058 if (!getRegNum(RegSuffix, RegNum)) {
3059 Error(Loc, "invalid register index");
3060 return MCRegister();
3061 }
3062 RegWidth = 32;
3063 } else {
3064 // Range of registers: v[XX:YY]. ":YY" is optional.
3065 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3066 return MCRegister();
3067 }
3068
3069 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3070 }
3071
ParseRegList(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)3072 MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3073 unsigned &RegNum, unsigned &RegWidth,
3074 SmallVectorImpl<AsmToken> &Tokens) {
3075 MCRegister Reg;
3076 auto ListLoc = getLoc();
3077
3078 if (!skipToken(AsmToken::LBrac,
3079 "expected a register or a list of registers")) {
3080 return MCRegister();
3081 }
3082
3083 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3084
3085 auto Loc = getLoc();
3086 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3087 return MCRegister();
3088 if (RegWidth != 32) {
3089 Error(Loc, "expected a single 32-bit register");
3090 return MCRegister();
3091 }
3092
3093 for (; trySkipToken(AsmToken::Comma); ) {
3094 RegisterKind NextRegKind;
3095 MCRegister NextReg;
3096 unsigned NextRegNum, NextRegWidth;
3097 Loc = getLoc();
3098
3099 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3100 NextRegNum, NextRegWidth,
3101 Tokens)) {
3102 return MCRegister();
3103 }
3104 if (NextRegWidth != 32) {
3105 Error(Loc, "expected a single 32-bit register");
3106 return MCRegister();
3107 }
3108 if (NextRegKind != RegKind) {
3109 Error(Loc, "registers in a list must be of the same kind");
3110 return MCRegister();
3111 }
3112 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3113 return MCRegister();
3114 }
3115
3116 if (!skipToken(AsmToken::RBrac,
3117 "expected a comma or a closing square bracket")) {
3118 return MCRegister();
3119 }
3120
3121 if (isRegularReg(RegKind))
3122 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3123
3124 return Reg;
3125 }
3126
ParseAMDGPURegister(RegisterKind & RegKind,MCRegister & Reg,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)3127 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3128 MCRegister &Reg, unsigned &RegNum,
3129 unsigned &RegWidth,
3130 SmallVectorImpl<AsmToken> &Tokens) {
3131 auto Loc = getLoc();
3132 Reg = MCRegister();
3133
3134 if (isToken(AsmToken::Identifier)) {
3135 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3136 if (!Reg)
3137 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3138 } else {
3139 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3140 }
3141
3142 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3143 if (!Reg) {
3144 assert(Parser.hasPendingError());
3145 return false;
3146 }
3147
3148 if (!subtargetHasRegister(*TRI, Reg)) {
3149 if (Reg == AMDGPU::SGPR_NULL) {
3150 Error(Loc, "'null' operand is not supported on this GPU");
3151 } else {
3152 Error(Loc, Twine(AMDGPUInstPrinter::getRegisterName(Reg)) +
3153 " register not available on this GPU");
3154 }
3155 return false;
3156 }
3157
3158 return true;
3159 }
3160
ParseAMDGPURegister(RegisterKind & RegKind,MCRegister & Reg,unsigned & RegNum,unsigned & RegWidth,bool RestoreOnFailure)3161 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3162 MCRegister &Reg, unsigned &RegNum,
3163 unsigned &RegWidth,
3164 bool RestoreOnFailure /*=false*/) {
3165 Reg = MCRegister();
3166
3167 SmallVector<AsmToken, 1> Tokens;
3168 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3169 if (RestoreOnFailure) {
3170 while (!Tokens.empty()) {
3171 getLexer().UnLex(Tokens.pop_back_val());
3172 }
3173 }
3174 return true;
3175 }
3176 return false;
3177 }
3178
3179 std::optional<StringRef>
getGprCountSymbolName(RegisterKind RegKind)3180 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3181 switch (RegKind) {
3182 case IS_VGPR:
3183 return StringRef(".amdgcn.next_free_vgpr");
3184 case IS_SGPR:
3185 return StringRef(".amdgcn.next_free_sgpr");
3186 default:
3187 return std::nullopt;
3188 }
3189 }
3190
initializeGprCountSymbol(RegisterKind RegKind)3191 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3192 auto SymbolName = getGprCountSymbolName(RegKind);
3193 assert(SymbolName && "initializing invalid register kind");
3194 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3195 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3196 Sym->setRedefinable(true);
3197 }
3198
updateGprCountSymbols(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)3199 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3200 unsigned DwordRegIndex,
3201 unsigned RegWidth) {
3202 // Symbols are only defined for GCN targets
3203 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3204 return true;
3205
3206 auto SymbolName = getGprCountSymbolName(RegKind);
3207 if (!SymbolName)
3208 return true;
3209 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3210
3211 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3212 int64_t OldCount;
3213
3214 if (!Sym->isVariable())
3215 return !Error(getLoc(),
3216 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3217 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3218 return !Error(
3219 getLoc(),
3220 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3221
3222 if (OldCount <= NewMax)
3223 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3224
3225 return true;
3226 }
3227
3228 std::unique_ptr<AMDGPUOperand>
parseRegister(bool RestoreOnFailure)3229 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3230 const auto &Tok = getToken();
3231 SMLoc StartLoc = Tok.getLoc();
3232 SMLoc EndLoc = Tok.getEndLoc();
3233 RegisterKind RegKind;
3234 MCRegister Reg;
3235 unsigned RegNum, RegWidth;
3236
3237 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3238 return nullptr;
3239 }
3240 if (isHsaAbi(getSTI())) {
3241 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3242 return nullptr;
3243 } else
3244 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3245 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3246 }
3247
parseImm(OperandVector & Operands,bool HasSP3AbsModifier,bool HasLit,bool HasLit64)3248 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3249 bool HasSP3AbsModifier, bool HasLit,
3250 bool HasLit64) {
3251 // TODO: add syntactic sugar for 1/(2*PI)
3252
3253 if (isRegister() || isModifier())
3254 return ParseStatus::NoMatch;
3255
3256 if (!HasLit && !HasLit64) {
3257 HasLit64 = trySkipId("lit64");
3258 HasLit = !HasLit64 && trySkipId("lit");
3259 if (HasLit || HasLit64) {
3260 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3261 return ParseStatus::Failure;
3262 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit, HasLit64);
3263 if (S.isSuccess() &&
3264 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3265 return ParseStatus::Failure;
3266 return S;
3267 }
3268 }
3269
3270 const auto& Tok = getToken();
3271 const auto& NextTok = peekToken();
3272 bool IsReal = Tok.is(AsmToken::Real);
3273 SMLoc S = getLoc();
3274 bool Negate = false;
3275
3276 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3277 lex();
3278 IsReal = true;
3279 Negate = true;
3280 }
3281
3282 AMDGPUOperand::Modifiers Mods;
3283 Mods.Lit = HasLit;
3284 Mods.Lit64 = HasLit64;
3285
3286 if (IsReal) {
3287 // Floating-point expressions are not supported.
3288 // Can only allow floating-point literals with an
3289 // optional sign.
3290
3291 StringRef Num = getTokenStr();
3292 lex();
3293
3294 APFloat RealVal(APFloat::IEEEdouble());
3295 auto roundMode = APFloat::rmNearestTiesToEven;
3296 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3297 return ParseStatus::Failure;
3298 if (Negate)
3299 RealVal.changeSign();
3300
3301 Operands.push_back(
3302 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3303 AMDGPUOperand::ImmTyNone, true));
3304 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3305 Op.setModifiers(Mods);
3306
3307 return ParseStatus::Success;
3308
3309 } else {
3310 int64_t IntVal;
3311 const MCExpr *Expr;
3312 SMLoc S = getLoc();
3313
3314 if (HasSP3AbsModifier) {
3315 // This is a workaround for handling expressions
3316 // as arguments of SP3 'abs' modifier, for example:
3317 // |1.0|
3318 // |-1|
3319 // |1+x|
3320 // This syntax is not compatible with syntax of standard
3321 // MC expressions (due to the trailing '|').
3322 SMLoc EndLoc;
3323 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3324 return ParseStatus::Failure;
3325 } else {
3326 if (Parser.parseExpression(Expr))
3327 return ParseStatus::Failure;
3328 }
3329
3330 if (Expr->evaluateAsAbsolute(IntVal)) {
3331 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3332 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3333 Op.setModifiers(Mods);
3334 } else {
3335 if (HasLit || HasLit64)
3336 return ParseStatus::NoMatch;
3337 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3338 }
3339
3340 return ParseStatus::Success;
3341 }
3342
3343 return ParseStatus::NoMatch;
3344 }
3345
parseReg(OperandVector & Operands)3346 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3347 if (!isRegister())
3348 return ParseStatus::NoMatch;
3349
3350 if (auto R = parseRegister()) {
3351 assert(R->isReg());
3352 Operands.push_back(std::move(R));
3353 return ParseStatus::Success;
3354 }
3355 return ParseStatus::Failure;
3356 }
3357
parseRegOrImm(OperandVector & Operands,bool HasSP3AbsMod,bool HasLit,bool HasLit64)3358 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3359 bool HasSP3AbsMod, bool HasLit,
3360 bool HasLit64) {
3361 ParseStatus Res = parseReg(Operands);
3362 if (!Res.isNoMatch())
3363 return Res;
3364 if (isModifier())
3365 return ParseStatus::NoMatch;
3366 return parseImm(Operands, HasSP3AbsMod, HasLit, HasLit64);
3367 }
3368
3369 bool
isNamedOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3370 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3371 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3372 const auto &str = Token.getString();
3373 return str == "abs" || str == "neg" || str == "sext";
3374 }
3375 return false;
3376 }
3377
3378 bool
isOpcodeModifierWithVal(const AsmToken & Token,const AsmToken & NextToken) const3379 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3380 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3381 }
3382
3383 bool
isOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3384 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3385 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3386 }
3387
3388 bool
isRegOrOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3389 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3390 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3391 }
3392
3393 // Check if this is an operand modifier or an opcode modifier
3394 // which may look like an expression but it is not. We should
3395 // avoid parsing these modifiers as expressions. Currently
3396 // recognized sequences are:
3397 // |...|
3398 // abs(...)
3399 // neg(...)
3400 // sext(...)
3401 // -reg
3402 // -|...|
3403 // -abs(...)
3404 // name:...
3405 //
3406 bool
isModifier()3407 AMDGPUAsmParser::isModifier() {
3408
3409 AsmToken Tok = getToken();
3410 AsmToken NextToken[2];
3411 peekTokens(NextToken);
3412
3413 return isOperandModifier(Tok, NextToken[0]) ||
3414 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3415 isOpcodeModifierWithVal(Tok, NextToken[0]);
3416 }
3417
3418 // Check if the current token is an SP3 'neg' modifier.
3419 // Currently this modifier is allowed in the following context:
3420 //
3421 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3422 // 2. Before an 'abs' modifier: -abs(...)
3423 // 3. Before an SP3 'abs' modifier: -|...|
3424 //
3425 // In all other cases "-" is handled as a part
3426 // of an expression that follows the sign.
3427 //
3428 // Note: When "-" is followed by an integer literal,
3429 // this is interpreted as integer negation rather
3430 // than a floating-point NEG modifier applied to N.
3431 // Beside being contr-intuitive, such use of floating-point
3432 // NEG modifier would have resulted in different meaning
3433 // of integer literals used with VOP1/2/C and VOP3,
3434 // for example:
3435 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3436 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3437 // Negative fp literals with preceding "-" are
3438 // handled likewise for uniformity
3439 //
3440 bool
parseSP3NegModifier()3441 AMDGPUAsmParser::parseSP3NegModifier() {
3442
3443 AsmToken NextToken[2];
3444 peekTokens(NextToken);
3445
3446 if (isToken(AsmToken::Minus) &&
3447 (isRegister(NextToken[0], NextToken[1]) ||
3448 NextToken[0].is(AsmToken::Pipe) ||
3449 isId(NextToken[0], "abs"))) {
3450 lex();
3451 return true;
3452 }
3453
3454 return false;
3455 }
3456
3457 ParseStatus
parseRegOrImmWithFPInputMods(OperandVector & Operands,bool AllowImm)3458 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3459 bool AllowImm) {
3460 bool Neg, SP3Neg;
3461 bool Abs, SP3Abs;
3462 bool Lit64, Lit;
3463 SMLoc Loc;
3464
3465 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3466 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3467 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3468
3469 SP3Neg = parseSP3NegModifier();
3470
3471 Loc = getLoc();
3472 Neg = trySkipId("neg");
3473 if (Neg && SP3Neg)
3474 return Error(Loc, "expected register or immediate");
3475 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3476 return ParseStatus::Failure;
3477
3478 Abs = trySkipId("abs");
3479 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3480 return ParseStatus::Failure;
3481
3482 Lit64 = trySkipId("lit64");
3483 if (Lit64) {
3484 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3485 return ParseStatus::Failure;
3486 if (!has64BitLiterals())
3487 return Error(Loc, "lit64 is not supported on this GPU");
3488 }
3489
3490 Lit = !Lit64 && trySkipId("lit");
3491 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3492 return ParseStatus::Failure;
3493
3494 Loc = getLoc();
3495 SP3Abs = trySkipToken(AsmToken::Pipe);
3496 if (Abs && SP3Abs)
3497 return Error(Loc, "expected register or immediate");
3498
3499 ParseStatus Res;
3500 if (AllowImm) {
3501 Res = parseRegOrImm(Operands, SP3Abs, Lit, Lit64);
3502 } else {
3503 Res = parseReg(Operands);
3504 }
3505 if (!Res.isSuccess())
3506 return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)
3507 ? ParseStatus::Failure
3508 : Res;
3509
3510 if ((Lit || Lit64) && !Operands.back()->isImm())
3511 Error(Loc, "expected immediate with lit modifier");
3512
3513 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3514 return ParseStatus::Failure;
3515 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3516 return ParseStatus::Failure;
3517 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3518 return ParseStatus::Failure;
3519 if ((Lit || Lit64) &&
3520 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3521 return ParseStatus::Failure;
3522
3523 AMDGPUOperand::Modifiers Mods;
3524 Mods.Abs = Abs || SP3Abs;
3525 Mods.Neg = Neg || SP3Neg;
3526 Mods.Lit = Lit;
3527 Mods.Lit64 = Lit64;
3528
3529 if (Mods.hasFPModifiers() || Lit || Lit64) {
3530 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3531 if (Op.isExpr())
3532 return Error(Op.getStartLoc(), "expected an absolute expression");
3533 Op.setModifiers(Mods);
3534 }
3535 return ParseStatus::Success;
3536 }
3537
3538 ParseStatus
parseRegOrImmWithIntInputMods(OperandVector & Operands,bool AllowImm)3539 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3540 bool AllowImm) {
3541 bool Sext = trySkipId("sext");
3542 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3543 return ParseStatus::Failure;
3544
3545 ParseStatus Res;
3546 if (AllowImm) {
3547 Res = parseRegOrImm(Operands);
3548 } else {
3549 Res = parseReg(Operands);
3550 }
3551 if (!Res.isSuccess())
3552 return Sext ? ParseStatus::Failure : Res;
3553
3554 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3555 return ParseStatus::Failure;
3556
3557 AMDGPUOperand::Modifiers Mods;
3558 Mods.Sext = Sext;
3559
3560 if (Mods.hasIntModifiers()) {
3561 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3562 if (Op.isExpr())
3563 return Error(Op.getStartLoc(), "expected an absolute expression");
3564 Op.setModifiers(Mods);
3565 }
3566
3567 return ParseStatus::Success;
3568 }
3569
parseRegWithFPInputMods(OperandVector & Operands)3570 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3571 return parseRegOrImmWithFPInputMods(Operands, false);
3572 }
3573
parseRegWithIntInputMods(OperandVector & Operands)3574 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3575 return parseRegOrImmWithIntInputMods(Operands, false);
3576 }
3577
parseVReg32OrOff(OperandVector & Operands)3578 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3579 auto Loc = getLoc();
3580 if (trySkipId("off")) {
3581 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3582 AMDGPUOperand::ImmTyOff, false));
3583 return ParseStatus::Success;
3584 }
3585
3586 if (!isRegister())
3587 return ParseStatus::NoMatch;
3588
3589 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3590 if (Reg) {
3591 Operands.push_back(std::move(Reg));
3592 return ParseStatus::Success;
3593 }
3594
3595 return ParseStatus::Failure;
3596 }
3597
checkTargetMatchPredicate(MCInst & Inst)3598 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3599 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3600
3601 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3602 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3603 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3604 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3605 return Match_InvalidOperand;
3606
3607 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3608 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3609 // v_mac_f32/16 allow only dst_sel == DWORD;
3610 auto OpNum =
3611 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3612 const auto &Op = Inst.getOperand(OpNum);
3613 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3614 return Match_InvalidOperand;
3615 }
3616 }
3617
3618 // Asm can first try to match VOPD or VOPD3. By failing early here with
3619 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3620 // Checking later during validateInstruction does not give a chance to retry
3621 // parsing as a different encoding.
3622 if (tryAnotherVOPDEncoding(Inst))
3623 return Match_InvalidOperand;
3624
3625 return Match_Success;
3626 }
3627
getAllVariants()3628 static ArrayRef<unsigned> getAllVariants() {
3629 static const unsigned Variants[] = {
3630 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3631 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3632 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3633 };
3634
3635 return ArrayRef(Variants);
3636 }
3637
3638 // What asm variants we should check
getMatchedVariants() const3639 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3640 if (isForcedDPP() && isForcedVOP3()) {
3641 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3642 return ArrayRef(Variants);
3643 }
3644 if (getForcedEncodingSize() == 32) {
3645 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3646 return ArrayRef(Variants);
3647 }
3648
3649 if (isForcedVOP3()) {
3650 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3651 return ArrayRef(Variants);
3652 }
3653
3654 if (isForcedSDWA()) {
3655 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3656 AMDGPUAsmVariants::SDWA9};
3657 return ArrayRef(Variants);
3658 }
3659
3660 if (isForcedDPP()) {
3661 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3662 return ArrayRef(Variants);
3663 }
3664
3665 return getAllVariants();
3666 }
3667
getMatchedVariantName() const3668 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3669 if (isForcedDPP() && isForcedVOP3())
3670 return "e64_dpp";
3671
3672 if (getForcedEncodingSize() == 32)
3673 return "e32";
3674
3675 if (isForcedVOP3())
3676 return "e64";
3677
3678 if (isForcedSDWA())
3679 return "sdwa";
3680
3681 if (isForcedDPP())
3682 return "dpp";
3683
3684 return "";
3685 }
3686
findImplicitSGPRReadInVOP(const MCInst & Inst) const3687 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3688 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3689 for (MCPhysReg Reg : Desc.implicit_uses()) {
3690 switch (Reg) {
3691 case AMDGPU::FLAT_SCR:
3692 case AMDGPU::VCC:
3693 case AMDGPU::VCC_LO:
3694 case AMDGPU::VCC_HI:
3695 case AMDGPU::M0:
3696 return Reg;
3697 default:
3698 break;
3699 }
3700 }
3701 return AMDGPU::NoRegister;
3702 }
3703
3704 // NB: This code is correct only when used to check constant
3705 // bus limitations because GFX7 support no f16 inline constants.
3706 // Note that there are no cases when a GFX7 opcode violates
3707 // constant bus limitations due to the use of an f16 constant.
isInlineConstant(const MCInst & Inst,unsigned OpIdx) const3708 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3709 unsigned OpIdx) const {
3710 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3711
3712 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3713 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3714 return false;
3715 }
3716
3717 const MCOperand &MO = Inst.getOperand(OpIdx);
3718
3719 int64_t Val = MO.getImm();
3720 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3721
3722 switch (OpSize) { // expected operand size
3723 case 8:
3724 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3725 case 4:
3726 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3727 case 2: {
3728 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3729 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3730 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16)
3731 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3732
3733 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3734 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3735 return AMDGPU::isInlinableLiteralV2I16(Val);
3736
3737 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3738 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3739 return AMDGPU::isInlinableLiteralV2F16(Val);
3740
3741 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3742 OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3743 return AMDGPU::isInlinableLiteralV2BF16(Val);
3744
3745 if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3746 OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16)
3747 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3748
3749 if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3750 OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)
3751 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3752
3753 llvm_unreachable("invalid operand type");
3754 }
3755 default:
3756 llvm_unreachable("invalid operand size");
3757 }
3758 }
3759
getConstantBusLimit(unsigned Opcode) const3760 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3761 if (!isGFX10Plus())
3762 return 1;
3763
3764 switch (Opcode) {
3765 // 64-bit shift instructions can use only one scalar value input
3766 case AMDGPU::V_LSHLREV_B64_e64:
3767 case AMDGPU::V_LSHLREV_B64_gfx10:
3768 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3769 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3770 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3771 case AMDGPU::V_LSHRREV_B64_e64:
3772 case AMDGPU::V_LSHRREV_B64_gfx10:
3773 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3774 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3775 case AMDGPU::V_ASHRREV_I64_e64:
3776 case AMDGPU::V_ASHRREV_I64_gfx10:
3777 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3778 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3779 case AMDGPU::V_LSHL_B64_e64:
3780 case AMDGPU::V_LSHR_B64_e64:
3781 case AMDGPU::V_ASHR_I64_e64:
3782 return 1;
3783 default:
3784 return 2;
3785 }
3786 }
3787
3788 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3789 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3790
3791 // Get regular operand indices in the same order as specified
3792 // in the instruction (but append mandatory literals to the end).
getSrcOperandIndices(unsigned Opcode,bool AddMandatoryLiterals=false)3793 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3794 bool AddMandatoryLiterals = false) {
3795
3796 int16_t ImmIdx =
3797 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3798
3799 if (isVOPD(Opcode)) {
3800 int16_t ImmXIdx =
3801 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3802
3803 return {getNamedOperandIdx(Opcode, OpName::src0X),
3804 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3805 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3806 getNamedOperandIdx(Opcode, OpName::src0Y),
3807 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3808 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3809 ImmXIdx,
3810 ImmIdx};
3811 }
3812
3813 return {getNamedOperandIdx(Opcode, OpName::src0),
3814 getNamedOperandIdx(Opcode, OpName::src1),
3815 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3816 }
3817
usesConstantBus(const MCInst & Inst,unsigned OpIdx)3818 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3819 const MCOperand &MO = Inst.getOperand(OpIdx);
3820 if (MO.isImm())
3821 return !isInlineConstant(Inst, OpIdx);
3822 if (MO.isReg()) {
3823 auto Reg = MO.getReg();
3824 if (!Reg)
3825 return false;
3826 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3827 auto PReg = mc2PseudoReg(Reg);
3828 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3829 }
3830 return true;
3831 }
3832
3833 // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3834 // Writelane is special in that it can use SGPR and M0 (which would normally
3835 // count as using the constant bus twice - but in this case it is allowed since
3836 // the lane selector doesn't count as a use of the constant bus). However, it is
3837 // still required to abide by the 1 SGPR rule.
checkWriteLane(const MCInst & Inst)3838 static bool checkWriteLane(const MCInst &Inst) {
3839 const unsigned Opcode = Inst.getOpcode();
3840 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3841 return false;
3842 const MCOperand &LaneSelOp = Inst.getOperand(2);
3843 if (!LaneSelOp.isReg())
3844 return false;
3845 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3846 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3847 }
3848
validateConstantBusLimitations(const MCInst & Inst,const OperandVector & Operands)3849 bool AMDGPUAsmParser::validateConstantBusLimitations(
3850 const MCInst &Inst, const OperandVector &Operands) {
3851 const unsigned Opcode = Inst.getOpcode();
3852 const MCInstrDesc &Desc = MII.get(Opcode);
3853 MCRegister LastSGPR;
3854 unsigned ConstantBusUseCount = 0;
3855 unsigned NumLiterals = 0;
3856 unsigned LiteralSize;
3857
3858 if (!(Desc.TSFlags &
3859 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3860 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3861 !isVOPD(Opcode))
3862 return true;
3863
3864 if (checkWriteLane(Inst))
3865 return true;
3866
3867 // Check special imm operands (used by madmk, etc)
3868 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3869 ++NumLiterals;
3870 LiteralSize = 4;
3871 }
3872
3873 SmallDenseSet<unsigned> SGPRsUsed;
3874 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3875 if (SGPRUsed != AMDGPU::NoRegister) {
3876 SGPRsUsed.insert(SGPRUsed);
3877 ++ConstantBusUseCount;
3878 }
3879
3880 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3881
3882 for (int OpIdx : OpIndices) {
3883 if (OpIdx == -1)
3884 continue;
3885
3886 const MCOperand &MO = Inst.getOperand(OpIdx);
3887 if (usesConstantBus(Inst, OpIdx)) {
3888 if (MO.isReg()) {
3889 LastSGPR = mc2PseudoReg(MO.getReg());
3890 // Pairs of registers with a partial intersections like these
3891 // s0, s[0:1]
3892 // flat_scratch_lo, flat_scratch
3893 // flat_scratch_lo, flat_scratch_hi
3894 // are theoretically valid but they are disabled anyway.
3895 // Note that this code mimics SIInstrInfo::verifyInstruction
3896 if (SGPRsUsed.insert(LastSGPR).second) {
3897 ++ConstantBusUseCount;
3898 }
3899 } else { // Expression or a literal
3900
3901 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3902 continue; // special operand like VINTERP attr_chan
3903
3904 // An instruction may use only one literal.
3905 // This has been validated on the previous step.
3906 // See validateVOPLiteral.
3907 // This literal may be used as more than one operand.
3908 // If all these operands are of the same size,
3909 // this literal counts as one scalar value.
3910 // Otherwise it counts as 2 scalar values.
3911 // See "GFX10 Shader Programming", section 3.6.2.3.
3912
3913 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3914 if (Size < 4)
3915 Size = 4;
3916
3917 if (NumLiterals == 0) {
3918 NumLiterals = 1;
3919 LiteralSize = Size;
3920 } else if (LiteralSize != Size) {
3921 NumLiterals = 2;
3922 }
3923 }
3924 }
3925 }
3926 ConstantBusUseCount += NumLiterals;
3927
3928 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3929 return true;
3930
3931 SMLoc LitLoc = getLitLoc(Operands);
3932 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3933 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3934 Error(Loc, "invalid operand (violates constant bus restrictions)");
3935 return false;
3936 }
3937
3938 std::optional<unsigned>
checkVOPDRegBankConstraints(const MCInst & Inst,bool AsVOPD3)3939 AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3940
3941 const unsigned Opcode = Inst.getOpcode();
3942 if (!isVOPD(Opcode))
3943 return {};
3944
3945 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3946
3947 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3948 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3949 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3950 ? Opr.getReg()
3951 : MCRegister();
3952 };
3953
3954 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3955 // source-cache.
3956 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3957 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3958 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3959 bool AllowSameVGPR = isGFX1250();
3960
3961 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3962 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3963 int I = getNamedOperandIdx(Opcode, OpName);
3964 const MCOperand &Op = Inst.getOperand(I);
3965 if (!Op.isImm())
3966 continue;
3967 int64_t Imm = Op.getImm();
3968 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3969 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3970 return (unsigned)I;
3971 }
3972
3973 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3974 OpName::vsrc2Y, OpName::imm}) {
3975 int I = getNamedOperandIdx(Opcode, OpName);
3976 if (I == -1)
3977 continue;
3978 const MCOperand &Op = Inst.getOperand(I);
3979 if (Op.isImm())
3980 return (unsigned)I;
3981 }
3982 }
3983
3984 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3985 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3986 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3987
3988 return InvalidCompOprIdx;
3989 }
3990
validateVOPD(const MCInst & Inst,const OperandVector & Operands)3991 bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3992 const OperandVector &Operands) {
3993
3994 unsigned Opcode = Inst.getOpcode();
3995 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3996
3997 if (AsVOPD3) {
3998 for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
3999 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4000 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4001 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4002 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4003 }
4004 }
4005
4006 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4007 if (!InvalidCompOprIdx.has_value())
4008 return true;
4009
4010 auto CompOprIdx = *InvalidCompOprIdx;
4011 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4012 auto ParsedIdx =
4013 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4014 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4015 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4016
4017 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4018 if (CompOprIdx == VOPD::Component::DST) {
4019 if (AsVOPD3)
4020 Error(Loc, "dst registers must be distinct");
4021 else
4022 Error(Loc, "one dst register must be even and the other odd");
4023 } else {
4024 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4025 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4026 " operands must use different VGPR banks");
4027 }
4028
4029 return false;
4030 }
4031
4032 // \returns true if \p Inst does not satisfy VOPD constraints, but can be
4033 // potentially used as VOPD3 with the same operands.
tryVOPD3(const MCInst & Inst)4034 bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4035 // First check if it fits VOPD
4036 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4037 if (!InvalidCompOprIdx.has_value())
4038 return false;
4039
4040 // Then if it fits VOPD3
4041 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4042 if (InvalidCompOprIdx.has_value()) {
4043 // If failed operand is dst it is better to show error about VOPD3
4044 // instruction as it has more capabilities and error message will be
4045 // more informative. If the dst is not legal for VOPD3, then it is not
4046 // legal for VOPD either.
4047 if (*InvalidCompOprIdx == VOPD::Component::DST)
4048 return true;
4049
4050 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4051 // with a conflict in tied implicit src2 of fmac and no asm operand to
4052 // to point to.
4053 return false;
4054 }
4055 return true;
4056 }
4057
4058 // \returns true is a VOPD3 instruction can be also represented as a shorter
4059 // VOPD encoding.
tryVOPD(const MCInst & Inst)4060 bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4061 const unsigned Opcode = Inst.getOpcode();
4062 const auto &II = getVOPDInstInfo(Opcode, &MII);
4063 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4064 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4065 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4066 return false;
4067
4068 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4069 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4070 // be parsed as VOPD which does not accept src2.
4071 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4072 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4073 return false;
4074
4075 // If any modifiers are set this cannot be VOPD.
4076 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4077 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4078 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4079 int I = getNamedOperandIdx(Opcode, OpName);
4080 if (I == -1)
4081 continue;
4082 if (Inst.getOperand(I).getImm())
4083 return false;
4084 }
4085
4086 return !tryVOPD3(Inst);
4087 }
4088
4089 // VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4090 // form but switch to VOPD3 otherwise.
tryAnotherVOPDEncoding(const MCInst & Inst)4091 bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4092 const unsigned Opcode = Inst.getOpcode();
4093 if (!isGFX1250() || !isVOPD(Opcode))
4094 return false;
4095
4096 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4097 return tryVOPD(Inst);
4098 return tryVOPD3(Inst);
4099 }
4100
validateIntClampSupported(const MCInst & Inst)4101 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4102
4103 const unsigned Opc = Inst.getOpcode();
4104 const MCInstrDesc &Desc = MII.get(Opc);
4105
4106 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4107 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4108 assert(ClampIdx != -1);
4109 return Inst.getOperand(ClampIdx).getImm() == 0;
4110 }
4111
4112 return true;
4113 }
4114
4115 constexpr uint64_t MIMGFlags =
4116 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
4117
validateMIMGDataSize(const MCInst & Inst,const SMLoc & IDLoc)4118 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
4119 const SMLoc &IDLoc) {
4120
4121 const unsigned Opc = Inst.getOpcode();
4122 const MCInstrDesc &Desc = MII.get(Opc);
4123
4124 if ((Desc.TSFlags & MIMGFlags) == 0)
4125 return true;
4126
4127 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4128 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4129 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4130
4131 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4132 return true;
4133
4134 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4135 return true;
4136
4137 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
4138 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4139 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4140 if (DMask == 0)
4141 DMask = 1;
4142
4143 bool IsPackedD16 = false;
4144 unsigned DataSize =
4145 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4146 if (hasPackedD16()) {
4147 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4148 IsPackedD16 = D16Idx >= 0;
4149 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4150 DataSize = (DataSize + 1) / 2;
4151 }
4152
4153 if ((VDataSize / 4) == DataSize + TFESize)
4154 return true;
4155
4156 StringRef Modifiers;
4157 if (isGFX90A())
4158 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4159 else
4160 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4161
4162 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4163 return false;
4164 }
4165
validateMIMGAddrSize(const MCInst & Inst,const SMLoc & IDLoc)4166 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
4167 const SMLoc &IDLoc) {
4168 const unsigned Opc = Inst.getOpcode();
4169 const MCInstrDesc &Desc = MII.get(Opc);
4170
4171 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4172 return true;
4173
4174 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4175
4176 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4177 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
4178 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4179 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4180 ? AMDGPU::OpName::srsrc
4181 : AMDGPU::OpName::rsrc;
4182 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4183 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4184 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4185
4186 assert(VAddr0Idx != -1);
4187 assert(SrsrcIdx != -1);
4188 assert(SrsrcIdx > VAddr0Idx);
4189
4190 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4191 if (BaseOpcode->BVH) {
4192 if (IsA16 == BaseOpcode->A16)
4193 return true;
4194 Error(IDLoc, "image address size does not match a16");
4195 return false;
4196 }
4197
4198 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4199 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4200 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4201 unsigned ActualAddrSize =
4202 IsNSA ? SrsrcIdx - VAddr0Idx
4203 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
4204
4205 unsigned ExpectedAddrSize =
4206 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4207
4208 if (IsNSA) {
4209 if (hasPartialNSAEncoding() &&
4210 ExpectedAddrSize >
4211 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
4212 int VAddrLastIdx = SrsrcIdx - 1;
4213 unsigned VAddrLastSize =
4214 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
4215
4216 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4217 }
4218 } else {
4219 if (ExpectedAddrSize > 12)
4220 ExpectedAddrSize = 16;
4221
4222 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4223 // This provides backward compatibility for assembly created
4224 // before 160b/192b/224b types were directly supported.
4225 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4226 return true;
4227 }
4228
4229 if (ActualAddrSize == ExpectedAddrSize)
4230 return true;
4231
4232 Error(IDLoc, "image address size does not match dim and a16");
4233 return false;
4234 }
4235
validateMIMGAtomicDMask(const MCInst & Inst)4236 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4237
4238 const unsigned Opc = Inst.getOpcode();
4239 const MCInstrDesc &Desc = MII.get(Opc);
4240
4241 if ((Desc.TSFlags & MIMGFlags) == 0)
4242 return true;
4243 if (!Desc.mayLoad() || !Desc.mayStore())
4244 return true; // Not atomic
4245
4246 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4247 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4248
4249 // This is an incomplete check because image_atomic_cmpswap
4250 // may only use 0x3 and 0xf while other atomic operations
4251 // may use 0x1 and 0x3. However these limitations are
4252 // verified when we check that dmask matches dst size.
4253 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4254 }
4255
validateMIMGGatherDMask(const MCInst & Inst)4256 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4257
4258 const unsigned Opc = Inst.getOpcode();
4259 const MCInstrDesc &Desc = MII.get(Opc);
4260
4261 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4262 return true;
4263
4264 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4265 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4266
4267 // GATHER4 instructions use dmask in a different fashion compared to
4268 // other MIMG instructions. The only useful DMASK values are
4269 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4270 // (red,red,red,red) etc.) The ISA document doesn't mention
4271 // this.
4272 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4273 }
4274
validateMIMGDim(const MCInst & Inst,const OperandVector & Operands)4275 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4276 const OperandVector &Operands) {
4277 if (!isGFX10Plus())
4278 return true;
4279
4280 const unsigned Opc = Inst.getOpcode();
4281 const MCInstrDesc &Desc = MII.get(Opc);
4282
4283 if ((Desc.TSFlags & MIMGFlags) == 0)
4284 return true;
4285
4286 // image_bvh_intersect_ray instructions do not have dim
4287 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4288 return true;
4289
4290 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4291 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4292 if (Op.isDim())
4293 return true;
4294 }
4295 return false;
4296 }
4297
validateMIMGMSAA(const MCInst & Inst)4298 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4299 const unsigned Opc = Inst.getOpcode();
4300 const MCInstrDesc &Desc = MII.get(Opc);
4301
4302 if ((Desc.TSFlags & MIMGFlags) == 0)
4303 return true;
4304
4305 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4306 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4307 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
4308
4309 if (!BaseOpcode->MSAA)
4310 return true;
4311
4312 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4313 assert(DimIdx != -1);
4314
4315 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4316 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4317
4318 return DimInfo->MSAA;
4319 }
4320
IsMovrelsSDWAOpcode(const unsigned Opcode)4321 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4322 {
4323 switch (Opcode) {
4324 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4325 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4326 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4327 return true;
4328 default:
4329 return false;
4330 }
4331 }
4332
4333 // movrels* opcodes should only allow VGPRS as src0.
4334 // This is specified in .td description for vop1/vop3,
4335 // but sdwa is handled differently. See isSDWAOperand.
validateMovrels(const MCInst & Inst,const OperandVector & Operands)4336 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4337 const OperandVector &Operands) {
4338
4339 const unsigned Opc = Inst.getOpcode();
4340 const MCInstrDesc &Desc = MII.get(Opc);
4341
4342 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4343 return true;
4344
4345 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4346 assert(Src0Idx != -1);
4347
4348 SMLoc ErrLoc;
4349 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4350 if (Src0.isReg()) {
4351 auto Reg = mc2PseudoReg(Src0.getReg());
4352 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4353 if (!isSGPR(Reg, TRI))
4354 return true;
4355 ErrLoc = getRegLoc(Reg, Operands);
4356 } else {
4357 ErrLoc = getConstLoc(Operands);
4358 }
4359
4360 Error(ErrLoc, "source operand must be a VGPR");
4361 return false;
4362 }
4363
validateMAIAccWrite(const MCInst & Inst,const OperandVector & Operands)4364 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4365 const OperandVector &Operands) {
4366
4367 const unsigned Opc = Inst.getOpcode();
4368
4369 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4370 return true;
4371
4372 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4373 assert(Src0Idx != -1);
4374
4375 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4376 if (!Src0.isReg())
4377 return true;
4378
4379 auto Reg = mc2PseudoReg(Src0.getReg());
4380 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4381 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4382 Error(getRegLoc(Reg, Operands),
4383 "source operand must be either a VGPR or an inline constant");
4384 return false;
4385 }
4386
4387 return true;
4388 }
4389
validateMAISrc2(const MCInst & Inst,const OperandVector & Operands)4390 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4391 const OperandVector &Operands) {
4392 unsigned Opcode = Inst.getOpcode();
4393 const MCInstrDesc &Desc = MII.get(Opcode);
4394
4395 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4396 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4397 return true;
4398
4399 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4400 if (Src2Idx == -1)
4401 return true;
4402
4403 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4404 Error(getConstLoc(Operands),
4405 "inline constants are not allowed for this operand");
4406 return false;
4407 }
4408
4409 return true;
4410 }
4411
validateMFMA(const MCInst & Inst,const OperandVector & Operands)4412 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4413 const OperandVector &Operands) {
4414 const unsigned Opc = Inst.getOpcode();
4415 const MCInstrDesc &Desc = MII.get(Opc);
4416
4417 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4418 return true;
4419
4420 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4421 if (BlgpIdx != -1) {
4422 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4423 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4424
4425 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4426 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4427
4428 // Validate the correct register size was used for the floating point
4429 // format operands
4430
4431 bool Success = true;
4432 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4433 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4434 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()),
4435 Operands),
4436 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4437 Success = false;
4438 }
4439
4440 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4441 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4442 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()),
4443 Operands),
4444 "wrong register tuple size for blgp value " + Twine(BLGP));
4445 Success = false;
4446 }
4447
4448 return Success;
4449 }
4450 }
4451
4452 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4453 if (Src2Idx == -1)
4454 return true;
4455
4456 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4457 if (!Src2.isReg())
4458 return true;
4459
4460 MCRegister Src2Reg = Src2.getReg();
4461 MCRegister DstReg = Inst.getOperand(0).getReg();
4462 if (Src2Reg == DstReg)
4463 return true;
4464
4465 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4466 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4467 return true;
4468
4469 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4470 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4471 "source 2 operand must not partially overlap with dst");
4472 return false;
4473 }
4474
4475 return true;
4476 }
4477
validateDivScale(const MCInst & Inst)4478 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4479 switch (Inst.getOpcode()) {
4480 default:
4481 return true;
4482 case V_DIV_SCALE_F32_gfx6_gfx7:
4483 case V_DIV_SCALE_F32_vi:
4484 case V_DIV_SCALE_F32_gfx10:
4485 case V_DIV_SCALE_F64_gfx6_gfx7:
4486 case V_DIV_SCALE_F64_vi:
4487 case V_DIV_SCALE_F64_gfx10:
4488 break;
4489 }
4490
4491 // TODO: Check that src0 = src1 or src2.
4492
4493 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4494 AMDGPU::OpName::src2_modifiers,
4495 AMDGPU::OpName::src2_modifiers}) {
4496 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4497 .getImm() &
4498 SISrcMods::ABS) {
4499 return false;
4500 }
4501 }
4502
4503 return true;
4504 }
4505
validateMIMGD16(const MCInst & Inst)4506 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4507
4508 const unsigned Opc = Inst.getOpcode();
4509 const MCInstrDesc &Desc = MII.get(Opc);
4510
4511 if ((Desc.TSFlags & MIMGFlags) == 0)
4512 return true;
4513
4514 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4515 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4516 if (isCI() || isSI())
4517 return false;
4518 }
4519
4520 return true;
4521 }
4522
validateTensorR128(const MCInst & Inst)4523 bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4524 const unsigned Opc = Inst.getOpcode();
4525 const MCInstrDesc &Desc = MII.get(Opc);
4526
4527 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4528 return true;
4529
4530 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4531
4532 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4533 }
4534
IsRevOpcode(const unsigned Opcode)4535 static bool IsRevOpcode(const unsigned Opcode)
4536 {
4537 switch (Opcode) {
4538 case AMDGPU::V_SUBREV_F32_e32:
4539 case AMDGPU::V_SUBREV_F32_e64:
4540 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4541 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4542 case AMDGPU::V_SUBREV_F32_e32_vi:
4543 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4544 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4545 case AMDGPU::V_SUBREV_F32_e64_vi:
4546
4547 case AMDGPU::V_SUBREV_CO_U32_e32:
4548 case AMDGPU::V_SUBREV_CO_U32_e64:
4549 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4550 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4551
4552 case AMDGPU::V_SUBBREV_U32_e32:
4553 case AMDGPU::V_SUBBREV_U32_e64:
4554 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4555 case AMDGPU::V_SUBBREV_U32_e32_vi:
4556 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4557 case AMDGPU::V_SUBBREV_U32_e64_vi:
4558
4559 case AMDGPU::V_SUBREV_U32_e32:
4560 case AMDGPU::V_SUBREV_U32_e64:
4561 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4562 case AMDGPU::V_SUBREV_U32_e32_vi:
4563 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4564 case AMDGPU::V_SUBREV_U32_e64_vi:
4565
4566 case AMDGPU::V_SUBREV_F16_e32:
4567 case AMDGPU::V_SUBREV_F16_e64:
4568 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4569 case AMDGPU::V_SUBREV_F16_e32_vi:
4570 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4571 case AMDGPU::V_SUBREV_F16_e64_vi:
4572
4573 case AMDGPU::V_SUBREV_U16_e32:
4574 case AMDGPU::V_SUBREV_U16_e64:
4575 case AMDGPU::V_SUBREV_U16_e32_vi:
4576 case AMDGPU::V_SUBREV_U16_e64_vi:
4577
4578 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4579 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4580 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4581
4582 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4583 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4584
4585 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4586 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4587
4588 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4589 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4590
4591 case AMDGPU::V_LSHRREV_B32_e32:
4592 case AMDGPU::V_LSHRREV_B32_e64:
4593 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4594 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4595 case AMDGPU::V_LSHRREV_B32_e32_vi:
4596 case AMDGPU::V_LSHRREV_B32_e64_vi:
4597 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4598 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4599
4600 case AMDGPU::V_ASHRREV_I32_e32:
4601 case AMDGPU::V_ASHRREV_I32_e64:
4602 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4603 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4604 case AMDGPU::V_ASHRREV_I32_e32_vi:
4605 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4606 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4607 case AMDGPU::V_ASHRREV_I32_e64_vi:
4608
4609 case AMDGPU::V_LSHLREV_B32_e32:
4610 case AMDGPU::V_LSHLREV_B32_e64:
4611 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4612 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4613 case AMDGPU::V_LSHLREV_B32_e32_vi:
4614 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4615 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4616 case AMDGPU::V_LSHLREV_B32_e64_vi:
4617
4618 case AMDGPU::V_LSHLREV_B16_e32:
4619 case AMDGPU::V_LSHLREV_B16_e64:
4620 case AMDGPU::V_LSHLREV_B16_e32_vi:
4621 case AMDGPU::V_LSHLREV_B16_e64_vi:
4622 case AMDGPU::V_LSHLREV_B16_gfx10:
4623
4624 case AMDGPU::V_LSHRREV_B16_e32:
4625 case AMDGPU::V_LSHRREV_B16_e64:
4626 case AMDGPU::V_LSHRREV_B16_e32_vi:
4627 case AMDGPU::V_LSHRREV_B16_e64_vi:
4628 case AMDGPU::V_LSHRREV_B16_gfx10:
4629
4630 case AMDGPU::V_ASHRREV_I16_e32:
4631 case AMDGPU::V_ASHRREV_I16_e64:
4632 case AMDGPU::V_ASHRREV_I16_e32_vi:
4633 case AMDGPU::V_ASHRREV_I16_e64_vi:
4634 case AMDGPU::V_ASHRREV_I16_gfx10:
4635
4636 case AMDGPU::V_LSHLREV_B64_e64:
4637 case AMDGPU::V_LSHLREV_B64_gfx10:
4638 case AMDGPU::V_LSHLREV_B64_vi:
4639
4640 case AMDGPU::V_LSHRREV_B64_e64:
4641 case AMDGPU::V_LSHRREV_B64_gfx10:
4642 case AMDGPU::V_LSHRREV_B64_vi:
4643
4644 case AMDGPU::V_ASHRREV_I64_e64:
4645 case AMDGPU::V_ASHRREV_I64_gfx10:
4646 case AMDGPU::V_ASHRREV_I64_vi:
4647
4648 case AMDGPU::V_PK_LSHLREV_B16:
4649 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4650 case AMDGPU::V_PK_LSHLREV_B16_vi:
4651
4652 case AMDGPU::V_PK_LSHRREV_B16:
4653 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4654 case AMDGPU::V_PK_LSHRREV_B16_vi:
4655 case AMDGPU::V_PK_ASHRREV_I16:
4656 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4657 case AMDGPU::V_PK_ASHRREV_I16_vi:
4658 return true;
4659 default:
4660 return false;
4661 }
4662 }
4663
4664 std::optional<StringRef>
validateLdsDirect(const MCInst & Inst)4665 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4666
4667 using namespace SIInstrFlags;
4668 const unsigned Opcode = Inst.getOpcode();
4669 const MCInstrDesc &Desc = MII.get(Opcode);
4670
4671 // lds_direct register is defined so that it can be used
4672 // with 9-bit operands only. Ignore encodings which do not accept these.
4673 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4674 if ((Desc.TSFlags & Enc) == 0)
4675 return std::nullopt;
4676
4677 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4678 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4679 if (SrcIdx == -1)
4680 break;
4681 const auto &Src = Inst.getOperand(SrcIdx);
4682 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4683
4684 if (isGFX90A() || isGFX11Plus())
4685 return StringRef("lds_direct is not supported on this GPU");
4686
4687 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4688 return StringRef("lds_direct cannot be used with this instruction");
4689
4690 if (SrcName != OpName::src0)
4691 return StringRef("lds_direct may be used as src0 only");
4692 }
4693 }
4694
4695 return std::nullopt;
4696 }
4697
getFlatOffsetLoc(const OperandVector & Operands) const4698 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4699 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4700 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4701 if (Op.isFlatOffset())
4702 return Op.getStartLoc();
4703 }
4704 return getLoc();
4705 }
4706
validateOffset(const MCInst & Inst,const OperandVector & Operands)4707 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4708 const OperandVector &Operands) {
4709 auto Opcode = Inst.getOpcode();
4710 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4711 if (OpNum == -1)
4712 return true;
4713
4714 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4715 if ((TSFlags & SIInstrFlags::FLAT))
4716 return validateFlatOffset(Inst, Operands);
4717
4718 if ((TSFlags & SIInstrFlags::SMRD))
4719 return validateSMEMOffset(Inst, Operands);
4720
4721 const auto &Op = Inst.getOperand(OpNum);
4722 if (isGFX12Plus() &&
4723 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4724 const unsigned OffsetSize = 24;
4725 if (!isIntN(OffsetSize, Op.getImm())) {
4726 Error(getFlatOffsetLoc(Operands),
4727 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4728 return false;
4729 }
4730 } else {
4731 const unsigned OffsetSize = 16;
4732 if (!isUIntN(OffsetSize, Op.getImm())) {
4733 Error(getFlatOffsetLoc(Operands),
4734 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4735 return false;
4736 }
4737 }
4738 return true;
4739 }
4740
validateFlatOffset(const MCInst & Inst,const OperandVector & Operands)4741 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4742 const OperandVector &Operands) {
4743 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4744 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4745 return true;
4746
4747 auto Opcode = Inst.getOpcode();
4748 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4749 assert(OpNum != -1);
4750
4751 const auto &Op = Inst.getOperand(OpNum);
4752 if (!hasFlatOffsets() && Op.getImm() != 0) {
4753 Error(getFlatOffsetLoc(Operands),
4754 "flat offset modifier is not supported on this GPU");
4755 return false;
4756 }
4757
4758 // For pre-GFX12 FLAT instructions the offset must be positive;
4759 // MSB is ignored and forced to zero.
4760 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4761 bool AllowNegative =
4762 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4763 isGFX12Plus();
4764 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4765 Error(getFlatOffsetLoc(Operands),
4766 Twine("expected a ") +
4767 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4768 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4769 return false;
4770 }
4771
4772 return true;
4773 }
4774
getSMEMOffsetLoc(const OperandVector & Operands) const4775 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4776 // Start with second operand because SMEM Offset cannot be dst or src0.
4777 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4778 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4779 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4780 return Op.getStartLoc();
4781 }
4782 return getLoc();
4783 }
4784
validateSMEMOffset(const MCInst & Inst,const OperandVector & Operands)4785 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4786 const OperandVector &Operands) {
4787 if (isCI() || isSI())
4788 return true;
4789
4790 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4791 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4792 return true;
4793
4794 auto Opcode = Inst.getOpcode();
4795 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4796 if (OpNum == -1)
4797 return true;
4798
4799 const auto &Op = Inst.getOperand(OpNum);
4800 if (!Op.isImm())
4801 return true;
4802
4803 uint64_t Offset = Op.getImm();
4804 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4805 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4806 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4807 return true;
4808
4809 Error(getSMEMOffsetLoc(Operands),
4810 isGFX12Plus() ? "expected a 24-bit signed offset"
4811 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4812 : "expected a 21-bit signed offset");
4813
4814 return false;
4815 }
4816
validateSOPLiteral(const MCInst & Inst) const4817 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4818 unsigned Opcode = Inst.getOpcode();
4819 const MCInstrDesc &Desc = MII.get(Opcode);
4820 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4821 return true;
4822
4823 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4824 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4825
4826 const int OpIndices[] = { Src0Idx, Src1Idx };
4827
4828 unsigned NumExprs = 0;
4829 unsigned NumLiterals = 0;
4830 uint64_t LiteralValue;
4831
4832 for (int OpIdx : OpIndices) {
4833 if (OpIdx == -1) break;
4834
4835 const MCOperand &MO = Inst.getOperand(OpIdx);
4836 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4837 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4838 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4839 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4840 if (NumLiterals == 0 || LiteralValue != Value) {
4841 LiteralValue = Value;
4842 ++NumLiterals;
4843 }
4844 } else if (MO.isExpr()) {
4845 ++NumExprs;
4846 }
4847 }
4848 }
4849
4850 return NumLiterals + NumExprs <= 1;
4851 }
4852
validateOpSel(const MCInst & Inst)4853 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4854 const unsigned Opc = Inst.getOpcode();
4855 if (isPermlane16(Opc)) {
4856 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4857 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4858
4859 if (OpSel & ~3)
4860 return false;
4861 }
4862
4863 uint64_t TSFlags = MII.get(Opc).TSFlags;
4864
4865 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4866 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4867 if (OpSelIdx != -1) {
4868 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4869 return false;
4870 }
4871 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4872 if (OpSelHiIdx != -1) {
4873 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4874 return false;
4875 }
4876 }
4877
4878 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4879 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4880 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4881 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4882 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4883 if (OpSel & 3)
4884 return false;
4885 }
4886
4887 return true;
4888 }
4889
validateTrue16OpSel(const MCInst & Inst)4890 bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4891 if (!hasTrue16Insts())
4892 return true;
4893 const MCRegisterInfo *MRI = getMRI();
4894 const unsigned Opc = Inst.getOpcode();
4895 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4896 if (OpSelIdx == -1)
4897 return true;
4898 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4899 // If the value is 0 we could have a default OpSel Operand, so conservatively
4900 // allow it.
4901 if (OpSelOpValue == 0)
4902 return true;
4903 unsigned OpCount = 0;
4904 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4905 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4906 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4907 if (OpIdx == -1)
4908 continue;
4909 const MCOperand &Op = Inst.getOperand(OpIdx);
4910 if (Op.isReg() &&
4911 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4912 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4913 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4914 if (OpSelOpIsHi != VGPRSuffixIsHi)
4915 return false;
4916 }
4917 ++OpCount;
4918 }
4919
4920 return true;
4921 }
4922
validateNeg(const MCInst & Inst,AMDGPU::OpName OpName)4923 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4924 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4925
4926 const unsigned Opc = Inst.getOpcode();
4927 uint64_t TSFlags = MII.get(Opc).TSFlags;
4928
4929 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4930 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4931 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4932 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4933 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4934 !(TSFlags & SIInstrFlags::IsSWMMAC))
4935 return true;
4936
4937 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4938 if (NegIdx == -1)
4939 return true;
4940
4941 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4942
4943 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4944 // on some src operands but not allowed on other.
4945 // It is convenient that such instructions don't have src_modifiers operand
4946 // for src operands that don't allow neg because they also don't allow opsel.
4947
4948 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4949 AMDGPU::OpName::src1_modifiers,
4950 AMDGPU::OpName::src2_modifiers};
4951
4952 for (unsigned i = 0; i < 3; ++i) {
4953 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4954 if (Neg & (1 << i))
4955 return false;
4956 }
4957 }
4958
4959 return true;
4960 }
4961
validateDPP(const MCInst & Inst,const OperandVector & Operands)4962 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4963 const OperandVector &Operands) {
4964 const unsigned Opc = Inst.getOpcode();
4965 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4966 if (DppCtrlIdx >= 0) {
4967 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4968
4969 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4970 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4971 // DP ALU DPP is supported for row_newbcast only on GFX9*
4972 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4973 Error(S, "DP ALU dpp only supports row_newbcast");
4974 return false;
4975 }
4976 }
4977
4978 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4979 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4980
4981 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4982 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4983 if (Src1Idx >= 0) {
4984 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4985 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4986 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4987 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4988 SMLoc S = getRegLoc(Reg, Operands);
4989 Error(S, "invalid operand for instruction");
4990 return false;
4991 }
4992 if (Src1.isImm()) {
4993 Error(getInstLoc(Operands),
4994 "src1 immediate operand invalid for instruction");
4995 return false;
4996 }
4997 }
4998 }
4999
5000 return true;
5001 }
5002
5003 // Check if VCC register matches wavefront size
validateVccOperand(MCRegister Reg) const5004 bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5005 auto FB = getFeatureBits();
5006 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
5007 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
5008 }
5009
5010 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
validateVOPLiteral(const MCInst & Inst,const OperandVector & Operands)5011 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5012 const OperandVector &Operands) {
5013 unsigned Opcode = Inst.getOpcode();
5014 const MCInstrDesc &Desc = MII.get(Opcode);
5015 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5016 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5017 !HasMandatoryLiteral && !isVOPD(Opcode))
5018 return true;
5019
5020 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5021
5022 unsigned NumExprs = 0;
5023 unsigned NumLiterals = 0;
5024 uint64_t LiteralValue;
5025
5026 for (int OpIdx : OpIndices) {
5027 if (OpIdx == -1)
5028 continue;
5029
5030 const MCOperand &MO = Inst.getOperand(OpIdx);
5031 if (!MO.isImm() && !MO.isExpr())
5032 continue;
5033 if (!isSISrcOperand(Desc, OpIdx))
5034 continue;
5035
5036 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
5037 uint64_t Value = static_cast<uint64_t>(MO.getImm());
5038 bool IsForcedFP64 =
5039 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5040 (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 &&
5041 HasMandatoryLiteral);
5042 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5043 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5044 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5045
5046 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5047 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5048 Error(getLitLoc(Operands), "invalid operand for instruction");
5049 return false;
5050 }
5051
5052 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5053 Value = Hi_32(Value);
5054
5055 if (NumLiterals == 0 || LiteralValue != Value) {
5056 LiteralValue = Value;
5057 ++NumLiterals;
5058 }
5059 } else if (MO.isExpr()) {
5060 ++NumExprs;
5061 }
5062 }
5063 NumLiterals += NumExprs;
5064
5065 if (!NumLiterals)
5066 return true;
5067
5068 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
5069 Error(getLitLoc(Operands), "literal operands are not supported");
5070 return false;
5071 }
5072
5073 if (NumLiterals > 1) {
5074 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
5075 return false;
5076 }
5077
5078 return true;
5079 }
5080
5081 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
IsAGPROperand(const MCInst & Inst,AMDGPU::OpName Name,const MCRegisterInfo * MRI)5082 static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5083 const MCRegisterInfo *MRI) {
5084 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5085 if (OpIdx < 0)
5086 return -1;
5087
5088 const MCOperand &Op = Inst.getOperand(OpIdx);
5089 if (!Op.isReg())
5090 return -1;
5091
5092 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5093 auto Reg = Sub ? Sub : Op.getReg();
5094 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5095 return AGPR32.contains(Reg) ? 1 : 0;
5096 }
5097
validateAGPRLdSt(const MCInst & Inst) const5098 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5099 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5100 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5101 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5102 SIInstrFlags::DS)) == 0)
5103 return true;
5104
5105 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5106 ? AMDGPU::OpName::data0
5107 : AMDGPU::OpName::vdata;
5108
5109 const MCRegisterInfo *MRI = getMRI();
5110 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5111 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5112
5113 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5114 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5115 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5116 return false;
5117 }
5118
5119 auto FB = getFeatureBits();
5120 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5121 if (DataAreg < 0 || DstAreg < 0)
5122 return true;
5123 return DstAreg == DataAreg;
5124 }
5125
5126 return DstAreg < 1 && DataAreg < 1;
5127 }
5128
validateVGPRAlign(const MCInst & Inst) const5129 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5130 auto FB = getFeatureBits();
5131 unsigned Opc = Inst.getOpcode();
5132 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5133 // unaligned VGPR. All others only allow even aligned VGPRs.
5134 if (!(FB[AMDGPU::FeatureGFX90AInsts]) || Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5135 return true;
5136
5137 const MCRegisterInfo *MRI = getMRI();
5138 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5139 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5140 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5141 const MCOperand &Op = Inst.getOperand(I);
5142 if (!Op.isReg())
5143 continue;
5144
5145 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5146 if (!Sub)
5147 continue;
5148
5149 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5150 return false;
5151 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5152 return false;
5153 }
5154
5155 return true;
5156 }
5157
getBLGPLoc(const OperandVector & Operands) const5158 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5159 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5160 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5161 if (Op.isBLGP())
5162 return Op.getStartLoc();
5163 }
5164 return SMLoc();
5165 }
5166
validateBLGP(const MCInst & Inst,const OperandVector & Operands)5167 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5168 const OperandVector &Operands) {
5169 unsigned Opc = Inst.getOpcode();
5170 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5171 if (BlgpIdx == -1)
5172 return true;
5173 SMLoc BLGPLoc = getBLGPLoc(Operands);
5174 if (!BLGPLoc.isValid())
5175 return true;
5176 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5177 auto FB = getFeatureBits();
5178 bool UsesNeg = false;
5179 if (FB[AMDGPU::FeatureGFX940Insts]) {
5180 switch (Opc) {
5181 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5182 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5183 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5184 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5185 UsesNeg = true;
5186 }
5187 }
5188
5189 if (IsNeg == UsesNeg)
5190 return true;
5191
5192 Error(BLGPLoc,
5193 UsesNeg ? "invalid modifier: blgp is not supported"
5194 : "invalid modifier: neg is not supported");
5195
5196 return false;
5197 }
5198
validateWaitCnt(const MCInst & Inst,const OperandVector & Operands)5199 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5200 const OperandVector &Operands) {
5201 if (!isGFX11Plus())
5202 return true;
5203
5204 unsigned Opc = Inst.getOpcode();
5205 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5206 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5207 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5208 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5209 return true;
5210
5211 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5212 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5213 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5214 if (Reg == AMDGPU::SGPR_NULL)
5215 return true;
5216
5217 SMLoc RegLoc = getRegLoc(Reg, Operands);
5218 Error(RegLoc, "src0 must be null");
5219 return false;
5220 }
5221
validateDS(const MCInst & Inst,const OperandVector & Operands)5222 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5223 const OperandVector &Operands) {
5224 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5225 if ((TSFlags & SIInstrFlags::DS) == 0)
5226 return true;
5227 if (TSFlags & SIInstrFlags::GWS)
5228 return validateGWS(Inst, Operands);
5229 // Only validate GDS for non-GWS instructions.
5230 if (hasGDS())
5231 return true;
5232 int GDSIdx =
5233 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5234 if (GDSIdx < 0)
5235 return true;
5236 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5237 if (GDS) {
5238 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5239 Error(S, "gds modifier is not supported on this GPU");
5240 return false;
5241 }
5242 return true;
5243 }
5244
5245 // gfx90a has an undocumented limitation:
5246 // DS_GWS opcodes must use even aligned registers.
validateGWS(const MCInst & Inst,const OperandVector & Operands)5247 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5248 const OperandVector &Operands) {
5249 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5250 return true;
5251
5252 int Opc = Inst.getOpcode();
5253 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5254 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5255 return true;
5256
5257 const MCRegisterInfo *MRI = getMRI();
5258 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5259 int Data0Pos =
5260 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5261 assert(Data0Pos != -1);
5262 auto Reg = Inst.getOperand(Data0Pos).getReg();
5263 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5264 if (RegIdx & 1) {
5265 SMLoc RegLoc = getRegLoc(Reg, Operands);
5266 Error(RegLoc, "vgpr must be even aligned");
5267 return false;
5268 }
5269
5270 return true;
5271 }
5272
validateCoherencyBits(const MCInst & Inst,const OperandVector & Operands,const SMLoc & IDLoc)5273 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5274 const OperandVector &Operands,
5275 const SMLoc &IDLoc) {
5276 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5277 AMDGPU::OpName::cpol);
5278 if (CPolPos == -1)
5279 return true;
5280
5281 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5282
5283 if (isGFX12Plus())
5284 return validateTHAndScopeBits(Inst, Operands, CPol);
5285
5286 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5287 if (TSFlags & SIInstrFlags::SMRD) {
5288 if (CPol && (isSI() || isCI())) {
5289 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5290 Error(S, "cache policy is not supported for SMRD instructions");
5291 return false;
5292 }
5293 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5294 Error(IDLoc, "invalid cache policy for SMEM instruction");
5295 return false;
5296 }
5297 }
5298
5299 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5300 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5301 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5302 SIInstrFlags::FLAT;
5303 if (!(TSFlags & AllowSCCModifier)) {
5304 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5305 StringRef CStr(S.getPointer());
5306 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5307 Error(S,
5308 "scc modifier is not supported for this instruction on this GPU");
5309 return false;
5310 }
5311 }
5312
5313 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
5314 return true;
5315
5316 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5317 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5318 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5319 : "instruction must use glc");
5320 return false;
5321 }
5322 } else {
5323 if (CPol & CPol::GLC) {
5324 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5325 StringRef CStr(S.getPointer());
5326 S = SMLoc::getFromPointer(
5327 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5328 Error(S, isGFX940() ? "instruction must not use sc0"
5329 : "instruction must not use glc");
5330 return false;
5331 }
5332 }
5333
5334 return true;
5335 }
5336
validateTHAndScopeBits(const MCInst & Inst,const OperandVector & Operands,const unsigned CPol)5337 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5338 const OperandVector &Operands,
5339 const unsigned CPol) {
5340 const unsigned TH = CPol & AMDGPU::CPol::TH;
5341 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5342
5343 const unsigned Opcode = Inst.getOpcode();
5344 const MCInstrDesc &TID = MII.get(Opcode);
5345
5346 auto PrintError = [&](StringRef Msg) {
5347 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5348 Error(S, Msg);
5349 return false;
5350 };
5351
5352 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5353 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
5354 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
5355 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5356
5357 if (TH == 0)
5358 return true;
5359
5360 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5361 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5362 (TH == AMDGPU::CPol::TH_NT_HT)))
5363 return PrintError("invalid th value for SMEM instruction");
5364
5365 if (TH == AMDGPU::CPol::TH_BYPASS) {
5366 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5367 CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
5368 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5369 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5370 return PrintError("scope and th combination is not valid");
5371 }
5372
5373 unsigned THType = AMDGPU::getTemporalHintType(TID);
5374 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5375 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5376 return PrintError("invalid th value for atomic instructions");
5377 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5378 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5379 return PrintError("invalid th value for store instructions");
5380 } else {
5381 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5382 return PrintError("invalid th value for load instructions");
5383 }
5384
5385 return true;
5386 }
5387
validateTFE(const MCInst & Inst,const OperandVector & Operands)5388 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5389 const OperandVector &Operands) {
5390 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5391 if (Desc.mayStore() &&
5392 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
5393 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5394 if (Loc != getInstLoc(Operands)) {
5395 Error(Loc, "TFE modifier has no meaning for store instructions");
5396 return false;
5397 }
5398 }
5399
5400 return true;
5401 }
5402
validateInstruction(const MCInst & Inst,const SMLoc & IDLoc,const OperandVector & Operands)5403 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5404 const SMLoc &IDLoc,
5405 const OperandVector &Operands) {
5406 if (auto ErrMsg = validateLdsDirect(Inst)) {
5407 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5408 return false;
5409 }
5410 if (!validateTrue16OpSel(Inst)) {
5411 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5412 "op_sel operand conflicts with 16-bit operand suffix");
5413 return false;
5414 }
5415 if (!validateSOPLiteral(Inst)) {
5416 Error(getLitLoc(Operands),
5417 "only one unique literal operand is allowed");
5418 return false;
5419 }
5420 if (!validateVOPLiteral(Inst, Operands)) {
5421 return false;
5422 }
5423 if (!validateConstantBusLimitations(Inst, Operands)) {
5424 return false;
5425 }
5426 if (!validateVOPD(Inst, Operands)) {
5427 return false;
5428 }
5429 if (!validateIntClampSupported(Inst)) {
5430 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5431 "integer clamping is not supported on this GPU");
5432 return false;
5433 }
5434 if (!validateOpSel(Inst)) {
5435 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5436 "invalid op_sel operand");
5437 return false;
5438 }
5439 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5440 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5441 "invalid neg_lo operand");
5442 return false;
5443 }
5444 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5445 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5446 "invalid neg_hi operand");
5447 return false;
5448 }
5449 if (!validateDPP(Inst, Operands)) {
5450 return false;
5451 }
5452 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5453 if (!validateMIMGD16(Inst)) {
5454 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5455 "d16 modifier is not supported on this GPU");
5456 return false;
5457 }
5458 if (!validateMIMGDim(Inst, Operands)) {
5459 Error(IDLoc, "missing dim operand");
5460 return false;
5461 }
5462 if (!validateTensorR128(Inst)) {
5463 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5464 "instruction must set modifier r128=0");
5465 return false;
5466 }
5467 if (!validateMIMGMSAA(Inst)) {
5468 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5469 "invalid dim; must be MSAA type");
5470 return false;
5471 }
5472 if (!validateMIMGDataSize(Inst, IDLoc)) {
5473 return false;
5474 }
5475 if (!validateMIMGAddrSize(Inst, IDLoc))
5476 return false;
5477 if (!validateMIMGAtomicDMask(Inst)) {
5478 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5479 "invalid atomic image dmask");
5480 return false;
5481 }
5482 if (!validateMIMGGatherDMask(Inst)) {
5483 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5484 "invalid image_gather dmask: only one bit must be set");
5485 return false;
5486 }
5487 if (!validateMovrels(Inst, Operands)) {
5488 return false;
5489 }
5490 if (!validateOffset(Inst, Operands)) {
5491 return false;
5492 }
5493 if (!validateMAIAccWrite(Inst, Operands)) {
5494 return false;
5495 }
5496 if (!validateMAISrc2(Inst, Operands)) {
5497 return false;
5498 }
5499 if (!validateMFMA(Inst, Operands)) {
5500 return false;
5501 }
5502 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5503 return false;
5504 }
5505
5506 if (!validateAGPRLdSt(Inst)) {
5507 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5508 ? "invalid register class: data and dst should be all VGPR or AGPR"
5509 : "invalid register class: agpr loads and stores not supported on this GPU"
5510 );
5511 return false;
5512 }
5513 if (!validateVGPRAlign(Inst)) {
5514 Error(IDLoc,
5515 "invalid register class: vgpr tuples must be 64 bit aligned");
5516 return false;
5517 }
5518 if (!validateDS(Inst, Operands)) {
5519 return false;
5520 }
5521
5522 if (!validateBLGP(Inst, Operands)) {
5523 return false;
5524 }
5525
5526 if (!validateDivScale(Inst)) {
5527 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5528 return false;
5529 }
5530 if (!validateWaitCnt(Inst, Operands)) {
5531 return false;
5532 }
5533 if (!validateTFE(Inst, Operands)) {
5534 return false;
5535 }
5536
5537 return true;
5538 }
5539
5540 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5541 const FeatureBitset &FBS,
5542 unsigned VariantID = 0);
5543
5544 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5545 const FeatureBitset &AvailableFeatures,
5546 unsigned VariantID);
5547
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS)5548 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5549 const FeatureBitset &FBS) {
5550 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5551 }
5552
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS,ArrayRef<unsigned> Variants)5553 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5554 const FeatureBitset &FBS,
5555 ArrayRef<unsigned> Variants) {
5556 for (auto Variant : Variants) {
5557 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5558 return true;
5559 }
5560
5561 return false;
5562 }
5563
checkUnsupportedInstruction(StringRef Mnemo,const SMLoc & IDLoc)5564 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5565 const SMLoc &IDLoc) {
5566 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5567
5568 // Check if requested instruction variant is supported.
5569 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5570 return false;
5571
5572 // This instruction is not supported.
5573 // Clear any other pending errors because they are no longer relevant.
5574 getParser().clearPendingErrors();
5575
5576 // Requested instruction variant is not supported.
5577 // Check if any other variants are supported.
5578 StringRef VariantName = getMatchedVariantName();
5579 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5580 return Error(IDLoc,
5581 Twine(VariantName,
5582 " variant of this instruction is not supported"));
5583 }
5584
5585 // Check if this instruction may be used with a different wavesize.
5586 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5587 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5588
5589 FeatureBitset FeaturesWS32 = getFeatureBits();
5590 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5591 .flip(AMDGPU::FeatureWavefrontSize32);
5592 FeatureBitset AvailableFeaturesWS32 =
5593 ComputeAvailableFeatures(FeaturesWS32);
5594
5595 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5596 return Error(IDLoc, "instruction requires wavesize=32");
5597 }
5598
5599 // Finally check if this instruction is supported on any other GPU.
5600 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5601 return Error(IDLoc, "instruction not supported on this GPU");
5602 }
5603
5604 // Instruction not supported on any GPU. Probably a typo.
5605 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5606 return Error(IDLoc, "invalid instruction" + Suggestion);
5607 }
5608
isInvalidVOPDY(const OperandVector & Operands,uint64_t InvalidOprIdx)5609 static bool isInvalidVOPDY(const OperandVector &Operands,
5610 uint64_t InvalidOprIdx) {
5611 assert(InvalidOprIdx < Operands.size());
5612 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5613 if (Op.isToken() && InvalidOprIdx > 1) {
5614 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5615 return PrevOp.isToken() && PrevOp.getToken() == "::";
5616 }
5617 return false;
5618 }
5619
matchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)5620 bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5621 OperandVector &Operands,
5622 MCStreamer &Out,
5623 uint64_t &ErrorInfo,
5624 bool MatchingInlineAsm) {
5625 MCInst Inst;
5626 unsigned Result = Match_Success;
5627 for (auto Variant : getMatchedVariants()) {
5628 uint64_t EI;
5629 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5630 Variant);
5631 // We order match statuses from least to most specific. We use most specific
5632 // status as resulting
5633 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5634 if (R == Match_Success || R == Match_MissingFeature ||
5635 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5636 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5637 Result != Match_MissingFeature)) {
5638 Result = R;
5639 ErrorInfo = EI;
5640 }
5641 if (R == Match_Success)
5642 break;
5643 }
5644
5645 if (Result == Match_Success) {
5646 if (!validateInstruction(Inst, IDLoc, Operands)) {
5647 return true;
5648 }
5649 Inst.setLoc(IDLoc);
5650 Out.emitInstruction(Inst, getSTI());
5651 return false;
5652 }
5653
5654 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5655 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5656 return true;
5657 }
5658
5659 switch (Result) {
5660 default: break;
5661 case Match_MissingFeature:
5662 // It has been verified that the specified instruction
5663 // mnemonic is valid. A match was found but it requires
5664 // features which are not supported on this GPU.
5665 return Error(IDLoc, "operands are not valid for this GPU or mode");
5666
5667 case Match_InvalidOperand: {
5668 SMLoc ErrorLoc = IDLoc;
5669 if (ErrorInfo != ~0ULL) {
5670 if (ErrorInfo >= Operands.size()) {
5671 return Error(IDLoc, "too few operands for instruction");
5672 }
5673 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5674 if (ErrorLoc == SMLoc())
5675 ErrorLoc = IDLoc;
5676
5677 if (isInvalidVOPDY(Operands, ErrorInfo))
5678 return Error(ErrorLoc, "invalid VOPDY instruction");
5679 }
5680 return Error(ErrorLoc, "invalid operand for instruction");
5681 }
5682
5683 case Match_MnemonicFail:
5684 llvm_unreachable("Invalid instructions should have been handled already");
5685 }
5686 llvm_unreachable("Implement any new match types added!");
5687 }
5688
ParseAsAbsoluteExpression(uint32_t & Ret)5689 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5690 int64_t Tmp = -1;
5691 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5692 return true;
5693 }
5694 if (getParser().parseAbsoluteExpression(Tmp)) {
5695 return true;
5696 }
5697 Ret = static_cast<uint32_t>(Tmp);
5698 return false;
5699 }
5700
ParseDirectiveAMDGCNTarget()5701 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5702 if (!getSTI().getTargetTriple().isAMDGCN())
5703 return TokError("directive only supported for amdgcn architecture");
5704
5705 std::string TargetIDDirective;
5706 SMLoc TargetStart = getTok().getLoc();
5707 if (getParser().parseEscapedString(TargetIDDirective))
5708 return true;
5709
5710 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5711 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5712 return getParser().Error(TargetRange.Start,
5713 (Twine(".amdgcn_target directive's target id ") +
5714 Twine(TargetIDDirective) +
5715 Twine(" does not match the specified target id ") +
5716 Twine(getTargetStreamer().getTargetID()->toString())).str());
5717
5718 return false;
5719 }
5720
OutOfRangeError(SMRange Range)5721 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5722 return Error(Range.Start, "value out of range", Range);
5723 }
5724
calculateGPRBlocks(const FeatureBitset & Features,const MCExpr * VCCUsed,const MCExpr * FlatScrUsed,bool XNACKUsed,std::optional<bool> EnableWavefrontSize32,const MCExpr * NextFreeVGPR,SMRange VGPRRange,const MCExpr * NextFreeSGPR,SMRange SGPRRange,const MCExpr * & VGPRBlocks,const MCExpr * & SGPRBlocks)5725 bool AMDGPUAsmParser::calculateGPRBlocks(
5726 const FeatureBitset &Features, const MCExpr *VCCUsed,
5727 const MCExpr *FlatScrUsed, bool XNACKUsed,
5728 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5729 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5730 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5731 // TODO(scott.linder): These calculations are duplicated from
5732 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5733 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5734 MCContext &Ctx = getContext();
5735
5736 const MCExpr *NumSGPRs = NextFreeSGPR;
5737 int64_t EvaluatedSGPRs;
5738
5739 if (Version.Major >= 10)
5740 NumSGPRs = MCConstantExpr::create(0, Ctx);
5741 else {
5742 unsigned MaxAddressableNumSGPRs =
5743 IsaInfo::getAddressableNumSGPRs(&getSTI());
5744
5745 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5746 !Features.test(FeatureSGPRInitBug) &&
5747 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5748 return OutOfRangeError(SGPRRange);
5749
5750 const MCExpr *ExtraSGPRs =
5751 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5752 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5753
5754 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5755 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5756 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5757 return OutOfRangeError(SGPRRange);
5758
5759 if (Features.test(FeatureSGPRInitBug))
5760 NumSGPRs =
5761 MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5762 }
5763
5764 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5765 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5766 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5767 unsigned Granule) -> const MCExpr * {
5768 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5769 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5770 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5771 const MCExpr *AlignToGPR =
5772 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5773 const MCExpr *DivGPR =
5774 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5775 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5776 return SubGPR;
5777 };
5778
5779 VGPRBlocks = GetNumGPRBlocks(
5780 NextFreeVGPR,
5781 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5782 SGPRBlocks =
5783 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5784
5785 return false;
5786 }
5787
ParseDirectiveAMDHSAKernel()5788 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5789 if (!getSTI().getTargetTriple().isAMDGCN())
5790 return TokError("directive only supported for amdgcn architecture");
5791
5792 if (!isHsaAbi(getSTI()))
5793 return TokError("directive only supported for amdhsa OS");
5794
5795 StringRef KernelName;
5796 if (getParser().parseIdentifier(KernelName))
5797 return true;
5798
5799 AMDGPU::MCKernelDescriptor KD =
5800 AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5801 &getSTI(), getContext());
5802
5803 StringSet<> Seen;
5804
5805 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5806
5807 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5808 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5809
5810 SMRange VGPRRange;
5811 const MCExpr *NextFreeVGPR = ZeroExpr;
5812 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5813 uint64_t SharedVGPRCount = 0;
5814 uint64_t PreloadLength = 0;
5815 uint64_t PreloadOffset = 0;
5816 SMRange SGPRRange;
5817 const MCExpr *NextFreeSGPR = ZeroExpr;
5818
5819 // Count the number of user SGPRs implied from the enabled feature bits.
5820 unsigned ImpliedUserSGPRCount = 0;
5821
5822 // Track if the asm explicitly contains the directive for the user SGPR
5823 // count.
5824 std::optional<unsigned> ExplicitUserSGPRCount;
5825 const MCExpr *ReserveVCC = OneExpr;
5826 const MCExpr *ReserveFlatScr = OneExpr;
5827 std::optional<bool> EnableWavefrontSize32;
5828
5829 while (true) {
5830 while (trySkipToken(AsmToken::EndOfStatement));
5831
5832 StringRef ID;
5833 SMRange IDRange = getTok().getLocRange();
5834 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5835 return true;
5836
5837 if (ID == ".end_amdhsa_kernel")
5838 break;
5839
5840 if (!Seen.insert(ID).second)
5841 return TokError(".amdhsa_ directives cannot be repeated");
5842
5843 SMLoc ValStart = getLoc();
5844 const MCExpr *ExprVal;
5845 if (getParser().parseExpression(ExprVal))
5846 return true;
5847 SMLoc ValEnd = getLoc();
5848 SMRange ValRange = SMRange(ValStart, ValEnd);
5849
5850 int64_t IVal = 0;
5851 uint64_t Val = IVal;
5852 bool EvaluatableExpr;
5853 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5854 if (IVal < 0)
5855 return OutOfRangeError(ValRange);
5856 Val = IVal;
5857 }
5858
5859 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5860 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5861 return OutOfRangeError(RANGE); \
5862 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5863 getContext());
5864
5865 // Some fields use the parsed value immediately which requires the expression to
5866 // be solvable.
5867 #define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5868 if (!(RESOLVED)) \
5869 return Error(IDRange.Start, "directive should have resolvable expression", \
5870 IDRange);
5871
5872 if (ID == ".amdhsa_group_segment_fixed_size") {
5873 if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
5874 CHAR_BIT>(Val))
5875 return OutOfRangeError(ValRange);
5876 KD.group_segment_fixed_size = ExprVal;
5877 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5878 if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
5879 CHAR_BIT>(Val))
5880 return OutOfRangeError(ValRange);
5881 KD.private_segment_fixed_size = ExprVal;
5882 } else if (ID == ".amdhsa_kernarg_size") {
5883 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5884 return OutOfRangeError(ValRange);
5885 KD.kernarg_size = ExprVal;
5886 } else if (ID == ".amdhsa_user_sgpr_count") {
5887 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5888 ExplicitUserSGPRCount = Val;
5889 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5890 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5891 if (hasArchitectedFlatScratch())
5892 return Error(IDRange.Start,
5893 "directive is not supported with architected flat scratch",
5894 IDRange);
5895 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5896 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5897 ExprVal, ValRange);
5898 if (Val)
5899 ImpliedUserSGPRCount += 4;
5900 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5901 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5902 if (!hasKernargPreload())
5903 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5904
5905 if (Val > getMaxNumUserSGPRs())
5906 return OutOfRangeError(ValRange);
5907 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5908 ValRange);
5909 if (Val) {
5910 ImpliedUserSGPRCount += Val;
5911 PreloadLength = Val;
5912 }
5913 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5914 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5915 if (!hasKernargPreload())
5916 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5917
5918 if (Val >= 1024)
5919 return OutOfRangeError(ValRange);
5920 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5921 ValRange);
5922 if (Val)
5923 PreloadOffset = Val;
5924 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5925 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5926 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5927 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5928 ValRange);
5929 if (Val)
5930 ImpliedUserSGPRCount += 2;
5931 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5932 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5933 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5934 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5935 ValRange);
5936 if (Val)
5937 ImpliedUserSGPRCount += 2;
5938 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5939 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5940 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5941 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5942 ExprVal, ValRange);
5943 if (Val)
5944 ImpliedUserSGPRCount += 2;
5945 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5946 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5947 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5948 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5949 ValRange);
5950 if (Val)
5951 ImpliedUserSGPRCount += 2;
5952 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5953 if (hasArchitectedFlatScratch())
5954 return Error(IDRange.Start,
5955 "directive is not supported with architected flat scratch",
5956 IDRange);
5957 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5958 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5959 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5960 ExprVal, ValRange);
5961 if (Val)
5962 ImpliedUserSGPRCount += 2;
5963 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5964 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5965 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5966 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5967 ExprVal, ValRange);
5968 if (Val)
5969 ImpliedUserSGPRCount += 1;
5970 } else if (ID == ".amdhsa_wavefront_size32") {
5971 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5972 if (IVersion.Major < 10)
5973 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5974 EnableWavefrontSize32 = Val;
5975 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5976 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5977 ValRange);
5978 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5979 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5980 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5981 ValRange);
5982 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5983 if (hasArchitectedFlatScratch())
5984 return Error(IDRange.Start,
5985 "directive is not supported with architected flat scratch",
5986 IDRange);
5987 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5988 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5989 ValRange);
5990 } else if (ID == ".amdhsa_enable_private_segment") {
5991 if (!hasArchitectedFlatScratch())
5992 return Error(
5993 IDRange.Start,
5994 "directive is not supported without architected flat scratch",
5995 IDRange);
5996 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5997 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5998 ValRange);
5999 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6000 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6001 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6002 ValRange);
6003 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6004 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6005 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6006 ValRange);
6007 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6008 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6009 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6010 ValRange);
6011 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6012 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6013 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6014 ValRange);
6015 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6016 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6017 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6018 ValRange);
6019 } else if (ID == ".amdhsa_next_free_vgpr") {
6020 VGPRRange = ValRange;
6021 NextFreeVGPR = ExprVal;
6022 } else if (ID == ".amdhsa_next_free_sgpr") {
6023 SGPRRange = ValRange;
6024 NextFreeSGPR = ExprVal;
6025 } else if (ID == ".amdhsa_accum_offset") {
6026 if (!isGFX90A())
6027 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6028 AccumOffset = ExprVal;
6029 } else if (ID == ".amdhsa_reserve_vcc") {
6030 if (EvaluatableExpr && !isUInt<1>(Val))
6031 return OutOfRangeError(ValRange);
6032 ReserveVCC = ExprVal;
6033 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6034 if (IVersion.Major < 7)
6035 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6036 if (hasArchitectedFlatScratch())
6037 return Error(IDRange.Start,
6038 "directive is not supported with architected flat scratch",
6039 IDRange);
6040 if (EvaluatableExpr && !isUInt<1>(Val))
6041 return OutOfRangeError(ValRange);
6042 ReserveFlatScr = ExprVal;
6043 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6044 if (IVersion.Major < 8)
6045 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6046 if (!isUInt<1>(Val))
6047 return OutOfRangeError(ValRange);
6048 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6049 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6050 IDRange);
6051 } else if (ID == ".amdhsa_float_round_mode_32") {
6052 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6053 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6054 ValRange);
6055 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6056 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6057 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6058 ValRange);
6059 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6060 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6061 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6062 ValRange);
6063 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6064 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6065 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6066 ValRange);
6067 } else if (ID == ".amdhsa_dx10_clamp") {
6068 if (IVersion.Major >= 12)
6069 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6070 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6071 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6072 ValRange);
6073 } else if (ID == ".amdhsa_ieee_mode") {
6074 if (IVersion.Major >= 12)
6075 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6076 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6077 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6078 ValRange);
6079 } else if (ID == ".amdhsa_fp16_overflow") {
6080 if (IVersion.Major < 9)
6081 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6082 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6083 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6084 ValRange);
6085 } else if (ID == ".amdhsa_tg_split") {
6086 if (!isGFX90A())
6087 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6088 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6089 ExprVal, ValRange);
6090 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6091 if (IVersion.Major < 10)
6092 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6093 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6094 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6095 ValRange);
6096 } else if (ID == ".amdhsa_memory_ordered") {
6097 if (IVersion.Major < 10)
6098 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6099 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6100 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6101 ValRange);
6102 } else if (ID == ".amdhsa_forward_progress") {
6103 if (IVersion.Major < 10)
6104 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6105 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6106 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6107 ValRange);
6108 } else if (ID == ".amdhsa_shared_vgpr_count") {
6109 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6110 if (IVersion.Major < 10 || IVersion.Major >= 12)
6111 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6112 IDRange);
6113 SharedVGPRCount = Val;
6114 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6115 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6116 ValRange);
6117 } else if (ID == ".amdhsa_inst_pref_size") {
6118 if (IVersion.Major < 11)
6119 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6120 if (IVersion.Major == 11) {
6121 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6122 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6123 ValRange);
6124 } else {
6125 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6126 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6127 ValRange);
6128 }
6129 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6130 PARSE_BITS_ENTRY(
6131 KD.compute_pgm_rsrc2,
6132 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6133 ExprVal, ValRange);
6134 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6135 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6136 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6137 ExprVal, ValRange);
6138 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6139 PARSE_BITS_ENTRY(
6140 KD.compute_pgm_rsrc2,
6141 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6142 ExprVal, ValRange);
6143 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6144 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6145 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6146 ExprVal, ValRange);
6147 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6148 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6149 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6150 ExprVal, ValRange);
6151 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6152 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6153 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6154 ExprVal, ValRange);
6155 } else if (ID == ".amdhsa_exception_int_div_zero") {
6156 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6157 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6158 ExprVal, ValRange);
6159 } else if (ID == ".amdhsa_round_robin_scheduling") {
6160 if (IVersion.Major < 12)
6161 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6162 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6163 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6164 ValRange);
6165 } else {
6166 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6167 }
6168
6169 #undef PARSE_BITS_ENTRY
6170 }
6171
6172 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6173 return TokError(".amdhsa_next_free_vgpr directive is required");
6174
6175 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6176 return TokError(".amdhsa_next_free_sgpr directive is required");
6177
6178 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6179
6180 // Consider the case where the total number of UserSGPRs with trailing
6181 // allocated preload SGPRs, is greater than the number of explicitly
6182 // referenced SGPRs.
6183 if (PreloadLength) {
6184 MCContext &Ctx = getContext();
6185 NextFreeSGPR = AMDGPUMCExpr::createMax(
6186 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6187 }
6188
6189 const MCExpr *VGPRBlocks;
6190 const MCExpr *SGPRBlocks;
6191 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6192 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6193 EnableWavefrontSize32, NextFreeVGPR,
6194 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6195 SGPRBlocks))
6196 return true;
6197
6198 int64_t EvaluatedVGPRBlocks;
6199 bool VGPRBlocksEvaluatable =
6200 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6201 if (VGPRBlocksEvaluatable &&
6202 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
6203 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6204 return OutOfRangeError(VGPRRange);
6205 }
6206 AMDGPU::MCKernelDescriptor::bits_set(
6207 KD.compute_pgm_rsrc1, VGPRBlocks,
6208 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6209 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6210
6211 int64_t EvaluatedSGPRBlocks;
6212 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6213 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
6214 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6215 return OutOfRangeError(SGPRRange);
6216 AMDGPU::MCKernelDescriptor::bits_set(
6217 KD.compute_pgm_rsrc1, SGPRBlocks,
6218 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6219 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6220
6221 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6222 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6223 "enabled user SGPRs");
6224
6225 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
6226 return TokError("too many user SGPRs enabled");
6227 AMDGPU::MCKernelDescriptor::bits_set(
6228 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
6229 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
6230 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
6231
6232 int64_t IVal = 0;
6233 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6234 return TokError("Kernarg size should be resolvable");
6235 uint64_t kernarg_size = IVal;
6236 if (PreloadLength && kernarg_size &&
6237 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6238 return TokError("Kernarg preload length + offset is larger than the "
6239 "kernarg segment size");
6240
6241 if (isGFX90A()) {
6242 if (!Seen.contains(".amdhsa_accum_offset"))
6243 return TokError(".amdhsa_accum_offset directive is required");
6244 int64_t EvaluatedAccum;
6245 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6246 uint64_t UEvaluatedAccum = EvaluatedAccum;
6247 if (AccumEvaluatable &&
6248 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6249 return TokError("accum_offset should be in range [4..256] in "
6250 "increments of 4");
6251
6252 int64_t EvaluatedNumVGPR;
6253 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6254 AccumEvaluatable &&
6255 UEvaluatedAccum >
6256 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6257 return TokError("accum_offset exceeds total VGPR allocation");
6258 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6259 MCBinaryExpr::createDiv(
6260 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6261 MCConstantExpr::create(1, getContext()), getContext());
6262 MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum,
6263 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6264 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6265 getContext());
6266 }
6267
6268 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6269 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6270 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6271 return TokError("shared_vgpr_count directive not valid on "
6272 "wavefront size 32");
6273 }
6274
6275 if (VGPRBlocksEvaluatable &&
6276 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6277 63)) {
6278 return TokError("shared_vgpr_count*2 + "
6279 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6280 "exceed 63\n");
6281 }
6282 }
6283
6284 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6285 NextFreeVGPR, NextFreeSGPR,
6286 ReserveVCC, ReserveFlatScr);
6287 return false;
6288 }
6289
ParseDirectiveAMDHSACodeObjectVersion()6290 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6291 uint32_t Version;
6292 if (ParseAsAbsoluteExpression(Version))
6293 return true;
6294
6295 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6296 return false;
6297 }
6298
ParseAMDKernelCodeTValue(StringRef ID,AMDGPUMCKernelCodeT & C)6299 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6300 AMDGPUMCKernelCodeT &C) {
6301 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6302 // assembly for backwards compatibility.
6303 if (ID == "max_scratch_backing_memory_byte_size") {
6304 Parser.eatToEndOfStatement();
6305 return false;
6306 }
6307
6308 SmallString<40> ErrStr;
6309 raw_svector_ostream Err(ErrStr);
6310 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6311 return TokError(Err.str());
6312 }
6313 Lex();
6314
6315 if (ID == "enable_wavefront_size32") {
6316 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6317 if (!isGFX10Plus())
6318 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6319 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6320 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6321 } else {
6322 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6323 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6324 }
6325 }
6326
6327 if (ID == "wavefront_size") {
6328 if (C.wavefront_size == 5) {
6329 if (!isGFX10Plus())
6330 return TokError("wavefront_size=5 is only allowed on GFX10+");
6331 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6332 return TokError("wavefront_size=5 requires +WavefrontSize32");
6333 } else if (C.wavefront_size == 6) {
6334 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6335 return TokError("wavefront_size=6 requires +WavefrontSize64");
6336 }
6337 }
6338
6339 return false;
6340 }
6341
ParseDirectiveAMDKernelCodeT()6342 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6343 AMDGPUMCKernelCodeT KernelCode;
6344 KernelCode.initDefault(&getSTI(), getContext());
6345
6346 while (true) {
6347 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6348 // will set the current token to EndOfStatement.
6349 while(trySkipToken(AsmToken::EndOfStatement));
6350
6351 StringRef ID;
6352 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6353 return true;
6354
6355 if (ID == ".end_amd_kernel_code_t")
6356 break;
6357
6358 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6359 return true;
6360 }
6361
6362 KernelCode.validate(&getSTI(), getContext());
6363 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6364
6365 return false;
6366 }
6367
ParseDirectiveAMDGPUHsaKernel()6368 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6369 StringRef KernelName;
6370 if (!parseId(KernelName, "expected symbol name"))
6371 return true;
6372
6373 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6374 ELF::STT_AMDGPU_HSA_KERNEL);
6375
6376 KernelScope.initialize(getContext());
6377 return false;
6378 }
6379
ParseDirectiveISAVersion()6380 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6381 if (!getSTI().getTargetTriple().isAMDGCN()) {
6382 return Error(getLoc(),
6383 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6384 "architectures");
6385 }
6386
6387 auto TargetIDDirective = getLexer().getTok().getStringContents();
6388 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6389 return Error(getParser().getTok().getLoc(), "target id must match options");
6390
6391 getTargetStreamer().EmitISAVersion();
6392 Lex();
6393
6394 return false;
6395 }
6396
ParseDirectiveHSAMetadata()6397 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6398 assert(isHsaAbi(getSTI()));
6399
6400 std::string HSAMetadataString;
6401 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6402 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6403 return true;
6404
6405 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6406 return Error(getLoc(), "invalid HSA metadata");
6407
6408 return false;
6409 }
6410
6411 /// Common code to parse out a block of text (typically YAML) between start and
6412 /// end directives.
ParseToEndDirective(const char * AssemblerDirectiveBegin,const char * AssemblerDirectiveEnd,std::string & CollectString)6413 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6414 const char *AssemblerDirectiveEnd,
6415 std::string &CollectString) {
6416
6417 raw_string_ostream CollectStream(CollectString);
6418
6419 getLexer().setSkipSpace(false);
6420
6421 bool FoundEnd = false;
6422 while (!isToken(AsmToken::Eof)) {
6423 while (isToken(AsmToken::Space)) {
6424 CollectStream << getTokenStr();
6425 Lex();
6426 }
6427
6428 if (trySkipId(AssemblerDirectiveEnd)) {
6429 FoundEnd = true;
6430 break;
6431 }
6432
6433 CollectStream << Parser.parseStringToEndOfStatement()
6434 << getContext().getAsmInfo()->getSeparatorString();
6435
6436 Parser.eatToEndOfStatement();
6437 }
6438
6439 getLexer().setSkipSpace(true);
6440
6441 if (isToken(AsmToken::Eof) && !FoundEnd) {
6442 return TokError(Twine("expected directive ") +
6443 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6444 }
6445
6446 return false;
6447 }
6448
6449 /// Parse the assembler directive for new MsgPack-format PAL metadata.
ParseDirectivePALMetadataBegin()6450 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6451 std::string String;
6452 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6453 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
6454 return true;
6455
6456 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6457 if (!PALMetadata->setFromString(String))
6458 return Error(getLoc(), "invalid PAL metadata");
6459 return false;
6460 }
6461
6462 /// Parse the assembler directive for old linear-format PAL metadata.
ParseDirectivePALMetadata()6463 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6464 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6465 return Error(getLoc(),
6466 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6467 "not available on non-amdpal OSes")).str());
6468 }
6469
6470 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6471 PALMetadata->setLegacy();
6472 for (;;) {
6473 uint32_t Key, Value;
6474 if (ParseAsAbsoluteExpression(Key)) {
6475 return TokError(Twine("invalid value in ") +
6476 Twine(PALMD::AssemblerDirective));
6477 }
6478 if (!trySkipToken(AsmToken::Comma)) {
6479 return TokError(Twine("expected an even number of values in ") +
6480 Twine(PALMD::AssemblerDirective));
6481 }
6482 if (ParseAsAbsoluteExpression(Value)) {
6483 return TokError(Twine("invalid value in ") +
6484 Twine(PALMD::AssemblerDirective));
6485 }
6486 PALMetadata->setRegister(Key, Value);
6487 if (!trySkipToken(AsmToken::Comma))
6488 break;
6489 }
6490 return false;
6491 }
6492
6493 /// ParseDirectiveAMDGPULDS
6494 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
ParseDirectiveAMDGPULDS()6495 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6496 if (getParser().checkForValidSection())
6497 return true;
6498
6499 StringRef Name;
6500 SMLoc NameLoc = getLoc();
6501 if (getParser().parseIdentifier(Name))
6502 return TokError("expected identifier in directive");
6503
6504 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6505 if (getParser().parseComma())
6506 return true;
6507
6508 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6509
6510 int64_t Size;
6511 SMLoc SizeLoc = getLoc();
6512 if (getParser().parseAbsoluteExpression(Size))
6513 return true;
6514 if (Size < 0)
6515 return Error(SizeLoc, "size must be non-negative");
6516 if (Size > LocalMemorySize)
6517 return Error(SizeLoc, "size is too large");
6518
6519 int64_t Alignment = 4;
6520 if (trySkipToken(AsmToken::Comma)) {
6521 SMLoc AlignLoc = getLoc();
6522 if (getParser().parseAbsoluteExpression(Alignment))
6523 return true;
6524 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6525 return Error(AlignLoc, "alignment must be a power of two");
6526
6527 // Alignment larger than the size of LDS is possible in theory, as long
6528 // as the linker manages to place to symbol at address 0, but we do want
6529 // to make sure the alignment fits nicely into a 32-bit integer.
6530 if (Alignment >= 1u << 31)
6531 return Error(AlignLoc, "alignment is too large");
6532 }
6533
6534 if (parseEOL())
6535 return true;
6536
6537 Symbol->redefineIfPossible();
6538 if (!Symbol->isUndefined())
6539 return Error(NameLoc, "invalid symbol redefinition");
6540
6541 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6542 return false;
6543 }
6544
ParseDirective(AsmToken DirectiveID)6545 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6546 StringRef IDVal = DirectiveID.getString();
6547
6548 if (isHsaAbi(getSTI())) {
6549 if (IDVal == ".amdhsa_kernel")
6550 return ParseDirectiveAMDHSAKernel();
6551
6552 if (IDVal == ".amdhsa_code_object_version")
6553 return ParseDirectiveAMDHSACodeObjectVersion();
6554
6555 // TODO: Restructure/combine with PAL metadata directive.
6556 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6557 return ParseDirectiveHSAMetadata();
6558 } else {
6559 if (IDVal == ".amd_kernel_code_t")
6560 return ParseDirectiveAMDKernelCodeT();
6561
6562 if (IDVal == ".amdgpu_hsa_kernel")
6563 return ParseDirectiveAMDGPUHsaKernel();
6564
6565 if (IDVal == ".amd_amdgpu_isa")
6566 return ParseDirectiveISAVersion();
6567
6568 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6569 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6570 Twine(" directive is "
6571 "not available on non-amdhsa OSes"))
6572 .str());
6573 }
6574 }
6575
6576 if (IDVal == ".amdgcn_target")
6577 return ParseDirectiveAMDGCNTarget();
6578
6579 if (IDVal == ".amdgpu_lds")
6580 return ParseDirectiveAMDGPULDS();
6581
6582 if (IDVal == PALMD::AssemblerDirectiveBegin)
6583 return ParseDirectivePALMetadataBegin();
6584
6585 if (IDVal == PALMD::AssemblerDirective)
6586 return ParseDirectivePALMetadata();
6587
6588 return true;
6589 }
6590
subtargetHasRegister(const MCRegisterInfo & MRI,MCRegister Reg)6591 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6592 MCRegister Reg) {
6593 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6594 return isGFX9Plus();
6595
6596 // GFX10+ has 2 more SGPRs 104 and 105.
6597 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6598 return hasSGPR104_SGPR105();
6599
6600 switch (Reg.id()) {
6601 case SRC_SHARED_BASE_LO:
6602 case SRC_SHARED_BASE:
6603 case SRC_SHARED_LIMIT_LO:
6604 case SRC_SHARED_LIMIT:
6605 case SRC_PRIVATE_BASE_LO:
6606 case SRC_PRIVATE_BASE:
6607 case SRC_PRIVATE_LIMIT_LO:
6608 case SRC_PRIVATE_LIMIT:
6609 return isGFX9Plus();
6610 case SRC_POPS_EXITING_WAVE_ID:
6611 return isGFX9Plus() && !isGFX11Plus();
6612 case TBA:
6613 case TBA_LO:
6614 case TBA_HI:
6615 case TMA:
6616 case TMA_LO:
6617 case TMA_HI:
6618 return !isGFX9Plus();
6619 case XNACK_MASK:
6620 case XNACK_MASK_LO:
6621 case XNACK_MASK_HI:
6622 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6623 case SGPR_NULL:
6624 return isGFX10Plus();
6625 case SRC_EXECZ:
6626 case SRC_VCCZ:
6627 return !isGFX11Plus();
6628 default:
6629 break;
6630 }
6631
6632 if (isCI())
6633 return true;
6634
6635 if (isSI() || isGFX10Plus()) {
6636 // No flat_scr on SI.
6637 // On GFX10Plus flat scratch is not a valid register operand and can only be
6638 // accessed with s_setreg/s_getreg.
6639 switch (Reg.id()) {
6640 case FLAT_SCR:
6641 case FLAT_SCR_LO:
6642 case FLAT_SCR_HI:
6643 return false;
6644 default:
6645 return true;
6646 }
6647 }
6648
6649 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6650 // SI/CI have.
6651 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6652 return hasSGPR102_SGPR103();
6653
6654 return true;
6655 }
6656
parseOperand(OperandVector & Operands,StringRef Mnemonic,OperandMode Mode)6657 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6658 StringRef Mnemonic,
6659 OperandMode Mode) {
6660 ParseStatus Res = parseVOPD(Operands);
6661 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6662 return Res;
6663
6664 // Try to parse with a custom parser
6665 Res = MatchOperandParserImpl(Operands, Mnemonic);
6666
6667 // If we successfully parsed the operand or if there as an error parsing,
6668 // we are done.
6669 //
6670 // If we are parsing after we reach EndOfStatement then this means we
6671 // are appending default values to the Operands list. This is only done
6672 // by custom parser, so we shouldn't continue on to the generic parsing.
6673 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6674 return Res;
6675
6676 SMLoc RBraceLoc;
6677 SMLoc LBraceLoc = getLoc();
6678 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6679 unsigned Prefix = Operands.size();
6680
6681 for (;;) {
6682 auto Loc = getLoc();
6683 Res = parseReg(Operands);
6684 if (Res.isNoMatch())
6685 Error(Loc, "expected a register");
6686 if (!Res.isSuccess())
6687 return ParseStatus::Failure;
6688
6689 RBraceLoc = getLoc();
6690 if (trySkipToken(AsmToken::RBrac))
6691 break;
6692
6693 if (!skipToken(AsmToken::Comma,
6694 "expected a comma or a closing square bracket"))
6695 return ParseStatus::Failure;
6696 }
6697
6698 if (Operands.size() - Prefix > 1) {
6699 Operands.insert(Operands.begin() + Prefix,
6700 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6701 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6702 }
6703
6704 return ParseStatus::Success;
6705 }
6706
6707 return parseRegOrImm(Operands);
6708 }
6709
parseMnemonicSuffix(StringRef Name)6710 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6711 // Clear any forced encodings from the previous instruction.
6712 setForcedEncodingSize(0);
6713 setForcedDPP(false);
6714 setForcedSDWA(false);
6715
6716 if (Name.consume_back("_e64_dpp")) {
6717 setForcedDPP(true);
6718 setForcedEncodingSize(64);
6719 return Name;
6720 }
6721 if (Name.consume_back("_e64")) {
6722 setForcedEncodingSize(64);
6723 return Name;
6724 }
6725 if (Name.consume_back("_e32")) {
6726 setForcedEncodingSize(32);
6727 return Name;
6728 }
6729 if (Name.consume_back("_dpp")) {
6730 setForcedDPP(true);
6731 return Name;
6732 }
6733 if (Name.consume_back("_sdwa")) {
6734 setForcedSDWA(true);
6735 return Name;
6736 }
6737 return Name;
6738 }
6739
6740 static void applyMnemonicAliases(StringRef &Mnemonic,
6741 const FeatureBitset &Features,
6742 unsigned VariantID);
6743
parseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)6744 bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6745 StringRef Name, SMLoc NameLoc,
6746 OperandVector &Operands) {
6747 // Add the instruction mnemonic
6748 Name = parseMnemonicSuffix(Name);
6749
6750 // If the target architecture uses MnemonicAlias, call it here to parse
6751 // operands correctly.
6752 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6753
6754 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6755
6756 bool IsMIMG = Name.starts_with("image_");
6757
6758 while (!trySkipToken(AsmToken::EndOfStatement)) {
6759 OperandMode Mode = OperandMode_Default;
6760 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6761 Mode = OperandMode_NSA;
6762 ParseStatus Res = parseOperand(Operands, Name, Mode);
6763
6764 if (!Res.isSuccess()) {
6765 checkUnsupportedInstruction(Name, NameLoc);
6766 if (!Parser.hasPendingError()) {
6767 // FIXME: use real operand location rather than the current location.
6768 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6769 : "not a valid operand.";
6770 Error(getLoc(), Msg);
6771 }
6772 while (!trySkipToken(AsmToken::EndOfStatement)) {
6773 lex();
6774 }
6775 return true;
6776 }
6777
6778 // Eat the comma or space if there is one.
6779 trySkipToken(AsmToken::Comma);
6780 }
6781
6782 return false;
6783 }
6784
6785 //===----------------------------------------------------------------------===//
6786 // Utility functions
6787 //===----------------------------------------------------------------------===//
6788
parseTokenOp(StringRef Name,OperandVector & Operands)6789 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6790 OperandVector &Operands) {
6791 SMLoc S = getLoc();
6792 if (!trySkipId(Name))
6793 return ParseStatus::NoMatch;
6794
6795 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6796 return ParseStatus::Success;
6797 }
6798
parseIntWithPrefix(const char * Prefix,int64_t & IntVal)6799 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6800 int64_t &IntVal) {
6801
6802 if (!trySkipId(Prefix, AsmToken::Colon))
6803 return ParseStatus::NoMatch;
6804
6805 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6806 }
6807
parseIntWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,std::function<bool (int64_t &)> ConvertResult)6808 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6809 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6810 std::function<bool(int64_t &)> ConvertResult) {
6811 SMLoc S = getLoc();
6812 int64_t Value = 0;
6813
6814 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6815 if (!Res.isSuccess())
6816 return Res;
6817
6818 if (ConvertResult && !ConvertResult(Value)) {
6819 Error(S, "invalid " + StringRef(Prefix) + " value.");
6820 }
6821
6822 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6823 return ParseStatus::Success;
6824 }
6825
parseOperandArrayWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))6826 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6827 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6828 bool (*ConvertResult)(int64_t &)) {
6829 SMLoc S = getLoc();
6830 if (!trySkipId(Prefix, AsmToken::Colon))
6831 return ParseStatus::NoMatch;
6832
6833 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6834 return ParseStatus::Failure;
6835
6836 unsigned Val = 0;
6837 const unsigned MaxSize = 4;
6838
6839 // FIXME: How to verify the number of elements matches the number of src
6840 // operands?
6841 for (int I = 0; ; ++I) {
6842 int64_t Op;
6843 SMLoc Loc = getLoc();
6844 if (!parseExpr(Op))
6845 return ParseStatus::Failure;
6846
6847 if (Op != 0 && Op != 1)
6848 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6849
6850 Val |= (Op << I);
6851
6852 if (trySkipToken(AsmToken::RBrac))
6853 break;
6854
6855 if (I + 1 == MaxSize)
6856 return Error(getLoc(), "expected a closing square bracket");
6857
6858 if (!skipToken(AsmToken::Comma, "expected a comma"))
6859 return ParseStatus::Failure;
6860 }
6861
6862 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6863 return ParseStatus::Success;
6864 }
6865
parseNamedBit(StringRef Name,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)6866 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6867 OperandVector &Operands,
6868 AMDGPUOperand::ImmTy ImmTy) {
6869 int64_t Bit;
6870 SMLoc S = getLoc();
6871
6872 if (trySkipId(Name)) {
6873 Bit = 1;
6874 } else if (trySkipId("no", Name)) {
6875 Bit = 0;
6876 } else {
6877 return ParseStatus::NoMatch;
6878 }
6879
6880 if (Name == "r128" && !hasMIMG_R128())
6881 return Error(S, "r128 modifier is not supported on this GPU");
6882 if (Name == "a16" && !hasA16())
6883 return Error(S, "a16 modifier is not supported on this GPU");
6884
6885 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6886 ImmTy = AMDGPUOperand::ImmTyR128A16;
6887
6888 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6889 return ParseStatus::Success;
6890 }
6891
getCPolKind(StringRef Id,StringRef Mnemo,bool & Disabling) const6892 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6893 bool &Disabling) const {
6894 Disabling = Id.consume_front("no");
6895
6896 if (isGFX940() && !Mnemo.starts_with("s_")) {
6897 return StringSwitch<unsigned>(Id)
6898 .Case("nt", AMDGPU::CPol::NT)
6899 .Case("sc0", AMDGPU::CPol::SC0)
6900 .Case("sc1", AMDGPU::CPol::SC1)
6901 .Default(0);
6902 }
6903
6904 return StringSwitch<unsigned>(Id)
6905 .Case("dlc", AMDGPU::CPol::DLC)
6906 .Case("glc", AMDGPU::CPol::GLC)
6907 .Case("scc", AMDGPU::CPol::SCC)
6908 .Case("slc", AMDGPU::CPol::SLC)
6909 .Default(0);
6910 }
6911
parseCPol(OperandVector & Operands)6912 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6913 if (isGFX12Plus()) {
6914 SMLoc StringLoc = getLoc();
6915
6916 int64_t CPolVal = 0;
6917 ParseStatus ResTH = ParseStatus::NoMatch;
6918 ParseStatus ResScope = ParseStatus::NoMatch;
6919
6920 for (;;) {
6921 if (ResTH.isNoMatch()) {
6922 int64_t TH;
6923 ResTH = parseTH(Operands, TH);
6924 if (ResTH.isFailure())
6925 return ResTH;
6926 if (ResTH.isSuccess()) {
6927 CPolVal |= TH;
6928 continue;
6929 }
6930 }
6931
6932 if (ResScope.isNoMatch()) {
6933 int64_t Scope;
6934 ResScope = parseScope(Operands, Scope);
6935 if (ResScope.isFailure())
6936 return ResScope;
6937 if (ResScope.isSuccess()) {
6938 CPolVal |= Scope;
6939 continue;
6940 }
6941 }
6942
6943 break;
6944 }
6945
6946 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6947 return ParseStatus::NoMatch;
6948
6949 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6950 AMDGPUOperand::ImmTyCPol));
6951 return ParseStatus::Success;
6952 }
6953
6954 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6955 SMLoc OpLoc = getLoc();
6956 unsigned Enabled = 0, Seen = 0;
6957 for (;;) {
6958 SMLoc S = getLoc();
6959 bool Disabling;
6960 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6961 if (!CPol)
6962 break;
6963
6964 lex();
6965
6966 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6967 return Error(S, "dlc modifier is not supported on this GPU");
6968
6969 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6970 return Error(S, "scc modifier is not supported on this GPU");
6971
6972 if (Seen & CPol)
6973 return Error(S, "duplicate cache policy modifier");
6974
6975 if (!Disabling)
6976 Enabled |= CPol;
6977
6978 Seen |= CPol;
6979 }
6980
6981 if (!Seen)
6982 return ParseStatus::NoMatch;
6983
6984 Operands.push_back(
6985 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6986 return ParseStatus::Success;
6987 }
6988
parseScope(OperandVector & Operands,int64_t & Scope)6989 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6990 int64_t &Scope) {
6991 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
6992 CPol::SCOPE_DEV, CPol::SCOPE_SYS};
6993
6994 ParseStatus Res = parseStringOrIntWithPrefix(
6995 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
6996 Scope);
6997
6998 if (Res.isSuccess())
6999 Scope = Scopes[Scope];
7000
7001 return Res;
7002 }
7003
parseTH(OperandVector & Operands,int64_t & TH)7004 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7005 TH = AMDGPU::CPol::TH_RT; // default
7006
7007 StringRef Value;
7008 SMLoc StringLoc;
7009 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7010 if (!Res.isSuccess())
7011 return Res;
7012
7013 if (Value == "TH_DEFAULT")
7014 TH = AMDGPU::CPol::TH_RT;
7015 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7016 Value == "TH_LOAD_NT_WB") {
7017 return Error(StringLoc, "invalid th value");
7018 } else if (Value.consume_front("TH_ATOMIC_")) {
7019 TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
7020 } else if (Value.consume_front("TH_LOAD_")) {
7021 TH = AMDGPU::CPol::TH_TYPE_LOAD;
7022 } else if (Value.consume_front("TH_STORE_")) {
7023 TH = AMDGPU::CPol::TH_TYPE_STORE;
7024 } else {
7025 return Error(StringLoc, "invalid th value");
7026 }
7027
7028 if (Value == "BYPASS")
7029 TH |= AMDGPU::CPol::TH_REAL_BYPASS;
7030
7031 if (TH != 0) {
7032 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
7033 TH |= StringSwitch<int64_t>(Value)
7034 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7035 .Case("RT", AMDGPU::CPol::TH_RT)
7036 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7037 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7038 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7039 AMDGPU::CPol::TH_ATOMIC_RETURN)
7040 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7041 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7042 AMDGPU::CPol::TH_ATOMIC_NT)
7043 .Default(0xffffffff);
7044 else
7045 TH |= StringSwitch<int64_t>(Value)
7046 .Case("RT", AMDGPU::CPol::TH_RT)
7047 .Case("NT", AMDGPU::CPol::TH_NT)
7048 .Case("HT", AMDGPU::CPol::TH_HT)
7049 .Case("LU", AMDGPU::CPol::TH_LU)
7050 .Case("WB", AMDGPU::CPol::TH_WB)
7051 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7052 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7053 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7054 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7055 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7056 .Default(0xffffffff);
7057 }
7058
7059 if (TH == 0xffffffff)
7060 return Error(StringLoc, "invalid th value");
7061
7062 return ParseStatus::Success;
7063 }
7064
7065 static void
addOptionalImmOperand(MCInst & Inst,const OperandVector & Operands,AMDGPUAsmParser::OptionalImmIndexMap & OptionalIdx,AMDGPUOperand::ImmTy ImmT,int64_t Default=0,std::optional<unsigned> InsertAt=std::nullopt)7066 addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
7067 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7068 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7069 std::optional<unsigned> InsertAt = std::nullopt) {
7070 auto i = OptionalIdx.find(ImmT);
7071 if (i != OptionalIdx.end()) {
7072 unsigned Idx = i->second;
7073 const AMDGPUOperand &Op =
7074 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7075 if (InsertAt)
7076 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7077 else
7078 Op.addImmOperands(Inst, 1);
7079 } else {
7080 if (InsertAt.has_value())
7081 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7082 else
7083 Inst.addOperand(MCOperand::createImm(Default));
7084 }
7085 }
7086
parseStringWithPrefix(StringRef Prefix,StringRef & Value,SMLoc & StringLoc)7087 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7088 StringRef &Value,
7089 SMLoc &StringLoc) {
7090 if (!trySkipId(Prefix, AsmToken::Colon))
7091 return ParseStatus::NoMatch;
7092
7093 StringLoc = getLoc();
7094 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7095 : ParseStatus::Failure;
7096 }
7097
parseStringOrIntWithPrefix(OperandVector & Operands,StringRef Name,ArrayRef<const char * > Ids,int64_t & IntVal)7098 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7099 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7100 int64_t &IntVal) {
7101 if (!trySkipId(Name, AsmToken::Colon))
7102 return ParseStatus::NoMatch;
7103
7104 SMLoc StringLoc = getLoc();
7105
7106 StringRef Value;
7107 if (isToken(AsmToken::Identifier)) {
7108 Value = getTokenStr();
7109 lex();
7110
7111 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7112 if (Value == Ids[IntVal])
7113 break;
7114 } else if (!parseExpr(IntVal))
7115 return ParseStatus::Failure;
7116
7117 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7118 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7119
7120 return ParseStatus::Success;
7121 }
7122
parseStringOrIntWithPrefix(OperandVector & Operands,StringRef Name,ArrayRef<const char * > Ids,AMDGPUOperand::ImmTy Type)7123 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7124 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7125 AMDGPUOperand::ImmTy Type) {
7126 SMLoc S = getLoc();
7127 int64_t IntVal;
7128
7129 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7130 if (Res.isSuccess())
7131 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7132
7133 return Res;
7134 }
7135
7136 //===----------------------------------------------------------------------===//
7137 // MTBUF format
7138 //===----------------------------------------------------------------------===//
7139
tryParseFmt(const char * Pref,int64_t MaxVal,int64_t & Fmt)7140 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7141 int64_t MaxVal,
7142 int64_t &Fmt) {
7143 int64_t Val;
7144 SMLoc Loc = getLoc();
7145
7146 auto Res = parseIntWithPrefix(Pref, Val);
7147 if (Res.isFailure())
7148 return false;
7149 if (Res.isNoMatch())
7150 return true;
7151
7152 if (Val < 0 || Val > MaxVal) {
7153 Error(Loc, Twine("out of range ", StringRef(Pref)));
7154 return false;
7155 }
7156
7157 Fmt = Val;
7158 return true;
7159 }
7160
tryParseIndexKey(OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)7161 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7162 AMDGPUOperand::ImmTy ImmTy) {
7163 const char *Pref = "index_key";
7164 int64_t ImmVal = 0;
7165 SMLoc Loc = getLoc();
7166 auto Res = parseIntWithPrefix(Pref, ImmVal);
7167 if (!Res.isSuccess())
7168 return Res;
7169
7170 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7171 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7172 (ImmVal < 0 || ImmVal > 1))
7173 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7174
7175 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7176 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7177
7178 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7179 return ParseStatus::Success;
7180 }
7181
parseIndexKey8bit(OperandVector & Operands)7182 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7183 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7184 }
7185
parseIndexKey16bit(OperandVector & Operands)7186 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7187 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7188 }
7189
parseIndexKey32bit(OperandVector & Operands)7190 ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7191 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7192 }
7193
7194 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7195 // values to live in a joint format operand in the MCInst encoding.
parseDfmtNfmt(int64_t & Format)7196 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7197 using namespace llvm::AMDGPU::MTBUFFormat;
7198
7199 int64_t Dfmt = DFMT_UNDEF;
7200 int64_t Nfmt = NFMT_UNDEF;
7201
7202 // dfmt and nfmt can appear in either order, and each is optional.
7203 for (int I = 0; I < 2; ++I) {
7204 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7205 return ParseStatus::Failure;
7206
7207 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7208 return ParseStatus::Failure;
7209
7210 // Skip optional comma between dfmt/nfmt
7211 // but guard against 2 commas following each other.
7212 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7213 !peekToken().is(AsmToken::Comma)) {
7214 trySkipToken(AsmToken::Comma);
7215 }
7216 }
7217
7218 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7219 return ParseStatus::NoMatch;
7220
7221 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7222 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7223
7224 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7225 return ParseStatus::Success;
7226 }
7227
parseUfmt(int64_t & Format)7228 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7229 using namespace llvm::AMDGPU::MTBUFFormat;
7230
7231 int64_t Fmt = UFMT_UNDEF;
7232
7233 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7234 return ParseStatus::Failure;
7235
7236 if (Fmt == UFMT_UNDEF)
7237 return ParseStatus::NoMatch;
7238
7239 Format = Fmt;
7240 return ParseStatus::Success;
7241 }
7242
matchDfmtNfmt(int64_t & Dfmt,int64_t & Nfmt,StringRef FormatStr,SMLoc Loc)7243 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7244 int64_t &Nfmt,
7245 StringRef FormatStr,
7246 SMLoc Loc) {
7247 using namespace llvm::AMDGPU::MTBUFFormat;
7248 int64_t Format;
7249
7250 Format = getDfmt(FormatStr);
7251 if (Format != DFMT_UNDEF) {
7252 Dfmt = Format;
7253 return true;
7254 }
7255
7256 Format = getNfmt(FormatStr, getSTI());
7257 if (Format != NFMT_UNDEF) {
7258 Nfmt = Format;
7259 return true;
7260 }
7261
7262 Error(Loc, "unsupported format");
7263 return false;
7264 }
7265
parseSymbolicSplitFormat(StringRef FormatStr,SMLoc FormatLoc,int64_t & Format)7266 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7267 SMLoc FormatLoc,
7268 int64_t &Format) {
7269 using namespace llvm::AMDGPU::MTBUFFormat;
7270
7271 int64_t Dfmt = DFMT_UNDEF;
7272 int64_t Nfmt = NFMT_UNDEF;
7273 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7274 return ParseStatus::Failure;
7275
7276 if (trySkipToken(AsmToken::Comma)) {
7277 StringRef Str;
7278 SMLoc Loc = getLoc();
7279 if (!parseId(Str, "expected a format string") ||
7280 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7281 return ParseStatus::Failure;
7282 if (Dfmt == DFMT_UNDEF)
7283 return Error(Loc, "duplicate numeric format");
7284 if (Nfmt == NFMT_UNDEF)
7285 return Error(Loc, "duplicate data format");
7286 }
7287
7288 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7289 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7290
7291 if (isGFX10Plus()) {
7292 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7293 if (Ufmt == UFMT_UNDEF)
7294 return Error(FormatLoc, "unsupported format");
7295 Format = Ufmt;
7296 } else {
7297 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7298 }
7299
7300 return ParseStatus::Success;
7301 }
7302
parseSymbolicUnifiedFormat(StringRef FormatStr,SMLoc Loc,int64_t & Format)7303 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7304 SMLoc Loc,
7305 int64_t &Format) {
7306 using namespace llvm::AMDGPU::MTBUFFormat;
7307
7308 auto Id = getUnifiedFormat(FormatStr, getSTI());
7309 if (Id == UFMT_UNDEF)
7310 return ParseStatus::NoMatch;
7311
7312 if (!isGFX10Plus())
7313 return Error(Loc, "unified format is not supported on this GPU");
7314
7315 Format = Id;
7316 return ParseStatus::Success;
7317 }
7318
parseNumericFormat(int64_t & Format)7319 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7320 using namespace llvm::AMDGPU::MTBUFFormat;
7321 SMLoc Loc = getLoc();
7322
7323 if (!parseExpr(Format))
7324 return ParseStatus::Failure;
7325 if (!isValidFormatEncoding(Format, getSTI()))
7326 return Error(Loc, "out of range format");
7327
7328 return ParseStatus::Success;
7329 }
7330
parseSymbolicOrNumericFormat(int64_t & Format)7331 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7332 using namespace llvm::AMDGPU::MTBUFFormat;
7333
7334 if (!trySkipId("format", AsmToken::Colon))
7335 return ParseStatus::NoMatch;
7336
7337 if (trySkipToken(AsmToken::LBrac)) {
7338 StringRef FormatStr;
7339 SMLoc Loc = getLoc();
7340 if (!parseId(FormatStr, "expected a format string"))
7341 return ParseStatus::Failure;
7342
7343 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7344 if (Res.isNoMatch())
7345 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7346 if (!Res.isSuccess())
7347 return Res;
7348
7349 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7350 return ParseStatus::Failure;
7351
7352 return ParseStatus::Success;
7353 }
7354
7355 return parseNumericFormat(Format);
7356 }
7357
parseFORMAT(OperandVector & Operands)7358 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7359 using namespace llvm::AMDGPU::MTBUFFormat;
7360
7361 int64_t Format = getDefaultFormatEncoding(getSTI());
7362 ParseStatus Res;
7363 SMLoc Loc = getLoc();
7364
7365 // Parse legacy format syntax.
7366 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7367 if (Res.isFailure())
7368 return Res;
7369
7370 bool FormatFound = Res.isSuccess();
7371
7372 Operands.push_back(
7373 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7374
7375 if (FormatFound)
7376 trySkipToken(AsmToken::Comma);
7377
7378 if (isToken(AsmToken::EndOfStatement)) {
7379 // We are expecting an soffset operand,
7380 // but let matcher handle the error.
7381 return ParseStatus::Success;
7382 }
7383
7384 // Parse soffset.
7385 Res = parseRegOrImm(Operands);
7386 if (!Res.isSuccess())
7387 return Res;
7388
7389 trySkipToken(AsmToken::Comma);
7390
7391 if (!FormatFound) {
7392 Res = parseSymbolicOrNumericFormat(Format);
7393 if (Res.isFailure())
7394 return Res;
7395 if (Res.isSuccess()) {
7396 auto Size = Operands.size();
7397 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7398 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7399 Op.setImm(Format);
7400 }
7401 return ParseStatus::Success;
7402 }
7403
7404 if (isId("format") && peekToken().is(AsmToken::Colon))
7405 return Error(getLoc(), "duplicate format");
7406 return ParseStatus::Success;
7407 }
7408
parseFlatOffset(OperandVector & Operands)7409 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7410 ParseStatus Res =
7411 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7412 if (Res.isNoMatch()) {
7413 Res = parseIntWithPrefix("inst_offset", Operands,
7414 AMDGPUOperand::ImmTyInstOffset);
7415 }
7416 return Res;
7417 }
7418
parseR128A16(OperandVector & Operands)7419 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7420 ParseStatus Res =
7421 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7422 if (Res.isNoMatch())
7423 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7424 return Res;
7425 }
7426
parseBLGP(OperandVector & Operands)7427 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7428 ParseStatus Res =
7429 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7430 if (Res.isNoMatch()) {
7431 Res =
7432 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7433 }
7434 return Res;
7435 }
7436
7437 //===----------------------------------------------------------------------===//
7438 // Exp
7439 //===----------------------------------------------------------------------===//
7440
cvtExp(MCInst & Inst,const OperandVector & Operands)7441 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7442 OptionalImmIndexMap OptionalIdx;
7443
7444 unsigned OperandIdx[4];
7445 unsigned EnMask = 0;
7446 int SrcIdx = 0;
7447
7448 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7449 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7450
7451 // Add the register arguments
7452 if (Op.isReg()) {
7453 assert(SrcIdx < 4);
7454 OperandIdx[SrcIdx] = Inst.size();
7455 Op.addRegOperands(Inst, 1);
7456 ++SrcIdx;
7457 continue;
7458 }
7459
7460 if (Op.isOff()) {
7461 assert(SrcIdx < 4);
7462 OperandIdx[SrcIdx] = Inst.size();
7463 Inst.addOperand(MCOperand::createReg(MCRegister()));
7464 ++SrcIdx;
7465 continue;
7466 }
7467
7468 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7469 Op.addImmOperands(Inst, 1);
7470 continue;
7471 }
7472
7473 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7474 continue;
7475
7476 // Handle optional arguments
7477 OptionalIdx[Op.getImmTy()] = i;
7478 }
7479
7480 assert(SrcIdx == 4);
7481
7482 bool Compr = false;
7483 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7484 Compr = true;
7485 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7486 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7487 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7488 }
7489
7490 for (auto i = 0; i < SrcIdx; ++i) {
7491 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7492 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7493 }
7494 }
7495
7496 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7497 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7498
7499 Inst.addOperand(MCOperand::createImm(EnMask));
7500 }
7501
7502 //===----------------------------------------------------------------------===//
7503 // s_waitcnt
7504 //===----------------------------------------------------------------------===//
7505
7506 static bool
encodeCnt(const AMDGPU::IsaVersion ISA,int64_t & IntVal,int64_t CntVal,bool Saturate,unsigned (* encode)(const IsaVersion & Version,unsigned,unsigned),unsigned (* decode)(const IsaVersion & Version,unsigned))7507 encodeCnt(
7508 const AMDGPU::IsaVersion ISA,
7509 int64_t &IntVal,
7510 int64_t CntVal,
7511 bool Saturate,
7512 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7513 unsigned (*decode)(const IsaVersion &Version, unsigned))
7514 {
7515 bool Failed = false;
7516
7517 IntVal = encode(ISA, IntVal, CntVal);
7518 if (CntVal != decode(ISA, IntVal)) {
7519 if (Saturate) {
7520 IntVal = encode(ISA, IntVal, -1);
7521 } else {
7522 Failed = true;
7523 }
7524 }
7525 return Failed;
7526 }
7527
parseCnt(int64_t & IntVal)7528 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7529
7530 SMLoc CntLoc = getLoc();
7531 StringRef CntName = getTokenStr();
7532
7533 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7534 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7535 return false;
7536
7537 int64_t CntVal;
7538 SMLoc ValLoc = getLoc();
7539 if (!parseExpr(CntVal))
7540 return false;
7541
7542 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7543
7544 bool Failed = true;
7545 bool Sat = CntName.ends_with("_sat");
7546
7547 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7548 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7549 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7550 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7551 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7552 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7553 } else {
7554 Error(CntLoc, "invalid counter name " + CntName);
7555 return false;
7556 }
7557
7558 if (Failed) {
7559 Error(ValLoc, "too large value for " + CntName);
7560 return false;
7561 }
7562
7563 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7564 return false;
7565
7566 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7567 if (isToken(AsmToken::EndOfStatement)) {
7568 Error(getLoc(), "expected a counter name");
7569 return false;
7570 }
7571 }
7572
7573 return true;
7574 }
7575
parseSWaitCnt(OperandVector & Operands)7576 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7577 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7578 int64_t Waitcnt = getWaitcntBitMask(ISA);
7579 SMLoc S = getLoc();
7580
7581 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7582 while (!isToken(AsmToken::EndOfStatement)) {
7583 if (!parseCnt(Waitcnt))
7584 return ParseStatus::Failure;
7585 }
7586 } else {
7587 if (!parseExpr(Waitcnt))
7588 return ParseStatus::Failure;
7589 }
7590
7591 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7592 return ParseStatus::Success;
7593 }
7594
parseDelay(int64_t & Delay)7595 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7596 SMLoc FieldLoc = getLoc();
7597 StringRef FieldName = getTokenStr();
7598 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7599 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7600 return false;
7601
7602 SMLoc ValueLoc = getLoc();
7603 StringRef ValueName = getTokenStr();
7604 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7605 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7606 return false;
7607
7608 unsigned Shift;
7609 if (FieldName == "instid0") {
7610 Shift = 0;
7611 } else if (FieldName == "instskip") {
7612 Shift = 4;
7613 } else if (FieldName == "instid1") {
7614 Shift = 7;
7615 } else {
7616 Error(FieldLoc, "invalid field name " + FieldName);
7617 return false;
7618 }
7619
7620 int Value;
7621 if (Shift == 4) {
7622 // Parse values for instskip.
7623 Value = StringSwitch<int>(ValueName)
7624 .Case("SAME", 0)
7625 .Case("NEXT", 1)
7626 .Case("SKIP_1", 2)
7627 .Case("SKIP_2", 3)
7628 .Case("SKIP_3", 4)
7629 .Case("SKIP_4", 5)
7630 .Default(-1);
7631 } else {
7632 // Parse values for instid0 and instid1.
7633 Value = StringSwitch<int>(ValueName)
7634 .Case("NO_DEP", 0)
7635 .Case("VALU_DEP_1", 1)
7636 .Case("VALU_DEP_2", 2)
7637 .Case("VALU_DEP_3", 3)
7638 .Case("VALU_DEP_4", 4)
7639 .Case("TRANS32_DEP_1", 5)
7640 .Case("TRANS32_DEP_2", 6)
7641 .Case("TRANS32_DEP_3", 7)
7642 .Case("FMA_ACCUM_CYCLE_1", 8)
7643 .Case("SALU_CYCLE_1", 9)
7644 .Case("SALU_CYCLE_2", 10)
7645 .Case("SALU_CYCLE_3", 11)
7646 .Default(-1);
7647 }
7648 if (Value < 0) {
7649 Error(ValueLoc, "invalid value name " + ValueName);
7650 return false;
7651 }
7652
7653 Delay |= Value << Shift;
7654 return true;
7655 }
7656
parseSDelayALU(OperandVector & Operands)7657 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7658 int64_t Delay = 0;
7659 SMLoc S = getLoc();
7660
7661 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7662 do {
7663 if (!parseDelay(Delay))
7664 return ParseStatus::Failure;
7665 } while (trySkipToken(AsmToken::Pipe));
7666 } else {
7667 if (!parseExpr(Delay))
7668 return ParseStatus::Failure;
7669 }
7670
7671 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7672 return ParseStatus::Success;
7673 }
7674
7675 bool
isSWaitCnt() const7676 AMDGPUOperand::isSWaitCnt() const {
7677 return isImm();
7678 }
7679
isSDelayALU() const7680 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7681
7682 //===----------------------------------------------------------------------===//
7683 // DepCtr
7684 //===----------------------------------------------------------------------===//
7685
depCtrError(SMLoc Loc,int ErrorId,StringRef DepCtrName)7686 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7687 StringRef DepCtrName) {
7688 switch (ErrorId) {
7689 case OPR_ID_UNKNOWN:
7690 Error(Loc, Twine("invalid counter name ", DepCtrName));
7691 return;
7692 case OPR_ID_UNSUPPORTED:
7693 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7694 return;
7695 case OPR_ID_DUPLICATE:
7696 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7697 return;
7698 case OPR_VAL_INVALID:
7699 Error(Loc, Twine("invalid value for ", DepCtrName));
7700 return;
7701 default:
7702 assert(false);
7703 }
7704 }
7705
parseDepCtr(int64_t & DepCtr,unsigned & UsedOprMask)7706 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7707
7708 using namespace llvm::AMDGPU::DepCtr;
7709
7710 SMLoc DepCtrLoc = getLoc();
7711 StringRef DepCtrName = getTokenStr();
7712
7713 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7714 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7715 return false;
7716
7717 int64_t ExprVal;
7718 if (!parseExpr(ExprVal))
7719 return false;
7720
7721 unsigned PrevOprMask = UsedOprMask;
7722 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7723
7724 if (CntVal < 0) {
7725 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7726 return false;
7727 }
7728
7729 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7730 return false;
7731
7732 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7733 if (isToken(AsmToken::EndOfStatement)) {
7734 Error(getLoc(), "expected a counter name");
7735 return false;
7736 }
7737 }
7738
7739 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7740 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7741 return true;
7742 }
7743
parseDepCtr(OperandVector & Operands)7744 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7745 using namespace llvm::AMDGPU::DepCtr;
7746
7747 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7748 SMLoc Loc = getLoc();
7749
7750 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7751 unsigned UsedOprMask = 0;
7752 while (!isToken(AsmToken::EndOfStatement)) {
7753 if (!parseDepCtr(DepCtr, UsedOprMask))
7754 return ParseStatus::Failure;
7755 }
7756 } else {
7757 if (!parseExpr(DepCtr))
7758 return ParseStatus::Failure;
7759 }
7760
7761 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7762 return ParseStatus::Success;
7763 }
7764
isDepCtr() const7765 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7766
7767 //===----------------------------------------------------------------------===//
7768 // hwreg
7769 //===----------------------------------------------------------------------===//
7770
parseHwregFunc(OperandInfoTy & HwReg,OperandInfoTy & Offset,OperandInfoTy & Width)7771 ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7772 OperandInfoTy &Offset,
7773 OperandInfoTy &Width) {
7774 using namespace llvm::AMDGPU::Hwreg;
7775
7776 if (!trySkipId("hwreg", AsmToken::LParen))
7777 return ParseStatus::NoMatch;
7778
7779 // The register may be specified by name or using a numeric code
7780 HwReg.Loc = getLoc();
7781 if (isToken(AsmToken::Identifier) &&
7782 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7783 HwReg.IsSymbolic = true;
7784 lex(); // skip register name
7785 } else if (!parseExpr(HwReg.Val, "a register name")) {
7786 return ParseStatus::Failure;
7787 }
7788
7789 if (trySkipToken(AsmToken::RParen))
7790 return ParseStatus::Success;
7791
7792 // parse optional params
7793 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7794 return ParseStatus::Failure;
7795
7796 Offset.Loc = getLoc();
7797 if (!parseExpr(Offset.Val))
7798 return ParseStatus::Failure;
7799
7800 if (!skipToken(AsmToken::Comma, "expected a comma"))
7801 return ParseStatus::Failure;
7802
7803 Width.Loc = getLoc();
7804 if (!parseExpr(Width.Val) ||
7805 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7806 return ParseStatus::Failure;
7807
7808 return ParseStatus::Success;
7809 }
7810
parseHwreg(OperandVector & Operands)7811 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7812 using namespace llvm::AMDGPU::Hwreg;
7813
7814 int64_t ImmVal = 0;
7815 SMLoc Loc = getLoc();
7816
7817 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7818 HwregId::Default);
7819 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7820 HwregOffset::Default);
7821 struct : StructuredOpField {
7822 using StructuredOpField::StructuredOpField;
7823 bool validate(AMDGPUAsmParser &Parser) const override {
7824 if (!isUIntN(Width, Val - 1))
7825 return Error(Parser, "only values from 1 to 32 are legal");
7826 return true;
7827 }
7828 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7829 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7830
7831 if (Res.isNoMatch())
7832 Res = parseHwregFunc(HwReg, Offset, Width);
7833
7834 if (Res.isSuccess()) {
7835 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7836 return ParseStatus::Failure;
7837 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7838 }
7839
7840 if (Res.isNoMatch() &&
7841 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7842 Res = ParseStatus::Success;
7843
7844 if (!Res.isSuccess())
7845 return ParseStatus::Failure;
7846
7847 if (!isUInt<16>(ImmVal))
7848 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7849 Operands.push_back(
7850 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7851 return ParseStatus::Success;
7852 }
7853
isHwreg() const7854 bool AMDGPUOperand::isHwreg() const {
7855 return isImmTy(ImmTyHwreg);
7856 }
7857
7858 //===----------------------------------------------------------------------===//
7859 // sendmsg
7860 //===----------------------------------------------------------------------===//
7861
7862 bool
parseSendMsgBody(OperandInfoTy & Msg,OperandInfoTy & Op,OperandInfoTy & Stream)7863 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7864 OperandInfoTy &Op,
7865 OperandInfoTy &Stream) {
7866 using namespace llvm::AMDGPU::SendMsg;
7867
7868 Msg.Loc = getLoc();
7869 if (isToken(AsmToken::Identifier) &&
7870 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7871 Msg.IsSymbolic = true;
7872 lex(); // skip message name
7873 } else if (!parseExpr(Msg.Val, "a message name")) {
7874 return false;
7875 }
7876
7877 if (trySkipToken(AsmToken::Comma)) {
7878 Op.IsDefined = true;
7879 Op.Loc = getLoc();
7880 if (isToken(AsmToken::Identifier) &&
7881 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7882 OPR_ID_UNKNOWN) {
7883 lex(); // skip operation name
7884 } else if (!parseExpr(Op.Val, "an operation name")) {
7885 return false;
7886 }
7887
7888 if (trySkipToken(AsmToken::Comma)) {
7889 Stream.IsDefined = true;
7890 Stream.Loc = getLoc();
7891 if (!parseExpr(Stream.Val))
7892 return false;
7893 }
7894 }
7895
7896 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7897 }
7898
7899 bool
validateSendMsg(const OperandInfoTy & Msg,const OperandInfoTy & Op,const OperandInfoTy & Stream)7900 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7901 const OperandInfoTy &Op,
7902 const OperandInfoTy &Stream) {
7903 using namespace llvm::AMDGPU::SendMsg;
7904
7905 // Validation strictness depends on whether message is specified
7906 // in a symbolic or in a numeric form. In the latter case
7907 // only encoding possibility is checked.
7908 bool Strict = Msg.IsSymbolic;
7909
7910 if (Strict) {
7911 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7912 Error(Msg.Loc, "specified message id is not supported on this GPU");
7913 return false;
7914 }
7915 } else {
7916 if (!isValidMsgId(Msg.Val, getSTI())) {
7917 Error(Msg.Loc, "invalid message id");
7918 return false;
7919 }
7920 }
7921 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7922 if (Op.IsDefined) {
7923 Error(Op.Loc, "message does not support operations");
7924 } else {
7925 Error(Msg.Loc, "missing message operation");
7926 }
7927 return false;
7928 }
7929 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7930 if (Op.Val == OPR_ID_UNSUPPORTED)
7931 Error(Op.Loc, "specified operation id is not supported on this GPU");
7932 else
7933 Error(Op.Loc, "invalid operation id");
7934 return false;
7935 }
7936 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7937 Stream.IsDefined) {
7938 Error(Stream.Loc, "message operation does not support streams");
7939 return false;
7940 }
7941 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7942 Error(Stream.Loc, "invalid message stream id");
7943 return false;
7944 }
7945 return true;
7946 }
7947
parseSendMsg(OperandVector & Operands)7948 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7949 using namespace llvm::AMDGPU::SendMsg;
7950
7951 int64_t ImmVal = 0;
7952 SMLoc Loc = getLoc();
7953
7954 if (trySkipId("sendmsg", AsmToken::LParen)) {
7955 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7956 OperandInfoTy Op(OP_NONE_);
7957 OperandInfoTy Stream(STREAM_ID_NONE_);
7958 if (parseSendMsgBody(Msg, Op, Stream) &&
7959 validateSendMsg(Msg, Op, Stream)) {
7960 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7961 } else {
7962 return ParseStatus::Failure;
7963 }
7964 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7965 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7966 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7967 } else {
7968 return ParseStatus::Failure;
7969 }
7970
7971 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7972 return ParseStatus::Success;
7973 }
7974
isSendMsg() const7975 bool AMDGPUOperand::isSendMsg() const {
7976 return isImmTy(ImmTySendMsg);
7977 }
7978
7979 //===----------------------------------------------------------------------===//
7980 // v_interp
7981 //===----------------------------------------------------------------------===//
7982
parseInterpSlot(OperandVector & Operands)7983 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7984 StringRef Str;
7985 SMLoc S = getLoc();
7986
7987 if (!parseId(Str))
7988 return ParseStatus::NoMatch;
7989
7990 int Slot = StringSwitch<int>(Str)
7991 .Case("p10", 0)
7992 .Case("p20", 1)
7993 .Case("p0", 2)
7994 .Default(-1);
7995
7996 if (Slot == -1)
7997 return Error(S, "invalid interpolation slot");
7998
7999 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8000 AMDGPUOperand::ImmTyInterpSlot));
8001 return ParseStatus::Success;
8002 }
8003
parseInterpAttr(OperandVector & Operands)8004 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8005 StringRef Str;
8006 SMLoc S = getLoc();
8007
8008 if (!parseId(Str))
8009 return ParseStatus::NoMatch;
8010
8011 if (!Str.starts_with("attr"))
8012 return Error(S, "invalid interpolation attribute");
8013
8014 StringRef Chan = Str.take_back(2);
8015 int AttrChan = StringSwitch<int>(Chan)
8016 .Case(".x", 0)
8017 .Case(".y", 1)
8018 .Case(".z", 2)
8019 .Case(".w", 3)
8020 .Default(-1);
8021 if (AttrChan == -1)
8022 return Error(S, "invalid or missing interpolation attribute channel");
8023
8024 Str = Str.drop_back(2).drop_front(4);
8025
8026 uint8_t Attr;
8027 if (Str.getAsInteger(10, Attr))
8028 return Error(S, "invalid or missing interpolation attribute number");
8029
8030 if (Attr > 32)
8031 return Error(S, "out of bounds interpolation attribute number");
8032
8033 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8034
8035 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8036 AMDGPUOperand::ImmTyInterpAttr));
8037 Operands.push_back(AMDGPUOperand::CreateImm(
8038 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8039 return ParseStatus::Success;
8040 }
8041
8042 //===----------------------------------------------------------------------===//
8043 // exp
8044 //===----------------------------------------------------------------------===//
8045
parseExpTgt(OperandVector & Operands)8046 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8047 using namespace llvm::AMDGPU::Exp;
8048
8049 StringRef Str;
8050 SMLoc S = getLoc();
8051
8052 if (!parseId(Str))
8053 return ParseStatus::NoMatch;
8054
8055 unsigned Id = getTgtId(Str);
8056 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8057 return Error(S, (Id == ET_INVALID)
8058 ? "invalid exp target"
8059 : "exp target is not supported on this GPU");
8060
8061 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8062 AMDGPUOperand::ImmTyExpTgt));
8063 return ParseStatus::Success;
8064 }
8065
8066 //===----------------------------------------------------------------------===//
8067 // parser helpers
8068 //===----------------------------------------------------------------------===//
8069
8070 bool
isId(const AsmToken & Token,const StringRef Id) const8071 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8072 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8073 }
8074
8075 bool
isId(const StringRef Id) const8076 AMDGPUAsmParser::isId(const StringRef Id) const {
8077 return isId(getToken(), Id);
8078 }
8079
8080 bool
isToken(const AsmToken::TokenKind Kind) const8081 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8082 return getTokenKind() == Kind;
8083 }
8084
getId() const8085 StringRef AMDGPUAsmParser::getId() const {
8086 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8087 }
8088
8089 bool
trySkipId(const StringRef Id)8090 AMDGPUAsmParser::trySkipId(const StringRef Id) {
8091 if (isId(Id)) {
8092 lex();
8093 return true;
8094 }
8095 return false;
8096 }
8097
8098 bool
trySkipId(const StringRef Pref,const StringRef Id)8099 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8100 if (isToken(AsmToken::Identifier)) {
8101 StringRef Tok = getTokenStr();
8102 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8103 lex();
8104 return true;
8105 }
8106 }
8107 return false;
8108 }
8109
8110 bool
trySkipId(const StringRef Id,const AsmToken::TokenKind Kind)8111 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8112 if (isId(Id) && peekToken().is(Kind)) {
8113 lex();
8114 lex();
8115 return true;
8116 }
8117 return false;
8118 }
8119
8120 bool
trySkipToken(const AsmToken::TokenKind Kind)8121 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8122 if (isToken(Kind)) {
8123 lex();
8124 return true;
8125 }
8126 return false;
8127 }
8128
8129 bool
skipToken(const AsmToken::TokenKind Kind,const StringRef ErrMsg)8130 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8131 const StringRef ErrMsg) {
8132 if (!trySkipToken(Kind)) {
8133 Error(getLoc(), ErrMsg);
8134 return false;
8135 }
8136 return true;
8137 }
8138
8139 bool
parseExpr(int64_t & Imm,StringRef Expected)8140 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8141 SMLoc S = getLoc();
8142
8143 const MCExpr *Expr;
8144 if (Parser.parseExpression(Expr))
8145 return false;
8146
8147 if (Expr->evaluateAsAbsolute(Imm))
8148 return true;
8149
8150 if (Expected.empty()) {
8151 Error(S, "expected absolute expression");
8152 } else {
8153 Error(S, Twine("expected ", Expected) +
8154 Twine(" or an absolute expression"));
8155 }
8156 return false;
8157 }
8158
8159 bool
parseExpr(OperandVector & Operands)8160 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8161 SMLoc S = getLoc();
8162
8163 const MCExpr *Expr;
8164 if (Parser.parseExpression(Expr))
8165 return false;
8166
8167 int64_t IntVal;
8168 if (Expr->evaluateAsAbsolute(IntVal)) {
8169 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8170 } else {
8171 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8172 }
8173 return true;
8174 }
8175
8176 bool
parseString(StringRef & Val,const StringRef ErrMsg)8177 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8178 if (isToken(AsmToken::String)) {
8179 Val = getToken().getStringContents();
8180 lex();
8181 return true;
8182 }
8183 Error(getLoc(), ErrMsg);
8184 return false;
8185 }
8186
8187 bool
parseId(StringRef & Val,const StringRef ErrMsg)8188 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8189 if (isToken(AsmToken::Identifier)) {
8190 Val = getTokenStr();
8191 lex();
8192 return true;
8193 }
8194 if (!ErrMsg.empty())
8195 Error(getLoc(), ErrMsg);
8196 return false;
8197 }
8198
8199 AsmToken
getToken() const8200 AMDGPUAsmParser::getToken() const {
8201 return Parser.getTok();
8202 }
8203
peekToken(bool ShouldSkipSpace)8204 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8205 return isToken(AsmToken::EndOfStatement)
8206 ? getToken()
8207 : getLexer().peekTok(ShouldSkipSpace);
8208 }
8209
8210 void
peekTokens(MutableArrayRef<AsmToken> Tokens)8211 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8212 auto TokCount = getLexer().peekTokens(Tokens);
8213
8214 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8215 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8216 }
8217
8218 AsmToken::TokenKind
getTokenKind() const8219 AMDGPUAsmParser::getTokenKind() const {
8220 return getLexer().getKind();
8221 }
8222
8223 SMLoc
getLoc() const8224 AMDGPUAsmParser::getLoc() const {
8225 return getToken().getLoc();
8226 }
8227
8228 StringRef
getTokenStr() const8229 AMDGPUAsmParser::getTokenStr() const {
8230 return getToken().getString();
8231 }
8232
8233 void
lex()8234 AMDGPUAsmParser::lex() {
8235 Parser.Lex();
8236 }
8237
getInstLoc(const OperandVector & Operands) const8238 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8239 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8240 }
8241
8242 SMLoc
getOperandLoc(std::function<bool (const AMDGPUOperand &)> Test,const OperandVector & Operands) const8243 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8244 const OperandVector &Operands) const {
8245 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8246 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8247 if (Test(Op))
8248 return Op.getStartLoc();
8249 }
8250 return getInstLoc(Operands);
8251 }
8252
8253 SMLoc
getImmLoc(AMDGPUOperand::ImmTy Type,const OperandVector & Operands) const8254 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8255 const OperandVector &Operands) const {
8256 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8257 return getOperandLoc(Test, Operands);
8258 }
8259
getRegLoc(MCRegister Reg,const OperandVector & Operands) const8260 SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg,
8261 const OperandVector &Operands) const {
8262 auto Test = [=](const AMDGPUOperand& Op) {
8263 return Op.isRegKind() && Op.getReg() == Reg;
8264 };
8265 return getOperandLoc(Test, Operands);
8266 }
8267
getLitLoc(const OperandVector & Operands,bool SearchMandatoryLiterals) const8268 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
8269 bool SearchMandatoryLiterals) const {
8270 auto Test = [](const AMDGPUOperand& Op) {
8271 return Op.IsImmKindLiteral() || Op.isExpr();
8272 };
8273 SMLoc Loc = getOperandLoc(Test, Operands);
8274 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
8275 Loc = getMandatoryLitLoc(Operands);
8276 return Loc;
8277 }
8278
getMandatoryLitLoc(const OperandVector & Operands) const8279 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
8280 auto Test = [](const AMDGPUOperand &Op) {
8281 return Op.IsImmKindMandatoryLiteral();
8282 };
8283 return getOperandLoc(Test, Operands);
8284 }
8285
8286 SMLoc
getConstLoc(const OperandVector & Operands) const8287 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
8288 auto Test = [](const AMDGPUOperand& Op) {
8289 return Op.isImmKindConst();
8290 };
8291 return getOperandLoc(Test, Operands);
8292 }
8293
8294 ParseStatus
parseStructuredOpFields(ArrayRef<StructuredOpField * > Fields)8295 AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8296 if (!trySkipToken(AsmToken::LCurly))
8297 return ParseStatus::NoMatch;
8298
8299 bool First = true;
8300 while (!trySkipToken(AsmToken::RCurly)) {
8301 if (!First &&
8302 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8303 return ParseStatus::Failure;
8304
8305 StringRef Id = getTokenStr();
8306 SMLoc IdLoc = getLoc();
8307 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8308 !skipToken(AsmToken::Colon, "colon expected"))
8309 return ParseStatus::Failure;
8310
8311 const auto *I =
8312 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8313 if (I == Fields.end())
8314 return Error(IdLoc, "unknown field");
8315 if ((*I)->IsDefined)
8316 return Error(IdLoc, "duplicate field");
8317
8318 // TODO: Support symbolic values.
8319 (*I)->Loc = getLoc();
8320 if (!parseExpr((*I)->Val))
8321 return ParseStatus::Failure;
8322 (*I)->IsDefined = true;
8323
8324 First = false;
8325 }
8326 return ParseStatus::Success;
8327 }
8328
validateStructuredOpFields(ArrayRef<const StructuredOpField * > Fields)8329 bool AMDGPUAsmParser::validateStructuredOpFields(
8330 ArrayRef<const StructuredOpField *> Fields) {
8331 return all_of(Fields, [this](const StructuredOpField *F) {
8332 return F->validate(*this);
8333 });
8334 }
8335
8336 //===----------------------------------------------------------------------===//
8337 // swizzle
8338 //===----------------------------------------------------------------------===//
8339
8340 LLVM_READNONE
8341 static unsigned
encodeBitmaskPerm(const unsigned AndMask,const unsigned OrMask,const unsigned XorMask)8342 encodeBitmaskPerm(const unsigned AndMask,
8343 const unsigned OrMask,
8344 const unsigned XorMask) {
8345 using namespace llvm::AMDGPU::Swizzle;
8346
8347 return BITMASK_PERM_ENC |
8348 (AndMask << BITMASK_AND_SHIFT) |
8349 (OrMask << BITMASK_OR_SHIFT) |
8350 (XorMask << BITMASK_XOR_SHIFT);
8351 }
8352
parseSwizzleOperand(int64_t & Op,const unsigned MinVal,const unsigned MaxVal,const Twine & ErrMsg,SMLoc & Loc)8353 bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8354 const unsigned MaxVal,
8355 const Twine &ErrMsg, SMLoc &Loc) {
8356 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8357 return false;
8358 }
8359 Loc = getLoc();
8360 if (!parseExpr(Op)) {
8361 return false;
8362 }
8363 if (Op < MinVal || Op > MaxVal) {
8364 Error(Loc, ErrMsg);
8365 return false;
8366 }
8367
8368 return true;
8369 }
8370
8371 bool
parseSwizzleOperands(const unsigned OpNum,int64_t * Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg)8372 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8373 const unsigned MinVal,
8374 const unsigned MaxVal,
8375 const StringRef ErrMsg) {
8376 SMLoc Loc;
8377 for (unsigned i = 0; i < OpNum; ++i) {
8378 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8379 return false;
8380 }
8381
8382 return true;
8383 }
8384
8385 bool
parseSwizzleQuadPerm(int64_t & Imm)8386 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8387 using namespace llvm::AMDGPU::Swizzle;
8388
8389 int64_t Lane[LANE_NUM];
8390 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8391 "expected a 2-bit lane id")) {
8392 Imm = QUAD_PERM_ENC;
8393 for (unsigned I = 0; I < LANE_NUM; ++I) {
8394 Imm |= Lane[I] << (LANE_SHIFT * I);
8395 }
8396 return true;
8397 }
8398 return false;
8399 }
8400
8401 bool
parseSwizzleBroadcast(int64_t & Imm)8402 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8403 using namespace llvm::AMDGPU::Swizzle;
8404
8405 SMLoc Loc;
8406 int64_t GroupSize;
8407 int64_t LaneIdx;
8408
8409 if (!parseSwizzleOperand(GroupSize,
8410 2, 32,
8411 "group size must be in the interval [2,32]",
8412 Loc)) {
8413 return false;
8414 }
8415 if (!isPowerOf2_64(GroupSize)) {
8416 Error(Loc, "group size must be a power of two");
8417 return false;
8418 }
8419 if (parseSwizzleOperand(LaneIdx,
8420 0, GroupSize - 1,
8421 "lane id must be in the interval [0,group size - 1]",
8422 Loc)) {
8423 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8424 return true;
8425 }
8426 return false;
8427 }
8428
8429 bool
parseSwizzleReverse(int64_t & Imm)8430 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8431 using namespace llvm::AMDGPU::Swizzle;
8432
8433 SMLoc Loc;
8434 int64_t GroupSize;
8435
8436 if (!parseSwizzleOperand(GroupSize,
8437 2, 32,
8438 "group size must be in the interval [2,32]",
8439 Loc)) {
8440 return false;
8441 }
8442 if (!isPowerOf2_64(GroupSize)) {
8443 Error(Loc, "group size must be a power of two");
8444 return false;
8445 }
8446
8447 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8448 return true;
8449 }
8450
8451 bool
parseSwizzleSwap(int64_t & Imm)8452 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8453 using namespace llvm::AMDGPU::Swizzle;
8454
8455 SMLoc Loc;
8456 int64_t GroupSize;
8457
8458 if (!parseSwizzleOperand(GroupSize,
8459 1, 16,
8460 "group size must be in the interval [1,16]",
8461 Loc)) {
8462 return false;
8463 }
8464 if (!isPowerOf2_64(GroupSize)) {
8465 Error(Loc, "group size must be a power of two");
8466 return false;
8467 }
8468
8469 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8470 return true;
8471 }
8472
8473 bool
parseSwizzleBitmaskPerm(int64_t & Imm)8474 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8475 using namespace llvm::AMDGPU::Swizzle;
8476
8477 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8478 return false;
8479 }
8480
8481 StringRef Ctl;
8482 SMLoc StrLoc = getLoc();
8483 if (!parseString(Ctl)) {
8484 return false;
8485 }
8486 if (Ctl.size() != BITMASK_WIDTH) {
8487 Error(StrLoc, "expected a 5-character mask");
8488 return false;
8489 }
8490
8491 unsigned AndMask = 0;
8492 unsigned OrMask = 0;
8493 unsigned XorMask = 0;
8494
8495 for (size_t i = 0; i < Ctl.size(); ++i) {
8496 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8497 switch(Ctl[i]) {
8498 default:
8499 Error(StrLoc, "invalid mask");
8500 return false;
8501 case '0':
8502 break;
8503 case '1':
8504 OrMask |= Mask;
8505 break;
8506 case 'p':
8507 AndMask |= Mask;
8508 break;
8509 case 'i':
8510 AndMask |= Mask;
8511 XorMask |= Mask;
8512 break;
8513 }
8514 }
8515
8516 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8517 return true;
8518 }
8519
parseSwizzleFFT(int64_t & Imm)8520 bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8521 using namespace llvm::AMDGPU::Swizzle;
8522
8523 if (!AMDGPU::isGFX9Plus(getSTI())) {
8524 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8525 return false;
8526 }
8527
8528 int64_t Swizzle;
8529 SMLoc Loc;
8530 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8531 "FFT swizzle must be in the interval [0," +
8532 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8533 Loc))
8534 return false;
8535
8536 Imm = FFT_MODE_ENC | Swizzle;
8537 return true;
8538 }
8539
parseSwizzleRotate(int64_t & Imm)8540 bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8541 using namespace llvm::AMDGPU::Swizzle;
8542
8543 if (!AMDGPU::isGFX9Plus(getSTI())) {
8544 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8545 return false;
8546 }
8547
8548 SMLoc Loc;
8549 int64_t Direction;
8550
8551 if (!parseSwizzleOperand(Direction, 0, 1,
8552 "direction must be 0 (left) or 1 (right)", Loc))
8553 return false;
8554
8555 int64_t RotateSize;
8556 if (!parseSwizzleOperand(
8557 RotateSize, 0, ROTATE_MAX_SIZE,
8558 "number of threads to rotate must be in the interval [0," +
8559 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8560 Loc))
8561 return false;
8562
8563 Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) |
8564 (RotateSize << ROTATE_SIZE_SHIFT);
8565 return true;
8566 }
8567
8568 bool
parseSwizzleOffset(int64_t & Imm)8569 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8570
8571 SMLoc OffsetLoc = getLoc();
8572
8573 if (!parseExpr(Imm, "a swizzle macro")) {
8574 return false;
8575 }
8576 if (!isUInt<16>(Imm)) {
8577 Error(OffsetLoc, "expected a 16-bit offset");
8578 return false;
8579 }
8580 return true;
8581 }
8582
8583 bool
parseSwizzleMacro(int64_t & Imm)8584 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8585 using namespace llvm::AMDGPU::Swizzle;
8586
8587 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8588
8589 SMLoc ModeLoc = getLoc();
8590 bool Ok = false;
8591
8592 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8593 Ok = parseSwizzleQuadPerm(Imm);
8594 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8595 Ok = parseSwizzleBitmaskPerm(Imm);
8596 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8597 Ok = parseSwizzleBroadcast(Imm);
8598 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8599 Ok = parseSwizzleSwap(Imm);
8600 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8601 Ok = parseSwizzleReverse(Imm);
8602 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8603 Ok = parseSwizzleFFT(Imm);
8604 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8605 Ok = parseSwizzleRotate(Imm);
8606 } else {
8607 Error(ModeLoc, "expected a swizzle mode");
8608 }
8609
8610 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8611 }
8612
8613 return false;
8614 }
8615
parseSwizzle(OperandVector & Operands)8616 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8617 SMLoc S = getLoc();
8618 int64_t Imm = 0;
8619
8620 if (trySkipId("offset")) {
8621
8622 bool Ok = false;
8623 if (skipToken(AsmToken::Colon, "expected a colon")) {
8624 if (trySkipId("swizzle")) {
8625 Ok = parseSwizzleMacro(Imm);
8626 } else {
8627 Ok = parseSwizzleOffset(Imm);
8628 }
8629 }
8630
8631 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8632
8633 return Ok ? ParseStatus::Success : ParseStatus::Failure;
8634 }
8635 return ParseStatus::NoMatch;
8636 }
8637
8638 bool
isSwizzle() const8639 AMDGPUOperand::isSwizzle() const {
8640 return isImmTy(ImmTySwizzle);
8641 }
8642
8643 //===----------------------------------------------------------------------===//
8644 // VGPR Index Mode
8645 //===----------------------------------------------------------------------===//
8646
parseGPRIdxMacro()8647 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8648
8649 using namespace llvm::AMDGPU::VGPRIndexMode;
8650
8651 if (trySkipToken(AsmToken::RParen)) {
8652 return OFF;
8653 }
8654
8655 int64_t Imm = 0;
8656
8657 while (true) {
8658 unsigned Mode = 0;
8659 SMLoc S = getLoc();
8660
8661 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8662 if (trySkipId(IdSymbolic[ModeId])) {
8663 Mode = 1 << ModeId;
8664 break;
8665 }
8666 }
8667
8668 if (Mode == 0) {
8669 Error(S, (Imm == 0)?
8670 "expected a VGPR index mode or a closing parenthesis" :
8671 "expected a VGPR index mode");
8672 return UNDEF;
8673 }
8674
8675 if (Imm & Mode) {
8676 Error(S, "duplicate VGPR index mode");
8677 return UNDEF;
8678 }
8679 Imm |= Mode;
8680
8681 if (trySkipToken(AsmToken::RParen))
8682 break;
8683 if (!skipToken(AsmToken::Comma,
8684 "expected a comma or a closing parenthesis"))
8685 return UNDEF;
8686 }
8687
8688 return Imm;
8689 }
8690
parseGPRIdxMode(OperandVector & Operands)8691 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8692
8693 using namespace llvm::AMDGPU::VGPRIndexMode;
8694
8695 int64_t Imm = 0;
8696 SMLoc S = getLoc();
8697
8698 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8699 Imm = parseGPRIdxMacro();
8700 if (Imm == UNDEF)
8701 return ParseStatus::Failure;
8702 } else {
8703 if (getParser().parseAbsoluteExpression(Imm))
8704 return ParseStatus::Failure;
8705 if (Imm < 0 || !isUInt<4>(Imm))
8706 return Error(S, "invalid immediate: only 4-bit values are legal");
8707 }
8708
8709 Operands.push_back(
8710 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8711 return ParseStatus::Success;
8712 }
8713
isGPRIdxMode() const8714 bool AMDGPUOperand::isGPRIdxMode() const {
8715 return isImmTy(ImmTyGprIdxMode);
8716 }
8717
8718 //===----------------------------------------------------------------------===//
8719 // sopp branch targets
8720 //===----------------------------------------------------------------------===//
8721
parseSOPPBrTarget(OperandVector & Operands)8722 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8723
8724 // Make sure we are not parsing something
8725 // that looks like a label or an expression but is not.
8726 // This will improve error messages.
8727 if (isRegister() || isModifier())
8728 return ParseStatus::NoMatch;
8729
8730 if (!parseExpr(Operands))
8731 return ParseStatus::Failure;
8732
8733 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8734 assert(Opr.isImm() || Opr.isExpr());
8735 SMLoc Loc = Opr.getStartLoc();
8736
8737 // Currently we do not support arbitrary expressions as branch targets.
8738 // Only labels and absolute expressions are accepted.
8739 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8740 Error(Loc, "expected an absolute expression or a label");
8741 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8742 Error(Loc, "expected a 16-bit signed jump offset");
8743 }
8744
8745 return ParseStatus::Success;
8746 }
8747
8748 //===----------------------------------------------------------------------===//
8749 // Boolean holding registers
8750 //===----------------------------------------------------------------------===//
8751
parseBoolReg(OperandVector & Operands)8752 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8753 return parseReg(Operands);
8754 }
8755
8756 //===----------------------------------------------------------------------===//
8757 // mubuf
8758 //===----------------------------------------------------------------------===//
8759
cvtMubufImpl(MCInst & Inst,const OperandVector & Operands,bool IsAtomic)8760 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8761 const OperandVector &Operands,
8762 bool IsAtomic) {
8763 OptionalImmIndexMap OptionalIdx;
8764 unsigned FirstOperandIdx = 1;
8765 bool IsAtomicReturn = false;
8766
8767 if (IsAtomic) {
8768 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8769 SIInstrFlags::IsAtomicRet;
8770 }
8771
8772 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8773 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8774
8775 // Add the register arguments
8776 if (Op.isReg()) {
8777 Op.addRegOperands(Inst, 1);
8778 // Insert a tied src for atomic return dst.
8779 // This cannot be postponed as subsequent calls to
8780 // addImmOperands rely on correct number of MC operands.
8781 if (IsAtomicReturn && i == FirstOperandIdx)
8782 Op.addRegOperands(Inst, 1);
8783 continue;
8784 }
8785
8786 // Handle the case where soffset is an immediate
8787 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8788 Op.addImmOperands(Inst, 1);
8789 continue;
8790 }
8791
8792 // Handle tokens like 'offen' which are sometimes hard-coded into the
8793 // asm string. There are no MCInst operands for these.
8794 if (Op.isToken()) {
8795 continue;
8796 }
8797 assert(Op.isImm());
8798
8799 // Handle optional arguments
8800 OptionalIdx[Op.getImmTy()] = i;
8801 }
8802
8803 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8804 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8805 }
8806
8807 //===----------------------------------------------------------------------===//
8808 // smrd
8809 //===----------------------------------------------------------------------===//
8810
isSMRDOffset8() const8811 bool AMDGPUOperand::isSMRDOffset8() const {
8812 return isImmLiteral() && isUInt<8>(getImm());
8813 }
8814
isSMEMOffset() const8815 bool AMDGPUOperand::isSMEMOffset() const {
8816 // Offset range is checked later by validator.
8817 return isImmLiteral();
8818 }
8819
isSMRDLiteralOffset() const8820 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8821 // 32-bit literals are only supported on CI and we only want to use them
8822 // when the offset is > 8-bits.
8823 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8824 }
8825
8826 //===----------------------------------------------------------------------===//
8827 // vop3
8828 //===----------------------------------------------------------------------===//
8829
ConvertOmodMul(int64_t & Mul)8830 static bool ConvertOmodMul(int64_t &Mul) {
8831 if (Mul != 1 && Mul != 2 && Mul != 4)
8832 return false;
8833
8834 Mul >>= 1;
8835 return true;
8836 }
8837
ConvertOmodDiv(int64_t & Div)8838 static bool ConvertOmodDiv(int64_t &Div) {
8839 if (Div == 1) {
8840 Div = 0;
8841 return true;
8842 }
8843
8844 if (Div == 2) {
8845 Div = 3;
8846 return true;
8847 }
8848
8849 return false;
8850 }
8851
8852 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8853 // This is intentional and ensures compatibility with sp3.
8854 // See bug 35397 for details.
convertDppBoundCtrl(int64_t & BoundCtrl)8855 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8856 if (BoundCtrl == 0 || BoundCtrl == 1) {
8857 if (!isGFX11Plus())
8858 BoundCtrl = 1;
8859 return true;
8860 }
8861 return false;
8862 }
8863
onBeginOfFile()8864 void AMDGPUAsmParser::onBeginOfFile() {
8865 if (!getParser().getStreamer().getTargetStreamer() ||
8866 getSTI().getTargetTriple().getArch() == Triple::r600)
8867 return;
8868
8869 if (!getTargetStreamer().getTargetID())
8870 getTargetStreamer().initializeTargetID(getSTI(),
8871 getSTI().getFeatureString());
8872
8873 if (isHsaAbi(getSTI()))
8874 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8875 }
8876
8877 /// Parse AMDGPU specific expressions.
8878 ///
8879 /// expr ::= or(expr, ...) |
8880 /// max(expr, ...)
8881 ///
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)8882 bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8883 using AGVK = AMDGPUMCExpr::VariantKind;
8884
8885 if (isToken(AsmToken::Identifier)) {
8886 StringRef TokenId = getTokenStr();
8887 AGVK VK = StringSwitch<AGVK>(TokenId)
8888 .Case("max", AGVK::AGVK_Max)
8889 .Case("or", AGVK::AGVK_Or)
8890 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8891 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8892 .Case("alignto", AGVK::AGVK_AlignTo)
8893 .Case("occupancy", AGVK::AGVK_Occupancy)
8894 .Default(AGVK::AGVK_None);
8895
8896 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8897 SmallVector<const MCExpr *, 4> Exprs;
8898 uint64_t CommaCount = 0;
8899 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8900 lex(); // Eat '('
8901 while (true) {
8902 if (trySkipToken(AsmToken::RParen)) {
8903 if (Exprs.empty()) {
8904 Error(getToken().getLoc(),
8905 "empty " + Twine(TokenId) + " expression");
8906 return true;
8907 }
8908 if (CommaCount + 1 != Exprs.size()) {
8909 Error(getToken().getLoc(),
8910 "mismatch of commas in " + Twine(TokenId) + " expression");
8911 return true;
8912 }
8913 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8914 return false;
8915 }
8916 const MCExpr *Expr;
8917 if (getParser().parseExpression(Expr, EndLoc))
8918 return true;
8919 Exprs.push_back(Expr);
8920 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8921 if (LastTokenWasComma)
8922 CommaCount++;
8923 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8924 Error(getToken().getLoc(),
8925 "unexpected token in " + Twine(TokenId) + " expression");
8926 return true;
8927 }
8928 }
8929 }
8930 }
8931 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8932 }
8933
parseOModSI(OperandVector & Operands)8934 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8935 StringRef Name = getTokenStr();
8936 if (Name == "mul") {
8937 return parseIntWithPrefix("mul", Operands,
8938 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8939 }
8940
8941 if (Name == "div") {
8942 return parseIntWithPrefix("div", Operands,
8943 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8944 }
8945
8946 return ParseStatus::NoMatch;
8947 }
8948
8949 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8950 // the number of src operands present, then copies that bit into src0_modifiers.
cvtVOP3DstOpSelOnly(MCInst & Inst,const MCRegisterInfo & MRI)8951 static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8952 int Opc = Inst.getOpcode();
8953 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8954 if (OpSelIdx == -1)
8955 return;
8956
8957 int SrcNum;
8958 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
8959 AMDGPU::OpName::src2};
8960 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8961 ++SrcNum)
8962 ;
8963 assert(SrcNum > 0);
8964
8965 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8966
8967 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8968 if (DstIdx == -1)
8969 return;
8970
8971 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8972 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8973 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8974 if (DstOp.isReg() &&
8975 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8976 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
8977 ModVal |= SISrcMods::DST_OP_SEL;
8978 } else {
8979 if ((OpSel & (1 << SrcNum)) != 0)
8980 ModVal |= SISrcMods::DST_OP_SEL;
8981 }
8982 Inst.getOperand(ModIdx).setImm(ModVal);
8983 }
8984
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands)8985 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8986 const OperandVector &Operands) {
8987 cvtVOP3P(Inst, Operands);
8988 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8989 }
8990
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)8991 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8992 OptionalImmIndexMap &OptionalIdx) {
8993 cvtVOP3P(Inst, Operands, OptionalIdx);
8994 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8995 }
8996
isRegOrImmWithInputMods(const MCInstrDesc & Desc,unsigned OpNum)8997 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8998 return
8999 // 1. This operand is input modifiers
9000 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9001 // 2. This is not last operand
9002 && Desc.NumOperands > (OpNum + 1)
9003 // 3. Next operand is register class
9004 && Desc.operands()[OpNum + 1].RegClass != -1
9005 // 4. Next register is not tied to any other operand
9006 && Desc.getOperandConstraint(OpNum + 1,
9007 MCOI::OperandConstraint::TIED_TO) == -1;
9008 }
9009
cvtVOP3Interp(MCInst & Inst,const OperandVector & Operands)9010 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9011 {
9012 OptionalImmIndexMap OptionalIdx;
9013 unsigned Opc = Inst.getOpcode();
9014
9015 unsigned I = 1;
9016 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9017 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9018 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9019 }
9020
9021 for (unsigned E = Operands.size(); I != E; ++I) {
9022 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9023 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9024 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9025 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9026 Op.isInterpAttrChan()) {
9027 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9028 } else if (Op.isImmModifier()) {
9029 OptionalIdx[Op.getImmTy()] = I;
9030 } else {
9031 llvm_unreachable("unhandled operand type");
9032 }
9033 }
9034
9035 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9036 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9037 AMDGPUOperand::ImmTyHigh);
9038
9039 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9040 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9041 AMDGPUOperand::ImmTyClamp);
9042
9043 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9044 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9045 AMDGPUOperand::ImmTyOModSI);
9046 }
9047
cvtVINTERP(MCInst & Inst,const OperandVector & Operands)9048 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9049 {
9050 OptionalImmIndexMap OptionalIdx;
9051 unsigned Opc = Inst.getOpcode();
9052
9053 unsigned I = 1;
9054 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9055 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9056 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9057 }
9058
9059 for (unsigned E = Operands.size(); I != E; ++I) {
9060 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9061 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9062 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9063 } else if (Op.isImmModifier()) {
9064 OptionalIdx[Op.getImmTy()] = I;
9065 } else {
9066 llvm_unreachable("unhandled operand type");
9067 }
9068 }
9069
9070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9071
9072 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9073 if (OpSelIdx != -1)
9074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9075
9076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9077
9078 if (OpSelIdx == -1)
9079 return;
9080
9081 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9082 AMDGPU::OpName::src2};
9083 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9084 AMDGPU::OpName::src1_modifiers,
9085 AMDGPU::OpName::src2_modifiers};
9086
9087 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9088
9089 for (int J = 0; J < 3; ++J) {
9090 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9091 if (OpIdx == -1)
9092 break;
9093
9094 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9095 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9096
9097 if ((OpSel & (1 << J)) != 0)
9098 ModVal |= SISrcMods::OP_SEL_0;
9099 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
9100 (OpSel & (1 << 3)) != 0)
9101 ModVal |= SISrcMods::DST_OP_SEL;
9102
9103 Inst.getOperand(ModIdx).setImm(ModVal);
9104 }
9105 }
cvtScaledMFMA(MCInst & Inst,const OperandVector & Operands)9106 void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9107 const OperandVector &Operands) {
9108 OptionalImmIndexMap OptionalIdx;
9109 unsigned Opc = Inst.getOpcode();
9110 unsigned I = 1;
9111 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9112
9113 const MCInstrDesc &Desc = MII.get(Opc);
9114
9115 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9116 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9117
9118 for (unsigned E = Operands.size(); I != E; ++I) {
9119 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9120 int NumOperands = Inst.getNumOperands();
9121 // The order of operands in MCInst and parsed operands are different.
9122 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9123 // indices for parsing scale values correctly.
9124 if (NumOperands == CbszOpIdx) {
9125 Inst.addOperand(MCOperand::createImm(0));
9126 Inst.addOperand(MCOperand::createImm(0));
9127 }
9128 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9129 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9130 } else if (Op.isImmModifier()) {
9131 OptionalIdx[Op.getImmTy()] = I;
9132 } else {
9133 Op.addRegOrImmOperands(Inst, 1);
9134 }
9135 }
9136
9137 // Insert CBSZ and BLGP operands for F8F6F4 variants
9138 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9139 if (CbszIdx != OptionalIdx.end()) {
9140 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9141 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9142 }
9143
9144 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9145 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9146 if (BlgpIdx != OptionalIdx.end()) {
9147 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9148 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9149 }
9150
9151 // Add dummy src_modifiers
9152 Inst.addOperand(MCOperand::createImm(0));
9153 Inst.addOperand(MCOperand::createImm(0));
9154
9155 // Handle op_sel fields
9156
9157 unsigned OpSel = 0;
9158 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9159 if (OpselIdx != OptionalIdx.end()) {
9160 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9161 .getImm();
9162 }
9163
9164 unsigned OpSelHi = 0;
9165 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9166 if (OpselHiIdx != OptionalIdx.end()) {
9167 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9168 .getImm();
9169 }
9170 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9171 AMDGPU::OpName::src1_modifiers};
9172
9173 for (unsigned J = 0; J < 2; ++J) {
9174 unsigned ModVal = 0;
9175 if (OpSel & (1 << J))
9176 ModVal |= SISrcMods::OP_SEL_0;
9177 if (OpSelHi & (1 << J))
9178 ModVal |= SISrcMods::OP_SEL_1;
9179
9180 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9181 Inst.getOperand(ModIdx).setImm(ModVal);
9182 }
9183 }
9184
cvtVOP3(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)9185 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9186 OptionalImmIndexMap &OptionalIdx) {
9187 unsigned Opc = Inst.getOpcode();
9188
9189 unsigned I = 1;
9190 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9191 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9192 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9193 }
9194
9195 for (unsigned E = Operands.size(); I != E; ++I) {
9196 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9197 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9198 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9199 } else if (Op.isImmModifier()) {
9200 OptionalIdx[Op.getImmTy()] = I;
9201 } else {
9202 Op.addRegOrImmOperands(Inst, 1);
9203 }
9204 }
9205
9206 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9207 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9208 Inst.addOperand(Inst.getOperand(0));
9209 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9210 AMDGPUOperand::ImmTyByteSel);
9211 }
9212
9213 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9214 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9215 AMDGPUOperand::ImmTyClamp);
9216
9217 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9218 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9219 AMDGPUOperand::ImmTyOModSI);
9220
9221 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9222 // it has src2 register operand that is tied to dst operand
9223 // we don't allow modifiers for this operand in assembler so src2_modifiers
9224 // should be 0.
9225 if (isMAC(Opc)) {
9226 auto *it = Inst.begin();
9227 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9228 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9229 ++it;
9230 // Copy the operand to ensure it's not invalidated when Inst grows.
9231 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9232 }
9233 }
9234
cvtVOP3(MCInst & Inst,const OperandVector & Operands)9235 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9236 OptionalImmIndexMap OptionalIdx;
9237 cvtVOP3(Inst, Operands, OptionalIdx);
9238 }
9239
cvtVOP3P(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptIdx)9240 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9241 OptionalImmIndexMap &OptIdx) {
9242 const int Opc = Inst.getOpcode();
9243 const MCInstrDesc &Desc = MII.get(Opc);
9244
9245 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9246
9247 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9248 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9249 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9250 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9251 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9252 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9253 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9254 Inst.addOperand(Inst.getOperand(0));
9255 }
9256
9257 // Adding vdst_in operand is already covered for these DPP instructions in
9258 // cvtVOP3DPP.
9259 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9260 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9261 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9262 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9263 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9264 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9265 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9266 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9267 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9268 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9269 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9270 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9271 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
9272 Inst.addOperand(Inst.getOperand(0));
9273 }
9274
9275 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9276 if (BitOp3Idx != -1) {
9277 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9278 }
9279
9280 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9281 // instruction, and then figure out where to actually put the modifiers
9282
9283 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9284 if (OpSelIdx != -1) {
9285 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9286 }
9287
9288 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9289 if (OpSelHiIdx != -1) {
9290 int DefaultVal = IsPacked ? -1 : 0;
9291 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9292 DefaultVal);
9293 }
9294
9295 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9296 addOptionalImmOperand(Inst, Operands, OptIdx,
9297 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9298
9299 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9300 addOptionalImmOperand(Inst, Operands, OptIdx,
9301 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9302
9303 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9304 if (NegLoIdx != -1)
9305 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9306
9307 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9308 if (NegHiIdx != -1)
9309 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9310
9311 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9312 AMDGPU::OpName::src2};
9313 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9314 AMDGPU::OpName::src1_modifiers,
9315 AMDGPU::OpName::src2_modifiers};
9316
9317 unsigned OpSel = 0;
9318 unsigned OpSelHi = 0;
9319 unsigned NegLo = 0;
9320 unsigned NegHi = 0;
9321
9322 if (OpSelIdx != -1)
9323 OpSel = Inst.getOperand(OpSelIdx).getImm();
9324
9325 if (OpSelHiIdx != -1)
9326 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9327
9328 if (NegLoIdx != -1)
9329 NegLo = Inst.getOperand(NegLoIdx).getImm();
9330
9331 if (NegHiIdx != -1)
9332 NegHi = Inst.getOperand(NegHiIdx).getImm();
9333
9334 for (int J = 0; J < 3; ++J) {
9335 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9336 if (OpIdx == -1)
9337 break;
9338
9339 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9340
9341 if (ModIdx == -1)
9342 continue;
9343
9344 uint32_t ModVal = 0;
9345
9346 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9347 if (SrcOp.isReg() && getMRI()
9348 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9349 .contains(SrcOp.getReg())) {
9350 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9351 if (VGPRSuffixIsHi)
9352 ModVal |= SISrcMods::OP_SEL_0;
9353 } else {
9354 if ((OpSel & (1 << J)) != 0)
9355 ModVal |= SISrcMods::OP_SEL_0;
9356 }
9357
9358 if ((OpSelHi & (1 << J)) != 0)
9359 ModVal |= SISrcMods::OP_SEL_1;
9360
9361 if ((NegLo & (1 << J)) != 0)
9362 ModVal |= SISrcMods::NEG;
9363
9364 if ((NegHi & (1 << J)) != 0)
9365 ModVal |= SISrcMods::NEG_HI;
9366
9367 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9368 }
9369 }
9370
cvtVOP3P(MCInst & Inst,const OperandVector & Operands)9371 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9372 OptionalImmIndexMap OptIdx;
9373 cvtVOP3(Inst, Operands, OptIdx);
9374 cvtVOP3P(Inst, Operands, OptIdx);
9375 }
9376
addSrcModifiersAndSrc(MCInst & Inst,const OperandVector & Operands,unsigned i,unsigned Opc,AMDGPU::OpName OpName)9377 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
9378 unsigned i, unsigned Opc,
9379 AMDGPU::OpName OpName) {
9380 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9381 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9382 else
9383 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9384 }
9385
cvtSWMMAC(MCInst & Inst,const OperandVector & Operands)9386 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9387 unsigned Opc = Inst.getOpcode();
9388
9389 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9390 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9391 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9392 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9393 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9394
9395 OptionalImmIndexMap OptIdx;
9396 for (unsigned i = 5; i < Operands.size(); ++i) {
9397 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9398 OptIdx[Op.getImmTy()] = i;
9399 }
9400
9401 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9402 addOptionalImmOperand(Inst, Operands, OptIdx,
9403 AMDGPUOperand::ImmTyIndexKey8bit);
9404
9405 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9406 addOptionalImmOperand(Inst, Operands, OptIdx,
9407 AMDGPUOperand::ImmTyIndexKey16bit);
9408
9409 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9410 addOptionalImmOperand(Inst, Operands, OptIdx,
9411 AMDGPUOperand::ImmTyIndexKey32bit);
9412
9413 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9414 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9415
9416 cvtVOP3P(Inst, Operands, OptIdx);
9417 }
9418
9419 //===----------------------------------------------------------------------===//
9420 // VOPD
9421 //===----------------------------------------------------------------------===//
9422
parseVOPD(OperandVector & Operands)9423 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9424 if (!hasVOPD(getSTI()))
9425 return ParseStatus::NoMatch;
9426
9427 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9428 SMLoc S = getLoc();
9429 lex();
9430 lex();
9431 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9432 SMLoc OpYLoc = getLoc();
9433 StringRef OpYName;
9434 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9435 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9436 return ParseStatus::Success;
9437 }
9438 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9439 }
9440 return ParseStatus::NoMatch;
9441 }
9442
9443 // Create VOPD MCInst operands using parsed assembler operands.
cvtVOPD(MCInst & Inst,const OperandVector & Operands)9444 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9445 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9446
9447 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9448 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9449 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9450 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9451 return;
9452 }
9453 if (Op.isReg()) {
9454 Op.addRegOperands(Inst, 1);
9455 return;
9456 }
9457 if (Op.isImm()) {
9458 Op.addImmOperands(Inst, 1);
9459 return;
9460 }
9461 llvm_unreachable("Unhandled operand type in cvtVOPD");
9462 };
9463
9464 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9465
9466 // MCInst operands are ordered as follows:
9467 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9468
9469 for (auto CompIdx : VOPD::COMPONENTS) {
9470 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9471 }
9472
9473 for (auto CompIdx : VOPD::COMPONENTS) {
9474 const auto &CInfo = InstInfo[CompIdx];
9475 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9476 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9477 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9478 if (CInfo.hasSrc2Acc())
9479 addOp(CInfo.getIndexOfDstInParsedOperands());
9480 }
9481
9482 int BitOp3Idx =
9483 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9484 if (BitOp3Idx != -1) {
9485 OptionalImmIndexMap OptIdx;
9486 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9487 if (Op.isImm())
9488 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9489
9490 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9491 }
9492 }
9493
9494 //===----------------------------------------------------------------------===//
9495 // dpp
9496 //===----------------------------------------------------------------------===//
9497
isDPP8() const9498 bool AMDGPUOperand::isDPP8() const {
9499 return isImmTy(ImmTyDPP8);
9500 }
9501
isDPPCtrl() const9502 bool AMDGPUOperand::isDPPCtrl() const {
9503 using namespace AMDGPU::DPP;
9504
9505 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9506 if (result) {
9507 int64_t Imm = getImm();
9508 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9509 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9510 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9511 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9512 (Imm == DppCtrl::WAVE_SHL1) ||
9513 (Imm == DppCtrl::WAVE_ROL1) ||
9514 (Imm == DppCtrl::WAVE_SHR1) ||
9515 (Imm == DppCtrl::WAVE_ROR1) ||
9516 (Imm == DppCtrl::ROW_MIRROR) ||
9517 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9518 (Imm == DppCtrl::BCAST15) ||
9519 (Imm == DppCtrl::BCAST31) ||
9520 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9521 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9522 }
9523 return false;
9524 }
9525
9526 //===----------------------------------------------------------------------===//
9527 // mAI
9528 //===----------------------------------------------------------------------===//
9529
isBLGP() const9530 bool AMDGPUOperand::isBLGP() const {
9531 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9532 }
9533
isS16Imm() const9534 bool AMDGPUOperand::isS16Imm() const {
9535 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9536 }
9537
isU16Imm() const9538 bool AMDGPUOperand::isU16Imm() const {
9539 return isImmLiteral() && isUInt<16>(getImm());
9540 }
9541
9542 //===----------------------------------------------------------------------===//
9543 // dim
9544 //===----------------------------------------------------------------------===//
9545
parseDimId(unsigned & Encoding)9546 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9547 // We want to allow "dim:1D" etc.,
9548 // but the initial 1 is tokenized as an integer.
9549 std::string Token;
9550 if (isToken(AsmToken::Integer)) {
9551 SMLoc Loc = getToken().getEndLoc();
9552 Token = std::string(getTokenStr());
9553 lex();
9554 if (getLoc() != Loc)
9555 return false;
9556 }
9557
9558 StringRef Suffix;
9559 if (!parseId(Suffix))
9560 return false;
9561 Token += Suffix;
9562
9563 StringRef DimId = Token;
9564 DimId.consume_front("SQ_RSRC_IMG_");
9565
9566 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9567 if (!DimInfo)
9568 return false;
9569
9570 Encoding = DimInfo->Encoding;
9571 return true;
9572 }
9573
parseDim(OperandVector & Operands)9574 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9575 if (!isGFX10Plus())
9576 return ParseStatus::NoMatch;
9577
9578 SMLoc S = getLoc();
9579
9580 if (!trySkipId("dim", AsmToken::Colon))
9581 return ParseStatus::NoMatch;
9582
9583 unsigned Encoding;
9584 SMLoc Loc = getLoc();
9585 if (!parseDimId(Encoding))
9586 return Error(Loc, "invalid dim value");
9587
9588 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9589 AMDGPUOperand::ImmTyDim));
9590 return ParseStatus::Success;
9591 }
9592
9593 //===----------------------------------------------------------------------===//
9594 // dpp
9595 //===----------------------------------------------------------------------===//
9596
parseDPP8(OperandVector & Operands)9597 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9598 SMLoc S = getLoc();
9599
9600 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9601 return ParseStatus::NoMatch;
9602
9603 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9604
9605 int64_t Sels[8];
9606
9607 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9608 return ParseStatus::Failure;
9609
9610 for (size_t i = 0; i < 8; ++i) {
9611 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9612 return ParseStatus::Failure;
9613
9614 SMLoc Loc = getLoc();
9615 if (getParser().parseAbsoluteExpression(Sels[i]))
9616 return ParseStatus::Failure;
9617 if (0 > Sels[i] || 7 < Sels[i])
9618 return Error(Loc, "expected a 3-bit value");
9619 }
9620
9621 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9622 return ParseStatus::Failure;
9623
9624 unsigned DPP8 = 0;
9625 for (size_t i = 0; i < 8; ++i)
9626 DPP8 |= (Sels[i] << (i * 3));
9627
9628 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9629 return ParseStatus::Success;
9630 }
9631
9632 bool
isSupportedDPPCtrl(StringRef Ctrl,const OperandVector & Operands)9633 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9634 const OperandVector &Operands) {
9635 if (Ctrl == "row_newbcast")
9636 return isGFX90A();
9637
9638 if (Ctrl == "row_share" ||
9639 Ctrl == "row_xmask")
9640 return isGFX10Plus();
9641
9642 if (Ctrl == "wave_shl" ||
9643 Ctrl == "wave_shr" ||
9644 Ctrl == "wave_rol" ||
9645 Ctrl == "wave_ror" ||
9646 Ctrl == "row_bcast")
9647 return isVI() || isGFX9();
9648
9649 return Ctrl == "row_mirror" ||
9650 Ctrl == "row_half_mirror" ||
9651 Ctrl == "quad_perm" ||
9652 Ctrl == "row_shl" ||
9653 Ctrl == "row_shr" ||
9654 Ctrl == "row_ror";
9655 }
9656
9657 int64_t
parseDPPCtrlPerm()9658 AMDGPUAsmParser::parseDPPCtrlPerm() {
9659 // quad_perm:[%d,%d,%d,%d]
9660
9661 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9662 return -1;
9663
9664 int64_t Val = 0;
9665 for (int i = 0; i < 4; ++i) {
9666 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9667 return -1;
9668
9669 int64_t Temp;
9670 SMLoc Loc = getLoc();
9671 if (getParser().parseAbsoluteExpression(Temp))
9672 return -1;
9673 if (Temp < 0 || Temp > 3) {
9674 Error(Loc, "expected a 2-bit value");
9675 return -1;
9676 }
9677
9678 Val += (Temp << i * 2);
9679 }
9680
9681 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9682 return -1;
9683
9684 return Val;
9685 }
9686
9687 int64_t
parseDPPCtrlSel(StringRef Ctrl)9688 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9689 using namespace AMDGPU::DPP;
9690
9691 // sel:%d
9692
9693 int64_t Val;
9694 SMLoc Loc = getLoc();
9695
9696 if (getParser().parseAbsoluteExpression(Val))
9697 return -1;
9698
9699 struct DppCtrlCheck {
9700 int64_t Ctrl;
9701 int Lo;
9702 int Hi;
9703 };
9704
9705 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9706 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9707 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9708 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9709 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9710 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9711 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9712 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9713 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9714 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9715 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9716 .Default({-1, 0, 0});
9717
9718 bool Valid;
9719 if (Check.Ctrl == -1) {
9720 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9721 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9722 } else {
9723 Valid = Check.Lo <= Val && Val <= Check.Hi;
9724 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9725 }
9726
9727 if (!Valid) {
9728 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9729 return -1;
9730 }
9731
9732 return Val;
9733 }
9734
parseDPPCtrl(OperandVector & Operands)9735 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9736 using namespace AMDGPU::DPP;
9737
9738 if (!isToken(AsmToken::Identifier) ||
9739 !isSupportedDPPCtrl(getTokenStr(), Operands))
9740 return ParseStatus::NoMatch;
9741
9742 SMLoc S = getLoc();
9743 int64_t Val = -1;
9744 StringRef Ctrl;
9745
9746 parseId(Ctrl);
9747
9748 if (Ctrl == "row_mirror") {
9749 Val = DppCtrl::ROW_MIRROR;
9750 } else if (Ctrl == "row_half_mirror") {
9751 Val = DppCtrl::ROW_HALF_MIRROR;
9752 } else {
9753 if (skipToken(AsmToken::Colon, "expected a colon")) {
9754 if (Ctrl == "quad_perm") {
9755 Val = parseDPPCtrlPerm();
9756 } else {
9757 Val = parseDPPCtrlSel(Ctrl);
9758 }
9759 }
9760 }
9761
9762 if (Val == -1)
9763 return ParseStatus::Failure;
9764
9765 Operands.push_back(
9766 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9767 return ParseStatus::Success;
9768 }
9769
cvtVOP3DPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)9770 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9771 bool IsDPP8) {
9772 OptionalImmIndexMap OptionalIdx;
9773 unsigned Opc = Inst.getOpcode();
9774 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9775
9776 // MAC instructions are special because they have 'old'
9777 // operand which is not tied to dst (but assumed to be).
9778 // They also have dummy unused src2_modifiers.
9779 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9780 int Src2ModIdx =
9781 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9782 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9783 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9784
9785 unsigned I = 1;
9786 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9787 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9788 }
9789
9790 int Fi = 0;
9791 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9792 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9793 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9794 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9795 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9796
9797 for (unsigned E = Operands.size(); I != E; ++I) {
9798
9799 if (IsMAC) {
9800 int NumOperands = Inst.getNumOperands();
9801 if (OldIdx == NumOperands) {
9802 // Handle old operand
9803 constexpr int DST_IDX = 0;
9804 Inst.addOperand(Inst.getOperand(DST_IDX));
9805 } else if (Src2ModIdx == NumOperands) {
9806 // Add unused dummy src2_modifiers
9807 Inst.addOperand(MCOperand::createImm(0));
9808 }
9809 }
9810
9811 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9812 Inst.addOperand(Inst.getOperand(0));
9813 }
9814
9815 if (IsVOP3CvtSrDpp) {
9816 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9817 Inst.addOperand(MCOperand::createImm(0));
9818 Inst.addOperand(MCOperand::createReg(MCRegister()));
9819 }
9820 }
9821
9822 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9823 MCOI::TIED_TO);
9824 if (TiedTo != -1) {
9825 assert((unsigned)TiedTo < Inst.getNumOperands());
9826 // handle tied old or src2 for MAC instructions
9827 Inst.addOperand(Inst.getOperand(TiedTo));
9828 }
9829 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9830 // Add the register arguments
9831 if (IsDPP8 && Op.isDppFI()) {
9832 Fi = Op.getImm();
9833 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9834 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9835 } else if (Op.isReg()) {
9836 Op.addRegOperands(Inst, 1);
9837 } else if (Op.isImm() &&
9838 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9839 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9840 Op.addImmOperands(Inst, 1);
9841 } else if (Op.isImm()) {
9842 OptionalIdx[Op.getImmTy()] = I;
9843 } else {
9844 llvm_unreachable("unhandled operand type");
9845 }
9846 }
9847
9848 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
9849 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9850 AMDGPUOperand::ImmTyClamp);
9851
9852 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9853 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9854 AMDGPUOperand::ImmTyByteSel);
9855
9856 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9857 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9858
9859 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9860 cvtVOP3P(Inst, Operands, OptionalIdx);
9861 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9862 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9863 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9864 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9865 }
9866
9867 if (IsDPP8) {
9868 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9869 using namespace llvm::AMDGPU::DPP;
9870 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9871 } else {
9872 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9874 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9876
9877 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9878 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9879 AMDGPUOperand::ImmTyDppFI);
9880 }
9881 }
9882
cvtDPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)9883 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9884 OptionalImmIndexMap OptionalIdx;
9885
9886 unsigned I = 1;
9887 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9888 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9889 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9890 }
9891
9892 int Fi = 0;
9893 for (unsigned E = Operands.size(); I != E; ++I) {
9894 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9895 MCOI::TIED_TO);
9896 if (TiedTo != -1) {
9897 assert((unsigned)TiedTo < Inst.getNumOperands());
9898 // handle tied old or src2 for MAC instructions
9899 Inst.addOperand(Inst.getOperand(TiedTo));
9900 }
9901 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9902 // Add the register arguments
9903 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9904 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9905 // Skip it.
9906 continue;
9907 }
9908
9909 if (IsDPP8) {
9910 if (Op.isDPP8()) {
9911 Op.addImmOperands(Inst, 1);
9912 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9913 Op.addRegWithFPInputModsOperands(Inst, 2);
9914 } else if (Op.isDppFI()) {
9915 Fi = Op.getImm();
9916 } else if (Op.isReg()) {
9917 Op.addRegOperands(Inst, 1);
9918 } else {
9919 llvm_unreachable("Invalid operand type");
9920 }
9921 } else {
9922 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9923 Op.addRegWithFPInputModsOperands(Inst, 2);
9924 } else if (Op.isReg()) {
9925 Op.addRegOperands(Inst, 1);
9926 } else if (Op.isDPPCtrl()) {
9927 Op.addImmOperands(Inst, 1);
9928 } else if (Op.isImm()) {
9929 // Handle optional arguments
9930 OptionalIdx[Op.getImmTy()] = I;
9931 } else {
9932 llvm_unreachable("Invalid operand type");
9933 }
9934 }
9935 }
9936
9937 if (IsDPP8) {
9938 using namespace llvm::AMDGPU::DPP;
9939 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9940 } else {
9941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9944 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9945 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9946 AMDGPUOperand::ImmTyDppFI);
9947 }
9948 }
9949 }
9950
9951 //===----------------------------------------------------------------------===//
9952 // sdwa
9953 //===----------------------------------------------------------------------===//
9954
parseSDWASel(OperandVector & Operands,StringRef Prefix,AMDGPUOperand::ImmTy Type)9955 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9956 StringRef Prefix,
9957 AMDGPUOperand::ImmTy Type) {
9958 return parseStringOrIntWithPrefix(
9959 Operands, Prefix,
9960 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
9961 Type);
9962 }
9963
parseSDWADstUnused(OperandVector & Operands)9964 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9965 return parseStringOrIntWithPrefix(
9966 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
9967 AMDGPUOperand::ImmTySDWADstUnused);
9968 }
9969
cvtSdwaVOP1(MCInst & Inst,const OperandVector & Operands)9970 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9971 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9972 }
9973
cvtSdwaVOP2(MCInst & Inst,const OperandVector & Operands)9974 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9975 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9976 }
9977
cvtSdwaVOP2b(MCInst & Inst,const OperandVector & Operands)9978 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9979 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9980 }
9981
cvtSdwaVOP2e(MCInst & Inst,const OperandVector & Operands)9982 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9983 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9984 }
9985
cvtSdwaVOPC(MCInst & Inst,const OperandVector & Operands)9986 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9987 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9988 }
9989
cvtSDWA(MCInst & Inst,const OperandVector & Operands,uint64_t BasicInstType,bool SkipDstVcc,bool SkipSrcVcc)9990 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9991 uint64_t BasicInstType,
9992 bool SkipDstVcc,
9993 bool SkipSrcVcc) {
9994 using namespace llvm::AMDGPU::SDWA;
9995
9996 OptionalImmIndexMap OptionalIdx;
9997 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9998 bool SkippedVcc = false;
9999
10000 unsigned I = 1;
10001 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10002 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10003 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10004 }
10005
10006 for (unsigned E = Operands.size(); I != E; ++I) {
10007 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10008 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10009 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10010 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10011 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10012 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10013 // Skip VCC only if we didn't skip it on previous iteration.
10014 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10015 if (BasicInstType == SIInstrFlags::VOP2 &&
10016 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10017 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10018 SkippedVcc = true;
10019 continue;
10020 }
10021 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10022 SkippedVcc = true;
10023 continue;
10024 }
10025 }
10026 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10027 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10028 } else if (Op.isImm()) {
10029 // Handle optional arguments
10030 OptionalIdx[Op.getImmTy()] = I;
10031 } else {
10032 llvm_unreachable("Invalid operand type");
10033 }
10034 SkippedVcc = false;
10035 }
10036
10037 const unsigned Opc = Inst.getOpcode();
10038 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10039 Opc != AMDGPU::V_NOP_sdwa_vi) {
10040 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10041 switch (BasicInstType) {
10042 case SIInstrFlags::VOP1:
10043 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10044 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10045 AMDGPUOperand::ImmTyClamp, 0);
10046
10047 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10048 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10049 AMDGPUOperand::ImmTyOModSI, 0);
10050
10051 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10052 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10053 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10054
10055 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10056 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10057 AMDGPUOperand::ImmTySDWADstUnused,
10058 DstUnused::UNUSED_PRESERVE);
10059
10060 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10061 break;
10062
10063 case SIInstrFlags::VOP2:
10064 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10065 AMDGPUOperand::ImmTyClamp, 0);
10066
10067 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10068 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10069
10070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10074 break;
10075
10076 case SIInstrFlags::VOPC:
10077 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10078 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10079 AMDGPUOperand::ImmTyClamp, 0);
10080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10082 break;
10083
10084 default:
10085 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10086 }
10087 }
10088
10089 // special case v_mac_{f16, f32}:
10090 // it has src2 register operand that is tied to dst operand
10091 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10092 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10093 auto *it = Inst.begin();
10094 std::advance(
10095 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10096 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10097 }
10098 }
10099
10100 /// Force static initialization.
10101 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeAMDGPUAsmParser()10102 LLVMInitializeAMDGPUAsmParser() {
10103 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
10104 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
10105 }
10106
10107 #define GET_REGISTER_MATCHER
10108 #define GET_MATCHER_IMPLEMENTATION
10109 #define GET_MNEMONIC_SPELL_CHECKER
10110 #define GET_MNEMONIC_CHECKER
10111 #include "AMDGPUGenAsmMatcher.inc"
10112
parseCustomOperand(OperandVector & Operands,unsigned MCK)10113 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10114 unsigned MCK) {
10115 switch (MCK) {
10116 case MCK_addr64:
10117 return parseTokenOp("addr64", Operands);
10118 case MCK_done:
10119 return parseTokenOp("done", Operands);
10120 case MCK_idxen:
10121 return parseTokenOp("idxen", Operands);
10122 case MCK_lds:
10123 return parseTokenOp("lds", Operands);
10124 case MCK_offen:
10125 return parseTokenOp("offen", Operands);
10126 case MCK_off:
10127 return parseTokenOp("off", Operands);
10128 case MCK_row_95_en:
10129 return parseTokenOp("row_en", Operands);
10130 case MCK_gds:
10131 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10132 case MCK_tfe:
10133 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10134 }
10135 return tryCustomParseOperand(Operands, MCK);
10136 }
10137
10138 // This function should be defined after auto-generated include so that we have
10139 // MatchClassKind enum defined
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)10140 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10141 unsigned Kind) {
10142 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10143 // But MatchInstructionImpl() expects to meet token and fails to validate
10144 // operand. This method checks if we are given immediate operand but expect to
10145 // get corresponding token.
10146 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10147 switch (Kind) {
10148 case MCK_addr64:
10149 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10150 case MCK_gds:
10151 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10152 case MCK_lds:
10153 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10154 case MCK_idxen:
10155 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10156 case MCK_offen:
10157 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10158 case MCK_tfe:
10159 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10160 case MCK_SSrc_b32:
10161 // When operands have expression values, they will return true for isToken,
10162 // because it is not possible to distinguish between a token and an
10163 // expression at parse time. MatchInstructionImpl() will always try to
10164 // match an operand as a token, when isToken returns true, and when the
10165 // name of the expression is not a valid token, the match will fail,
10166 // so we need to handle it here.
10167 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10168 case MCK_SSrc_f32:
10169 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10170 case MCK_SOPPBrTarget:
10171 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10172 case MCK_VReg32OrOff:
10173 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10174 case MCK_InterpSlot:
10175 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10176 case MCK_InterpAttr:
10177 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10178 case MCK_InterpAttrChan:
10179 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10180 case MCK_SReg_64:
10181 case MCK_SReg_64_XEXEC:
10182 // Null is defined as a 32-bit register but
10183 // it should also be enabled with 64-bit operands or larger.
10184 // The following code enables it for SReg_64 and larger operands
10185 // used as source and destination. Remaining source
10186 // operands are handled in isInlinableImm.
10187 case MCK_SReg_96:
10188 case MCK_SReg_128:
10189 case MCK_SReg_256:
10190 case MCK_SReg_512:
10191 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10192 default:
10193 return Match_InvalidOperand;
10194 }
10195 }
10196
10197 //===----------------------------------------------------------------------===//
10198 // endpgm
10199 //===----------------------------------------------------------------------===//
10200
parseEndpgm(OperandVector & Operands)10201 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10202 SMLoc S = getLoc();
10203 int64_t Imm = 0;
10204
10205 if (!parseExpr(Imm)) {
10206 // The operand is optional, if not present default to 0
10207 Imm = 0;
10208 }
10209
10210 if (!isUInt<16>(Imm))
10211 return Error(S, "expected a 16-bit value");
10212
10213 Operands.push_back(
10214 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10215 return ParseStatus::Success;
10216 }
10217
isEndpgm() const10218 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10219
10220 //===----------------------------------------------------------------------===//
10221 // Split Barrier
10222 //===----------------------------------------------------------------------===//
10223
isSplitBarrier() const10224 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
10225