1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCExpr.h"
11 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "SIInstrInfo.h"
16 #include "SIRegisterInfo.h"
17 #include "TargetInfo/AMDGPUTargetInfo.h"
18 #include "Utils/AMDGPUAsmUtils.h"
19 #include "Utils/AMDGPUBaseInfo.h"
20 #include "Utils/AMDKernelCodeTUtils.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/BinaryFormat/ELF.h"
26 #include "llvm/CodeGenTypes/MachineValueType.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCContext.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/MC/MCInst.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/MC/MCParser/MCAsmLexer.h"
33 #include "llvm/MC/MCParser/MCAsmParser.h"
34 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
35 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
36 #include "llvm/MC/MCSymbol.h"
37 #include "llvm/MC/TargetRegistry.h"
38 #include "llvm/Support/AMDGPUMetadata.h"
39 #include "llvm/Support/AMDHSAKernelDescriptor.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/MathExtras.h"
42 #include "llvm/TargetParser/TargetParser.h"
43 #include <optional>
44
45 using namespace llvm;
46 using namespace llvm::AMDGPU;
47 using namespace llvm::amdhsa;
48
49 namespace {
50
51 class AMDGPUAsmParser;
52
53 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55 //===----------------------------------------------------------------------===//
56 // Operand
57 //===----------------------------------------------------------------------===//
58
59 class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
63 Register,
64 Expression
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70 public:
AMDGPUOperand(KindTy Kind_,const AMDGPUAsmParser * AsmParser_)71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
hasFPModifiers__anon6862249c0111::AMDGPUOperand::Modifiers82 bool hasFPModifiers() const { return Abs || Neg; }
hasIntModifiers__anon6862249c0111::AMDGPUOperand::Modifiers83 bool hasIntModifiers() const { return Sext; }
hasModifiers__anon6862249c0111::AMDGPUOperand::Modifiers84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
getFPModifiersOperand__anon6862249c0111::AMDGPUOperand::Modifiers86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
getIntModifiersOperand__anon6862249c0111::AMDGPUOperand::Modifiers93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
getModifiersOperand__anon6862249c0111::AMDGPUOperand::Modifiers99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers())
103 return getFPModifiersOperand();
104 if (hasIntModifiers())
105 return getIntModifiersOperand();
106 return 0;
107 }
108
109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110 };
111
112 enum ImmTy {
113 ImmTyNone,
114 ImmTyGDS,
115 ImmTyLDS,
116 ImmTyOffen,
117 ImmTyIdxen,
118 ImmTyAddr64,
119 ImmTyOffset,
120 ImmTyInstOffset,
121 ImmTyOffset0,
122 ImmTyOffset1,
123 ImmTySMEMOffsetMod,
124 ImmTyCPol,
125 ImmTyTFE,
126 ImmTyD16,
127 ImmTyClamp,
128 ImmTyOModSI,
129 ImmTySDWADstSel,
130 ImmTySDWASrc0Sel,
131 ImmTySDWASrc1Sel,
132 ImmTySDWADstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyInterpAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTyIndexKey8bit,
155 ImmTyIndexKey16bit,
156 ImmTyDPP8,
157 ImmTyDppCtrl,
158 ImmTyDppRowMask,
159 ImmTyDppBankMask,
160 ImmTyDppBoundCtrl,
161 ImmTyDppFI,
162 ImmTySwizzle,
163 ImmTyGprIdxMode,
164 ImmTyHigh,
165 ImmTyBLGP,
166 ImmTyCBSZ,
167 ImmTyABID,
168 ImmTyEndpgm,
169 ImmTyWaitVDST,
170 ImmTyWaitEXP,
171 ImmTyWaitVAVDst,
172 ImmTyWaitVMVSrc,
173 ImmTyByteSel,
174 };
175
176 // Immediate operand kind.
177 // It helps to identify the location of an offending operand after an error.
178 // Note that regular literals and mandatory literals (KImm) must be handled
179 // differently. When looking for an offending operand, we should usually
180 // ignore mandatory literals because they are part of the instruction and
181 // cannot be changed. Report location of mandatory operands only for VOPD,
182 // when both OpX and OpY have a KImm and there are no other literals.
183 enum ImmKindTy {
184 ImmKindTyNone,
185 ImmKindTyLiteral,
186 ImmKindTyMandatoryLiteral,
187 ImmKindTyConst,
188 };
189
190 private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 mutable ImmKindTy Kind;
201 Modifiers Mods;
202 };
203
204 struct RegOp {
205 unsigned RegNo;
206 Modifiers Mods;
207 };
208
209 union {
210 TokOp Tok;
211 ImmOp Imm;
212 RegOp Reg;
213 const MCExpr *Expr;
214 };
215
216 public:
isToken() const217 bool isToken() const override { return Kind == Token; }
218
isSymbolRefExpr() const219 bool isSymbolRefExpr() const {
220 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
221 }
222
isImm() const223 bool isImm() const override {
224 return Kind == Immediate;
225 }
226
setImmKindNone() const227 void setImmKindNone() const {
228 assert(isImm());
229 Imm.Kind = ImmKindTyNone;
230 }
231
setImmKindLiteral() const232 void setImmKindLiteral() const {
233 assert(isImm());
234 Imm.Kind = ImmKindTyLiteral;
235 }
236
setImmKindMandatoryLiteral() const237 void setImmKindMandatoryLiteral() const {
238 assert(isImm());
239 Imm.Kind = ImmKindTyMandatoryLiteral;
240 }
241
setImmKindConst() const242 void setImmKindConst() const {
243 assert(isImm());
244 Imm.Kind = ImmKindTyConst;
245 }
246
IsImmKindLiteral() const247 bool IsImmKindLiteral() const {
248 return isImm() && Imm.Kind == ImmKindTyLiteral;
249 }
250
IsImmKindMandatoryLiteral() const251 bool IsImmKindMandatoryLiteral() const {
252 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
253 }
254
isImmKindConst() const255 bool isImmKindConst() const {
256 return isImm() && Imm.Kind == ImmKindTyConst;
257 }
258
259 bool isInlinableImm(MVT type) const;
260 bool isLiteralImm(MVT type) const;
261
isRegKind() const262 bool isRegKind() const {
263 return Kind == Register;
264 }
265
isReg() const266 bool isReg() const override {
267 return isRegKind() && !hasModifiers();
268 }
269
isRegOrInline(unsigned RCID,MVT type) const270 bool isRegOrInline(unsigned RCID, MVT type) const {
271 return isRegClass(RCID) || isInlinableImm(type);
272 }
273
isRegOrImmWithInputMods(unsigned RCID,MVT type) const274 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
275 return isRegOrInline(RCID, type) || isLiteralImm(type);
276 }
277
isRegOrImmWithInt16InputMods() const278 bool isRegOrImmWithInt16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
280 }
281
isRegOrImmWithIntT16InputMods() const282 bool isRegOrImmWithIntT16InputMods() const {
283 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
284 }
285
isRegOrImmWithInt32InputMods() const286 bool isRegOrImmWithInt32InputMods() const {
287 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
288 }
289
isRegOrInlineImmWithInt16InputMods() const290 bool isRegOrInlineImmWithInt16InputMods() const {
291 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
292 }
293
isRegOrInlineImmWithInt32InputMods() const294 bool isRegOrInlineImmWithInt32InputMods() const {
295 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
296 }
297
isRegOrImmWithInt64InputMods() const298 bool isRegOrImmWithInt64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
300 }
301
isRegOrImmWithFP16InputMods() const302 bool isRegOrImmWithFP16InputMods() const {
303 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
304 }
305
isRegOrImmWithFPT16InputMods() const306 bool isRegOrImmWithFPT16InputMods() const {
307 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
308 }
309
isRegOrImmWithFP32InputMods() const310 bool isRegOrImmWithFP32InputMods() const {
311 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
312 }
313
isRegOrImmWithFP64InputMods() const314 bool isRegOrImmWithFP64InputMods() const {
315 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
316 }
317
isRegOrInlineImmWithFP16InputMods() const318 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
319 return isRegOrInline(
320 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
321 }
322
isRegOrInlineImmWithFP32InputMods() const323 bool isRegOrInlineImmWithFP32InputMods() const {
324 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
325 }
326
isPackedFP16InputMods() const327 bool isPackedFP16InputMods() const {
328 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
329 }
330
isVReg() const331 bool isVReg() const {
332 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
333 isRegClass(AMDGPU::VReg_64RegClassID) ||
334 isRegClass(AMDGPU::VReg_96RegClassID) ||
335 isRegClass(AMDGPU::VReg_128RegClassID) ||
336 isRegClass(AMDGPU::VReg_160RegClassID) ||
337 isRegClass(AMDGPU::VReg_192RegClassID) ||
338 isRegClass(AMDGPU::VReg_256RegClassID) ||
339 isRegClass(AMDGPU::VReg_512RegClassID) ||
340 isRegClass(AMDGPU::VReg_1024RegClassID);
341 }
342
isVReg32() const343 bool isVReg32() const {
344 return isRegClass(AMDGPU::VGPR_32RegClassID);
345 }
346
isVReg32OrOff() const347 bool isVReg32OrOff() const {
348 return isOff() || isVReg32();
349 }
350
isNull() const351 bool isNull() const {
352 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
353 }
354
355 bool isVRegWithInputMods() const;
356 template <bool IsFake16> bool isT16VRegWithInputMods() const;
357
358 bool isSDWAOperand(MVT type) const;
359 bool isSDWAFP16Operand() const;
360 bool isSDWAFP32Operand() const;
361 bool isSDWAInt16Operand() const;
362 bool isSDWAInt32Operand() const;
363
isImmTy(ImmTy ImmT) const364 bool isImmTy(ImmTy ImmT) const {
365 return isImm() && Imm.Type == ImmT;
366 }
367
isImmTy() const368 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
369
isImmLiteral() const370 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
371
isImmModifier() const372 bool isImmModifier() const {
373 return isImm() && Imm.Type != ImmTyNone;
374 }
375
isOModSI() const376 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
isDim() const377 bool isDim() const { return isImmTy(ImmTyDim); }
isR128A16() const378 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
isOff() const379 bool isOff() const { return isImmTy(ImmTyOff); }
isExpTgt() const380 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
isOffen() const381 bool isOffen() const { return isImmTy(ImmTyOffen); }
isIdxen() const382 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
isAddr64() const383 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
isSMEMOffsetMod() const384 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
isFlatOffset() const385 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
isGDS() const386 bool isGDS() const { return isImmTy(ImmTyGDS); }
isLDS() const387 bool isLDS() const { return isImmTy(ImmTyLDS); }
isCPol() const388 bool isCPol() const { return isImmTy(ImmTyCPol); }
isIndexKey8bit() const389 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
isIndexKey16bit() const390 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
isTFE() const391 bool isTFE() const { return isImmTy(ImmTyTFE); }
isFORMAT() const392 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
isDppFI() const393 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
isSDWADstSel() const394 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
isSDWASrc0Sel() const395 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
isSDWASrc1Sel() const396 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
isSDWADstUnused() const397 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
isInterpSlot() const398 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
isInterpAttr() const399 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
isInterpAttrChan() const400 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
isOpSel() const401 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
isOpSelHi() const402 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
isNegLo() const403 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
isNegHi() const404 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
405
isRegOrImm() const406 bool isRegOrImm() const {
407 return isReg() || isImm();
408 }
409
410 bool isRegClass(unsigned RCID) const;
411
412 bool isInlineValue() const;
413
isRegOrInlineNoMods(unsigned RCID,MVT type) const414 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
415 return isRegOrInline(RCID, type) && !hasModifiers();
416 }
417
isSCSrcB16() const418 bool isSCSrcB16() const {
419 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
420 }
421
isSCSrcV2B16() const422 bool isSCSrcV2B16() const {
423 return isSCSrcB16();
424 }
425
isSCSrc_b32() const426 bool isSCSrc_b32() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
428 }
429
isSCSrc_b64() const430 bool isSCSrc_b64() const {
431 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
432 }
433
434 bool isBoolReg() const;
435
isSCSrcF16() const436 bool isSCSrcF16() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
438 }
439
isSCSrcV2F16() const440 bool isSCSrcV2F16() const {
441 return isSCSrcF16();
442 }
443
isSCSrcF32() const444 bool isSCSrcF32() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
446 }
447
isSCSrcF64() const448 bool isSCSrcF64() const {
449 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
450 }
451
isSSrc_b32() const452 bool isSSrc_b32() const {
453 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
454 }
455
isSSrc_b16() const456 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
457
isSSrcV2B16() const458 bool isSSrcV2B16() const {
459 llvm_unreachable("cannot happen");
460 return isSSrc_b16();
461 }
462
isSSrc_b64() const463 bool isSSrc_b64() const {
464 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
465 // See isVSrc64().
466 return isSCSrc_b64() || isLiteralImm(MVT::i64);
467 }
468
isSSrc_f32() const469 bool isSSrc_f32() const {
470 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
471 }
472
isSSrcF64() const473 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
474
isSSrc_bf16() const475 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
476
isSSrc_f16() const477 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
478
isSSrcV2F16() const479 bool isSSrcV2F16() const {
480 llvm_unreachable("cannot happen");
481 return isSSrc_f16();
482 }
483
isSSrcV2FP32() const484 bool isSSrcV2FP32() const {
485 llvm_unreachable("cannot happen");
486 return isSSrc_f32();
487 }
488
isSCSrcV2FP32() const489 bool isSCSrcV2FP32() const {
490 llvm_unreachable("cannot happen");
491 return isSCSrcF32();
492 }
493
isSSrcV2INT32() const494 bool isSSrcV2INT32() const {
495 llvm_unreachable("cannot happen");
496 return isSSrc_b32();
497 }
498
isSCSrcV2INT32() const499 bool isSCSrcV2INT32() const {
500 llvm_unreachable("cannot happen");
501 return isSCSrc_b32();
502 }
503
isSSrcOrLds_b32() const504 bool isSSrcOrLds_b32() const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506 isLiteralImm(MVT::i32) || isExpr();
507 }
508
isVCSrc_b32() const509 bool isVCSrc_b32() const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
511 }
512
isVCSrcB64() const513 bool isVCSrcB64() const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
515 }
516
isVCSrcTB16() const517 bool isVCSrcTB16() const {
518 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
519 }
520
isVCSrcTB16_Lo128() const521 bool isVCSrcTB16_Lo128() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
523 }
524
isVCSrcFake16B16_Lo128() const525 bool isVCSrcFake16B16_Lo128() const {
526 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
527 }
528
isVCSrc_b16() const529 bool isVCSrc_b16() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
531 }
532
isVCSrc_v2b16() const533 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
534
isVCSrc_f32() const535 bool isVCSrc_f32() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
537 }
538
isVCSrcF64() const539 bool isVCSrcF64() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
541 }
542
isVCSrcTBF16() const543 bool isVCSrcTBF16() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
545 }
546
isVCSrcTF16() const547 bool isVCSrcTF16() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
549 }
550
isVCSrcTBF16_Lo128() const551 bool isVCSrcTBF16_Lo128() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
553 }
554
isVCSrcTF16_Lo128() const555 bool isVCSrcTF16_Lo128() const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
557 }
558
isVCSrcFake16BF16_Lo128() const559 bool isVCSrcFake16BF16_Lo128() const {
560 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
561 }
562
isVCSrcFake16F16_Lo128() const563 bool isVCSrcFake16F16_Lo128() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
565 }
566
isVCSrc_bf16() const567 bool isVCSrc_bf16() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
569 }
570
isVCSrc_f16() const571 bool isVCSrc_f16() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
573 }
574
isVCSrc_v2bf16() const575 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
576
isVCSrc_v2f16() const577 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
578
isVSrc_b32() const579 bool isVSrc_b32() const {
580 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
581 }
582
isVSrc_b64() const583 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
584
isVSrcT_b16() const585 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
586
isVSrcT_b16_Lo128() const587 bool isVSrcT_b16_Lo128() const {
588 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
589 }
590
isVSrcFake16_b16_Lo128() const591 bool isVSrcFake16_b16_Lo128() const {
592 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
593 }
594
isVSrc_b16() const595 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
596
isVSrc_v2b16() const597 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
598
isVCSrcV2FP32() const599 bool isVCSrcV2FP32() const {
600 return isVCSrcF64();
601 }
602
isVSrc_v2f32() const603 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
604
isVCSrcV2INT32() const605 bool isVCSrcV2INT32() const {
606 return isVCSrcB64();
607 }
608
isVSrc_v2b32() const609 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
610
isVSrc_f32() const611 bool isVSrc_f32() const {
612 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
613 }
614
isVSrc_f64() const615 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
616
isVSrcT_bf16() const617 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
618
isVSrcT_f16() const619 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
620
isVSrcT_bf16_Lo128() const621 bool isVSrcT_bf16_Lo128() const {
622 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
623 }
624
isVSrcT_f16_Lo128() const625 bool isVSrcT_f16_Lo128() const {
626 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
627 }
628
isVSrcFake16_bf16_Lo128() const629 bool isVSrcFake16_bf16_Lo128() const {
630 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
631 }
632
isVSrcFake16_f16_Lo128() const633 bool isVSrcFake16_f16_Lo128() const {
634 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
635 }
636
isVSrc_bf16() const637 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
638
isVSrc_f16() const639 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
640
isVSrc_v2bf16() const641 bool isVSrc_v2bf16() const {
642 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
643 }
644
isVSrc_v2f16() const645 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
646
isVISrcB32() const647 bool isVISrcB32() const {
648 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
649 }
650
isVISrcB16() const651 bool isVISrcB16() const {
652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
653 }
654
isVISrcV2B16() const655 bool isVISrcV2B16() const {
656 return isVISrcB16();
657 }
658
isVISrcF32() const659 bool isVISrcF32() const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
661 }
662
isVISrcF16() const663 bool isVISrcF16() const {
664 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
665 }
666
isVISrcV2F16() const667 bool isVISrcV2F16() const {
668 return isVISrcF16() || isVISrcB32();
669 }
670
isVISrc_64_bf16() const671 bool isVISrc_64_bf16() const {
672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
673 }
674
isVISrc_64_f16() const675 bool isVISrc_64_f16() const {
676 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
677 }
678
isVISrc_64_b32() const679 bool isVISrc_64_b32() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
681 }
682
isVISrc_64B64() const683 bool isVISrc_64B64() const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
685 }
686
isVISrc_64_f64() const687 bool isVISrc_64_f64() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
689 }
690
isVISrc_64V2FP32() const691 bool isVISrc_64V2FP32() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
693 }
694
isVISrc_64V2INT32() const695 bool isVISrc_64V2INT32() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
697 }
698
isVISrc_256_b32() const699 bool isVISrc_256_b32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
701 }
702
isVISrc_256_f32() const703 bool isVISrc_256_f32() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
705 }
706
isVISrc_256B64() const707 bool isVISrc_256B64() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
709 }
710
isVISrc_256_f64() const711 bool isVISrc_256_f64() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
713 }
714
isVISrc_128B16() const715 bool isVISrc_128B16() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
717 }
718
isVISrc_128V2B16() const719 bool isVISrc_128V2B16() const {
720 return isVISrc_128B16();
721 }
722
isVISrc_128_b32() const723 bool isVISrc_128_b32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
725 }
726
isVISrc_128_f32() const727 bool isVISrc_128_f32() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
729 }
730
isVISrc_256V2FP32() const731 bool isVISrc_256V2FP32() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
733 }
734
isVISrc_256V2INT32() const735 bool isVISrc_256V2INT32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
737 }
738
isVISrc_512_b32() const739 bool isVISrc_512_b32() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
741 }
742
isVISrc_512B16() const743 bool isVISrc_512B16() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
745 }
746
isVISrc_512V2B16() const747 bool isVISrc_512V2B16() const {
748 return isVISrc_512B16();
749 }
750
isVISrc_512_f32() const751 bool isVISrc_512_f32() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
753 }
754
isVISrc_512F16() const755 bool isVISrc_512F16() const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
757 }
758
isVISrc_512V2F16() const759 bool isVISrc_512V2F16() const {
760 return isVISrc_512F16() || isVISrc_512_b32();
761 }
762
isVISrc_1024_b32() const763 bool isVISrc_1024_b32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
765 }
766
isVISrc_1024B16() const767 bool isVISrc_1024B16() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
769 }
770
isVISrc_1024V2B16() const771 bool isVISrc_1024V2B16() const {
772 return isVISrc_1024B16();
773 }
774
isVISrc_1024_f32() const775 bool isVISrc_1024_f32() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
777 }
778
isVISrc_1024F16() const779 bool isVISrc_1024F16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
781 }
782
isVISrc_1024V2F16() const783 bool isVISrc_1024V2F16() const {
784 return isVISrc_1024F16() || isVISrc_1024_b32();
785 }
786
isAISrcB32() const787 bool isAISrcB32() const {
788 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
789 }
790
isAISrcB16() const791 bool isAISrcB16() const {
792 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
793 }
794
isAISrcV2B16() const795 bool isAISrcV2B16() const {
796 return isAISrcB16();
797 }
798
isAISrcF32() const799 bool isAISrcF32() const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
801 }
802
isAISrcF16() const803 bool isAISrcF16() const {
804 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
805 }
806
isAISrcV2F16() const807 bool isAISrcV2F16() const {
808 return isAISrcF16() || isAISrcB32();
809 }
810
isAISrc_64B64() const811 bool isAISrc_64B64() const {
812 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
813 }
814
isAISrc_64_f64() const815 bool isAISrc_64_f64() const {
816 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
817 }
818
isAISrc_128_b32() const819 bool isAISrc_128_b32() const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
821 }
822
isAISrc_128B16() const823 bool isAISrc_128B16() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
825 }
826
isAISrc_128V2B16() const827 bool isAISrc_128V2B16() const {
828 return isAISrc_128B16();
829 }
830
isAISrc_128_f32() const831 bool isAISrc_128_f32() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
833 }
834
isAISrc_128F16() const835 bool isAISrc_128F16() const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
837 }
838
isAISrc_128V2F16() const839 bool isAISrc_128V2F16() const {
840 return isAISrc_128F16() || isAISrc_128_b32();
841 }
842
isVISrc_128_bf16() const843 bool isVISrc_128_bf16() const {
844 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
845 }
846
isVISrc_128_f16() const847 bool isVISrc_128_f16() const {
848 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
849 }
850
isVISrc_128V2F16() const851 bool isVISrc_128V2F16() const {
852 return isVISrc_128_f16() || isVISrc_128_b32();
853 }
854
isAISrc_256B64() const855 bool isAISrc_256B64() const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
857 }
858
isAISrc_256_f64() const859 bool isAISrc_256_f64() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
861 }
862
isAISrc_512_b32() const863 bool isAISrc_512_b32() const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
865 }
866
isAISrc_512B16() const867 bool isAISrc_512B16() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
869 }
870
isAISrc_512V2B16() const871 bool isAISrc_512V2B16() const {
872 return isAISrc_512B16();
873 }
874
isAISrc_512_f32() const875 bool isAISrc_512_f32() const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
877 }
878
isAISrc_512F16() const879 bool isAISrc_512F16() const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
881 }
882
isAISrc_512V2F16() const883 bool isAISrc_512V2F16() const {
884 return isAISrc_512F16() || isAISrc_512_b32();
885 }
886
isAISrc_1024_b32() const887 bool isAISrc_1024_b32() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
889 }
890
isAISrc_1024B16() const891 bool isAISrc_1024B16() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
893 }
894
isAISrc_1024V2B16() const895 bool isAISrc_1024V2B16() const {
896 return isAISrc_1024B16();
897 }
898
isAISrc_1024_f32() const899 bool isAISrc_1024_f32() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
901 }
902
isAISrc_1024F16() const903 bool isAISrc_1024F16() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
905 }
906
isAISrc_1024V2F16() const907 bool isAISrc_1024V2F16() const {
908 return isAISrc_1024F16() || isAISrc_1024_b32();
909 }
910
isKImmFP32() const911 bool isKImmFP32() const {
912 return isLiteralImm(MVT::f32);
913 }
914
isKImmFP16() const915 bool isKImmFP16() const {
916 return isLiteralImm(MVT::f16);
917 }
918
isMem() const919 bool isMem() const override {
920 return false;
921 }
922
isExpr() const923 bool isExpr() const {
924 return Kind == Expression;
925 }
926
isSOPPBrTarget() const927 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
928
929 bool isSWaitCnt() const;
930 bool isDepCtr() const;
931 bool isSDelayALU() const;
932 bool isHwreg() const;
933 bool isSendMsg() const;
934 bool isSplitBarrier() const;
935 bool isSwizzle() const;
936 bool isSMRDOffset8() const;
937 bool isSMEMOffset() const;
938 bool isSMRDLiteralOffset() const;
939 bool isDPP8() const;
940 bool isDPPCtrl() const;
941 bool isBLGP() const;
942 bool isGPRIdxMode() const;
943 bool isS16Imm() const;
944 bool isU16Imm() const;
945 bool isEndpgm() const;
946
getPredicate(std::function<bool (const AMDGPUOperand & Op)> P) const947 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
948 return [=](){ return P(*this); };
949 }
950
getToken() const951 StringRef getToken() const {
952 assert(isToken());
953 return StringRef(Tok.Data, Tok.Length);
954 }
955
getImm() const956 int64_t getImm() const {
957 assert(isImm());
958 return Imm.Val;
959 }
960
setImm(int64_t Val)961 void setImm(int64_t Val) {
962 assert(isImm());
963 Imm.Val = Val;
964 }
965
getImmTy() const966 ImmTy getImmTy() const {
967 assert(isImm());
968 return Imm.Type;
969 }
970
getReg() const971 MCRegister getReg() const override {
972 assert(isRegKind());
973 return Reg.RegNo;
974 }
975
getStartLoc() const976 SMLoc getStartLoc() const override {
977 return StartLoc;
978 }
979
getEndLoc() const980 SMLoc getEndLoc() const override {
981 return EndLoc;
982 }
983
getLocRange() const984 SMRange getLocRange() const {
985 return SMRange(StartLoc, EndLoc);
986 }
987
getModifiers() const988 Modifiers getModifiers() const {
989 assert(isRegKind() || isImmTy(ImmTyNone));
990 return isRegKind() ? Reg.Mods : Imm.Mods;
991 }
992
setModifiers(Modifiers Mods)993 void setModifiers(Modifiers Mods) {
994 assert(isRegKind() || isImmTy(ImmTyNone));
995 if (isRegKind())
996 Reg.Mods = Mods;
997 else
998 Imm.Mods = Mods;
999 }
1000
hasModifiers() const1001 bool hasModifiers() const {
1002 return getModifiers().hasModifiers();
1003 }
1004
hasFPModifiers() const1005 bool hasFPModifiers() const {
1006 return getModifiers().hasFPModifiers();
1007 }
1008
hasIntModifiers() const1009 bool hasIntModifiers() const {
1010 return getModifiers().hasIntModifiers();
1011 }
1012
1013 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1014
1015 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1016
1017 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1018
1019 void addRegOperands(MCInst &Inst, unsigned N) const;
1020
addRegOrImmOperands(MCInst & Inst,unsigned N) const1021 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1022 if (isRegKind())
1023 addRegOperands(Inst, N);
1024 else
1025 addImmOperands(Inst, N);
1026 }
1027
addRegOrImmWithInputModsOperands(MCInst & Inst,unsigned N) const1028 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1029 Modifiers Mods = getModifiers();
1030 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1031 if (isRegKind()) {
1032 addRegOperands(Inst, N);
1033 } else {
1034 addImmOperands(Inst, N, false);
1035 }
1036 }
1037
addRegOrImmWithFPInputModsOperands(MCInst & Inst,unsigned N) const1038 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1039 assert(!hasIntModifiers());
1040 addRegOrImmWithInputModsOperands(Inst, N);
1041 }
1042
addRegOrImmWithIntInputModsOperands(MCInst & Inst,unsigned N) const1043 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1044 assert(!hasFPModifiers());
1045 addRegOrImmWithInputModsOperands(Inst, N);
1046 }
1047
addRegWithInputModsOperands(MCInst & Inst,unsigned N) const1048 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1049 Modifiers Mods = getModifiers();
1050 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1051 assert(isRegKind());
1052 addRegOperands(Inst, N);
1053 }
1054
addRegWithFPInputModsOperands(MCInst & Inst,unsigned N) const1055 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1056 assert(!hasIntModifiers());
1057 addRegWithInputModsOperands(Inst, N);
1058 }
1059
addRegWithIntInputModsOperands(MCInst & Inst,unsigned N) const1060 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1061 assert(!hasFPModifiers());
1062 addRegWithInputModsOperands(Inst, N);
1063 }
1064
printImmTy(raw_ostream & OS,ImmTy Type)1065 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1066 // clang-format off
1067 switch (Type) {
1068 case ImmTyNone: OS << "None"; break;
1069 case ImmTyGDS: OS << "GDS"; break;
1070 case ImmTyLDS: OS << "LDS"; break;
1071 case ImmTyOffen: OS << "Offen"; break;
1072 case ImmTyIdxen: OS << "Idxen"; break;
1073 case ImmTyAddr64: OS << "Addr64"; break;
1074 case ImmTyOffset: OS << "Offset"; break;
1075 case ImmTyInstOffset: OS << "InstOffset"; break;
1076 case ImmTyOffset0: OS << "Offset0"; break;
1077 case ImmTyOffset1: OS << "Offset1"; break;
1078 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1079 case ImmTyCPol: OS << "CPol"; break;
1080 case ImmTyIndexKey8bit: OS << "index_key"; break;
1081 case ImmTyIndexKey16bit: OS << "index_key"; break;
1082 case ImmTyTFE: OS << "TFE"; break;
1083 case ImmTyD16: OS << "D16"; break;
1084 case ImmTyFORMAT: OS << "FORMAT"; break;
1085 case ImmTyClamp: OS << "Clamp"; break;
1086 case ImmTyOModSI: OS << "OModSI"; break;
1087 case ImmTyDPP8: OS << "DPP8"; break;
1088 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1089 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1090 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1091 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1092 case ImmTyDppFI: OS << "DppFI"; break;
1093 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1094 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1095 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1096 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1097 case ImmTyDMask: OS << "DMask"; break;
1098 case ImmTyDim: OS << "Dim"; break;
1099 case ImmTyUNorm: OS << "UNorm"; break;
1100 case ImmTyDA: OS << "DA"; break;
1101 case ImmTyR128A16: OS << "R128A16"; break;
1102 case ImmTyA16: OS << "A16"; break;
1103 case ImmTyLWE: OS << "LWE"; break;
1104 case ImmTyOff: OS << "Off"; break;
1105 case ImmTyExpTgt: OS << "ExpTgt"; break;
1106 case ImmTyExpCompr: OS << "ExpCompr"; break;
1107 case ImmTyExpVM: OS << "ExpVM"; break;
1108 case ImmTyHwreg: OS << "Hwreg"; break;
1109 case ImmTySendMsg: OS << "SendMsg"; break;
1110 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1111 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1112 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1113 case ImmTyOpSel: OS << "OpSel"; break;
1114 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1115 case ImmTyNegLo: OS << "NegLo"; break;
1116 case ImmTyNegHi: OS << "NegHi"; break;
1117 case ImmTySwizzle: OS << "Swizzle"; break;
1118 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1119 case ImmTyHigh: OS << "High"; break;
1120 case ImmTyBLGP: OS << "BLGP"; break;
1121 case ImmTyCBSZ: OS << "CBSZ"; break;
1122 case ImmTyABID: OS << "ABID"; break;
1123 case ImmTyEndpgm: OS << "Endpgm"; break;
1124 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1125 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1126 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1127 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1128 case ImmTyByteSel: OS << "ByteSel" ; break;
1129 }
1130 // clang-format on
1131 }
1132
print(raw_ostream & OS) const1133 void print(raw_ostream &OS) const override {
1134 switch (Kind) {
1135 case Register:
1136 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1137 break;
1138 case Immediate:
1139 OS << '<' << getImm();
1140 if (getImmTy() != ImmTyNone) {
1141 OS << " type: "; printImmTy(OS, getImmTy());
1142 }
1143 OS << " mods: " << Imm.Mods << '>';
1144 break;
1145 case Token:
1146 OS << '\'' << getToken() << '\'';
1147 break;
1148 case Expression:
1149 OS << "<expr " << *Expr << '>';
1150 break;
1151 }
1152 }
1153
CreateImm(const AMDGPUAsmParser * AsmParser,int64_t Val,SMLoc Loc,ImmTy Type=ImmTyNone,bool IsFPImm=false)1154 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1155 int64_t Val, SMLoc Loc,
1156 ImmTy Type = ImmTyNone,
1157 bool IsFPImm = false) {
1158 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1159 Op->Imm.Val = Val;
1160 Op->Imm.IsFPImm = IsFPImm;
1161 Op->Imm.Kind = ImmKindTyNone;
1162 Op->Imm.Type = Type;
1163 Op->Imm.Mods = Modifiers();
1164 Op->StartLoc = Loc;
1165 Op->EndLoc = Loc;
1166 return Op;
1167 }
1168
CreateToken(const AMDGPUAsmParser * AsmParser,StringRef Str,SMLoc Loc,bool HasExplicitEncodingSize=true)1169 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1170 StringRef Str, SMLoc Loc,
1171 bool HasExplicitEncodingSize = true) {
1172 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1173 Res->Tok.Data = Str.data();
1174 Res->Tok.Length = Str.size();
1175 Res->StartLoc = Loc;
1176 Res->EndLoc = Loc;
1177 return Res;
1178 }
1179
CreateReg(const AMDGPUAsmParser * AsmParser,unsigned RegNo,SMLoc S,SMLoc E)1180 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1181 unsigned RegNo, SMLoc S,
1182 SMLoc E) {
1183 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1184 Op->Reg.RegNo = RegNo;
1185 Op->Reg.Mods = Modifiers();
1186 Op->StartLoc = S;
1187 Op->EndLoc = E;
1188 return Op;
1189 }
1190
CreateExpr(const AMDGPUAsmParser * AsmParser,const class MCExpr * Expr,SMLoc S)1191 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1192 const class MCExpr *Expr, SMLoc S) {
1193 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1194 Op->Expr = Expr;
1195 Op->StartLoc = S;
1196 Op->EndLoc = S;
1197 return Op;
1198 }
1199 };
1200
operator <<(raw_ostream & OS,AMDGPUOperand::Modifiers Mods)1201 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1202 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1203 return OS;
1204 }
1205
1206 //===----------------------------------------------------------------------===//
1207 // AsmParser
1208 //===----------------------------------------------------------------------===//
1209
1210 // Holds info related to the current kernel, e.g. count of SGPRs used.
1211 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1212 // .amdgpu_hsa_kernel or at EOF.
1213 class KernelScopeInfo {
1214 int SgprIndexUnusedMin = -1;
1215 int VgprIndexUnusedMin = -1;
1216 int AgprIndexUnusedMin = -1;
1217 MCContext *Ctx = nullptr;
1218 MCSubtargetInfo const *MSTI = nullptr;
1219
usesSgprAt(int i)1220 void usesSgprAt(int i) {
1221 if (i >= SgprIndexUnusedMin) {
1222 SgprIndexUnusedMin = ++i;
1223 if (Ctx) {
1224 MCSymbol* const Sym =
1225 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1226 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1227 }
1228 }
1229 }
1230
usesVgprAt(int i)1231 void usesVgprAt(int i) {
1232 if (i >= VgprIndexUnusedMin) {
1233 VgprIndexUnusedMin = ++i;
1234 if (Ctx) {
1235 MCSymbol* const Sym =
1236 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1237 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1238 VgprIndexUnusedMin);
1239 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1240 }
1241 }
1242 }
1243
usesAgprAt(int i)1244 void usesAgprAt(int i) {
1245 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1246 if (!hasMAIInsts(*MSTI))
1247 return;
1248
1249 if (i >= AgprIndexUnusedMin) {
1250 AgprIndexUnusedMin = ++i;
1251 if (Ctx) {
1252 MCSymbol* const Sym =
1253 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1254 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1255
1256 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1257 MCSymbol* const vSym =
1258 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1259 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1260 VgprIndexUnusedMin);
1261 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1262 }
1263 }
1264 }
1265
1266 public:
1267 KernelScopeInfo() = default;
1268
initialize(MCContext & Context)1269 void initialize(MCContext &Context) {
1270 Ctx = &Context;
1271 MSTI = Ctx->getSubtargetInfo();
1272
1273 usesSgprAt(SgprIndexUnusedMin = -1);
1274 usesVgprAt(VgprIndexUnusedMin = -1);
1275 if (hasMAIInsts(*MSTI)) {
1276 usesAgprAt(AgprIndexUnusedMin = -1);
1277 }
1278 }
1279
usesRegister(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)1280 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1281 unsigned RegWidth) {
1282 switch (RegKind) {
1283 case IS_SGPR:
1284 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1285 break;
1286 case IS_AGPR:
1287 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1288 break;
1289 case IS_VGPR:
1290 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1291 break;
1292 default:
1293 break;
1294 }
1295 }
1296 };
1297
1298 class AMDGPUAsmParser : public MCTargetAsmParser {
1299 MCAsmParser &Parser;
1300
1301 unsigned ForcedEncodingSize = 0;
1302 bool ForcedDPP = false;
1303 bool ForcedSDWA = false;
1304 KernelScopeInfo KernelScope;
1305
1306 /// @name Auto-generated Match Functions
1307 /// {
1308
1309 #define GET_ASSEMBLER_HEADER
1310 #include "AMDGPUGenAsmMatcher.inc"
1311
1312 /// }
1313
1314 private:
1315 void createConstantSymbol(StringRef Id, int64_t Val);
1316
1317 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1318 bool OutOfRangeError(SMRange Range);
1319 /// Calculate VGPR/SGPR blocks required for given target, reserved
1320 /// registers, and user-specified NextFreeXGPR values.
1321 ///
1322 /// \param Features [in] Target features, used for bug corrections.
1323 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1324 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1325 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1326 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1327 /// descriptor field, if valid.
1328 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1329 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1330 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1331 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1332 /// \param VGPRBlocks [out] Result VGPR block count.
1333 /// \param SGPRBlocks [out] Result SGPR block count.
1334 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1335 const MCExpr *FlatScrUsed, bool XNACKUsed,
1336 std::optional<bool> EnableWavefrontSize32,
1337 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1338 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1339 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1340 bool ParseDirectiveAMDGCNTarget();
1341 bool ParseDirectiveAMDHSACodeObjectVersion();
1342 bool ParseDirectiveAMDHSAKernel();
1343 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1344 bool ParseDirectiveAMDKernelCodeT();
1345 // TODO: Possibly make subtargetHasRegister const.
1346 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1347 bool ParseDirectiveAMDGPUHsaKernel();
1348
1349 bool ParseDirectiveISAVersion();
1350 bool ParseDirectiveHSAMetadata();
1351 bool ParseDirectivePALMetadataBegin();
1352 bool ParseDirectivePALMetadata();
1353 bool ParseDirectiveAMDGPULDS();
1354
1355 /// Common code to parse out a block of text (typically YAML) between start and
1356 /// end directives.
1357 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1358 const char *AssemblerDirectiveEnd,
1359 std::string &CollectString);
1360
1361 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1362 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1363 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1364 unsigned &RegNum, unsigned &RegWidth,
1365 bool RestoreOnFailure = false);
1366 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1367 unsigned &RegNum, unsigned &RegWidth,
1368 SmallVectorImpl<AsmToken> &Tokens);
1369 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1370 unsigned &RegWidth,
1371 SmallVectorImpl<AsmToken> &Tokens);
1372 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1373 unsigned &RegWidth,
1374 SmallVectorImpl<AsmToken> &Tokens);
1375 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1376 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1377 bool ParseRegRange(unsigned& Num, unsigned& Width);
1378 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1379 unsigned RegWidth, SMLoc Loc);
1380
1381 bool isRegister();
1382 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1383 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1384 void initializeGprCountSymbol(RegisterKind RegKind);
1385 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1386 unsigned RegWidth);
1387 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1388 bool IsAtomic);
1389
1390 public:
1391 enum OperandMode {
1392 OperandMode_Default,
1393 OperandMode_NSA,
1394 };
1395
1396 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1397
AMDGPUAsmParser(const MCSubtargetInfo & STI,MCAsmParser & _Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)1398 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1399 const MCInstrInfo &MII,
1400 const MCTargetOptions &Options)
1401 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1402 MCAsmParserExtension::Initialize(Parser);
1403
1404 if (getFeatureBits().none()) {
1405 // Set default features.
1406 copySTI().ToggleFeature("southern-islands");
1407 }
1408
1409 FeatureBitset FB = getFeatureBits();
1410 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1411 !FB[AMDGPU::FeatureWavefrontSize32]) {
1412 // If there is no default wave size it must be a generation before gfx10,
1413 // these have FeatureWavefrontSize64 in their definition already. For
1414 // gfx10+ set wave32 as a default.
1415 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1416 }
1417
1418 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1419
1420 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1421 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1422 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1423 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1424 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1425 } else {
1426 createConstantSymbol(".option.machine_version_major", ISA.Major);
1427 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1428 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1429 }
1430 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1431 initializeGprCountSymbol(IS_VGPR);
1432 initializeGprCountSymbol(IS_SGPR);
1433 } else
1434 KernelScope.initialize(getContext());
1435
1436 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1437 createConstantSymbol(Symbol, Code);
1438
1439 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1440 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1441 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1442 }
1443
hasMIMG_R128() const1444 bool hasMIMG_R128() const {
1445 return AMDGPU::hasMIMG_R128(getSTI());
1446 }
1447
hasPackedD16() const1448 bool hasPackedD16() const {
1449 return AMDGPU::hasPackedD16(getSTI());
1450 }
1451
hasA16() const1452 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1453
hasG16() const1454 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1455
hasGDS() const1456 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1457
isSI() const1458 bool isSI() const {
1459 return AMDGPU::isSI(getSTI());
1460 }
1461
isCI() const1462 bool isCI() const {
1463 return AMDGPU::isCI(getSTI());
1464 }
1465
isVI() const1466 bool isVI() const {
1467 return AMDGPU::isVI(getSTI());
1468 }
1469
isGFX9() const1470 bool isGFX9() const {
1471 return AMDGPU::isGFX9(getSTI());
1472 }
1473
1474 // TODO: isGFX90A is also true for GFX940. We need to clean it.
isGFX90A() const1475 bool isGFX90A() const {
1476 return AMDGPU::isGFX90A(getSTI());
1477 }
1478
isGFX940() const1479 bool isGFX940() const {
1480 return AMDGPU::isGFX940(getSTI());
1481 }
1482
isGFX9Plus() const1483 bool isGFX9Plus() const {
1484 return AMDGPU::isGFX9Plus(getSTI());
1485 }
1486
isGFX10() const1487 bool isGFX10() const {
1488 return AMDGPU::isGFX10(getSTI());
1489 }
1490
isGFX10Plus() const1491 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1492
isGFX11() const1493 bool isGFX11() const {
1494 return AMDGPU::isGFX11(getSTI());
1495 }
1496
isGFX11Plus() const1497 bool isGFX11Plus() const {
1498 return AMDGPU::isGFX11Plus(getSTI());
1499 }
1500
isGFX12() const1501 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1502
isGFX12Plus() const1503 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1504
isGFX10_AEncoding() const1505 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1506
isGFX10_BEncoding() const1507 bool isGFX10_BEncoding() const {
1508 return AMDGPU::isGFX10_BEncoding(getSTI());
1509 }
1510
hasInv2PiInlineImm() const1511 bool hasInv2PiInlineImm() const {
1512 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1513 }
1514
hasFlatOffsets() const1515 bool hasFlatOffsets() const {
1516 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1517 }
1518
hasArchitectedFlatScratch() const1519 bool hasArchitectedFlatScratch() const {
1520 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1521 }
1522
hasSGPR102_SGPR103() const1523 bool hasSGPR102_SGPR103() const {
1524 return !isVI() && !isGFX9();
1525 }
1526
hasSGPR104_SGPR105() const1527 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1528
hasIntClamp() const1529 bool hasIntClamp() const {
1530 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1531 }
1532
hasPartialNSAEncoding() const1533 bool hasPartialNSAEncoding() const {
1534 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1535 }
1536
getNSAMaxSize(bool HasSampler=false) const1537 unsigned getNSAMaxSize(bool HasSampler = false) const {
1538 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1539 }
1540
getMaxNumUserSGPRs() const1541 unsigned getMaxNumUserSGPRs() const {
1542 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1543 }
1544
hasKernargPreload() const1545 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1546
getTargetStreamer()1547 AMDGPUTargetStreamer &getTargetStreamer() {
1548 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1549 return static_cast<AMDGPUTargetStreamer &>(TS);
1550 }
1551
getMRI() const1552 const MCRegisterInfo *getMRI() const {
1553 // We need this const_cast because for some reason getContext() is not const
1554 // in MCAsmParser.
1555 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1556 }
1557
getMII() const1558 const MCInstrInfo *getMII() const {
1559 return &MII;
1560 }
1561
getFeatureBits() const1562 const FeatureBitset &getFeatureBits() const {
1563 return getSTI().getFeatureBits();
1564 }
1565
setForcedEncodingSize(unsigned Size)1566 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
setForcedDPP(bool ForceDPP_)1567 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
setForcedSDWA(bool ForceSDWA_)1568 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1569
getForcedEncodingSize() const1570 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
isForcedVOP3() const1571 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
isForcedDPP() const1572 bool isForcedDPP() const { return ForcedDPP; }
isForcedSDWA() const1573 bool isForcedSDWA() const { return ForcedSDWA; }
1574 ArrayRef<unsigned> getMatchedVariants() const;
1575 StringRef getMatchedVariantName() const;
1576
1577 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1578 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1579 bool RestoreOnFailure);
1580 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1581 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1582 SMLoc &EndLoc) override;
1583 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1584 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1585 unsigned Kind) override;
1586 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1587 OperandVector &Operands, MCStreamer &Out,
1588 uint64_t &ErrorInfo,
1589 bool MatchingInlineAsm) override;
1590 bool ParseDirective(AsmToken DirectiveID) override;
1591 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1592 OperandMode Mode = OperandMode_Default);
1593 StringRef parseMnemonicSuffix(StringRef Name);
1594 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1595 SMLoc NameLoc, OperandVector &Operands) override;
1596 //bool ProcessInstruction(MCInst &Inst);
1597
1598 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1599
1600 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1601
1602 ParseStatus
1603 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1604 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1605 std::function<bool(int64_t &)> ConvertResult = nullptr);
1606
1607 ParseStatus parseOperandArrayWithPrefix(
1608 const char *Prefix, OperandVector &Operands,
1609 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1610 bool (*ConvertResult)(int64_t &) = nullptr);
1611
1612 ParseStatus
1613 parseNamedBit(StringRef Name, OperandVector &Operands,
1614 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1615 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1616 ParseStatus parseCPol(OperandVector &Operands);
1617 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1618 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1619 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1620 SMLoc &StringLoc);
1621
1622 bool isModifier();
1623 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1624 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1625 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1626 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1627 bool parseSP3NegModifier();
1628 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1629 bool HasLit = false);
1630 ParseStatus parseReg(OperandVector &Operands);
1631 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1632 bool HasLit = false);
1633 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1634 bool AllowImm = true);
1635 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1636 bool AllowImm = true);
1637 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1638 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1639 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1640 ParseStatus tryParseIndexKey(OperandVector &Operands,
1641 AMDGPUOperand::ImmTy ImmTy);
1642 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1643 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1644
1645 ParseStatus parseDfmtNfmt(int64_t &Format);
1646 ParseStatus parseUfmt(int64_t &Format);
1647 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1648 int64_t &Format);
1649 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1650 int64_t &Format);
1651 ParseStatus parseFORMAT(OperandVector &Operands);
1652 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1653 ParseStatus parseNumericFormat(int64_t &Format);
1654 ParseStatus parseFlatOffset(OperandVector &Operands);
1655 ParseStatus parseR128A16(OperandVector &Operands);
1656 ParseStatus parseBLGP(OperandVector &Operands);
1657 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1658 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1659
1660 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1661
1662 bool parseCnt(int64_t &IntVal);
1663 ParseStatus parseSWaitCnt(OperandVector &Operands);
1664
1665 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1666 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1667 ParseStatus parseDepCtr(OperandVector &Operands);
1668
1669 bool parseDelay(int64_t &Delay);
1670 ParseStatus parseSDelayALU(OperandVector &Operands);
1671
1672 ParseStatus parseHwreg(OperandVector &Operands);
1673
1674 private:
1675 struct OperandInfoTy {
1676 SMLoc Loc;
1677 int64_t Val;
1678 bool IsSymbolic = false;
1679 bool IsDefined = false;
1680
OperandInfoTy__anon6862249c0111::AMDGPUAsmParser::OperandInfoTy1681 OperandInfoTy(int64_t Val) : Val(Val) {}
1682 };
1683
1684 struct StructuredOpField : OperandInfoTy {
1685 StringLiteral Id;
1686 StringLiteral Desc;
1687 unsigned Width;
1688 bool IsDefined = false;
1689
StructuredOpField__anon6862249c0111::AMDGPUAsmParser::StructuredOpField1690 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1691 int64_t Default)
1692 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1693 virtual ~StructuredOpField() = default;
1694
Error__anon6862249c0111::AMDGPUAsmParser::StructuredOpField1695 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1696 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1697 return false;
1698 }
1699
validate__anon6862249c0111::AMDGPUAsmParser::StructuredOpField1700 virtual bool validate(AMDGPUAsmParser &Parser) const {
1701 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1702 return Error(Parser, "not supported on this GPU");
1703 if (!isUIntN(Width, Val))
1704 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1705 return true;
1706 }
1707 };
1708
1709 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1710 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1711
1712 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1713 bool validateSendMsg(const OperandInfoTy &Msg,
1714 const OperandInfoTy &Op,
1715 const OperandInfoTy &Stream);
1716
1717 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1718 OperandInfoTy &Width);
1719
1720 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1721 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1722 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1723
1724 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1725 const OperandVector &Operands) const;
1726 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1727 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1728 SMLoc getLitLoc(const OperandVector &Operands,
1729 bool SearchMandatoryLiterals = false) const;
1730 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1731 SMLoc getConstLoc(const OperandVector &Operands) const;
1732 SMLoc getInstLoc(const OperandVector &Operands) const;
1733
1734 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1735 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1736 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1737 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1738 bool validateSOPLiteral(const MCInst &Inst) const;
1739 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1740 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1741 const OperandVector &Operands);
1742 bool validateIntClampSupported(const MCInst &Inst);
1743 bool validateMIMGAtomicDMask(const MCInst &Inst);
1744 bool validateMIMGGatherDMask(const MCInst &Inst);
1745 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1746 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1747 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1748 bool validateMIMGD16(const MCInst &Inst);
1749 bool validateMIMGMSAA(const MCInst &Inst);
1750 bool validateOpSel(const MCInst &Inst);
1751 bool validateNeg(const MCInst &Inst, int OpName);
1752 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1753 bool validateVccOperand(unsigned Reg) const;
1754 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1755 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1756 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1757 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1758 bool validateAGPRLdSt(const MCInst &Inst) const;
1759 bool validateVGPRAlign(const MCInst &Inst) const;
1760 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1761 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1762 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1763 bool validateDivScale(const MCInst &Inst);
1764 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1765 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1766 const SMLoc &IDLoc);
1767 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1768 const unsigned CPol);
1769 bool validateExeczVcczOperands(const OperandVector &Operands);
1770 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1771 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1772 unsigned getConstantBusLimit(unsigned Opcode) const;
1773 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1774 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1775 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1776
1777 bool isSupportedMnemo(StringRef Mnemo,
1778 const FeatureBitset &FBS);
1779 bool isSupportedMnemo(StringRef Mnemo,
1780 const FeatureBitset &FBS,
1781 ArrayRef<unsigned> Variants);
1782 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1783
1784 bool isId(const StringRef Id) const;
1785 bool isId(const AsmToken &Token, const StringRef Id) const;
1786 bool isToken(const AsmToken::TokenKind Kind) const;
1787 StringRef getId() const;
1788 bool trySkipId(const StringRef Id);
1789 bool trySkipId(const StringRef Pref, const StringRef Id);
1790 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1791 bool trySkipToken(const AsmToken::TokenKind Kind);
1792 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1793 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1794 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1795
1796 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1797 AsmToken::TokenKind getTokenKind() const;
1798 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1799 bool parseExpr(OperandVector &Operands);
1800 StringRef getTokenStr() const;
1801 AsmToken peekToken(bool ShouldSkipSpace = true);
1802 AsmToken getToken() const;
1803 SMLoc getLoc() const;
1804 void lex();
1805
1806 public:
1807 void onBeginOfFile() override;
1808 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1809
1810 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1811
1812 ParseStatus parseExpTgt(OperandVector &Operands);
1813 ParseStatus parseSendMsg(OperandVector &Operands);
1814 ParseStatus parseInterpSlot(OperandVector &Operands);
1815 ParseStatus parseInterpAttr(OperandVector &Operands);
1816 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1817 ParseStatus parseBoolReg(OperandVector &Operands);
1818
1819 bool parseSwizzleOperand(int64_t &Op,
1820 const unsigned MinVal,
1821 const unsigned MaxVal,
1822 const StringRef ErrMsg,
1823 SMLoc &Loc);
1824 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1825 const unsigned MinVal,
1826 const unsigned MaxVal,
1827 const StringRef ErrMsg);
1828 ParseStatus parseSwizzle(OperandVector &Operands);
1829 bool parseSwizzleOffset(int64_t &Imm);
1830 bool parseSwizzleMacro(int64_t &Imm);
1831 bool parseSwizzleQuadPerm(int64_t &Imm);
1832 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1833 bool parseSwizzleBroadcast(int64_t &Imm);
1834 bool parseSwizzleSwap(int64_t &Imm);
1835 bool parseSwizzleReverse(int64_t &Imm);
1836
1837 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1838 int64_t parseGPRIdxMacro();
1839
cvtMubuf(MCInst & Inst,const OperandVector & Operands)1840 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
cvtMubufAtomic(MCInst & Inst,const OperandVector & Operands)1841 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1842
1843 ParseStatus parseOModSI(OperandVector &Operands);
1844
1845 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1846 OptionalImmIndexMap &OptionalIdx);
1847 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1848 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1849 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1850 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1851
1852 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1853 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1854 OptionalImmIndexMap &OptionalIdx);
1855 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1856 OptionalImmIndexMap &OptionalIdx);
1857
1858 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1859 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1860
1861 bool parseDimId(unsigned &Encoding);
1862 ParseStatus parseDim(OperandVector &Operands);
1863 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1864 ParseStatus parseDPP8(OperandVector &Operands);
1865 ParseStatus parseDPPCtrl(OperandVector &Operands);
1866 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1867 int64_t parseDPPCtrlSel(StringRef Ctrl);
1868 int64_t parseDPPCtrlPerm();
1869 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
cvtDPP8(MCInst & Inst,const OperandVector & Operands)1870 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1871 cvtDPP(Inst, Operands, true);
1872 }
1873 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1874 bool IsDPP8 = false);
cvtVOP3DPP8(MCInst & Inst,const OperandVector & Operands)1875 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1876 cvtVOP3DPP(Inst, Operands, true);
1877 }
1878
1879 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1880 AMDGPUOperand::ImmTy Type);
1881 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1882 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1883 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1884 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1885 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1886 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1887 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1888 uint64_t BasicInstType,
1889 bool SkipDstVcc = false,
1890 bool SkipSrcVcc = false);
1891
1892 ParseStatus parseEndpgm(OperandVector &Operands);
1893
1894 ParseStatus parseVOPD(OperandVector &Operands);
1895 };
1896
1897 } // end anonymous namespace
1898
1899 // May be called with integer type with equivalent bitwidth.
getFltSemantics(unsigned Size)1900 static const fltSemantics *getFltSemantics(unsigned Size) {
1901 switch (Size) {
1902 case 4:
1903 return &APFloat::IEEEsingle();
1904 case 8:
1905 return &APFloat::IEEEdouble();
1906 case 2:
1907 return &APFloat::IEEEhalf();
1908 default:
1909 llvm_unreachable("unsupported fp type");
1910 }
1911 }
1912
getFltSemantics(MVT VT)1913 static const fltSemantics *getFltSemantics(MVT VT) {
1914 return getFltSemantics(VT.getSizeInBits() / 8);
1915 }
1916
getOpFltSemantics(uint8_t OperandType)1917 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1918 switch (OperandType) {
1919 // When floating-point immediate is used as operand of type i16, the 32-bit
1920 // representation of the constant truncated to the 16 LSBs should be used.
1921 case AMDGPU::OPERAND_REG_IMM_INT16:
1922 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1923 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1924 case AMDGPU::OPERAND_REG_IMM_INT32:
1925 case AMDGPU::OPERAND_REG_IMM_FP32:
1926 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1927 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1928 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1929 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1930 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1931 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1932 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1933 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1934 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1935 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1936 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1937 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1938 case AMDGPU::OPERAND_KIMM32:
1939 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1940 return &APFloat::IEEEsingle();
1941 case AMDGPU::OPERAND_REG_IMM_INT64:
1942 case AMDGPU::OPERAND_REG_IMM_FP64:
1943 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1944 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1945 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1946 return &APFloat::IEEEdouble();
1947 case AMDGPU::OPERAND_REG_IMM_FP16:
1948 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1949 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1950 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1951 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1952 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1953 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1954 case AMDGPU::OPERAND_KIMM16:
1955 return &APFloat::IEEEhalf();
1956 case AMDGPU::OPERAND_REG_IMM_BF16:
1957 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1958 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1959 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1960 case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1961 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1962 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1963 return &APFloat::BFloat();
1964 default:
1965 llvm_unreachable("unsupported fp type");
1966 }
1967 }
1968
1969 //===----------------------------------------------------------------------===//
1970 // Operand
1971 //===----------------------------------------------------------------------===//
1972
canLosslesslyConvertToFPType(APFloat & FPLiteral,MVT VT)1973 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1974 bool Lost;
1975
1976 // Convert literal to single precision
1977 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1978 APFloat::rmNearestTiesToEven,
1979 &Lost);
1980 // We allow precision lost but not overflow or underflow
1981 if (Status != APFloat::opOK &&
1982 Lost &&
1983 ((Status & APFloat::opOverflow) != 0 ||
1984 (Status & APFloat::opUnderflow) != 0)) {
1985 return false;
1986 }
1987
1988 return true;
1989 }
1990
isSafeTruncation(int64_t Val,unsigned Size)1991 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1992 return isUIntN(Size, Val) || isIntN(Size, Val);
1993 }
1994
isInlineableLiteralOp16(int64_t Val,MVT VT,bool HasInv2Pi)1995 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1996 if (VT.getScalarType() == MVT::i16)
1997 return isInlinableLiteral32(Val, HasInv2Pi);
1998
1999 if (VT.getScalarType() == MVT::f16)
2000 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2001
2002 assert(VT.getScalarType() == MVT::bf16);
2003
2004 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2005 }
2006
isInlinableImm(MVT type) const2007 bool AMDGPUOperand::isInlinableImm(MVT type) const {
2008
2009 // This is a hack to enable named inline values like
2010 // shared_base with both 32-bit and 64-bit operands.
2011 // Note that these values are defined as
2012 // 32-bit operands only.
2013 if (isInlineValue()) {
2014 return true;
2015 }
2016
2017 if (!isImmTy(ImmTyNone)) {
2018 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2019 return false;
2020 }
2021 // TODO: We should avoid using host float here. It would be better to
2022 // check the float bit values which is what a few other places do.
2023 // We've had bot failures before due to weird NaN support on mips hosts.
2024
2025 APInt Literal(64, Imm.Val);
2026
2027 if (Imm.IsFPImm) { // We got fp literal token
2028 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2029 return AMDGPU::isInlinableLiteral64(Imm.Val,
2030 AsmParser->hasInv2PiInlineImm());
2031 }
2032
2033 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2034 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2035 return false;
2036
2037 if (type.getScalarSizeInBits() == 16) {
2038 bool Lost = false;
2039 switch (type.getScalarType().SimpleTy) {
2040 default:
2041 llvm_unreachable("unknown 16-bit type");
2042 case MVT::bf16:
2043 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2044 &Lost);
2045 break;
2046 case MVT::f16:
2047 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2048 &Lost);
2049 break;
2050 case MVT::i16:
2051 FPLiteral.convert(APFloatBase::IEEEsingle(),
2052 APFloat::rmNearestTiesToEven, &Lost);
2053 break;
2054 }
2055 // We need to use 32-bit representation here because when a floating-point
2056 // inline constant is used as an i16 operand, its 32-bit representation
2057 // representation will be used. We will need the 32-bit value to check if
2058 // it is FP inline constant.
2059 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2060 return isInlineableLiteralOp16(ImmVal, type,
2061 AsmParser->hasInv2PiInlineImm());
2062 }
2063
2064 // Check if single precision literal is inlinable
2065 return AMDGPU::isInlinableLiteral32(
2066 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2067 AsmParser->hasInv2PiInlineImm());
2068 }
2069
2070 // We got int literal token.
2071 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2072 return AMDGPU::isInlinableLiteral64(Imm.Val,
2073 AsmParser->hasInv2PiInlineImm());
2074 }
2075
2076 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2077 return false;
2078 }
2079
2080 if (type.getScalarSizeInBits() == 16) {
2081 return isInlineableLiteralOp16(
2082 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2083 type, AsmParser->hasInv2PiInlineImm());
2084 }
2085
2086 return AMDGPU::isInlinableLiteral32(
2087 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2088 AsmParser->hasInv2PiInlineImm());
2089 }
2090
isLiteralImm(MVT type) const2091 bool AMDGPUOperand::isLiteralImm(MVT type) const {
2092 // Check that this immediate can be added as literal
2093 if (!isImmTy(ImmTyNone)) {
2094 return false;
2095 }
2096
2097 if (!Imm.IsFPImm) {
2098 // We got int literal token.
2099
2100 if (type == MVT::f64 && hasFPModifiers()) {
2101 // Cannot apply fp modifiers to int literals preserving the same semantics
2102 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2103 // disable these cases.
2104 return false;
2105 }
2106
2107 unsigned Size = type.getSizeInBits();
2108 if (Size == 64)
2109 Size = 32;
2110
2111 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2112 // types.
2113 return isSafeTruncation(Imm.Val, Size);
2114 }
2115
2116 // We got fp literal token
2117 if (type == MVT::f64) { // Expected 64-bit fp operand
2118 // We would set low 64-bits of literal to zeroes but we accept this literals
2119 return true;
2120 }
2121
2122 if (type == MVT::i64) { // Expected 64-bit int operand
2123 // We don't allow fp literals in 64-bit integer instructions. It is
2124 // unclear how we should encode them.
2125 return false;
2126 }
2127
2128 // We allow fp literals with f16x2 operands assuming that the specified
2129 // literal goes into the lower half and the upper half is zero. We also
2130 // require that the literal may be losslessly converted to f16.
2131 //
2132 // For i16x2 operands, we assume that the specified literal is encoded as a
2133 // single-precision float. This is pretty odd, but it matches SP3 and what
2134 // happens in hardware.
2135 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2136 : (type == MVT::v2i16) ? MVT::f32
2137 : (type == MVT::v2f32) ? MVT::f32
2138 : type;
2139
2140 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2141 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2142 }
2143
isRegClass(unsigned RCID) const2144 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2145 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2146 }
2147
isVRegWithInputMods() const2148 bool AMDGPUOperand::isVRegWithInputMods() const {
2149 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2150 // GFX90A allows DPP on 64-bit operands.
2151 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2152 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2153 }
2154
isT16VRegWithInputMods() const2155 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2156 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2157 : AMDGPU::VGPR_16_Lo128RegClassID);
2158 }
2159
isSDWAOperand(MVT type) const2160 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2161 if (AsmParser->isVI())
2162 return isVReg32();
2163 if (AsmParser->isGFX9Plus())
2164 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2165 return false;
2166 }
2167
isSDWAFP16Operand() const2168 bool AMDGPUOperand::isSDWAFP16Operand() const {
2169 return isSDWAOperand(MVT::f16);
2170 }
2171
isSDWAFP32Operand() const2172 bool AMDGPUOperand::isSDWAFP32Operand() const {
2173 return isSDWAOperand(MVT::f32);
2174 }
2175
isSDWAInt16Operand() const2176 bool AMDGPUOperand::isSDWAInt16Operand() const {
2177 return isSDWAOperand(MVT::i16);
2178 }
2179
isSDWAInt32Operand() const2180 bool AMDGPUOperand::isSDWAInt32Operand() const {
2181 return isSDWAOperand(MVT::i32);
2182 }
2183
isBoolReg() const2184 bool AMDGPUOperand::isBoolReg() const {
2185 auto FB = AsmParser->getFeatureBits();
2186 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2187 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2188 }
2189
applyInputFPModifiers(uint64_t Val,unsigned Size) const2190 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2191 {
2192 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2193 assert(Size == 2 || Size == 4 || Size == 8);
2194
2195 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2196
2197 if (Imm.Mods.Abs) {
2198 Val &= ~FpSignMask;
2199 }
2200 if (Imm.Mods.Neg) {
2201 Val ^= FpSignMask;
2202 }
2203
2204 return Val;
2205 }
2206
addImmOperands(MCInst & Inst,unsigned N,bool ApplyModifiers) const2207 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2208 if (isExpr()) {
2209 Inst.addOperand(MCOperand::createExpr(Expr));
2210 return;
2211 }
2212
2213 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2214 Inst.getNumOperands())) {
2215 addLiteralImmOperand(Inst, Imm.Val,
2216 ApplyModifiers &
2217 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2218 } else {
2219 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2220 Inst.addOperand(MCOperand::createImm(Imm.Val));
2221 setImmKindNone();
2222 }
2223 }
2224
addLiteralImmOperand(MCInst & Inst,int64_t Val,bool ApplyModifiers) const2225 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2226 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2227 auto OpNum = Inst.getNumOperands();
2228 // Check that this operand accepts literals
2229 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2230
2231 if (ApplyModifiers) {
2232 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2233 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2234 Val = applyInputFPModifiers(Val, Size);
2235 }
2236
2237 APInt Literal(64, Val);
2238 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2239
2240 if (Imm.IsFPImm) { // We got fp literal token
2241 switch (OpTy) {
2242 case AMDGPU::OPERAND_REG_IMM_INT64:
2243 case AMDGPU::OPERAND_REG_IMM_FP64:
2244 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2245 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2246 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2247 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2248 AsmParser->hasInv2PiInlineImm())) {
2249 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2250 setImmKindConst();
2251 return;
2252 }
2253
2254 // Non-inlineable
2255 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2256 // For fp operands we check if low 32 bits are zeros
2257 if (Literal.getLoBits(32) != 0) {
2258 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2259 "Can't encode literal as exact 64-bit floating-point operand. "
2260 "Low 32-bits will be set to zero");
2261 Val &= 0xffffffff00000000u;
2262 }
2263
2264 Inst.addOperand(MCOperand::createImm(Val));
2265 setImmKindLiteral();
2266 return;
2267 }
2268
2269 // We don't allow fp literals in 64-bit integer instructions. It is
2270 // unclear how we should encode them. This case should be checked earlier
2271 // in predicate methods (isLiteralImm())
2272 llvm_unreachable("fp literal in 64-bit integer instruction.");
2273
2274 case AMDGPU::OPERAND_REG_IMM_BF16:
2275 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2276 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2277 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2278 case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2279 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
2280 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2281 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2282 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2283 // loss of precision. The constant represents ideomatic fp32 value of
2284 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2285 // bits. Prevent rounding below.
2286 Inst.addOperand(MCOperand::createImm(0x3e22));
2287 setImmKindLiteral();
2288 return;
2289 }
2290 [[fallthrough]];
2291
2292 case AMDGPU::OPERAND_REG_IMM_INT32:
2293 case AMDGPU::OPERAND_REG_IMM_FP32:
2294 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2295 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2296 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2297 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2298 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2299 case AMDGPU::OPERAND_REG_IMM_INT16:
2300 case AMDGPU::OPERAND_REG_IMM_FP16:
2301 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2302 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2303 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2304 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2305 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2306 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2307 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2308 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2309 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2310 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2311 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2312 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2313 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2314 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2315 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2316 case AMDGPU::OPERAND_KIMM32:
2317 case AMDGPU::OPERAND_KIMM16:
2318 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2319 bool lost;
2320 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2321 // Convert literal to single precision
2322 FPLiteral.convert(*getOpFltSemantics(OpTy),
2323 APFloat::rmNearestTiesToEven, &lost);
2324 // We allow precision lost but not overflow or underflow. This should be
2325 // checked earlier in isLiteralImm()
2326
2327 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2328 Inst.addOperand(MCOperand::createImm(ImmVal));
2329 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2330 setImmKindMandatoryLiteral();
2331 } else {
2332 setImmKindLiteral();
2333 }
2334 return;
2335 }
2336 default:
2337 llvm_unreachable("invalid operand size");
2338 }
2339
2340 return;
2341 }
2342
2343 // We got int literal token.
2344 // Only sign extend inline immediates.
2345 switch (OpTy) {
2346 case AMDGPU::OPERAND_REG_IMM_INT32:
2347 case AMDGPU::OPERAND_REG_IMM_FP32:
2348 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2349 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2350 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2351 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2352 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2353 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2354 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2355 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2356 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2357 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2358 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2359 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2360 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2361 if (isSafeTruncation(Val, 32) &&
2362 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2363 AsmParser->hasInv2PiInlineImm())) {
2364 Inst.addOperand(MCOperand::createImm(Val));
2365 setImmKindConst();
2366 return;
2367 }
2368
2369 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2370 setImmKindLiteral();
2371 return;
2372
2373 case AMDGPU::OPERAND_REG_IMM_INT64:
2374 case AMDGPU::OPERAND_REG_IMM_FP64:
2375 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2376 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2377 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2378 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2379 Inst.addOperand(MCOperand::createImm(Val));
2380 setImmKindConst();
2381 return;
2382 }
2383
2384 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2385 : Lo_32(Val);
2386
2387 Inst.addOperand(MCOperand::createImm(Val));
2388 setImmKindLiteral();
2389 return;
2390
2391 case AMDGPU::OPERAND_REG_IMM_INT16:
2392 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2393 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2394 if (isSafeTruncation(Val, 16) &&
2395 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2396 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2397 setImmKindConst();
2398 return;
2399 }
2400
2401 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2402 setImmKindLiteral();
2403 return;
2404
2405 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2406 case AMDGPU::OPERAND_REG_IMM_FP16:
2407 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2408 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2409 if (isSafeTruncation(Val, 16) &&
2410 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2411 AsmParser->hasInv2PiInlineImm())) {
2412 Inst.addOperand(MCOperand::createImm(Val));
2413 setImmKindConst();
2414 return;
2415 }
2416
2417 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2418 setImmKindLiteral();
2419 return;
2420
2421 case AMDGPU::OPERAND_REG_IMM_BF16:
2422 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2423 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2424 case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2425 if (isSafeTruncation(Val, 16) &&
2426 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2427 AsmParser->hasInv2PiInlineImm())) {
2428 Inst.addOperand(MCOperand::createImm(Val));
2429 setImmKindConst();
2430 return;
2431 }
2432
2433 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2434 setImmKindLiteral();
2435 return;
2436
2437 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2438 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2439 assert(isSafeTruncation(Val, 16));
2440 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2441 Inst.addOperand(MCOperand::createImm(Val));
2442 return;
2443 }
2444 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2445 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2446 assert(isSafeTruncation(Val, 16));
2447 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2448 AsmParser->hasInv2PiInlineImm()));
2449
2450 Inst.addOperand(MCOperand::createImm(Val));
2451 return;
2452 }
2453
2454 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2455 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: {
2456 assert(isSafeTruncation(Val, 16));
2457 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2458 AsmParser->hasInv2PiInlineImm()));
2459
2460 Inst.addOperand(MCOperand::createImm(Val));
2461 return;
2462 }
2463
2464 case AMDGPU::OPERAND_KIMM32:
2465 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2466 setImmKindMandatoryLiteral();
2467 return;
2468 case AMDGPU::OPERAND_KIMM16:
2469 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2470 setImmKindMandatoryLiteral();
2471 return;
2472 default:
2473 llvm_unreachable("invalid operand size");
2474 }
2475 }
2476
addRegOperands(MCInst & Inst,unsigned N) const2477 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2478 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2479 }
2480
isInlineValue() const2481 bool AMDGPUOperand::isInlineValue() const {
2482 return isRegKind() && ::isInlineValue(getReg());
2483 }
2484
2485 //===----------------------------------------------------------------------===//
2486 // AsmParser
2487 //===----------------------------------------------------------------------===//
2488
createConstantSymbol(StringRef Id,int64_t Val)2489 void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2490 // TODO: make those pre-defined variables read-only.
2491 // Currently there is none suitable machinery in the core llvm-mc for this.
2492 // MCSymbol::isRedefinable is intended for another purpose, and
2493 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2494 MCContext &Ctx = getContext();
2495 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2496 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2497 }
2498
getRegClass(RegisterKind Is,unsigned RegWidth)2499 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2500 if (Is == IS_VGPR) {
2501 switch (RegWidth) {
2502 default: return -1;
2503 case 32:
2504 return AMDGPU::VGPR_32RegClassID;
2505 case 64:
2506 return AMDGPU::VReg_64RegClassID;
2507 case 96:
2508 return AMDGPU::VReg_96RegClassID;
2509 case 128:
2510 return AMDGPU::VReg_128RegClassID;
2511 case 160:
2512 return AMDGPU::VReg_160RegClassID;
2513 case 192:
2514 return AMDGPU::VReg_192RegClassID;
2515 case 224:
2516 return AMDGPU::VReg_224RegClassID;
2517 case 256:
2518 return AMDGPU::VReg_256RegClassID;
2519 case 288:
2520 return AMDGPU::VReg_288RegClassID;
2521 case 320:
2522 return AMDGPU::VReg_320RegClassID;
2523 case 352:
2524 return AMDGPU::VReg_352RegClassID;
2525 case 384:
2526 return AMDGPU::VReg_384RegClassID;
2527 case 512:
2528 return AMDGPU::VReg_512RegClassID;
2529 case 1024:
2530 return AMDGPU::VReg_1024RegClassID;
2531 }
2532 } else if (Is == IS_TTMP) {
2533 switch (RegWidth) {
2534 default: return -1;
2535 case 32:
2536 return AMDGPU::TTMP_32RegClassID;
2537 case 64:
2538 return AMDGPU::TTMP_64RegClassID;
2539 case 128:
2540 return AMDGPU::TTMP_128RegClassID;
2541 case 256:
2542 return AMDGPU::TTMP_256RegClassID;
2543 case 512:
2544 return AMDGPU::TTMP_512RegClassID;
2545 }
2546 } else if (Is == IS_SGPR) {
2547 switch (RegWidth) {
2548 default: return -1;
2549 case 32:
2550 return AMDGPU::SGPR_32RegClassID;
2551 case 64:
2552 return AMDGPU::SGPR_64RegClassID;
2553 case 96:
2554 return AMDGPU::SGPR_96RegClassID;
2555 case 128:
2556 return AMDGPU::SGPR_128RegClassID;
2557 case 160:
2558 return AMDGPU::SGPR_160RegClassID;
2559 case 192:
2560 return AMDGPU::SGPR_192RegClassID;
2561 case 224:
2562 return AMDGPU::SGPR_224RegClassID;
2563 case 256:
2564 return AMDGPU::SGPR_256RegClassID;
2565 case 288:
2566 return AMDGPU::SGPR_288RegClassID;
2567 case 320:
2568 return AMDGPU::SGPR_320RegClassID;
2569 case 352:
2570 return AMDGPU::SGPR_352RegClassID;
2571 case 384:
2572 return AMDGPU::SGPR_384RegClassID;
2573 case 512:
2574 return AMDGPU::SGPR_512RegClassID;
2575 }
2576 } else if (Is == IS_AGPR) {
2577 switch (RegWidth) {
2578 default: return -1;
2579 case 32:
2580 return AMDGPU::AGPR_32RegClassID;
2581 case 64:
2582 return AMDGPU::AReg_64RegClassID;
2583 case 96:
2584 return AMDGPU::AReg_96RegClassID;
2585 case 128:
2586 return AMDGPU::AReg_128RegClassID;
2587 case 160:
2588 return AMDGPU::AReg_160RegClassID;
2589 case 192:
2590 return AMDGPU::AReg_192RegClassID;
2591 case 224:
2592 return AMDGPU::AReg_224RegClassID;
2593 case 256:
2594 return AMDGPU::AReg_256RegClassID;
2595 case 288:
2596 return AMDGPU::AReg_288RegClassID;
2597 case 320:
2598 return AMDGPU::AReg_320RegClassID;
2599 case 352:
2600 return AMDGPU::AReg_352RegClassID;
2601 case 384:
2602 return AMDGPU::AReg_384RegClassID;
2603 case 512:
2604 return AMDGPU::AReg_512RegClassID;
2605 case 1024:
2606 return AMDGPU::AReg_1024RegClassID;
2607 }
2608 }
2609 return -1;
2610 }
2611
getSpecialRegForName(StringRef RegName)2612 static unsigned getSpecialRegForName(StringRef RegName) {
2613 return StringSwitch<unsigned>(RegName)
2614 .Case("exec", AMDGPU::EXEC)
2615 .Case("vcc", AMDGPU::VCC)
2616 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2617 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2618 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2619 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2620 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2621 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2622 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2623 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2624 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2625 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2626 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2627 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2628 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2629 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2630 .Case("m0", AMDGPU::M0)
2631 .Case("vccz", AMDGPU::SRC_VCCZ)
2632 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2633 .Case("execz", AMDGPU::SRC_EXECZ)
2634 .Case("src_execz", AMDGPU::SRC_EXECZ)
2635 .Case("scc", AMDGPU::SRC_SCC)
2636 .Case("src_scc", AMDGPU::SRC_SCC)
2637 .Case("tba", AMDGPU::TBA)
2638 .Case("tma", AMDGPU::TMA)
2639 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2640 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2641 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2642 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2643 .Case("vcc_lo", AMDGPU::VCC_LO)
2644 .Case("vcc_hi", AMDGPU::VCC_HI)
2645 .Case("exec_lo", AMDGPU::EXEC_LO)
2646 .Case("exec_hi", AMDGPU::EXEC_HI)
2647 .Case("tma_lo", AMDGPU::TMA_LO)
2648 .Case("tma_hi", AMDGPU::TMA_HI)
2649 .Case("tba_lo", AMDGPU::TBA_LO)
2650 .Case("tba_hi", AMDGPU::TBA_HI)
2651 .Case("pc", AMDGPU::PC_REG)
2652 .Case("null", AMDGPU::SGPR_NULL)
2653 .Default(AMDGPU::NoRegister);
2654 }
2655
ParseRegister(MCRegister & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)2656 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2657 SMLoc &EndLoc, bool RestoreOnFailure) {
2658 auto R = parseRegister();
2659 if (!R) return true;
2660 assert(R->isReg());
2661 RegNo = R->getReg();
2662 StartLoc = R->getStartLoc();
2663 EndLoc = R->getEndLoc();
2664 return false;
2665 }
2666
parseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)2667 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2668 SMLoc &EndLoc) {
2669 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2670 }
2671
tryParseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)2672 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2673 SMLoc &EndLoc) {
2674 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2675 bool PendingErrors = getParser().hasPendingError();
2676 getParser().clearPendingErrors();
2677 if (PendingErrors)
2678 return ParseStatus::Failure;
2679 if (Result)
2680 return ParseStatus::NoMatch;
2681 return ParseStatus::Success;
2682 }
2683
AddNextRegisterToList(unsigned & Reg,unsigned & RegWidth,RegisterKind RegKind,unsigned Reg1,SMLoc Loc)2684 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2685 RegisterKind RegKind, unsigned Reg1,
2686 SMLoc Loc) {
2687 switch (RegKind) {
2688 case IS_SPECIAL:
2689 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2690 Reg = AMDGPU::EXEC;
2691 RegWidth = 64;
2692 return true;
2693 }
2694 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2695 Reg = AMDGPU::FLAT_SCR;
2696 RegWidth = 64;
2697 return true;
2698 }
2699 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2700 Reg = AMDGPU::XNACK_MASK;
2701 RegWidth = 64;
2702 return true;
2703 }
2704 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2705 Reg = AMDGPU::VCC;
2706 RegWidth = 64;
2707 return true;
2708 }
2709 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2710 Reg = AMDGPU::TBA;
2711 RegWidth = 64;
2712 return true;
2713 }
2714 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2715 Reg = AMDGPU::TMA;
2716 RegWidth = 64;
2717 return true;
2718 }
2719 Error(Loc, "register does not fit in the list");
2720 return false;
2721 case IS_VGPR:
2722 case IS_SGPR:
2723 case IS_AGPR:
2724 case IS_TTMP:
2725 if (Reg1 != Reg + RegWidth / 32) {
2726 Error(Loc, "registers in a list must have consecutive indices");
2727 return false;
2728 }
2729 RegWidth += 32;
2730 return true;
2731 default:
2732 llvm_unreachable("unexpected register kind");
2733 }
2734 }
2735
2736 struct RegInfo {
2737 StringLiteral Name;
2738 RegisterKind Kind;
2739 };
2740
2741 static constexpr RegInfo RegularRegisters[] = {
2742 {{"v"}, IS_VGPR},
2743 {{"s"}, IS_SGPR},
2744 {{"ttmp"}, IS_TTMP},
2745 {{"acc"}, IS_AGPR},
2746 {{"a"}, IS_AGPR},
2747 };
2748
isRegularReg(RegisterKind Kind)2749 static bool isRegularReg(RegisterKind Kind) {
2750 return Kind == IS_VGPR ||
2751 Kind == IS_SGPR ||
2752 Kind == IS_TTMP ||
2753 Kind == IS_AGPR;
2754 }
2755
getRegularRegInfo(StringRef Str)2756 static const RegInfo* getRegularRegInfo(StringRef Str) {
2757 for (const RegInfo &Reg : RegularRegisters)
2758 if (Str.starts_with(Reg.Name))
2759 return &Reg;
2760 return nullptr;
2761 }
2762
getRegNum(StringRef Str,unsigned & Num)2763 static bool getRegNum(StringRef Str, unsigned& Num) {
2764 return !Str.getAsInteger(10, Num);
2765 }
2766
2767 bool
isRegister(const AsmToken & Token,const AsmToken & NextToken) const2768 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2769 const AsmToken &NextToken) const {
2770
2771 // A list of consecutive registers: [s0,s1,s2,s3]
2772 if (Token.is(AsmToken::LBrac))
2773 return true;
2774
2775 if (!Token.is(AsmToken::Identifier))
2776 return false;
2777
2778 // A single register like s0 or a range of registers like s[0:1]
2779
2780 StringRef Str = Token.getString();
2781 const RegInfo *Reg = getRegularRegInfo(Str);
2782 if (Reg) {
2783 StringRef RegName = Reg->Name;
2784 StringRef RegSuffix = Str.substr(RegName.size());
2785 if (!RegSuffix.empty()) {
2786 RegSuffix.consume_back(".l");
2787 RegSuffix.consume_back(".h");
2788 unsigned Num;
2789 // A single register with an index: rXX
2790 if (getRegNum(RegSuffix, Num))
2791 return true;
2792 } else {
2793 // A range of registers: r[XX:YY].
2794 if (NextToken.is(AsmToken::LBrac))
2795 return true;
2796 }
2797 }
2798
2799 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2800 }
2801
2802 bool
isRegister()2803 AMDGPUAsmParser::isRegister()
2804 {
2805 return isRegister(getToken(), peekToken());
2806 }
2807
getRegularReg(RegisterKind RegKind,unsigned RegNum,unsigned SubReg,unsigned RegWidth,SMLoc Loc)2808 unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2809 unsigned SubReg, unsigned RegWidth,
2810 SMLoc Loc) {
2811 assert(isRegularReg(RegKind));
2812
2813 unsigned AlignSize = 1;
2814 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2815 // SGPR and TTMP registers must be aligned.
2816 // Max required alignment is 4 dwords.
2817 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2818 }
2819
2820 if (RegNum % AlignSize != 0) {
2821 Error(Loc, "invalid register alignment");
2822 return AMDGPU::NoRegister;
2823 }
2824
2825 unsigned RegIdx = RegNum / AlignSize;
2826 int RCID = getRegClass(RegKind, RegWidth);
2827 if (RCID == -1) {
2828 Error(Loc, "invalid or unsupported register size");
2829 return AMDGPU::NoRegister;
2830 }
2831
2832 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2833 const MCRegisterClass RC = TRI->getRegClass(RCID);
2834 if (RegIdx >= RC.getNumRegs()) {
2835 Error(Loc, "register index is out of range");
2836 return AMDGPU::NoRegister;
2837 }
2838
2839 unsigned Reg = RC.getRegister(RegIdx);
2840
2841 if (SubReg) {
2842 Reg = TRI->getSubReg(Reg, SubReg);
2843
2844 // Currently all regular registers have their .l and .h subregisters, so
2845 // we should never need to generate an error here.
2846 assert(Reg && "Invalid subregister!");
2847 }
2848
2849 return Reg;
2850 }
2851
ParseRegRange(unsigned & Num,unsigned & RegWidth)2852 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2853 int64_t RegLo, RegHi;
2854 if (!skipToken(AsmToken::LBrac, "missing register index"))
2855 return false;
2856
2857 SMLoc FirstIdxLoc = getLoc();
2858 SMLoc SecondIdxLoc;
2859
2860 if (!parseExpr(RegLo))
2861 return false;
2862
2863 if (trySkipToken(AsmToken::Colon)) {
2864 SecondIdxLoc = getLoc();
2865 if (!parseExpr(RegHi))
2866 return false;
2867 } else {
2868 RegHi = RegLo;
2869 }
2870
2871 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2872 return false;
2873
2874 if (!isUInt<32>(RegLo)) {
2875 Error(FirstIdxLoc, "invalid register index");
2876 return false;
2877 }
2878
2879 if (!isUInt<32>(RegHi)) {
2880 Error(SecondIdxLoc, "invalid register index");
2881 return false;
2882 }
2883
2884 if (RegLo > RegHi) {
2885 Error(FirstIdxLoc, "first register index should not exceed second index");
2886 return false;
2887 }
2888
2889 Num = static_cast<unsigned>(RegLo);
2890 RegWidth = 32 * ((RegHi - RegLo) + 1);
2891 return true;
2892 }
2893
ParseSpecialReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2894 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2895 unsigned &RegNum, unsigned &RegWidth,
2896 SmallVectorImpl<AsmToken> &Tokens) {
2897 assert(isToken(AsmToken::Identifier));
2898 unsigned Reg = getSpecialRegForName(getTokenStr());
2899 if (Reg) {
2900 RegNum = 0;
2901 RegWidth = 32;
2902 RegKind = IS_SPECIAL;
2903 Tokens.push_back(getToken());
2904 lex(); // skip register name
2905 }
2906 return Reg;
2907 }
2908
ParseRegularReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2909 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2910 unsigned &RegNum, unsigned &RegWidth,
2911 SmallVectorImpl<AsmToken> &Tokens) {
2912 assert(isToken(AsmToken::Identifier));
2913 StringRef RegName = getTokenStr();
2914 auto Loc = getLoc();
2915
2916 const RegInfo *RI = getRegularRegInfo(RegName);
2917 if (!RI) {
2918 Error(Loc, "invalid register name");
2919 return AMDGPU::NoRegister;
2920 }
2921
2922 Tokens.push_back(getToken());
2923 lex(); // skip register name
2924
2925 RegKind = RI->Kind;
2926 StringRef RegSuffix = RegName.substr(RI->Name.size());
2927 unsigned SubReg = NoSubRegister;
2928 if (!RegSuffix.empty()) {
2929 // We don't know the opcode till we are done parsing, so we don't know if
2930 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2931 // .h to correctly specify 16 bit registers. We also can't determine class
2932 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2933 if (RegSuffix.consume_back(".l"))
2934 SubReg = AMDGPU::lo16;
2935 else if (RegSuffix.consume_back(".h"))
2936 SubReg = AMDGPU::hi16;
2937
2938 // Single 32-bit register: vXX.
2939 if (!getRegNum(RegSuffix, RegNum)) {
2940 Error(Loc, "invalid register index");
2941 return AMDGPU::NoRegister;
2942 }
2943 RegWidth = 32;
2944 } else {
2945 // Range of registers: v[XX:YY]. ":YY" is optional.
2946 if (!ParseRegRange(RegNum, RegWidth))
2947 return AMDGPU::NoRegister;
2948 }
2949
2950 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2951 }
2952
ParseRegList(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2953 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2954 unsigned &RegWidth,
2955 SmallVectorImpl<AsmToken> &Tokens) {
2956 unsigned Reg = AMDGPU::NoRegister;
2957 auto ListLoc = getLoc();
2958
2959 if (!skipToken(AsmToken::LBrac,
2960 "expected a register or a list of registers")) {
2961 return AMDGPU::NoRegister;
2962 }
2963
2964 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2965
2966 auto Loc = getLoc();
2967 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2968 return AMDGPU::NoRegister;
2969 if (RegWidth != 32) {
2970 Error(Loc, "expected a single 32-bit register");
2971 return AMDGPU::NoRegister;
2972 }
2973
2974 for (; trySkipToken(AsmToken::Comma); ) {
2975 RegisterKind NextRegKind;
2976 unsigned NextReg, NextRegNum, NextRegWidth;
2977 Loc = getLoc();
2978
2979 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2980 NextRegNum, NextRegWidth,
2981 Tokens)) {
2982 return AMDGPU::NoRegister;
2983 }
2984 if (NextRegWidth != 32) {
2985 Error(Loc, "expected a single 32-bit register");
2986 return AMDGPU::NoRegister;
2987 }
2988 if (NextRegKind != RegKind) {
2989 Error(Loc, "registers in a list must be of the same kind");
2990 return AMDGPU::NoRegister;
2991 }
2992 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2993 return AMDGPU::NoRegister;
2994 }
2995
2996 if (!skipToken(AsmToken::RBrac,
2997 "expected a comma or a closing square bracket")) {
2998 return AMDGPU::NoRegister;
2999 }
3000
3001 if (isRegularReg(RegKind))
3002 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3003
3004 return Reg;
3005 }
3006
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)3007 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3008 unsigned &RegNum, unsigned &RegWidth,
3009 SmallVectorImpl<AsmToken> &Tokens) {
3010 auto Loc = getLoc();
3011 Reg = AMDGPU::NoRegister;
3012
3013 if (isToken(AsmToken::Identifier)) {
3014 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3015 if (Reg == AMDGPU::NoRegister)
3016 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3017 } else {
3018 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3019 }
3020
3021 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3022 if (Reg == AMDGPU::NoRegister) {
3023 assert(Parser.hasPendingError());
3024 return false;
3025 }
3026
3027 if (!subtargetHasRegister(*TRI, Reg)) {
3028 if (Reg == AMDGPU::SGPR_NULL) {
3029 Error(Loc, "'null' operand is not supported on this GPU");
3030 } else {
3031 Error(Loc, "register not available on this GPU");
3032 }
3033 return false;
3034 }
3035
3036 return true;
3037 }
3038
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,bool RestoreOnFailure)3039 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3040 unsigned &RegNum, unsigned &RegWidth,
3041 bool RestoreOnFailure /*=false*/) {
3042 Reg = AMDGPU::NoRegister;
3043
3044 SmallVector<AsmToken, 1> Tokens;
3045 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3046 if (RestoreOnFailure) {
3047 while (!Tokens.empty()) {
3048 getLexer().UnLex(Tokens.pop_back_val());
3049 }
3050 }
3051 return true;
3052 }
3053 return false;
3054 }
3055
3056 std::optional<StringRef>
getGprCountSymbolName(RegisterKind RegKind)3057 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3058 switch (RegKind) {
3059 case IS_VGPR:
3060 return StringRef(".amdgcn.next_free_vgpr");
3061 case IS_SGPR:
3062 return StringRef(".amdgcn.next_free_sgpr");
3063 default:
3064 return std::nullopt;
3065 }
3066 }
3067
initializeGprCountSymbol(RegisterKind RegKind)3068 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3069 auto SymbolName = getGprCountSymbolName(RegKind);
3070 assert(SymbolName && "initializing invalid register kind");
3071 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3072 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3073 }
3074
updateGprCountSymbols(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)3075 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3076 unsigned DwordRegIndex,
3077 unsigned RegWidth) {
3078 // Symbols are only defined for GCN targets
3079 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3080 return true;
3081
3082 auto SymbolName = getGprCountSymbolName(RegKind);
3083 if (!SymbolName)
3084 return true;
3085 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3086
3087 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3088 int64_t OldCount;
3089
3090 if (!Sym->isVariable())
3091 return !Error(getLoc(),
3092 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3093 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3094 return !Error(
3095 getLoc(),
3096 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3097
3098 if (OldCount <= NewMax)
3099 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3100
3101 return true;
3102 }
3103
3104 std::unique_ptr<AMDGPUOperand>
parseRegister(bool RestoreOnFailure)3105 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3106 const auto &Tok = getToken();
3107 SMLoc StartLoc = Tok.getLoc();
3108 SMLoc EndLoc = Tok.getEndLoc();
3109 RegisterKind RegKind;
3110 unsigned Reg, RegNum, RegWidth;
3111
3112 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3113 return nullptr;
3114 }
3115 if (isHsaAbi(getSTI())) {
3116 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3117 return nullptr;
3118 } else
3119 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3120 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3121 }
3122
parseImm(OperandVector & Operands,bool HasSP3AbsModifier,bool HasLit)3123 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3124 bool HasSP3AbsModifier, bool HasLit) {
3125 // TODO: add syntactic sugar for 1/(2*PI)
3126
3127 if (isRegister())
3128 return ParseStatus::NoMatch;
3129 assert(!isModifier());
3130
3131 if (!HasLit) {
3132 HasLit = trySkipId("lit");
3133 if (HasLit) {
3134 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3135 return ParseStatus::Failure;
3136 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3137 if (S.isSuccess() &&
3138 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3139 return ParseStatus::Failure;
3140 return S;
3141 }
3142 }
3143
3144 const auto& Tok = getToken();
3145 const auto& NextTok = peekToken();
3146 bool IsReal = Tok.is(AsmToken::Real);
3147 SMLoc S = getLoc();
3148 bool Negate = false;
3149
3150 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3151 lex();
3152 IsReal = true;
3153 Negate = true;
3154 }
3155
3156 AMDGPUOperand::Modifiers Mods;
3157 Mods.Lit = HasLit;
3158
3159 if (IsReal) {
3160 // Floating-point expressions are not supported.
3161 // Can only allow floating-point literals with an
3162 // optional sign.
3163
3164 StringRef Num = getTokenStr();
3165 lex();
3166
3167 APFloat RealVal(APFloat::IEEEdouble());
3168 auto roundMode = APFloat::rmNearestTiesToEven;
3169 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3170 return ParseStatus::Failure;
3171 if (Negate)
3172 RealVal.changeSign();
3173
3174 Operands.push_back(
3175 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3176 AMDGPUOperand::ImmTyNone, true));
3177 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3178 Op.setModifiers(Mods);
3179
3180 return ParseStatus::Success;
3181
3182 } else {
3183 int64_t IntVal;
3184 const MCExpr *Expr;
3185 SMLoc S = getLoc();
3186
3187 if (HasSP3AbsModifier) {
3188 // This is a workaround for handling expressions
3189 // as arguments of SP3 'abs' modifier, for example:
3190 // |1.0|
3191 // |-1|
3192 // |1+x|
3193 // This syntax is not compatible with syntax of standard
3194 // MC expressions (due to the trailing '|').
3195 SMLoc EndLoc;
3196 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3197 return ParseStatus::Failure;
3198 } else {
3199 if (Parser.parseExpression(Expr))
3200 return ParseStatus::Failure;
3201 }
3202
3203 if (Expr->evaluateAsAbsolute(IntVal)) {
3204 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3205 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3206 Op.setModifiers(Mods);
3207 } else {
3208 if (HasLit)
3209 return ParseStatus::NoMatch;
3210 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3211 }
3212
3213 return ParseStatus::Success;
3214 }
3215
3216 return ParseStatus::NoMatch;
3217 }
3218
parseReg(OperandVector & Operands)3219 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3220 if (!isRegister())
3221 return ParseStatus::NoMatch;
3222
3223 if (auto R = parseRegister()) {
3224 assert(R->isReg());
3225 Operands.push_back(std::move(R));
3226 return ParseStatus::Success;
3227 }
3228 return ParseStatus::Failure;
3229 }
3230
parseRegOrImm(OperandVector & Operands,bool HasSP3AbsMod,bool HasLit)3231 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3232 bool HasSP3AbsMod, bool HasLit) {
3233 ParseStatus Res = parseReg(Operands);
3234 if (!Res.isNoMatch())
3235 return Res;
3236 if (isModifier())
3237 return ParseStatus::NoMatch;
3238 return parseImm(Operands, HasSP3AbsMod, HasLit);
3239 }
3240
3241 bool
isNamedOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3242 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3243 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3244 const auto &str = Token.getString();
3245 return str == "abs" || str == "neg" || str == "sext";
3246 }
3247 return false;
3248 }
3249
3250 bool
isOpcodeModifierWithVal(const AsmToken & Token,const AsmToken & NextToken) const3251 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3252 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3253 }
3254
3255 bool
isOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3256 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3257 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3258 }
3259
3260 bool
isRegOrOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3261 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3262 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3263 }
3264
3265 // Check if this is an operand modifier or an opcode modifier
3266 // which may look like an expression but it is not. We should
3267 // avoid parsing these modifiers as expressions. Currently
3268 // recognized sequences are:
3269 // |...|
3270 // abs(...)
3271 // neg(...)
3272 // sext(...)
3273 // -reg
3274 // -|...|
3275 // -abs(...)
3276 // name:...
3277 //
3278 bool
isModifier()3279 AMDGPUAsmParser::isModifier() {
3280
3281 AsmToken Tok = getToken();
3282 AsmToken NextToken[2];
3283 peekTokens(NextToken);
3284
3285 return isOperandModifier(Tok, NextToken[0]) ||
3286 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3287 isOpcodeModifierWithVal(Tok, NextToken[0]);
3288 }
3289
3290 // Check if the current token is an SP3 'neg' modifier.
3291 // Currently this modifier is allowed in the following context:
3292 //
3293 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3294 // 2. Before an 'abs' modifier: -abs(...)
3295 // 3. Before an SP3 'abs' modifier: -|...|
3296 //
3297 // In all other cases "-" is handled as a part
3298 // of an expression that follows the sign.
3299 //
3300 // Note: When "-" is followed by an integer literal,
3301 // this is interpreted as integer negation rather
3302 // than a floating-point NEG modifier applied to N.
3303 // Beside being contr-intuitive, such use of floating-point
3304 // NEG modifier would have resulted in different meaning
3305 // of integer literals used with VOP1/2/C and VOP3,
3306 // for example:
3307 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3308 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3309 // Negative fp literals with preceding "-" are
3310 // handled likewise for uniformity
3311 //
3312 bool
parseSP3NegModifier()3313 AMDGPUAsmParser::parseSP3NegModifier() {
3314
3315 AsmToken NextToken[2];
3316 peekTokens(NextToken);
3317
3318 if (isToken(AsmToken::Minus) &&
3319 (isRegister(NextToken[0], NextToken[1]) ||
3320 NextToken[0].is(AsmToken::Pipe) ||
3321 isId(NextToken[0], "abs"))) {
3322 lex();
3323 return true;
3324 }
3325
3326 return false;
3327 }
3328
3329 ParseStatus
parseRegOrImmWithFPInputMods(OperandVector & Operands,bool AllowImm)3330 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3331 bool AllowImm) {
3332 bool Neg, SP3Neg;
3333 bool Abs, SP3Abs;
3334 bool Lit;
3335 SMLoc Loc;
3336
3337 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3338 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3339 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3340
3341 SP3Neg = parseSP3NegModifier();
3342
3343 Loc = getLoc();
3344 Neg = trySkipId("neg");
3345 if (Neg && SP3Neg)
3346 return Error(Loc, "expected register or immediate");
3347 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3348 return ParseStatus::Failure;
3349
3350 Abs = trySkipId("abs");
3351 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3352 return ParseStatus::Failure;
3353
3354 Lit = trySkipId("lit");
3355 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3356 return ParseStatus::Failure;
3357
3358 Loc = getLoc();
3359 SP3Abs = trySkipToken(AsmToken::Pipe);
3360 if (Abs && SP3Abs)
3361 return Error(Loc, "expected register or immediate");
3362
3363 ParseStatus Res;
3364 if (AllowImm) {
3365 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3366 } else {
3367 Res = parseReg(Operands);
3368 }
3369 if (!Res.isSuccess())
3370 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3371
3372 if (Lit && !Operands.back()->isImm())
3373 Error(Loc, "expected immediate with lit modifier");
3374
3375 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3376 return ParseStatus::Failure;
3377 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3378 return ParseStatus::Failure;
3379 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3380 return ParseStatus::Failure;
3381 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3382 return ParseStatus::Failure;
3383
3384 AMDGPUOperand::Modifiers Mods;
3385 Mods.Abs = Abs || SP3Abs;
3386 Mods.Neg = Neg || SP3Neg;
3387 Mods.Lit = Lit;
3388
3389 if (Mods.hasFPModifiers() || Lit) {
3390 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3391 if (Op.isExpr())
3392 return Error(Op.getStartLoc(), "expected an absolute expression");
3393 Op.setModifiers(Mods);
3394 }
3395 return ParseStatus::Success;
3396 }
3397
3398 ParseStatus
parseRegOrImmWithIntInputMods(OperandVector & Operands,bool AllowImm)3399 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3400 bool AllowImm) {
3401 bool Sext = trySkipId("sext");
3402 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3403 return ParseStatus::Failure;
3404
3405 ParseStatus Res;
3406 if (AllowImm) {
3407 Res = parseRegOrImm(Operands);
3408 } else {
3409 Res = parseReg(Operands);
3410 }
3411 if (!Res.isSuccess())
3412 return Sext ? ParseStatus::Failure : Res;
3413
3414 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3415 return ParseStatus::Failure;
3416
3417 AMDGPUOperand::Modifiers Mods;
3418 Mods.Sext = Sext;
3419
3420 if (Mods.hasIntModifiers()) {
3421 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3422 if (Op.isExpr())
3423 return Error(Op.getStartLoc(), "expected an absolute expression");
3424 Op.setModifiers(Mods);
3425 }
3426
3427 return ParseStatus::Success;
3428 }
3429
parseRegWithFPInputMods(OperandVector & Operands)3430 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3431 return parseRegOrImmWithFPInputMods(Operands, false);
3432 }
3433
parseRegWithIntInputMods(OperandVector & Operands)3434 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3435 return parseRegOrImmWithIntInputMods(Operands, false);
3436 }
3437
parseVReg32OrOff(OperandVector & Operands)3438 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3439 auto Loc = getLoc();
3440 if (trySkipId("off")) {
3441 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3442 AMDGPUOperand::ImmTyOff, false));
3443 return ParseStatus::Success;
3444 }
3445
3446 if (!isRegister())
3447 return ParseStatus::NoMatch;
3448
3449 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3450 if (Reg) {
3451 Operands.push_back(std::move(Reg));
3452 return ParseStatus::Success;
3453 }
3454
3455 return ParseStatus::Failure;
3456 }
3457
checkTargetMatchPredicate(MCInst & Inst)3458 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3459 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3460
3461 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3462 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3463 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3464 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3465 return Match_InvalidOperand;
3466
3467 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3468 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3469 // v_mac_f32/16 allow only dst_sel == DWORD;
3470 auto OpNum =
3471 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3472 const auto &Op = Inst.getOperand(OpNum);
3473 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3474 return Match_InvalidOperand;
3475 }
3476 }
3477
3478 return Match_Success;
3479 }
3480
getAllVariants()3481 static ArrayRef<unsigned> getAllVariants() {
3482 static const unsigned Variants[] = {
3483 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3484 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3485 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3486 };
3487
3488 return ArrayRef(Variants);
3489 }
3490
3491 // What asm variants we should check
getMatchedVariants() const3492 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3493 if (isForcedDPP() && isForcedVOP3()) {
3494 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3495 return ArrayRef(Variants);
3496 }
3497 if (getForcedEncodingSize() == 32) {
3498 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3499 return ArrayRef(Variants);
3500 }
3501
3502 if (isForcedVOP3()) {
3503 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3504 return ArrayRef(Variants);
3505 }
3506
3507 if (isForcedSDWA()) {
3508 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3509 AMDGPUAsmVariants::SDWA9};
3510 return ArrayRef(Variants);
3511 }
3512
3513 if (isForcedDPP()) {
3514 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3515 return ArrayRef(Variants);
3516 }
3517
3518 return getAllVariants();
3519 }
3520
getMatchedVariantName() const3521 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3522 if (isForcedDPP() && isForcedVOP3())
3523 return "e64_dpp";
3524
3525 if (getForcedEncodingSize() == 32)
3526 return "e32";
3527
3528 if (isForcedVOP3())
3529 return "e64";
3530
3531 if (isForcedSDWA())
3532 return "sdwa";
3533
3534 if (isForcedDPP())
3535 return "dpp";
3536
3537 return "";
3538 }
3539
findImplicitSGPRReadInVOP(const MCInst & Inst) const3540 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3541 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3542 for (MCPhysReg Reg : Desc.implicit_uses()) {
3543 switch (Reg) {
3544 case AMDGPU::FLAT_SCR:
3545 case AMDGPU::VCC:
3546 case AMDGPU::VCC_LO:
3547 case AMDGPU::VCC_HI:
3548 case AMDGPU::M0:
3549 return Reg;
3550 default:
3551 break;
3552 }
3553 }
3554 return AMDGPU::NoRegister;
3555 }
3556
3557 // NB: This code is correct only when used to check constant
3558 // bus limitations because GFX7 support no f16 inline constants.
3559 // Note that there are no cases when a GFX7 opcode violates
3560 // constant bus limitations due to the use of an f16 constant.
isInlineConstant(const MCInst & Inst,unsigned OpIdx) const3561 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3562 unsigned OpIdx) const {
3563 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3564
3565 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3566 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3567 return false;
3568 }
3569
3570 const MCOperand &MO = Inst.getOperand(OpIdx);
3571
3572 int64_t Val = MO.getImm();
3573 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3574
3575 switch (OpSize) { // expected operand size
3576 case 8:
3577 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3578 case 4:
3579 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3580 case 2: {
3581 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3582 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3583 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3584 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3585 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3586
3587 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3588 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3589 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3590 return AMDGPU::isInlinableLiteralV2I16(Val);
3591
3592 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3593 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3594 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3595 return AMDGPU::isInlinableLiteralV2F16(Val);
3596
3597 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3598 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2BF16 ||
3599 OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3600 return AMDGPU::isInlinableLiteralV2BF16(Val);
3601
3602 if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3603 OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3604 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3605 OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3606 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3607
3608 if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3609 OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
3610 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
3611 OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
3612 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3613
3614 llvm_unreachable("invalid operand type");
3615 }
3616 default:
3617 llvm_unreachable("invalid operand size");
3618 }
3619 }
3620
getConstantBusLimit(unsigned Opcode) const3621 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3622 if (!isGFX10Plus())
3623 return 1;
3624
3625 switch (Opcode) {
3626 // 64-bit shift instructions can use only one scalar value input
3627 case AMDGPU::V_LSHLREV_B64_e64:
3628 case AMDGPU::V_LSHLREV_B64_gfx10:
3629 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3630 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3631 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3632 case AMDGPU::V_LSHRREV_B64_e64:
3633 case AMDGPU::V_LSHRREV_B64_gfx10:
3634 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3635 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3636 case AMDGPU::V_ASHRREV_I64_e64:
3637 case AMDGPU::V_ASHRREV_I64_gfx10:
3638 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3639 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3640 case AMDGPU::V_LSHL_B64_e64:
3641 case AMDGPU::V_LSHR_B64_e64:
3642 case AMDGPU::V_ASHR_I64_e64:
3643 return 1;
3644 default:
3645 return 2;
3646 }
3647 }
3648
3649 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3650 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3651
3652 // Get regular operand indices in the same order as specified
3653 // in the instruction (but append mandatory literals to the end).
getSrcOperandIndices(unsigned Opcode,bool AddMandatoryLiterals=false)3654 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3655 bool AddMandatoryLiterals = false) {
3656
3657 int16_t ImmIdx =
3658 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3659
3660 if (isVOPD(Opcode)) {
3661 int16_t ImmDeferredIdx =
3662 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3663 : -1;
3664
3665 return {getNamedOperandIdx(Opcode, OpName::src0X),
3666 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3667 getNamedOperandIdx(Opcode, OpName::src0Y),
3668 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3669 ImmDeferredIdx,
3670 ImmIdx};
3671 }
3672
3673 return {getNamedOperandIdx(Opcode, OpName::src0),
3674 getNamedOperandIdx(Opcode, OpName::src1),
3675 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3676 }
3677
usesConstantBus(const MCInst & Inst,unsigned OpIdx)3678 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3679 const MCOperand &MO = Inst.getOperand(OpIdx);
3680 if (MO.isImm())
3681 return !isInlineConstant(Inst, OpIdx);
3682 if (MO.isReg()) {
3683 auto Reg = MO.getReg();
3684 if (!Reg)
3685 return false;
3686 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3687 auto PReg = mc2PseudoReg(Reg);
3688 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3689 }
3690 return true;
3691 }
3692
3693 // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3694 // Writelane is special in that it can use SGPR and M0 (which would normally
3695 // count as using the constant bus twice - but in this case it is allowed since
3696 // the lane selector doesn't count as a use of the constant bus). However, it is
3697 // still required to abide by the 1 SGPR rule.
checkWriteLane(const MCInst & Inst)3698 static bool checkWriteLane(const MCInst &Inst) {
3699 const unsigned Opcode = Inst.getOpcode();
3700 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3701 return false;
3702 const MCOperand &LaneSelOp = Inst.getOperand(2);
3703 if (!LaneSelOp.isReg())
3704 return false;
3705 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3706 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3707 }
3708
validateConstantBusLimitations(const MCInst & Inst,const OperandVector & Operands)3709 bool AMDGPUAsmParser::validateConstantBusLimitations(
3710 const MCInst &Inst, const OperandVector &Operands) {
3711 const unsigned Opcode = Inst.getOpcode();
3712 const MCInstrDesc &Desc = MII.get(Opcode);
3713 unsigned LastSGPR = AMDGPU::NoRegister;
3714 unsigned ConstantBusUseCount = 0;
3715 unsigned NumLiterals = 0;
3716 unsigned LiteralSize;
3717
3718 if (!(Desc.TSFlags &
3719 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3720 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3721 !isVOPD(Opcode))
3722 return true;
3723
3724 if (checkWriteLane(Inst))
3725 return true;
3726
3727 // Check special imm operands (used by madmk, etc)
3728 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3729 ++NumLiterals;
3730 LiteralSize = 4;
3731 }
3732
3733 SmallDenseSet<unsigned> SGPRsUsed;
3734 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3735 if (SGPRUsed != AMDGPU::NoRegister) {
3736 SGPRsUsed.insert(SGPRUsed);
3737 ++ConstantBusUseCount;
3738 }
3739
3740 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3741
3742 for (int OpIdx : OpIndices) {
3743 if (OpIdx == -1)
3744 continue;
3745
3746 const MCOperand &MO = Inst.getOperand(OpIdx);
3747 if (usesConstantBus(Inst, OpIdx)) {
3748 if (MO.isReg()) {
3749 LastSGPR = mc2PseudoReg(MO.getReg());
3750 // Pairs of registers with a partial intersections like these
3751 // s0, s[0:1]
3752 // flat_scratch_lo, flat_scratch
3753 // flat_scratch_lo, flat_scratch_hi
3754 // are theoretically valid but they are disabled anyway.
3755 // Note that this code mimics SIInstrInfo::verifyInstruction
3756 if (SGPRsUsed.insert(LastSGPR).second) {
3757 ++ConstantBusUseCount;
3758 }
3759 } else { // Expression or a literal
3760
3761 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3762 continue; // special operand like VINTERP attr_chan
3763
3764 // An instruction may use only one literal.
3765 // This has been validated on the previous step.
3766 // See validateVOPLiteral.
3767 // This literal may be used as more than one operand.
3768 // If all these operands are of the same size,
3769 // this literal counts as one scalar value.
3770 // Otherwise it counts as 2 scalar values.
3771 // See "GFX10 Shader Programming", section 3.6.2.3.
3772
3773 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3774 if (Size < 4)
3775 Size = 4;
3776
3777 if (NumLiterals == 0) {
3778 NumLiterals = 1;
3779 LiteralSize = Size;
3780 } else if (LiteralSize != Size) {
3781 NumLiterals = 2;
3782 }
3783 }
3784 }
3785 }
3786 ConstantBusUseCount += NumLiterals;
3787
3788 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3789 return true;
3790
3791 SMLoc LitLoc = getLitLoc(Operands);
3792 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3793 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3794 Error(Loc, "invalid operand (violates constant bus restrictions)");
3795 return false;
3796 }
3797
validateVOPDRegBankConstraints(const MCInst & Inst,const OperandVector & Operands)3798 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3799 const MCInst &Inst, const OperandVector &Operands) {
3800
3801 const unsigned Opcode = Inst.getOpcode();
3802 if (!isVOPD(Opcode))
3803 return true;
3804
3805 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3806
3807 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3808 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3809 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3810 ? Opr.getReg()
3811 : MCRegister::NoRegister;
3812 };
3813
3814 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3815 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3816
3817 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3818 auto InvalidCompOprIdx =
3819 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3820 if (!InvalidCompOprIdx)
3821 return true;
3822
3823 auto CompOprIdx = *InvalidCompOprIdx;
3824 auto ParsedIdx =
3825 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3826 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3827 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3828
3829 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3830 if (CompOprIdx == VOPD::Component::DST) {
3831 Error(Loc, "one dst register must be even and the other odd");
3832 } else {
3833 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3834 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3835 " operands must use different VGPR banks");
3836 }
3837
3838 return false;
3839 }
3840
validateIntClampSupported(const MCInst & Inst)3841 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3842
3843 const unsigned Opc = Inst.getOpcode();
3844 const MCInstrDesc &Desc = MII.get(Opc);
3845
3846 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3847 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3848 assert(ClampIdx != -1);
3849 return Inst.getOperand(ClampIdx).getImm() == 0;
3850 }
3851
3852 return true;
3853 }
3854
3855 constexpr uint64_t MIMGFlags =
3856 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3857
validateMIMGDataSize(const MCInst & Inst,const SMLoc & IDLoc)3858 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3859 const SMLoc &IDLoc) {
3860
3861 const unsigned Opc = Inst.getOpcode();
3862 const MCInstrDesc &Desc = MII.get(Opc);
3863
3864 if ((Desc.TSFlags & MIMGFlags) == 0)
3865 return true;
3866
3867 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3868 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3869 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3870
3871 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3872 return true;
3873
3874 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3875 return true;
3876
3877 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3878 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3879 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3880 if (DMask == 0)
3881 DMask = 1;
3882
3883 bool IsPackedD16 = false;
3884 unsigned DataSize =
3885 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3886 if (hasPackedD16()) {
3887 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3888 IsPackedD16 = D16Idx >= 0;
3889 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3890 DataSize = (DataSize + 1) / 2;
3891 }
3892
3893 if ((VDataSize / 4) == DataSize + TFESize)
3894 return true;
3895
3896 StringRef Modifiers;
3897 if (isGFX90A())
3898 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3899 else
3900 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3901
3902 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3903 return false;
3904 }
3905
validateMIMGAddrSize(const MCInst & Inst,const SMLoc & IDLoc)3906 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3907 const SMLoc &IDLoc) {
3908 const unsigned Opc = Inst.getOpcode();
3909 const MCInstrDesc &Desc = MII.get(Opc);
3910
3911 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3912 return true;
3913
3914 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3915
3916 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3917 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3918 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3919 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3920 : AMDGPU::OpName::rsrc;
3921 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3922 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3923 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3924
3925 assert(VAddr0Idx != -1);
3926 assert(SrsrcIdx != -1);
3927 assert(SrsrcIdx > VAddr0Idx);
3928
3929 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3930 if (BaseOpcode->BVH) {
3931 if (IsA16 == BaseOpcode->A16)
3932 return true;
3933 Error(IDLoc, "image address size does not match a16");
3934 return false;
3935 }
3936
3937 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3938 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3939 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3940 unsigned ActualAddrSize =
3941 IsNSA ? SrsrcIdx - VAddr0Idx
3942 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3943
3944 unsigned ExpectedAddrSize =
3945 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3946
3947 if (IsNSA) {
3948 if (hasPartialNSAEncoding() &&
3949 ExpectedAddrSize >
3950 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3951 int VAddrLastIdx = SrsrcIdx - 1;
3952 unsigned VAddrLastSize =
3953 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3954
3955 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3956 }
3957 } else {
3958 if (ExpectedAddrSize > 12)
3959 ExpectedAddrSize = 16;
3960
3961 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3962 // This provides backward compatibility for assembly created
3963 // before 160b/192b/224b types were directly supported.
3964 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3965 return true;
3966 }
3967
3968 if (ActualAddrSize == ExpectedAddrSize)
3969 return true;
3970
3971 Error(IDLoc, "image address size does not match dim and a16");
3972 return false;
3973 }
3974
validateMIMGAtomicDMask(const MCInst & Inst)3975 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3976
3977 const unsigned Opc = Inst.getOpcode();
3978 const MCInstrDesc &Desc = MII.get(Opc);
3979
3980 if ((Desc.TSFlags & MIMGFlags) == 0)
3981 return true;
3982 if (!Desc.mayLoad() || !Desc.mayStore())
3983 return true; // Not atomic
3984
3985 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3986 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3987
3988 // This is an incomplete check because image_atomic_cmpswap
3989 // may only use 0x3 and 0xf while other atomic operations
3990 // may use 0x1 and 0x3. However these limitations are
3991 // verified when we check that dmask matches dst size.
3992 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3993 }
3994
validateMIMGGatherDMask(const MCInst & Inst)3995 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3996
3997 const unsigned Opc = Inst.getOpcode();
3998 const MCInstrDesc &Desc = MII.get(Opc);
3999
4000 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4001 return true;
4002
4003 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4004 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4005
4006 // GATHER4 instructions use dmask in a different fashion compared to
4007 // other MIMG instructions. The only useful DMASK values are
4008 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4009 // (red,red,red,red) etc.) The ISA document doesn't mention
4010 // this.
4011 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4012 }
4013
validateMIMGMSAA(const MCInst & Inst)4014 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4015 const unsigned Opc = Inst.getOpcode();
4016 const MCInstrDesc &Desc = MII.get(Opc);
4017
4018 if ((Desc.TSFlags & MIMGFlags) == 0)
4019 return true;
4020
4021 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4022 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4023 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
4024
4025 if (!BaseOpcode->MSAA)
4026 return true;
4027
4028 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4029 assert(DimIdx != -1);
4030
4031 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4032 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4033
4034 return DimInfo->MSAA;
4035 }
4036
IsMovrelsSDWAOpcode(const unsigned Opcode)4037 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4038 {
4039 switch (Opcode) {
4040 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4041 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4042 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4043 return true;
4044 default:
4045 return false;
4046 }
4047 }
4048
4049 // movrels* opcodes should only allow VGPRS as src0.
4050 // This is specified in .td description for vop1/vop3,
4051 // but sdwa is handled differently. See isSDWAOperand.
validateMovrels(const MCInst & Inst,const OperandVector & Operands)4052 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4053 const OperandVector &Operands) {
4054
4055 const unsigned Opc = Inst.getOpcode();
4056 const MCInstrDesc &Desc = MII.get(Opc);
4057
4058 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4059 return true;
4060
4061 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4062 assert(Src0Idx != -1);
4063
4064 SMLoc ErrLoc;
4065 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4066 if (Src0.isReg()) {
4067 auto Reg = mc2PseudoReg(Src0.getReg());
4068 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4069 if (!isSGPR(Reg, TRI))
4070 return true;
4071 ErrLoc = getRegLoc(Reg, Operands);
4072 } else {
4073 ErrLoc = getConstLoc(Operands);
4074 }
4075
4076 Error(ErrLoc, "source operand must be a VGPR");
4077 return false;
4078 }
4079
validateMAIAccWrite(const MCInst & Inst,const OperandVector & Operands)4080 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4081 const OperandVector &Operands) {
4082
4083 const unsigned Opc = Inst.getOpcode();
4084
4085 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4086 return true;
4087
4088 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4089 assert(Src0Idx != -1);
4090
4091 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4092 if (!Src0.isReg())
4093 return true;
4094
4095 auto Reg = mc2PseudoReg(Src0.getReg());
4096 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4097 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4098 Error(getRegLoc(Reg, Operands),
4099 "source operand must be either a VGPR or an inline constant");
4100 return false;
4101 }
4102
4103 return true;
4104 }
4105
validateMAISrc2(const MCInst & Inst,const OperandVector & Operands)4106 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4107 const OperandVector &Operands) {
4108 unsigned Opcode = Inst.getOpcode();
4109 const MCInstrDesc &Desc = MII.get(Opcode);
4110
4111 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4112 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4113 return true;
4114
4115 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4116 if (Src2Idx == -1)
4117 return true;
4118
4119 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4120 Error(getConstLoc(Operands),
4121 "inline constants are not allowed for this operand");
4122 return false;
4123 }
4124
4125 return true;
4126 }
4127
validateMFMA(const MCInst & Inst,const OperandVector & Operands)4128 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4129 const OperandVector &Operands) {
4130 const unsigned Opc = Inst.getOpcode();
4131 const MCInstrDesc &Desc = MII.get(Opc);
4132
4133 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4134 return true;
4135
4136 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4137 if (Src2Idx == -1)
4138 return true;
4139
4140 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4141 if (!Src2.isReg())
4142 return true;
4143
4144 MCRegister Src2Reg = Src2.getReg();
4145 MCRegister DstReg = Inst.getOperand(0).getReg();
4146 if (Src2Reg == DstReg)
4147 return true;
4148
4149 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4150 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4151 return true;
4152
4153 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4154 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4155 "source 2 operand must not partially overlap with dst");
4156 return false;
4157 }
4158
4159 return true;
4160 }
4161
validateDivScale(const MCInst & Inst)4162 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4163 switch (Inst.getOpcode()) {
4164 default:
4165 return true;
4166 case V_DIV_SCALE_F32_gfx6_gfx7:
4167 case V_DIV_SCALE_F32_vi:
4168 case V_DIV_SCALE_F32_gfx10:
4169 case V_DIV_SCALE_F64_gfx6_gfx7:
4170 case V_DIV_SCALE_F64_vi:
4171 case V_DIV_SCALE_F64_gfx10:
4172 break;
4173 }
4174
4175 // TODO: Check that src0 = src1 or src2.
4176
4177 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4178 AMDGPU::OpName::src2_modifiers,
4179 AMDGPU::OpName::src2_modifiers}) {
4180 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4181 .getImm() &
4182 SISrcMods::ABS) {
4183 return false;
4184 }
4185 }
4186
4187 return true;
4188 }
4189
validateMIMGD16(const MCInst & Inst)4190 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4191
4192 const unsigned Opc = Inst.getOpcode();
4193 const MCInstrDesc &Desc = MII.get(Opc);
4194
4195 if ((Desc.TSFlags & MIMGFlags) == 0)
4196 return true;
4197
4198 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4199 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4200 if (isCI() || isSI())
4201 return false;
4202 }
4203
4204 return true;
4205 }
4206
IsRevOpcode(const unsigned Opcode)4207 static bool IsRevOpcode(const unsigned Opcode)
4208 {
4209 switch (Opcode) {
4210 case AMDGPU::V_SUBREV_F32_e32:
4211 case AMDGPU::V_SUBREV_F32_e64:
4212 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4213 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4214 case AMDGPU::V_SUBREV_F32_e32_vi:
4215 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4216 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4217 case AMDGPU::V_SUBREV_F32_e64_vi:
4218
4219 case AMDGPU::V_SUBREV_CO_U32_e32:
4220 case AMDGPU::V_SUBREV_CO_U32_e64:
4221 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4222 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4223
4224 case AMDGPU::V_SUBBREV_U32_e32:
4225 case AMDGPU::V_SUBBREV_U32_e64:
4226 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4227 case AMDGPU::V_SUBBREV_U32_e32_vi:
4228 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4229 case AMDGPU::V_SUBBREV_U32_e64_vi:
4230
4231 case AMDGPU::V_SUBREV_U32_e32:
4232 case AMDGPU::V_SUBREV_U32_e64:
4233 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4234 case AMDGPU::V_SUBREV_U32_e32_vi:
4235 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4236 case AMDGPU::V_SUBREV_U32_e64_vi:
4237
4238 case AMDGPU::V_SUBREV_F16_e32:
4239 case AMDGPU::V_SUBREV_F16_e64:
4240 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4241 case AMDGPU::V_SUBREV_F16_e32_vi:
4242 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4243 case AMDGPU::V_SUBREV_F16_e64_vi:
4244
4245 case AMDGPU::V_SUBREV_U16_e32:
4246 case AMDGPU::V_SUBREV_U16_e64:
4247 case AMDGPU::V_SUBREV_U16_e32_vi:
4248 case AMDGPU::V_SUBREV_U16_e64_vi:
4249
4250 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4251 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4252 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4253
4254 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4255 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4256
4257 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4258 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4259
4260 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4261 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4262
4263 case AMDGPU::V_LSHRREV_B32_e32:
4264 case AMDGPU::V_LSHRREV_B32_e64:
4265 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4266 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4267 case AMDGPU::V_LSHRREV_B32_e32_vi:
4268 case AMDGPU::V_LSHRREV_B32_e64_vi:
4269 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4270 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4271
4272 case AMDGPU::V_ASHRREV_I32_e32:
4273 case AMDGPU::V_ASHRREV_I32_e64:
4274 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4275 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4276 case AMDGPU::V_ASHRREV_I32_e32_vi:
4277 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4278 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4279 case AMDGPU::V_ASHRREV_I32_e64_vi:
4280
4281 case AMDGPU::V_LSHLREV_B32_e32:
4282 case AMDGPU::V_LSHLREV_B32_e64:
4283 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4284 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4285 case AMDGPU::V_LSHLREV_B32_e32_vi:
4286 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4287 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4288 case AMDGPU::V_LSHLREV_B32_e64_vi:
4289
4290 case AMDGPU::V_LSHLREV_B16_e32:
4291 case AMDGPU::V_LSHLREV_B16_e64:
4292 case AMDGPU::V_LSHLREV_B16_e32_vi:
4293 case AMDGPU::V_LSHLREV_B16_e64_vi:
4294 case AMDGPU::V_LSHLREV_B16_gfx10:
4295
4296 case AMDGPU::V_LSHRREV_B16_e32:
4297 case AMDGPU::V_LSHRREV_B16_e64:
4298 case AMDGPU::V_LSHRREV_B16_e32_vi:
4299 case AMDGPU::V_LSHRREV_B16_e64_vi:
4300 case AMDGPU::V_LSHRREV_B16_gfx10:
4301
4302 case AMDGPU::V_ASHRREV_I16_e32:
4303 case AMDGPU::V_ASHRREV_I16_e64:
4304 case AMDGPU::V_ASHRREV_I16_e32_vi:
4305 case AMDGPU::V_ASHRREV_I16_e64_vi:
4306 case AMDGPU::V_ASHRREV_I16_gfx10:
4307
4308 case AMDGPU::V_LSHLREV_B64_e64:
4309 case AMDGPU::V_LSHLREV_B64_gfx10:
4310 case AMDGPU::V_LSHLREV_B64_vi:
4311
4312 case AMDGPU::V_LSHRREV_B64_e64:
4313 case AMDGPU::V_LSHRREV_B64_gfx10:
4314 case AMDGPU::V_LSHRREV_B64_vi:
4315
4316 case AMDGPU::V_ASHRREV_I64_e64:
4317 case AMDGPU::V_ASHRREV_I64_gfx10:
4318 case AMDGPU::V_ASHRREV_I64_vi:
4319
4320 case AMDGPU::V_PK_LSHLREV_B16:
4321 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4322 case AMDGPU::V_PK_LSHLREV_B16_vi:
4323
4324 case AMDGPU::V_PK_LSHRREV_B16:
4325 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4326 case AMDGPU::V_PK_LSHRREV_B16_vi:
4327 case AMDGPU::V_PK_ASHRREV_I16:
4328 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4329 case AMDGPU::V_PK_ASHRREV_I16_vi:
4330 return true;
4331 default:
4332 return false;
4333 }
4334 }
4335
4336 std::optional<StringRef>
validateLdsDirect(const MCInst & Inst)4337 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4338
4339 using namespace SIInstrFlags;
4340 const unsigned Opcode = Inst.getOpcode();
4341 const MCInstrDesc &Desc = MII.get(Opcode);
4342
4343 // lds_direct register is defined so that it can be used
4344 // with 9-bit operands only. Ignore encodings which do not accept these.
4345 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4346 if ((Desc.TSFlags & Enc) == 0)
4347 return std::nullopt;
4348
4349 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4350 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4351 if (SrcIdx == -1)
4352 break;
4353 const auto &Src = Inst.getOperand(SrcIdx);
4354 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4355
4356 if (isGFX90A() || isGFX11Plus())
4357 return StringRef("lds_direct is not supported on this GPU");
4358
4359 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4360 return StringRef("lds_direct cannot be used with this instruction");
4361
4362 if (SrcName != OpName::src0)
4363 return StringRef("lds_direct may be used as src0 only");
4364 }
4365 }
4366
4367 return std::nullopt;
4368 }
4369
getFlatOffsetLoc(const OperandVector & Operands) const4370 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4371 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4372 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4373 if (Op.isFlatOffset())
4374 return Op.getStartLoc();
4375 }
4376 return getLoc();
4377 }
4378
validateOffset(const MCInst & Inst,const OperandVector & Operands)4379 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4380 const OperandVector &Operands) {
4381 auto Opcode = Inst.getOpcode();
4382 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4383 if (OpNum == -1)
4384 return true;
4385
4386 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4387 if ((TSFlags & SIInstrFlags::FLAT))
4388 return validateFlatOffset(Inst, Operands);
4389
4390 if ((TSFlags & SIInstrFlags::SMRD))
4391 return validateSMEMOffset(Inst, Operands);
4392
4393 const auto &Op = Inst.getOperand(OpNum);
4394 if (isGFX12Plus() &&
4395 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4396 const unsigned OffsetSize = 24;
4397 if (!isIntN(OffsetSize, Op.getImm())) {
4398 Error(getFlatOffsetLoc(Operands),
4399 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4400 return false;
4401 }
4402 } else {
4403 const unsigned OffsetSize = 16;
4404 if (!isUIntN(OffsetSize, Op.getImm())) {
4405 Error(getFlatOffsetLoc(Operands),
4406 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4407 return false;
4408 }
4409 }
4410 return true;
4411 }
4412
validateFlatOffset(const MCInst & Inst,const OperandVector & Operands)4413 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4414 const OperandVector &Operands) {
4415 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4416 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4417 return true;
4418
4419 auto Opcode = Inst.getOpcode();
4420 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4421 assert(OpNum != -1);
4422
4423 const auto &Op = Inst.getOperand(OpNum);
4424 if (!hasFlatOffsets() && Op.getImm() != 0) {
4425 Error(getFlatOffsetLoc(Operands),
4426 "flat offset modifier is not supported on this GPU");
4427 return false;
4428 }
4429
4430 // For pre-GFX12 FLAT instructions the offset must be positive;
4431 // MSB is ignored and forced to zero.
4432 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4433 bool AllowNegative =
4434 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4435 isGFX12Plus();
4436 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4437 Error(getFlatOffsetLoc(Operands),
4438 Twine("expected a ") +
4439 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4440 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4441 return false;
4442 }
4443
4444 return true;
4445 }
4446
getSMEMOffsetLoc(const OperandVector & Operands) const4447 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4448 // Start with second operand because SMEM Offset cannot be dst or src0.
4449 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4450 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4451 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4452 return Op.getStartLoc();
4453 }
4454 return getLoc();
4455 }
4456
validateSMEMOffset(const MCInst & Inst,const OperandVector & Operands)4457 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4458 const OperandVector &Operands) {
4459 if (isCI() || isSI())
4460 return true;
4461
4462 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4463 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4464 return true;
4465
4466 auto Opcode = Inst.getOpcode();
4467 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4468 if (OpNum == -1)
4469 return true;
4470
4471 const auto &Op = Inst.getOperand(OpNum);
4472 if (!Op.isImm())
4473 return true;
4474
4475 uint64_t Offset = Op.getImm();
4476 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4477 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4478 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4479 return true;
4480
4481 Error(getSMEMOffsetLoc(Operands),
4482 isGFX12Plus() ? "expected a 24-bit signed offset"
4483 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4484 : "expected a 21-bit signed offset");
4485
4486 return false;
4487 }
4488
validateSOPLiteral(const MCInst & Inst) const4489 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4490 unsigned Opcode = Inst.getOpcode();
4491 const MCInstrDesc &Desc = MII.get(Opcode);
4492 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4493 return true;
4494
4495 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4496 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4497
4498 const int OpIndices[] = { Src0Idx, Src1Idx };
4499
4500 unsigned NumExprs = 0;
4501 unsigned NumLiterals = 0;
4502 uint32_t LiteralValue;
4503
4504 for (int OpIdx : OpIndices) {
4505 if (OpIdx == -1) break;
4506
4507 const MCOperand &MO = Inst.getOperand(OpIdx);
4508 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4509 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4510 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4511 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4512 if (NumLiterals == 0 || LiteralValue != Value) {
4513 LiteralValue = Value;
4514 ++NumLiterals;
4515 }
4516 } else if (MO.isExpr()) {
4517 ++NumExprs;
4518 }
4519 }
4520 }
4521
4522 return NumLiterals + NumExprs <= 1;
4523 }
4524
validateOpSel(const MCInst & Inst)4525 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4526 const unsigned Opc = Inst.getOpcode();
4527 if (isPermlane16(Opc)) {
4528 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4529 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4530
4531 if (OpSel & ~3)
4532 return false;
4533 }
4534
4535 uint64_t TSFlags = MII.get(Opc).TSFlags;
4536
4537 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4538 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4539 if (OpSelIdx != -1) {
4540 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4541 return false;
4542 }
4543 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4544 if (OpSelHiIdx != -1) {
4545 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4546 return false;
4547 }
4548 }
4549
4550 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4551 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4552 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4553 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4554 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4555 if (OpSel & 3)
4556 return false;
4557 }
4558
4559 return true;
4560 }
4561
validateNeg(const MCInst & Inst,int OpName)4562 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4563 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4564
4565 const unsigned Opc = Inst.getOpcode();
4566 uint64_t TSFlags = MII.get(Opc).TSFlags;
4567
4568 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4569 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4570 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4571 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4572 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4573 !(TSFlags & SIInstrFlags::IsSWMMAC))
4574 return true;
4575
4576 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4577 if (NegIdx == -1)
4578 return true;
4579
4580 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4581
4582 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4583 // on some src operands but not allowed on other.
4584 // It is convenient that such instructions don't have src_modifiers operand
4585 // for src operands that don't allow neg because they also don't allow opsel.
4586
4587 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4588 AMDGPU::OpName::src1_modifiers,
4589 AMDGPU::OpName::src2_modifiers};
4590
4591 for (unsigned i = 0; i < 3; ++i) {
4592 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4593 if (Neg & (1 << i))
4594 return false;
4595 }
4596 }
4597
4598 return true;
4599 }
4600
validateDPP(const MCInst & Inst,const OperandVector & Operands)4601 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4602 const OperandVector &Operands) {
4603 const unsigned Opc = Inst.getOpcode();
4604 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4605 if (DppCtrlIdx >= 0) {
4606 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4607
4608 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4609 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4610 // DP ALU DPP is supported for row_newbcast only on GFX9*
4611 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4612 Error(S, "DP ALU dpp only supports row_newbcast");
4613 return false;
4614 }
4615 }
4616
4617 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4618 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4619
4620 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4621 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4622 if (Src1Idx >= 0) {
4623 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4624 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4625 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4626 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4627 SMLoc S = getRegLoc(Reg, Operands);
4628 Error(S, "invalid operand for instruction");
4629 return false;
4630 }
4631 if (Src1.isImm()) {
4632 Error(getInstLoc(Operands),
4633 "src1 immediate operand invalid for instruction");
4634 return false;
4635 }
4636 }
4637 }
4638
4639 return true;
4640 }
4641
4642 // Check if VCC register matches wavefront size
validateVccOperand(unsigned Reg) const4643 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4644 auto FB = getFeatureBits();
4645 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4646 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4647 }
4648
4649 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
validateVOPLiteral(const MCInst & Inst,const OperandVector & Operands)4650 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4651 const OperandVector &Operands) {
4652 unsigned Opcode = Inst.getOpcode();
4653 const MCInstrDesc &Desc = MII.get(Opcode);
4654 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4655 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4656 !HasMandatoryLiteral && !isVOPD(Opcode))
4657 return true;
4658
4659 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4660
4661 unsigned NumExprs = 0;
4662 unsigned NumLiterals = 0;
4663 uint32_t LiteralValue;
4664
4665 for (int OpIdx : OpIndices) {
4666 if (OpIdx == -1)
4667 continue;
4668
4669 const MCOperand &MO = Inst.getOperand(OpIdx);
4670 if (!MO.isImm() && !MO.isExpr())
4671 continue;
4672 if (!isSISrcOperand(Desc, OpIdx))
4673 continue;
4674
4675 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4676 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4677 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4678 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4679 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4680
4681 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4682 Error(getLitLoc(Operands), "invalid operand for instruction");
4683 return false;
4684 }
4685
4686 if (IsFP64 && IsValid32Op)
4687 Value = Hi_32(Value);
4688
4689 if (NumLiterals == 0 || LiteralValue != Value) {
4690 LiteralValue = Value;
4691 ++NumLiterals;
4692 }
4693 } else if (MO.isExpr()) {
4694 ++NumExprs;
4695 }
4696 }
4697 NumLiterals += NumExprs;
4698
4699 if (!NumLiterals)
4700 return true;
4701
4702 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4703 Error(getLitLoc(Operands), "literal operands are not supported");
4704 return false;
4705 }
4706
4707 if (NumLiterals > 1) {
4708 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4709 return false;
4710 }
4711
4712 return true;
4713 }
4714
4715 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
IsAGPROperand(const MCInst & Inst,uint16_t NameIdx,const MCRegisterInfo * MRI)4716 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4717 const MCRegisterInfo *MRI) {
4718 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4719 if (OpIdx < 0)
4720 return -1;
4721
4722 const MCOperand &Op = Inst.getOperand(OpIdx);
4723 if (!Op.isReg())
4724 return -1;
4725
4726 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4727 auto Reg = Sub ? Sub : Op.getReg();
4728 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4729 return AGPR32.contains(Reg) ? 1 : 0;
4730 }
4731
validateAGPRLdSt(const MCInst & Inst) const4732 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4733 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4734 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4735 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4736 SIInstrFlags::DS)) == 0)
4737 return true;
4738
4739 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4740 : AMDGPU::OpName::vdata;
4741
4742 const MCRegisterInfo *MRI = getMRI();
4743 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4744 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4745
4746 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4747 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4748 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4749 return false;
4750 }
4751
4752 auto FB = getFeatureBits();
4753 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4754 if (DataAreg < 0 || DstAreg < 0)
4755 return true;
4756 return DstAreg == DataAreg;
4757 }
4758
4759 return DstAreg < 1 && DataAreg < 1;
4760 }
4761
validateVGPRAlign(const MCInst & Inst) const4762 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4763 auto FB = getFeatureBits();
4764 if (!FB[AMDGPU::FeatureGFX90AInsts])
4765 return true;
4766
4767 const MCRegisterInfo *MRI = getMRI();
4768 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4769 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4770 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4771 const MCOperand &Op = Inst.getOperand(I);
4772 if (!Op.isReg())
4773 continue;
4774
4775 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4776 if (!Sub)
4777 continue;
4778
4779 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4780 return false;
4781 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4782 return false;
4783 }
4784
4785 return true;
4786 }
4787
getBLGPLoc(const OperandVector & Operands) const4788 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4789 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4790 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4791 if (Op.isBLGP())
4792 return Op.getStartLoc();
4793 }
4794 return SMLoc();
4795 }
4796
validateBLGP(const MCInst & Inst,const OperandVector & Operands)4797 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4798 const OperandVector &Operands) {
4799 unsigned Opc = Inst.getOpcode();
4800 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4801 if (BlgpIdx == -1)
4802 return true;
4803 SMLoc BLGPLoc = getBLGPLoc(Operands);
4804 if (!BLGPLoc.isValid())
4805 return true;
4806 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4807 auto FB = getFeatureBits();
4808 bool UsesNeg = false;
4809 if (FB[AMDGPU::FeatureGFX940Insts]) {
4810 switch (Opc) {
4811 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4812 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4813 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4814 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4815 UsesNeg = true;
4816 }
4817 }
4818
4819 if (IsNeg == UsesNeg)
4820 return true;
4821
4822 Error(BLGPLoc,
4823 UsesNeg ? "invalid modifier: blgp is not supported"
4824 : "invalid modifier: neg is not supported");
4825
4826 return false;
4827 }
4828
validateWaitCnt(const MCInst & Inst,const OperandVector & Operands)4829 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4830 const OperandVector &Operands) {
4831 if (!isGFX11Plus())
4832 return true;
4833
4834 unsigned Opc = Inst.getOpcode();
4835 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4836 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4837 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4838 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4839 return true;
4840
4841 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4842 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4843 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4844 if (Reg == AMDGPU::SGPR_NULL)
4845 return true;
4846
4847 SMLoc RegLoc = getRegLoc(Reg, Operands);
4848 Error(RegLoc, "src0 must be null");
4849 return false;
4850 }
4851
validateDS(const MCInst & Inst,const OperandVector & Operands)4852 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4853 const OperandVector &Operands) {
4854 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4855 if ((TSFlags & SIInstrFlags::DS) == 0)
4856 return true;
4857 if (TSFlags & SIInstrFlags::GWS)
4858 return validateGWS(Inst, Operands);
4859 // Only validate GDS for non-GWS instructions.
4860 if (hasGDS())
4861 return true;
4862 int GDSIdx =
4863 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4864 if (GDSIdx < 0)
4865 return true;
4866 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4867 if (GDS) {
4868 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4869 Error(S, "gds modifier is not supported on this GPU");
4870 return false;
4871 }
4872 return true;
4873 }
4874
4875 // gfx90a has an undocumented limitation:
4876 // DS_GWS opcodes must use even aligned registers.
validateGWS(const MCInst & Inst,const OperandVector & Operands)4877 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4878 const OperandVector &Operands) {
4879 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4880 return true;
4881
4882 int Opc = Inst.getOpcode();
4883 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4884 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4885 return true;
4886
4887 const MCRegisterInfo *MRI = getMRI();
4888 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4889 int Data0Pos =
4890 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4891 assert(Data0Pos != -1);
4892 auto Reg = Inst.getOperand(Data0Pos).getReg();
4893 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4894 if (RegIdx & 1) {
4895 SMLoc RegLoc = getRegLoc(Reg, Operands);
4896 Error(RegLoc, "vgpr must be even aligned");
4897 return false;
4898 }
4899
4900 return true;
4901 }
4902
validateCoherencyBits(const MCInst & Inst,const OperandVector & Operands,const SMLoc & IDLoc)4903 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4904 const OperandVector &Operands,
4905 const SMLoc &IDLoc) {
4906 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4907 AMDGPU::OpName::cpol);
4908 if (CPolPos == -1)
4909 return true;
4910
4911 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4912
4913 if (isGFX12Plus())
4914 return validateTHAndScopeBits(Inst, Operands, CPol);
4915
4916 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4917 if (TSFlags & SIInstrFlags::SMRD) {
4918 if (CPol && (isSI() || isCI())) {
4919 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4920 Error(S, "cache policy is not supported for SMRD instructions");
4921 return false;
4922 }
4923 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4924 Error(IDLoc, "invalid cache policy for SMEM instruction");
4925 return false;
4926 }
4927 }
4928
4929 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4930 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4931 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4932 SIInstrFlags::FLAT;
4933 if (!(TSFlags & AllowSCCModifier)) {
4934 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4935 StringRef CStr(S.getPointer());
4936 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4937 Error(S,
4938 "scc modifier is not supported for this instruction on this GPU");
4939 return false;
4940 }
4941 }
4942
4943 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4944 return true;
4945
4946 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4947 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4948 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4949 : "instruction must use glc");
4950 return false;
4951 }
4952 } else {
4953 if (CPol & CPol::GLC) {
4954 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4955 StringRef CStr(S.getPointer());
4956 S = SMLoc::getFromPointer(
4957 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4958 Error(S, isGFX940() ? "instruction must not use sc0"
4959 : "instruction must not use glc");
4960 return false;
4961 }
4962 }
4963
4964 return true;
4965 }
4966
validateTHAndScopeBits(const MCInst & Inst,const OperandVector & Operands,const unsigned CPol)4967 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4968 const OperandVector &Operands,
4969 const unsigned CPol) {
4970 const unsigned TH = CPol & AMDGPU::CPol::TH;
4971 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4972
4973 const unsigned Opcode = Inst.getOpcode();
4974 const MCInstrDesc &TID = MII.get(Opcode);
4975
4976 auto PrintError = [&](StringRef Msg) {
4977 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4978 Error(S, Msg);
4979 return false;
4980 };
4981
4982 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4983 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
4984 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
4985 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4986
4987 if (TH == 0)
4988 return true;
4989
4990 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4991 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4992 (TH == AMDGPU::CPol::TH_NT_HT)))
4993 return PrintError("invalid th value for SMEM instruction");
4994
4995 if (TH == AMDGPU::CPol::TH_BYPASS) {
4996 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4997 CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
4998 (Scope == AMDGPU::CPol::SCOPE_SYS &&
4999 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5000 return PrintError("scope and th combination is not valid");
5001 }
5002
5003 bool IsStore = TID.mayStore();
5004 bool IsAtomic =
5005 TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
5006
5007 if (IsAtomic) {
5008 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5009 return PrintError("invalid th value for atomic instructions");
5010 } else if (IsStore) {
5011 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5012 return PrintError("invalid th value for store instructions");
5013 } else {
5014 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5015 return PrintError("invalid th value for load instructions");
5016 }
5017
5018 return true;
5019 }
5020
validateExeczVcczOperands(const OperandVector & Operands)5021 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5022 if (!isGFX11Plus())
5023 return true;
5024 for (auto &Operand : Operands) {
5025 if (!Operand->isReg())
5026 continue;
5027 unsigned Reg = Operand->getReg();
5028 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5029 Error(getRegLoc(Reg, Operands),
5030 "execz and vccz are not supported on this GPU");
5031 return false;
5032 }
5033 }
5034 return true;
5035 }
5036
validateTFE(const MCInst & Inst,const OperandVector & Operands)5037 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5038 const OperandVector &Operands) {
5039 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5040 if (Desc.mayStore() &&
5041 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
5042 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5043 if (Loc != getInstLoc(Operands)) {
5044 Error(Loc, "TFE modifier has no meaning for store instructions");
5045 return false;
5046 }
5047 }
5048
5049 return true;
5050 }
5051
validateInstruction(const MCInst & Inst,const SMLoc & IDLoc,const OperandVector & Operands)5052 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5053 const SMLoc &IDLoc,
5054 const OperandVector &Operands) {
5055 if (auto ErrMsg = validateLdsDirect(Inst)) {
5056 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5057 return false;
5058 }
5059 if (!validateSOPLiteral(Inst)) {
5060 Error(getLitLoc(Operands),
5061 "only one unique literal operand is allowed");
5062 return false;
5063 }
5064 if (!validateVOPLiteral(Inst, Operands)) {
5065 return false;
5066 }
5067 if (!validateConstantBusLimitations(Inst, Operands)) {
5068 return false;
5069 }
5070 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5071 return false;
5072 }
5073 if (!validateIntClampSupported(Inst)) {
5074 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5075 "integer clamping is not supported on this GPU");
5076 return false;
5077 }
5078 if (!validateOpSel(Inst)) {
5079 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5080 "invalid op_sel operand");
5081 return false;
5082 }
5083 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5084 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5085 "invalid neg_lo operand");
5086 return false;
5087 }
5088 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5089 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5090 "invalid neg_hi operand");
5091 return false;
5092 }
5093 if (!validateDPP(Inst, Operands)) {
5094 return false;
5095 }
5096 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5097 if (!validateMIMGD16(Inst)) {
5098 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5099 "d16 modifier is not supported on this GPU");
5100 return false;
5101 }
5102 if (!validateMIMGMSAA(Inst)) {
5103 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5104 "invalid dim; must be MSAA type");
5105 return false;
5106 }
5107 if (!validateMIMGDataSize(Inst, IDLoc)) {
5108 return false;
5109 }
5110 if (!validateMIMGAddrSize(Inst, IDLoc))
5111 return false;
5112 if (!validateMIMGAtomicDMask(Inst)) {
5113 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5114 "invalid atomic image dmask");
5115 return false;
5116 }
5117 if (!validateMIMGGatherDMask(Inst)) {
5118 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5119 "invalid image_gather dmask: only one bit must be set");
5120 return false;
5121 }
5122 if (!validateMovrels(Inst, Operands)) {
5123 return false;
5124 }
5125 if (!validateOffset(Inst, Operands)) {
5126 return false;
5127 }
5128 if (!validateMAIAccWrite(Inst, Operands)) {
5129 return false;
5130 }
5131 if (!validateMAISrc2(Inst, Operands)) {
5132 return false;
5133 }
5134 if (!validateMFMA(Inst, Operands)) {
5135 return false;
5136 }
5137 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5138 return false;
5139 }
5140
5141 if (!validateAGPRLdSt(Inst)) {
5142 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5143 ? "invalid register class: data and dst should be all VGPR or AGPR"
5144 : "invalid register class: agpr loads and stores not supported on this GPU"
5145 );
5146 return false;
5147 }
5148 if (!validateVGPRAlign(Inst)) {
5149 Error(IDLoc,
5150 "invalid register class: vgpr tuples must be 64 bit aligned");
5151 return false;
5152 }
5153 if (!validateDS(Inst, Operands)) {
5154 return false;
5155 }
5156
5157 if (!validateBLGP(Inst, Operands)) {
5158 return false;
5159 }
5160
5161 if (!validateDivScale(Inst)) {
5162 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5163 return false;
5164 }
5165 if (!validateWaitCnt(Inst, Operands)) {
5166 return false;
5167 }
5168 if (!validateExeczVcczOperands(Operands)) {
5169 return false;
5170 }
5171 if (!validateTFE(Inst, Operands)) {
5172 return false;
5173 }
5174
5175 return true;
5176 }
5177
5178 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5179 const FeatureBitset &FBS,
5180 unsigned VariantID = 0);
5181
5182 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5183 const FeatureBitset &AvailableFeatures,
5184 unsigned VariantID);
5185
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS)5186 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5187 const FeatureBitset &FBS) {
5188 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5189 }
5190
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS,ArrayRef<unsigned> Variants)5191 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5192 const FeatureBitset &FBS,
5193 ArrayRef<unsigned> Variants) {
5194 for (auto Variant : Variants) {
5195 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5196 return true;
5197 }
5198
5199 return false;
5200 }
5201
checkUnsupportedInstruction(StringRef Mnemo,const SMLoc & IDLoc)5202 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5203 const SMLoc &IDLoc) {
5204 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5205
5206 // Check if requested instruction variant is supported.
5207 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5208 return false;
5209
5210 // This instruction is not supported.
5211 // Clear any other pending errors because they are no longer relevant.
5212 getParser().clearPendingErrors();
5213
5214 // Requested instruction variant is not supported.
5215 // Check if any other variants are supported.
5216 StringRef VariantName = getMatchedVariantName();
5217 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5218 return Error(IDLoc,
5219 Twine(VariantName,
5220 " variant of this instruction is not supported"));
5221 }
5222
5223 // Check if this instruction may be used with a different wavesize.
5224 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5225 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5226
5227 FeatureBitset FeaturesWS32 = getFeatureBits();
5228 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5229 .flip(AMDGPU::FeatureWavefrontSize32);
5230 FeatureBitset AvailableFeaturesWS32 =
5231 ComputeAvailableFeatures(FeaturesWS32);
5232
5233 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5234 return Error(IDLoc, "instruction requires wavesize=32");
5235 }
5236
5237 // Finally check if this instruction is supported on any other GPU.
5238 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5239 return Error(IDLoc, "instruction not supported on this GPU");
5240 }
5241
5242 // Instruction not supported on any GPU. Probably a typo.
5243 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5244 return Error(IDLoc, "invalid instruction" + Suggestion);
5245 }
5246
isInvalidVOPDY(const OperandVector & Operands,uint64_t InvalidOprIdx)5247 static bool isInvalidVOPDY(const OperandVector &Operands,
5248 uint64_t InvalidOprIdx) {
5249 assert(InvalidOprIdx < Operands.size());
5250 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5251 if (Op.isToken() && InvalidOprIdx > 1) {
5252 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5253 return PrevOp.isToken() && PrevOp.getToken() == "::";
5254 }
5255 return false;
5256 }
5257
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)5258 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5259 OperandVector &Operands,
5260 MCStreamer &Out,
5261 uint64_t &ErrorInfo,
5262 bool MatchingInlineAsm) {
5263 MCInst Inst;
5264 unsigned Result = Match_Success;
5265 for (auto Variant : getMatchedVariants()) {
5266 uint64_t EI;
5267 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5268 Variant);
5269 // We order match statuses from least to most specific. We use most specific
5270 // status as resulting
5271 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5272 if (R == Match_Success || R == Match_MissingFeature ||
5273 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5274 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5275 Result != Match_MissingFeature)) {
5276 Result = R;
5277 ErrorInfo = EI;
5278 }
5279 if (R == Match_Success)
5280 break;
5281 }
5282
5283 if (Result == Match_Success) {
5284 if (!validateInstruction(Inst, IDLoc, Operands)) {
5285 return true;
5286 }
5287 Inst.setLoc(IDLoc);
5288 Out.emitInstruction(Inst, getSTI());
5289 return false;
5290 }
5291
5292 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5293 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5294 return true;
5295 }
5296
5297 switch (Result) {
5298 default: break;
5299 case Match_MissingFeature:
5300 // It has been verified that the specified instruction
5301 // mnemonic is valid. A match was found but it requires
5302 // features which are not supported on this GPU.
5303 return Error(IDLoc, "operands are not valid for this GPU or mode");
5304
5305 case Match_InvalidOperand: {
5306 SMLoc ErrorLoc = IDLoc;
5307 if (ErrorInfo != ~0ULL) {
5308 if (ErrorInfo >= Operands.size()) {
5309 return Error(IDLoc, "too few operands for instruction");
5310 }
5311 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5312 if (ErrorLoc == SMLoc())
5313 ErrorLoc = IDLoc;
5314
5315 if (isInvalidVOPDY(Operands, ErrorInfo))
5316 return Error(ErrorLoc, "invalid VOPDY instruction");
5317 }
5318 return Error(ErrorLoc, "invalid operand for instruction");
5319 }
5320
5321 case Match_MnemonicFail:
5322 llvm_unreachable("Invalid instructions should have been handled already");
5323 }
5324 llvm_unreachable("Implement any new match types added!");
5325 }
5326
ParseAsAbsoluteExpression(uint32_t & Ret)5327 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5328 int64_t Tmp = -1;
5329 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5330 return true;
5331 }
5332 if (getParser().parseAbsoluteExpression(Tmp)) {
5333 return true;
5334 }
5335 Ret = static_cast<uint32_t>(Tmp);
5336 return false;
5337 }
5338
ParseDirectiveAMDGCNTarget()5339 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5340 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5341 return TokError("directive only supported for amdgcn architecture");
5342
5343 std::string TargetIDDirective;
5344 SMLoc TargetStart = getTok().getLoc();
5345 if (getParser().parseEscapedString(TargetIDDirective))
5346 return true;
5347
5348 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5349 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5350 return getParser().Error(TargetRange.Start,
5351 (Twine(".amdgcn_target directive's target id ") +
5352 Twine(TargetIDDirective) +
5353 Twine(" does not match the specified target id ") +
5354 Twine(getTargetStreamer().getTargetID()->toString())).str());
5355
5356 return false;
5357 }
5358
OutOfRangeError(SMRange Range)5359 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5360 return Error(Range.Start, "value out of range", Range);
5361 }
5362
calculateGPRBlocks(const FeatureBitset & Features,const MCExpr * VCCUsed,const MCExpr * FlatScrUsed,bool XNACKUsed,std::optional<bool> EnableWavefrontSize32,const MCExpr * NextFreeVGPR,SMRange VGPRRange,const MCExpr * NextFreeSGPR,SMRange SGPRRange,const MCExpr * & VGPRBlocks,const MCExpr * & SGPRBlocks)5363 bool AMDGPUAsmParser::calculateGPRBlocks(
5364 const FeatureBitset &Features, const MCExpr *VCCUsed,
5365 const MCExpr *FlatScrUsed, bool XNACKUsed,
5366 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5367 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5368 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5369 // TODO(scott.linder): These calculations are duplicated from
5370 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5371 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5372 MCContext &Ctx = getContext();
5373
5374 const MCExpr *NumSGPRs = NextFreeSGPR;
5375 int64_t EvaluatedSGPRs;
5376
5377 if (Version.Major >= 10)
5378 NumSGPRs = MCConstantExpr::create(0, Ctx);
5379 else {
5380 unsigned MaxAddressableNumSGPRs =
5381 IsaInfo::getAddressableNumSGPRs(&getSTI());
5382
5383 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5384 !Features.test(FeatureSGPRInitBug) &&
5385 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5386 return OutOfRangeError(SGPRRange);
5387
5388 const MCExpr *ExtraSGPRs =
5389 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5390 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5391
5392 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5393 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5394 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5395 return OutOfRangeError(SGPRRange);
5396
5397 if (Features.test(FeatureSGPRInitBug))
5398 NumSGPRs =
5399 MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5400 }
5401
5402 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5403 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5404 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5405 unsigned Granule) -> const MCExpr * {
5406 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5407 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5408 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5409 const MCExpr *AlignToGPR =
5410 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5411 const MCExpr *DivGPR =
5412 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5413 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5414 return SubGPR;
5415 };
5416
5417 VGPRBlocks = GetNumGPRBlocks(
5418 NextFreeVGPR,
5419 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5420 SGPRBlocks =
5421 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5422
5423 return false;
5424 }
5425
ParseDirectiveAMDHSAKernel()5426 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5427 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5428 return TokError("directive only supported for amdgcn architecture");
5429
5430 if (!isHsaAbi(getSTI()))
5431 return TokError("directive only supported for amdhsa OS");
5432
5433 StringRef KernelName;
5434 if (getParser().parseIdentifier(KernelName))
5435 return true;
5436
5437 AMDGPU::MCKernelDescriptor KD =
5438 AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5439 &getSTI(), getContext());
5440
5441 StringSet<> Seen;
5442
5443 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5444
5445 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5446 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5447
5448 SMRange VGPRRange;
5449 const MCExpr *NextFreeVGPR = ZeroExpr;
5450 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5451 uint64_t SharedVGPRCount = 0;
5452 uint64_t PreloadLength = 0;
5453 uint64_t PreloadOffset = 0;
5454 SMRange SGPRRange;
5455 const MCExpr *NextFreeSGPR = ZeroExpr;
5456
5457 // Count the number of user SGPRs implied from the enabled feature bits.
5458 unsigned ImpliedUserSGPRCount = 0;
5459
5460 // Track if the asm explicitly contains the directive for the user SGPR
5461 // count.
5462 std::optional<unsigned> ExplicitUserSGPRCount;
5463 const MCExpr *ReserveVCC = OneExpr;
5464 const MCExpr *ReserveFlatScr = OneExpr;
5465 std::optional<bool> EnableWavefrontSize32;
5466
5467 while (true) {
5468 while (trySkipToken(AsmToken::EndOfStatement));
5469
5470 StringRef ID;
5471 SMRange IDRange = getTok().getLocRange();
5472 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5473 return true;
5474
5475 if (ID == ".end_amdhsa_kernel")
5476 break;
5477
5478 if (!Seen.insert(ID).second)
5479 return TokError(".amdhsa_ directives cannot be repeated");
5480
5481 SMLoc ValStart = getLoc();
5482 const MCExpr *ExprVal;
5483 if (getParser().parseExpression(ExprVal))
5484 return true;
5485 SMLoc ValEnd = getLoc();
5486 SMRange ValRange = SMRange(ValStart, ValEnd);
5487
5488 int64_t IVal = 0;
5489 uint64_t Val = IVal;
5490 bool EvaluatableExpr;
5491 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5492 if (IVal < 0)
5493 return OutOfRangeError(ValRange);
5494 Val = IVal;
5495 }
5496
5497 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5498 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5499 return OutOfRangeError(RANGE); \
5500 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5501 getContext());
5502
5503 // Some fields use the parsed value immediately which requires the expression to
5504 // be solvable.
5505 #define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5506 if (!(RESOLVED)) \
5507 return Error(IDRange.Start, "directive should have resolvable expression", \
5508 IDRange);
5509
5510 if (ID == ".amdhsa_group_segment_fixed_size") {
5511 if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
5512 CHAR_BIT>(Val))
5513 return OutOfRangeError(ValRange);
5514 KD.group_segment_fixed_size = ExprVal;
5515 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5516 if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
5517 CHAR_BIT>(Val))
5518 return OutOfRangeError(ValRange);
5519 KD.private_segment_fixed_size = ExprVal;
5520 } else if (ID == ".amdhsa_kernarg_size") {
5521 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5522 return OutOfRangeError(ValRange);
5523 KD.kernarg_size = ExprVal;
5524 } else if (ID == ".amdhsa_user_sgpr_count") {
5525 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5526 ExplicitUserSGPRCount = Val;
5527 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5528 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5529 if (hasArchitectedFlatScratch())
5530 return Error(IDRange.Start,
5531 "directive is not supported with architected flat scratch",
5532 IDRange);
5533 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5534 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5535 ExprVal, ValRange);
5536 if (Val)
5537 ImpliedUserSGPRCount += 4;
5538 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5539 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5540 if (!hasKernargPreload())
5541 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5542
5543 if (Val > getMaxNumUserSGPRs())
5544 return OutOfRangeError(ValRange);
5545 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5546 ValRange);
5547 if (Val) {
5548 ImpliedUserSGPRCount += Val;
5549 PreloadLength = Val;
5550 }
5551 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5552 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5553 if (!hasKernargPreload())
5554 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5555
5556 if (Val >= 1024)
5557 return OutOfRangeError(ValRange);
5558 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5559 ValRange);
5560 if (Val)
5561 PreloadOffset = Val;
5562 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5563 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5564 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5565 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5566 ValRange);
5567 if (Val)
5568 ImpliedUserSGPRCount += 2;
5569 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5570 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5571 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5572 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5573 ValRange);
5574 if (Val)
5575 ImpliedUserSGPRCount += 2;
5576 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5577 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5578 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5579 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5580 ExprVal, ValRange);
5581 if (Val)
5582 ImpliedUserSGPRCount += 2;
5583 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5584 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5585 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5586 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5587 ValRange);
5588 if (Val)
5589 ImpliedUserSGPRCount += 2;
5590 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5591 if (hasArchitectedFlatScratch())
5592 return Error(IDRange.Start,
5593 "directive is not supported with architected flat scratch",
5594 IDRange);
5595 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5596 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5597 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5598 ExprVal, ValRange);
5599 if (Val)
5600 ImpliedUserSGPRCount += 2;
5601 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5602 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5603 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5604 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5605 ExprVal, ValRange);
5606 if (Val)
5607 ImpliedUserSGPRCount += 1;
5608 } else if (ID == ".amdhsa_wavefront_size32") {
5609 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5610 if (IVersion.Major < 10)
5611 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5612 EnableWavefrontSize32 = Val;
5613 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5614 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5615 ValRange);
5616 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5617 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5618 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5619 ValRange);
5620 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5621 if (hasArchitectedFlatScratch())
5622 return Error(IDRange.Start,
5623 "directive is not supported with architected flat scratch",
5624 IDRange);
5625 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5626 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5627 ValRange);
5628 } else if (ID == ".amdhsa_enable_private_segment") {
5629 if (!hasArchitectedFlatScratch())
5630 return Error(
5631 IDRange.Start,
5632 "directive is not supported without architected flat scratch",
5633 IDRange);
5634 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5635 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5636 ValRange);
5637 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5638 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5639 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5640 ValRange);
5641 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5642 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5643 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5644 ValRange);
5645 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5646 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5647 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5648 ValRange);
5649 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5650 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5651 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5652 ValRange);
5653 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5654 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5655 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5656 ValRange);
5657 } else if (ID == ".amdhsa_next_free_vgpr") {
5658 VGPRRange = ValRange;
5659 NextFreeVGPR = ExprVal;
5660 } else if (ID == ".amdhsa_next_free_sgpr") {
5661 SGPRRange = ValRange;
5662 NextFreeSGPR = ExprVal;
5663 } else if (ID == ".amdhsa_accum_offset") {
5664 if (!isGFX90A())
5665 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5666 AccumOffset = ExprVal;
5667 } else if (ID == ".amdhsa_reserve_vcc") {
5668 if (EvaluatableExpr && !isUInt<1>(Val))
5669 return OutOfRangeError(ValRange);
5670 ReserveVCC = ExprVal;
5671 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5672 if (IVersion.Major < 7)
5673 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5674 if (hasArchitectedFlatScratch())
5675 return Error(IDRange.Start,
5676 "directive is not supported with architected flat scratch",
5677 IDRange);
5678 if (EvaluatableExpr && !isUInt<1>(Val))
5679 return OutOfRangeError(ValRange);
5680 ReserveFlatScr = ExprVal;
5681 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5682 if (IVersion.Major < 8)
5683 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5684 if (!isUInt<1>(Val))
5685 return OutOfRangeError(ValRange);
5686 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5687 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5688 IDRange);
5689 } else if (ID == ".amdhsa_float_round_mode_32") {
5690 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5691 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5692 ValRange);
5693 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5694 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5695 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5696 ValRange);
5697 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5698 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5699 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5700 ValRange);
5701 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5702 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5703 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5704 ValRange);
5705 } else if (ID == ".amdhsa_dx10_clamp") {
5706 if (IVersion.Major >= 12)
5707 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5708 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5709 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5710 ValRange);
5711 } else if (ID == ".amdhsa_ieee_mode") {
5712 if (IVersion.Major >= 12)
5713 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5714 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5715 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5716 ValRange);
5717 } else if (ID == ".amdhsa_fp16_overflow") {
5718 if (IVersion.Major < 9)
5719 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5720 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5721 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5722 ValRange);
5723 } else if (ID == ".amdhsa_tg_split") {
5724 if (!isGFX90A())
5725 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5726 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5727 ExprVal, ValRange);
5728 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5729 if (IVersion.Major < 10)
5730 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5731 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5732 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5733 ValRange);
5734 } else if (ID == ".amdhsa_memory_ordered") {
5735 if (IVersion.Major < 10)
5736 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5737 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5738 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5739 ValRange);
5740 } else if (ID == ".amdhsa_forward_progress") {
5741 if (IVersion.Major < 10)
5742 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5744 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5745 ValRange);
5746 } else if (ID == ".amdhsa_shared_vgpr_count") {
5747 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5748 if (IVersion.Major < 10 || IVersion.Major >= 12)
5749 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5750 IDRange);
5751 SharedVGPRCount = Val;
5752 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5753 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5754 ValRange);
5755 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5756 PARSE_BITS_ENTRY(
5757 KD.compute_pgm_rsrc2,
5758 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5759 ExprVal, ValRange);
5760 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5761 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5762 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5763 ExprVal, ValRange);
5764 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5765 PARSE_BITS_ENTRY(
5766 KD.compute_pgm_rsrc2,
5767 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5768 ExprVal, ValRange);
5769 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5772 ExprVal, ValRange);
5773 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5775 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5776 ExprVal, ValRange);
5777 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5778 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5779 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5780 ExprVal, ValRange);
5781 } else if (ID == ".amdhsa_exception_int_div_zero") {
5782 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5783 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5784 ExprVal, ValRange);
5785 } else if (ID == ".amdhsa_round_robin_scheduling") {
5786 if (IVersion.Major < 12)
5787 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5788 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5789 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5790 ValRange);
5791 } else {
5792 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5793 }
5794
5795 #undef PARSE_BITS_ENTRY
5796 }
5797
5798 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5799 return TokError(".amdhsa_next_free_vgpr directive is required");
5800
5801 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5802 return TokError(".amdhsa_next_free_sgpr directive is required");
5803
5804 const MCExpr *VGPRBlocks;
5805 const MCExpr *SGPRBlocks;
5806 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5807 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5808 EnableWavefrontSize32, NextFreeVGPR,
5809 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5810 SGPRBlocks))
5811 return true;
5812
5813 int64_t EvaluatedVGPRBlocks;
5814 bool VGPRBlocksEvaluatable =
5815 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5816 if (VGPRBlocksEvaluatable &&
5817 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5818 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5819 return OutOfRangeError(VGPRRange);
5820 }
5821 AMDGPU::MCKernelDescriptor::bits_set(
5822 KD.compute_pgm_rsrc1, VGPRBlocks,
5823 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5824 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5825
5826 int64_t EvaluatedSGPRBlocks;
5827 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5828 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5829 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5830 return OutOfRangeError(SGPRRange);
5831 AMDGPU::MCKernelDescriptor::bits_set(
5832 KD.compute_pgm_rsrc1, SGPRBlocks,
5833 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5834 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5835
5836 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5837 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5838 "enabled user SGPRs");
5839
5840 unsigned UserSGPRCount =
5841 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5842
5843 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5844 return TokError("too many user SGPRs enabled");
5845 AMDGPU::MCKernelDescriptor::bits_set(
5846 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5847 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5848 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5849
5850 int64_t IVal = 0;
5851 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5852 return TokError("Kernarg size should be resolvable");
5853 uint64_t kernarg_size = IVal;
5854 if (PreloadLength && kernarg_size &&
5855 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5856 return TokError("Kernarg preload length + offset is larger than the "
5857 "kernarg segment size");
5858
5859 if (isGFX90A()) {
5860 if (!Seen.contains(".amdhsa_accum_offset"))
5861 return TokError(".amdhsa_accum_offset directive is required");
5862 int64_t EvaluatedAccum;
5863 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5864 uint64_t UEvaluatedAccum = EvaluatedAccum;
5865 if (AccumEvaluatable &&
5866 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5867 return TokError("accum_offset should be in range [4..256] in "
5868 "increments of 4");
5869
5870 int64_t EvaluatedNumVGPR;
5871 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5872 AccumEvaluatable &&
5873 UEvaluatedAccum >
5874 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
5875 return TokError("accum_offset exceeds total VGPR allocation");
5876 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
5877 MCBinaryExpr::createDiv(
5878 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
5879 MCConstantExpr::create(1, getContext()), getContext());
5880 MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum,
5881 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5882 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5883 getContext());
5884 }
5885
5886 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5887 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5888 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5889 return TokError("shared_vgpr_count directive not valid on "
5890 "wavefront size 32");
5891 }
5892
5893 if (VGPRBlocksEvaluatable &&
5894 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5895 63)) {
5896 return TokError("shared_vgpr_count*2 + "
5897 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5898 "exceed 63\n");
5899 }
5900 }
5901
5902 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5903 NextFreeVGPR, NextFreeSGPR,
5904 ReserveVCC, ReserveFlatScr);
5905 return false;
5906 }
5907
ParseDirectiveAMDHSACodeObjectVersion()5908 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5909 uint32_t Version;
5910 if (ParseAsAbsoluteExpression(Version))
5911 return true;
5912
5913 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5914 return false;
5915 }
5916
ParseAMDKernelCodeTValue(StringRef ID,AMDGPUMCKernelCodeT & C)5917 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5918 AMDGPUMCKernelCodeT &C) {
5919 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5920 // assembly for backwards compatibility.
5921 if (ID == "max_scratch_backing_memory_byte_size") {
5922 Parser.eatToEndOfStatement();
5923 return false;
5924 }
5925
5926 SmallString<40> ErrStr;
5927 raw_svector_ostream Err(ErrStr);
5928 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
5929 return TokError(Err.str());
5930 }
5931 Lex();
5932
5933 if (ID == "enable_wavefront_size32") {
5934 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5935 if (!isGFX10Plus())
5936 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5937 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5938 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5939 } else {
5940 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5941 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5942 }
5943 }
5944
5945 if (ID == "wavefront_size") {
5946 if (C.wavefront_size == 5) {
5947 if (!isGFX10Plus())
5948 return TokError("wavefront_size=5 is only allowed on GFX10+");
5949 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5950 return TokError("wavefront_size=5 requires +WavefrontSize32");
5951 } else if (C.wavefront_size == 6) {
5952 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5953 return TokError("wavefront_size=6 requires +WavefrontSize64");
5954 }
5955 }
5956
5957 return false;
5958 }
5959
ParseDirectiveAMDKernelCodeT()5960 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5961 AMDGPUMCKernelCodeT KernelCode;
5962 KernelCode.initDefault(&getSTI(), getContext());
5963
5964 while (true) {
5965 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5966 // will set the current token to EndOfStatement.
5967 while(trySkipToken(AsmToken::EndOfStatement));
5968
5969 StringRef ID;
5970 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5971 return true;
5972
5973 if (ID == ".end_amd_kernel_code_t")
5974 break;
5975
5976 if (ParseAMDKernelCodeTValue(ID, KernelCode))
5977 return true;
5978 }
5979
5980 KernelCode.validate(&getSTI(), getContext());
5981 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
5982
5983 return false;
5984 }
5985
ParseDirectiveAMDGPUHsaKernel()5986 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5987 StringRef KernelName;
5988 if (!parseId(KernelName, "expected symbol name"))
5989 return true;
5990
5991 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5992 ELF::STT_AMDGPU_HSA_KERNEL);
5993
5994 KernelScope.initialize(getContext());
5995 return false;
5996 }
5997
ParseDirectiveISAVersion()5998 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5999 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6000 return Error(getLoc(),
6001 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6002 "architectures");
6003 }
6004
6005 auto TargetIDDirective = getLexer().getTok().getStringContents();
6006 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6007 return Error(getParser().getTok().getLoc(), "target id must match options");
6008
6009 getTargetStreamer().EmitISAVersion();
6010 Lex();
6011
6012 return false;
6013 }
6014
ParseDirectiveHSAMetadata()6015 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6016 assert(isHsaAbi(getSTI()));
6017
6018 std::string HSAMetadataString;
6019 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6020 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6021 return true;
6022
6023 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6024 return Error(getLoc(), "invalid HSA metadata");
6025
6026 return false;
6027 }
6028
6029 /// Common code to parse out a block of text (typically YAML) between start and
6030 /// end directives.
ParseToEndDirective(const char * AssemblerDirectiveBegin,const char * AssemblerDirectiveEnd,std::string & CollectString)6031 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6032 const char *AssemblerDirectiveEnd,
6033 std::string &CollectString) {
6034
6035 raw_string_ostream CollectStream(CollectString);
6036
6037 getLexer().setSkipSpace(false);
6038
6039 bool FoundEnd = false;
6040 while (!isToken(AsmToken::Eof)) {
6041 while (isToken(AsmToken::Space)) {
6042 CollectStream << getTokenStr();
6043 Lex();
6044 }
6045
6046 if (trySkipId(AssemblerDirectiveEnd)) {
6047 FoundEnd = true;
6048 break;
6049 }
6050
6051 CollectStream << Parser.parseStringToEndOfStatement()
6052 << getContext().getAsmInfo()->getSeparatorString();
6053
6054 Parser.eatToEndOfStatement();
6055 }
6056
6057 getLexer().setSkipSpace(true);
6058
6059 if (isToken(AsmToken::Eof) && !FoundEnd) {
6060 return TokError(Twine("expected directive ") +
6061 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6062 }
6063
6064 CollectStream.flush();
6065 return false;
6066 }
6067
6068 /// Parse the assembler directive for new MsgPack-format PAL metadata.
ParseDirectivePALMetadataBegin()6069 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6070 std::string String;
6071 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6072 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
6073 return true;
6074
6075 auto PALMetadata = getTargetStreamer().getPALMetadata();
6076 if (!PALMetadata->setFromString(String))
6077 return Error(getLoc(), "invalid PAL metadata");
6078 return false;
6079 }
6080
6081 /// Parse the assembler directive for old linear-format PAL metadata.
ParseDirectivePALMetadata()6082 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6083 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6084 return Error(getLoc(),
6085 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6086 "not available on non-amdpal OSes")).str());
6087 }
6088
6089 auto PALMetadata = getTargetStreamer().getPALMetadata();
6090 PALMetadata->setLegacy();
6091 for (;;) {
6092 uint32_t Key, Value;
6093 if (ParseAsAbsoluteExpression(Key)) {
6094 return TokError(Twine("invalid value in ") +
6095 Twine(PALMD::AssemblerDirective));
6096 }
6097 if (!trySkipToken(AsmToken::Comma)) {
6098 return TokError(Twine("expected an even number of values in ") +
6099 Twine(PALMD::AssemblerDirective));
6100 }
6101 if (ParseAsAbsoluteExpression(Value)) {
6102 return TokError(Twine("invalid value in ") +
6103 Twine(PALMD::AssemblerDirective));
6104 }
6105 PALMetadata->setRegister(Key, Value);
6106 if (!trySkipToken(AsmToken::Comma))
6107 break;
6108 }
6109 return false;
6110 }
6111
6112 /// ParseDirectiveAMDGPULDS
6113 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
ParseDirectiveAMDGPULDS()6114 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6115 if (getParser().checkForValidSection())
6116 return true;
6117
6118 StringRef Name;
6119 SMLoc NameLoc = getLoc();
6120 if (getParser().parseIdentifier(Name))
6121 return TokError("expected identifier in directive");
6122
6123 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6124 if (getParser().parseComma())
6125 return true;
6126
6127 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6128
6129 int64_t Size;
6130 SMLoc SizeLoc = getLoc();
6131 if (getParser().parseAbsoluteExpression(Size))
6132 return true;
6133 if (Size < 0)
6134 return Error(SizeLoc, "size must be non-negative");
6135 if (Size > LocalMemorySize)
6136 return Error(SizeLoc, "size is too large");
6137
6138 int64_t Alignment = 4;
6139 if (trySkipToken(AsmToken::Comma)) {
6140 SMLoc AlignLoc = getLoc();
6141 if (getParser().parseAbsoluteExpression(Alignment))
6142 return true;
6143 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6144 return Error(AlignLoc, "alignment must be a power of two");
6145
6146 // Alignment larger than the size of LDS is possible in theory, as long
6147 // as the linker manages to place to symbol at address 0, but we do want
6148 // to make sure the alignment fits nicely into a 32-bit integer.
6149 if (Alignment >= 1u << 31)
6150 return Error(AlignLoc, "alignment is too large");
6151 }
6152
6153 if (parseEOL())
6154 return true;
6155
6156 Symbol->redefineIfPossible();
6157 if (!Symbol->isUndefined())
6158 return Error(NameLoc, "invalid symbol redefinition");
6159
6160 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6161 return false;
6162 }
6163
ParseDirective(AsmToken DirectiveID)6164 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6165 StringRef IDVal = DirectiveID.getString();
6166
6167 if (isHsaAbi(getSTI())) {
6168 if (IDVal == ".amdhsa_kernel")
6169 return ParseDirectiveAMDHSAKernel();
6170
6171 if (IDVal == ".amdhsa_code_object_version")
6172 return ParseDirectiveAMDHSACodeObjectVersion();
6173
6174 // TODO: Restructure/combine with PAL metadata directive.
6175 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6176 return ParseDirectiveHSAMetadata();
6177 } else {
6178 if (IDVal == ".amd_kernel_code_t")
6179 return ParseDirectiveAMDKernelCodeT();
6180
6181 if (IDVal == ".amdgpu_hsa_kernel")
6182 return ParseDirectiveAMDGPUHsaKernel();
6183
6184 if (IDVal == ".amd_amdgpu_isa")
6185 return ParseDirectiveISAVersion();
6186
6187 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6188 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6189 Twine(" directive is "
6190 "not available on non-amdhsa OSes"))
6191 .str());
6192 }
6193 }
6194
6195 if (IDVal == ".amdgcn_target")
6196 return ParseDirectiveAMDGCNTarget();
6197
6198 if (IDVal == ".amdgpu_lds")
6199 return ParseDirectiveAMDGPULDS();
6200
6201 if (IDVal == PALMD::AssemblerDirectiveBegin)
6202 return ParseDirectivePALMetadataBegin();
6203
6204 if (IDVal == PALMD::AssemblerDirective)
6205 return ParseDirectivePALMetadata();
6206
6207 return true;
6208 }
6209
subtargetHasRegister(const MCRegisterInfo & MRI,unsigned RegNo)6210 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6211 unsigned RegNo) {
6212
6213 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6214 return isGFX9Plus();
6215
6216 // GFX10+ has 2 more SGPRs 104 and 105.
6217 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6218 return hasSGPR104_SGPR105();
6219
6220 switch (RegNo) {
6221 case AMDGPU::SRC_SHARED_BASE_LO:
6222 case AMDGPU::SRC_SHARED_BASE:
6223 case AMDGPU::SRC_SHARED_LIMIT_LO:
6224 case AMDGPU::SRC_SHARED_LIMIT:
6225 case AMDGPU::SRC_PRIVATE_BASE_LO:
6226 case AMDGPU::SRC_PRIVATE_BASE:
6227 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6228 case AMDGPU::SRC_PRIVATE_LIMIT:
6229 return isGFX9Plus();
6230 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6231 return isGFX9Plus() && !isGFX11Plus();
6232 case AMDGPU::TBA:
6233 case AMDGPU::TBA_LO:
6234 case AMDGPU::TBA_HI:
6235 case AMDGPU::TMA:
6236 case AMDGPU::TMA_LO:
6237 case AMDGPU::TMA_HI:
6238 return !isGFX9Plus();
6239 case AMDGPU::XNACK_MASK:
6240 case AMDGPU::XNACK_MASK_LO:
6241 case AMDGPU::XNACK_MASK_HI:
6242 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6243 case AMDGPU::SGPR_NULL:
6244 return isGFX10Plus();
6245 default:
6246 break;
6247 }
6248
6249 if (isCI())
6250 return true;
6251
6252 if (isSI() || isGFX10Plus()) {
6253 // No flat_scr on SI.
6254 // On GFX10Plus flat scratch is not a valid register operand and can only be
6255 // accessed with s_setreg/s_getreg.
6256 switch (RegNo) {
6257 case AMDGPU::FLAT_SCR:
6258 case AMDGPU::FLAT_SCR_LO:
6259 case AMDGPU::FLAT_SCR_HI:
6260 return false;
6261 default:
6262 return true;
6263 }
6264 }
6265
6266 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6267 // SI/CI have.
6268 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6269 return hasSGPR102_SGPR103();
6270
6271 return true;
6272 }
6273
parseOperand(OperandVector & Operands,StringRef Mnemonic,OperandMode Mode)6274 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6275 StringRef Mnemonic,
6276 OperandMode Mode) {
6277 ParseStatus Res = parseVOPD(Operands);
6278 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6279 return Res;
6280
6281 // Try to parse with a custom parser
6282 Res = MatchOperandParserImpl(Operands, Mnemonic);
6283
6284 // If we successfully parsed the operand or if there as an error parsing,
6285 // we are done.
6286 //
6287 // If we are parsing after we reach EndOfStatement then this means we
6288 // are appending default values to the Operands list. This is only done
6289 // by custom parser, so we shouldn't continue on to the generic parsing.
6290 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6291 return Res;
6292
6293 SMLoc RBraceLoc;
6294 SMLoc LBraceLoc = getLoc();
6295 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6296 unsigned Prefix = Operands.size();
6297
6298 for (;;) {
6299 auto Loc = getLoc();
6300 Res = parseReg(Operands);
6301 if (Res.isNoMatch())
6302 Error(Loc, "expected a register");
6303 if (!Res.isSuccess())
6304 return ParseStatus::Failure;
6305
6306 RBraceLoc = getLoc();
6307 if (trySkipToken(AsmToken::RBrac))
6308 break;
6309
6310 if (!skipToken(AsmToken::Comma,
6311 "expected a comma or a closing square bracket"))
6312 return ParseStatus::Failure;
6313 }
6314
6315 if (Operands.size() - Prefix > 1) {
6316 Operands.insert(Operands.begin() + Prefix,
6317 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6318 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6319 }
6320
6321 return ParseStatus::Success;
6322 }
6323
6324 return parseRegOrImm(Operands);
6325 }
6326
parseMnemonicSuffix(StringRef Name)6327 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6328 // Clear any forced encodings from the previous instruction.
6329 setForcedEncodingSize(0);
6330 setForcedDPP(false);
6331 setForcedSDWA(false);
6332
6333 if (Name.ends_with("_e64_dpp")) {
6334 setForcedDPP(true);
6335 setForcedEncodingSize(64);
6336 return Name.substr(0, Name.size() - 8);
6337 }
6338 if (Name.ends_with("_e64")) {
6339 setForcedEncodingSize(64);
6340 return Name.substr(0, Name.size() - 4);
6341 }
6342 if (Name.ends_with("_e32")) {
6343 setForcedEncodingSize(32);
6344 return Name.substr(0, Name.size() - 4);
6345 }
6346 if (Name.ends_with("_dpp")) {
6347 setForcedDPP(true);
6348 return Name.substr(0, Name.size() - 4);
6349 }
6350 if (Name.ends_with("_sdwa")) {
6351 setForcedSDWA(true);
6352 return Name.substr(0, Name.size() - 5);
6353 }
6354 return Name;
6355 }
6356
6357 static void applyMnemonicAliases(StringRef &Mnemonic,
6358 const FeatureBitset &Features,
6359 unsigned VariantID);
6360
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)6361 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6362 StringRef Name,
6363 SMLoc NameLoc, OperandVector &Operands) {
6364 // Add the instruction mnemonic
6365 Name = parseMnemonicSuffix(Name);
6366
6367 // If the target architecture uses MnemonicAlias, call it here to parse
6368 // operands correctly.
6369 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6370
6371 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6372
6373 bool IsMIMG = Name.starts_with("image_");
6374
6375 while (!trySkipToken(AsmToken::EndOfStatement)) {
6376 OperandMode Mode = OperandMode_Default;
6377 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6378 Mode = OperandMode_NSA;
6379 ParseStatus Res = parseOperand(Operands, Name, Mode);
6380
6381 if (!Res.isSuccess()) {
6382 checkUnsupportedInstruction(Name, NameLoc);
6383 if (!Parser.hasPendingError()) {
6384 // FIXME: use real operand location rather than the current location.
6385 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6386 : "not a valid operand.";
6387 Error(getLoc(), Msg);
6388 }
6389 while (!trySkipToken(AsmToken::EndOfStatement)) {
6390 lex();
6391 }
6392 return true;
6393 }
6394
6395 // Eat the comma or space if there is one.
6396 trySkipToken(AsmToken::Comma);
6397 }
6398
6399 return false;
6400 }
6401
6402 //===----------------------------------------------------------------------===//
6403 // Utility functions
6404 //===----------------------------------------------------------------------===//
6405
parseTokenOp(StringRef Name,OperandVector & Operands)6406 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6407 OperandVector &Operands) {
6408 SMLoc S = getLoc();
6409 if (!trySkipId(Name))
6410 return ParseStatus::NoMatch;
6411
6412 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6413 return ParseStatus::Success;
6414 }
6415
parseIntWithPrefix(const char * Prefix,int64_t & IntVal)6416 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6417 int64_t &IntVal) {
6418
6419 if (!trySkipId(Prefix, AsmToken::Colon))
6420 return ParseStatus::NoMatch;
6421
6422 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6423 }
6424
parseIntWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,std::function<bool (int64_t &)> ConvertResult)6425 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6426 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6427 std::function<bool(int64_t &)> ConvertResult) {
6428 SMLoc S = getLoc();
6429 int64_t Value = 0;
6430
6431 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6432 if (!Res.isSuccess())
6433 return Res;
6434
6435 if (ConvertResult && !ConvertResult(Value)) {
6436 Error(S, "invalid " + StringRef(Prefix) + " value.");
6437 }
6438
6439 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6440 return ParseStatus::Success;
6441 }
6442
parseOperandArrayWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))6443 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6444 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6445 bool (*ConvertResult)(int64_t &)) {
6446 SMLoc S = getLoc();
6447 if (!trySkipId(Prefix, AsmToken::Colon))
6448 return ParseStatus::NoMatch;
6449
6450 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6451 return ParseStatus::Failure;
6452
6453 unsigned Val = 0;
6454 const unsigned MaxSize = 4;
6455
6456 // FIXME: How to verify the number of elements matches the number of src
6457 // operands?
6458 for (int I = 0; ; ++I) {
6459 int64_t Op;
6460 SMLoc Loc = getLoc();
6461 if (!parseExpr(Op))
6462 return ParseStatus::Failure;
6463
6464 if (Op != 0 && Op != 1)
6465 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6466
6467 Val |= (Op << I);
6468
6469 if (trySkipToken(AsmToken::RBrac))
6470 break;
6471
6472 if (I + 1 == MaxSize)
6473 return Error(getLoc(), "expected a closing square bracket");
6474
6475 if (!skipToken(AsmToken::Comma, "expected a comma"))
6476 return ParseStatus::Failure;
6477 }
6478
6479 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6480 return ParseStatus::Success;
6481 }
6482
parseNamedBit(StringRef Name,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)6483 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6484 OperandVector &Operands,
6485 AMDGPUOperand::ImmTy ImmTy) {
6486 int64_t Bit;
6487 SMLoc S = getLoc();
6488
6489 if (trySkipId(Name)) {
6490 Bit = 1;
6491 } else if (trySkipId("no", Name)) {
6492 Bit = 0;
6493 } else {
6494 return ParseStatus::NoMatch;
6495 }
6496
6497 if (Name == "r128" && !hasMIMG_R128())
6498 return Error(S, "r128 modifier is not supported on this GPU");
6499 if (Name == "a16" && !hasA16())
6500 return Error(S, "a16 modifier is not supported on this GPU");
6501
6502 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6503 ImmTy = AMDGPUOperand::ImmTyR128A16;
6504
6505 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6506 return ParseStatus::Success;
6507 }
6508
getCPolKind(StringRef Id,StringRef Mnemo,bool & Disabling) const6509 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6510 bool &Disabling) const {
6511 Disabling = Id.consume_front("no");
6512
6513 if (isGFX940() && !Mnemo.starts_with("s_")) {
6514 return StringSwitch<unsigned>(Id)
6515 .Case("nt", AMDGPU::CPol::NT)
6516 .Case("sc0", AMDGPU::CPol::SC0)
6517 .Case("sc1", AMDGPU::CPol::SC1)
6518 .Default(0);
6519 }
6520
6521 return StringSwitch<unsigned>(Id)
6522 .Case("dlc", AMDGPU::CPol::DLC)
6523 .Case("glc", AMDGPU::CPol::GLC)
6524 .Case("scc", AMDGPU::CPol::SCC)
6525 .Case("slc", AMDGPU::CPol::SLC)
6526 .Default(0);
6527 }
6528
parseCPol(OperandVector & Operands)6529 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6530 if (isGFX12Plus()) {
6531 SMLoc StringLoc = getLoc();
6532
6533 int64_t CPolVal = 0;
6534 ParseStatus ResTH = ParseStatus::NoMatch;
6535 ParseStatus ResScope = ParseStatus::NoMatch;
6536
6537 for (;;) {
6538 if (ResTH.isNoMatch()) {
6539 int64_t TH;
6540 ResTH = parseTH(Operands, TH);
6541 if (ResTH.isFailure())
6542 return ResTH;
6543 if (ResTH.isSuccess()) {
6544 CPolVal |= TH;
6545 continue;
6546 }
6547 }
6548
6549 if (ResScope.isNoMatch()) {
6550 int64_t Scope;
6551 ResScope = parseScope(Operands, Scope);
6552 if (ResScope.isFailure())
6553 return ResScope;
6554 if (ResScope.isSuccess()) {
6555 CPolVal |= Scope;
6556 continue;
6557 }
6558 }
6559
6560 break;
6561 }
6562
6563 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6564 return ParseStatus::NoMatch;
6565
6566 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6567 AMDGPUOperand::ImmTyCPol));
6568 return ParseStatus::Success;
6569 }
6570
6571 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6572 SMLoc OpLoc = getLoc();
6573 unsigned Enabled = 0, Seen = 0;
6574 for (;;) {
6575 SMLoc S = getLoc();
6576 bool Disabling;
6577 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6578 if (!CPol)
6579 break;
6580
6581 lex();
6582
6583 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6584 return Error(S, "dlc modifier is not supported on this GPU");
6585
6586 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6587 return Error(S, "scc modifier is not supported on this GPU");
6588
6589 if (Seen & CPol)
6590 return Error(S, "duplicate cache policy modifier");
6591
6592 if (!Disabling)
6593 Enabled |= CPol;
6594
6595 Seen |= CPol;
6596 }
6597
6598 if (!Seen)
6599 return ParseStatus::NoMatch;
6600
6601 Operands.push_back(
6602 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6603 return ParseStatus::Success;
6604 }
6605
parseScope(OperandVector & Operands,int64_t & Scope)6606 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6607 int64_t &Scope) {
6608 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6609
6610 StringRef Value;
6611 SMLoc StringLoc;
6612 ParseStatus Res;
6613
6614 Res = parseStringWithPrefix("scope", Value, StringLoc);
6615 if (!Res.isSuccess())
6616 return Res;
6617
6618 Scope = StringSwitch<int64_t>(Value)
6619 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6620 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6621 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6622 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6623 .Default(0xffffffff);
6624
6625 if (Scope == 0xffffffff)
6626 return Error(StringLoc, "invalid scope value");
6627
6628 return ParseStatus::Success;
6629 }
6630
parseTH(OperandVector & Operands,int64_t & TH)6631 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6632 TH = AMDGPU::CPol::TH_RT; // default
6633
6634 StringRef Value;
6635 SMLoc StringLoc;
6636 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6637 if (!Res.isSuccess())
6638 return Res;
6639
6640 if (Value == "TH_DEFAULT")
6641 TH = AMDGPU::CPol::TH_RT;
6642 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6643 Value == "TH_LOAD_NT_WB") {
6644 return Error(StringLoc, "invalid th value");
6645 } else if (Value.consume_front("TH_ATOMIC_")) {
6646 TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6647 } else if (Value.consume_front("TH_LOAD_")) {
6648 TH = AMDGPU::CPol::TH_TYPE_LOAD;
6649 } else if (Value.consume_front("TH_STORE_")) {
6650 TH = AMDGPU::CPol::TH_TYPE_STORE;
6651 } else {
6652 return Error(StringLoc, "invalid th value");
6653 }
6654
6655 if (Value == "BYPASS")
6656 TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6657
6658 if (TH != 0) {
6659 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6660 TH |= StringSwitch<int64_t>(Value)
6661 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6662 .Case("RT", AMDGPU::CPol::TH_RT)
6663 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6664 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
6665 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6666 AMDGPU::CPol::TH_ATOMIC_RETURN)
6667 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
6668 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6669 AMDGPU::CPol::TH_ATOMIC_NT)
6670 .Default(0xffffffff);
6671 else
6672 TH |= StringSwitch<int64_t>(Value)
6673 .Case("RT", AMDGPU::CPol::TH_RT)
6674 .Case("NT", AMDGPU::CPol::TH_NT)
6675 .Case("HT", AMDGPU::CPol::TH_HT)
6676 .Case("LU", AMDGPU::CPol::TH_LU)
6677 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6678 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6679 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6680 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6681 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6682 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6683 .Default(0xffffffff);
6684 }
6685
6686 if (TH == 0xffffffff)
6687 return Error(StringLoc, "invalid th value");
6688
6689 return ParseStatus::Success;
6690 }
6691
addOptionalImmOperand(MCInst & Inst,const OperandVector & Operands,AMDGPUAsmParser::OptionalImmIndexMap & OptionalIdx,AMDGPUOperand::ImmTy ImmT,int64_t Default=0)6692 static void addOptionalImmOperand(
6693 MCInst& Inst, const OperandVector& Operands,
6694 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6695 AMDGPUOperand::ImmTy ImmT,
6696 int64_t Default = 0) {
6697 auto i = OptionalIdx.find(ImmT);
6698 if (i != OptionalIdx.end()) {
6699 unsigned Idx = i->second;
6700 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6701 } else {
6702 Inst.addOperand(MCOperand::createImm(Default));
6703 }
6704 }
6705
parseStringWithPrefix(StringRef Prefix,StringRef & Value,SMLoc & StringLoc)6706 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6707 StringRef &Value,
6708 SMLoc &StringLoc) {
6709 if (!trySkipId(Prefix, AsmToken::Colon))
6710 return ParseStatus::NoMatch;
6711
6712 StringLoc = getLoc();
6713 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6714 : ParseStatus::Failure;
6715 }
6716
6717 //===----------------------------------------------------------------------===//
6718 // MTBUF format
6719 //===----------------------------------------------------------------------===//
6720
tryParseFmt(const char * Pref,int64_t MaxVal,int64_t & Fmt)6721 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6722 int64_t MaxVal,
6723 int64_t &Fmt) {
6724 int64_t Val;
6725 SMLoc Loc = getLoc();
6726
6727 auto Res = parseIntWithPrefix(Pref, Val);
6728 if (Res.isFailure())
6729 return false;
6730 if (Res.isNoMatch())
6731 return true;
6732
6733 if (Val < 0 || Val > MaxVal) {
6734 Error(Loc, Twine("out of range ", StringRef(Pref)));
6735 return false;
6736 }
6737
6738 Fmt = Val;
6739 return true;
6740 }
6741
tryParseIndexKey(OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)6742 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6743 AMDGPUOperand::ImmTy ImmTy) {
6744 const char *Pref = "index_key";
6745 int64_t ImmVal = 0;
6746 SMLoc Loc = getLoc();
6747 auto Res = parseIntWithPrefix(Pref, ImmVal);
6748 if (!Res.isSuccess())
6749 return Res;
6750
6751 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6752 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6753
6754 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6755 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6756
6757 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6758 return ParseStatus::Success;
6759 }
6760
parseIndexKey8bit(OperandVector & Operands)6761 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6762 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6763 }
6764
parseIndexKey16bit(OperandVector & Operands)6765 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6766 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6767 }
6768
6769 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6770 // values to live in a joint format operand in the MCInst encoding.
parseDfmtNfmt(int64_t & Format)6771 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6772 using namespace llvm::AMDGPU::MTBUFFormat;
6773
6774 int64_t Dfmt = DFMT_UNDEF;
6775 int64_t Nfmt = NFMT_UNDEF;
6776
6777 // dfmt and nfmt can appear in either order, and each is optional.
6778 for (int I = 0; I < 2; ++I) {
6779 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6780 return ParseStatus::Failure;
6781
6782 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6783 return ParseStatus::Failure;
6784
6785 // Skip optional comma between dfmt/nfmt
6786 // but guard against 2 commas following each other.
6787 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6788 !peekToken().is(AsmToken::Comma)) {
6789 trySkipToken(AsmToken::Comma);
6790 }
6791 }
6792
6793 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6794 return ParseStatus::NoMatch;
6795
6796 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6797 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6798
6799 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6800 return ParseStatus::Success;
6801 }
6802
parseUfmt(int64_t & Format)6803 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6804 using namespace llvm::AMDGPU::MTBUFFormat;
6805
6806 int64_t Fmt = UFMT_UNDEF;
6807
6808 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6809 return ParseStatus::Failure;
6810
6811 if (Fmt == UFMT_UNDEF)
6812 return ParseStatus::NoMatch;
6813
6814 Format = Fmt;
6815 return ParseStatus::Success;
6816 }
6817
matchDfmtNfmt(int64_t & Dfmt,int64_t & Nfmt,StringRef FormatStr,SMLoc Loc)6818 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6819 int64_t &Nfmt,
6820 StringRef FormatStr,
6821 SMLoc Loc) {
6822 using namespace llvm::AMDGPU::MTBUFFormat;
6823 int64_t Format;
6824
6825 Format = getDfmt(FormatStr);
6826 if (Format != DFMT_UNDEF) {
6827 Dfmt = Format;
6828 return true;
6829 }
6830
6831 Format = getNfmt(FormatStr, getSTI());
6832 if (Format != NFMT_UNDEF) {
6833 Nfmt = Format;
6834 return true;
6835 }
6836
6837 Error(Loc, "unsupported format");
6838 return false;
6839 }
6840
parseSymbolicSplitFormat(StringRef FormatStr,SMLoc FormatLoc,int64_t & Format)6841 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6842 SMLoc FormatLoc,
6843 int64_t &Format) {
6844 using namespace llvm::AMDGPU::MTBUFFormat;
6845
6846 int64_t Dfmt = DFMT_UNDEF;
6847 int64_t Nfmt = NFMT_UNDEF;
6848 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6849 return ParseStatus::Failure;
6850
6851 if (trySkipToken(AsmToken::Comma)) {
6852 StringRef Str;
6853 SMLoc Loc = getLoc();
6854 if (!parseId(Str, "expected a format string") ||
6855 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6856 return ParseStatus::Failure;
6857 if (Dfmt == DFMT_UNDEF)
6858 return Error(Loc, "duplicate numeric format");
6859 if (Nfmt == NFMT_UNDEF)
6860 return Error(Loc, "duplicate data format");
6861 }
6862
6863 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6864 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6865
6866 if (isGFX10Plus()) {
6867 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6868 if (Ufmt == UFMT_UNDEF)
6869 return Error(FormatLoc, "unsupported format");
6870 Format = Ufmt;
6871 } else {
6872 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6873 }
6874
6875 return ParseStatus::Success;
6876 }
6877
parseSymbolicUnifiedFormat(StringRef FormatStr,SMLoc Loc,int64_t & Format)6878 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6879 SMLoc Loc,
6880 int64_t &Format) {
6881 using namespace llvm::AMDGPU::MTBUFFormat;
6882
6883 auto Id = getUnifiedFormat(FormatStr, getSTI());
6884 if (Id == UFMT_UNDEF)
6885 return ParseStatus::NoMatch;
6886
6887 if (!isGFX10Plus())
6888 return Error(Loc, "unified format is not supported on this GPU");
6889
6890 Format = Id;
6891 return ParseStatus::Success;
6892 }
6893
parseNumericFormat(int64_t & Format)6894 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6895 using namespace llvm::AMDGPU::MTBUFFormat;
6896 SMLoc Loc = getLoc();
6897
6898 if (!parseExpr(Format))
6899 return ParseStatus::Failure;
6900 if (!isValidFormatEncoding(Format, getSTI()))
6901 return Error(Loc, "out of range format");
6902
6903 return ParseStatus::Success;
6904 }
6905
parseSymbolicOrNumericFormat(int64_t & Format)6906 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6907 using namespace llvm::AMDGPU::MTBUFFormat;
6908
6909 if (!trySkipId("format", AsmToken::Colon))
6910 return ParseStatus::NoMatch;
6911
6912 if (trySkipToken(AsmToken::LBrac)) {
6913 StringRef FormatStr;
6914 SMLoc Loc = getLoc();
6915 if (!parseId(FormatStr, "expected a format string"))
6916 return ParseStatus::Failure;
6917
6918 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6919 if (Res.isNoMatch())
6920 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6921 if (!Res.isSuccess())
6922 return Res;
6923
6924 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6925 return ParseStatus::Failure;
6926
6927 return ParseStatus::Success;
6928 }
6929
6930 return parseNumericFormat(Format);
6931 }
6932
parseFORMAT(OperandVector & Operands)6933 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6934 using namespace llvm::AMDGPU::MTBUFFormat;
6935
6936 int64_t Format = getDefaultFormatEncoding(getSTI());
6937 ParseStatus Res;
6938 SMLoc Loc = getLoc();
6939
6940 // Parse legacy format syntax.
6941 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6942 if (Res.isFailure())
6943 return Res;
6944
6945 bool FormatFound = Res.isSuccess();
6946
6947 Operands.push_back(
6948 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6949
6950 if (FormatFound)
6951 trySkipToken(AsmToken::Comma);
6952
6953 if (isToken(AsmToken::EndOfStatement)) {
6954 // We are expecting an soffset operand,
6955 // but let matcher handle the error.
6956 return ParseStatus::Success;
6957 }
6958
6959 // Parse soffset.
6960 Res = parseRegOrImm(Operands);
6961 if (!Res.isSuccess())
6962 return Res;
6963
6964 trySkipToken(AsmToken::Comma);
6965
6966 if (!FormatFound) {
6967 Res = parseSymbolicOrNumericFormat(Format);
6968 if (Res.isFailure())
6969 return Res;
6970 if (Res.isSuccess()) {
6971 auto Size = Operands.size();
6972 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6973 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6974 Op.setImm(Format);
6975 }
6976 return ParseStatus::Success;
6977 }
6978
6979 if (isId("format") && peekToken().is(AsmToken::Colon))
6980 return Error(getLoc(), "duplicate format");
6981 return ParseStatus::Success;
6982 }
6983
parseFlatOffset(OperandVector & Operands)6984 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6985 ParseStatus Res =
6986 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6987 if (Res.isNoMatch()) {
6988 Res = parseIntWithPrefix("inst_offset", Operands,
6989 AMDGPUOperand::ImmTyInstOffset);
6990 }
6991 return Res;
6992 }
6993
parseR128A16(OperandVector & Operands)6994 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6995 ParseStatus Res =
6996 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6997 if (Res.isNoMatch())
6998 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6999 return Res;
7000 }
7001
parseBLGP(OperandVector & Operands)7002 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7003 ParseStatus Res =
7004 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7005 if (Res.isNoMatch()) {
7006 Res =
7007 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7008 }
7009 return Res;
7010 }
7011
7012 //===----------------------------------------------------------------------===//
7013 // Exp
7014 //===----------------------------------------------------------------------===//
7015
cvtExp(MCInst & Inst,const OperandVector & Operands)7016 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7017 OptionalImmIndexMap OptionalIdx;
7018
7019 unsigned OperandIdx[4];
7020 unsigned EnMask = 0;
7021 int SrcIdx = 0;
7022
7023 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7024 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7025
7026 // Add the register arguments
7027 if (Op.isReg()) {
7028 assert(SrcIdx < 4);
7029 OperandIdx[SrcIdx] = Inst.size();
7030 Op.addRegOperands(Inst, 1);
7031 ++SrcIdx;
7032 continue;
7033 }
7034
7035 if (Op.isOff()) {
7036 assert(SrcIdx < 4);
7037 OperandIdx[SrcIdx] = Inst.size();
7038 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7039 ++SrcIdx;
7040 continue;
7041 }
7042
7043 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7044 Op.addImmOperands(Inst, 1);
7045 continue;
7046 }
7047
7048 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7049 continue;
7050
7051 // Handle optional arguments
7052 OptionalIdx[Op.getImmTy()] = i;
7053 }
7054
7055 assert(SrcIdx == 4);
7056
7057 bool Compr = false;
7058 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7059 Compr = true;
7060 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7061 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7062 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7063 }
7064
7065 for (auto i = 0; i < SrcIdx; ++i) {
7066 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7067 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7068 }
7069 }
7070
7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7073
7074 Inst.addOperand(MCOperand::createImm(EnMask));
7075 }
7076
7077 //===----------------------------------------------------------------------===//
7078 // s_waitcnt
7079 //===----------------------------------------------------------------------===//
7080
7081 static bool
encodeCnt(const AMDGPU::IsaVersion ISA,int64_t & IntVal,int64_t CntVal,bool Saturate,unsigned (* encode)(const IsaVersion & Version,unsigned,unsigned),unsigned (* decode)(const IsaVersion & Version,unsigned))7082 encodeCnt(
7083 const AMDGPU::IsaVersion ISA,
7084 int64_t &IntVal,
7085 int64_t CntVal,
7086 bool Saturate,
7087 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7088 unsigned (*decode)(const IsaVersion &Version, unsigned))
7089 {
7090 bool Failed = false;
7091
7092 IntVal = encode(ISA, IntVal, CntVal);
7093 if (CntVal != decode(ISA, IntVal)) {
7094 if (Saturate) {
7095 IntVal = encode(ISA, IntVal, -1);
7096 } else {
7097 Failed = true;
7098 }
7099 }
7100 return Failed;
7101 }
7102
parseCnt(int64_t & IntVal)7103 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7104
7105 SMLoc CntLoc = getLoc();
7106 StringRef CntName = getTokenStr();
7107
7108 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7109 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7110 return false;
7111
7112 int64_t CntVal;
7113 SMLoc ValLoc = getLoc();
7114 if (!parseExpr(CntVal))
7115 return false;
7116
7117 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7118
7119 bool Failed = true;
7120 bool Sat = CntName.ends_with("_sat");
7121
7122 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7123 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7124 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7125 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7126 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7127 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7128 } else {
7129 Error(CntLoc, "invalid counter name " + CntName);
7130 return false;
7131 }
7132
7133 if (Failed) {
7134 Error(ValLoc, "too large value for " + CntName);
7135 return false;
7136 }
7137
7138 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7139 return false;
7140
7141 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7142 if (isToken(AsmToken::EndOfStatement)) {
7143 Error(getLoc(), "expected a counter name");
7144 return false;
7145 }
7146 }
7147
7148 return true;
7149 }
7150
parseSWaitCnt(OperandVector & Operands)7151 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7152 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7153 int64_t Waitcnt = getWaitcntBitMask(ISA);
7154 SMLoc S = getLoc();
7155
7156 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7157 while (!isToken(AsmToken::EndOfStatement)) {
7158 if (!parseCnt(Waitcnt))
7159 return ParseStatus::Failure;
7160 }
7161 } else {
7162 if (!parseExpr(Waitcnt))
7163 return ParseStatus::Failure;
7164 }
7165
7166 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7167 return ParseStatus::Success;
7168 }
7169
parseDelay(int64_t & Delay)7170 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7171 SMLoc FieldLoc = getLoc();
7172 StringRef FieldName = getTokenStr();
7173 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7174 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7175 return false;
7176
7177 SMLoc ValueLoc = getLoc();
7178 StringRef ValueName = getTokenStr();
7179 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7180 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7181 return false;
7182
7183 unsigned Shift;
7184 if (FieldName == "instid0") {
7185 Shift = 0;
7186 } else if (FieldName == "instskip") {
7187 Shift = 4;
7188 } else if (FieldName == "instid1") {
7189 Shift = 7;
7190 } else {
7191 Error(FieldLoc, "invalid field name " + FieldName);
7192 return false;
7193 }
7194
7195 int Value;
7196 if (Shift == 4) {
7197 // Parse values for instskip.
7198 Value = StringSwitch<int>(ValueName)
7199 .Case("SAME", 0)
7200 .Case("NEXT", 1)
7201 .Case("SKIP_1", 2)
7202 .Case("SKIP_2", 3)
7203 .Case("SKIP_3", 4)
7204 .Case("SKIP_4", 5)
7205 .Default(-1);
7206 } else {
7207 // Parse values for instid0 and instid1.
7208 Value = StringSwitch<int>(ValueName)
7209 .Case("NO_DEP", 0)
7210 .Case("VALU_DEP_1", 1)
7211 .Case("VALU_DEP_2", 2)
7212 .Case("VALU_DEP_3", 3)
7213 .Case("VALU_DEP_4", 4)
7214 .Case("TRANS32_DEP_1", 5)
7215 .Case("TRANS32_DEP_2", 6)
7216 .Case("TRANS32_DEP_3", 7)
7217 .Case("FMA_ACCUM_CYCLE_1", 8)
7218 .Case("SALU_CYCLE_1", 9)
7219 .Case("SALU_CYCLE_2", 10)
7220 .Case("SALU_CYCLE_3", 11)
7221 .Default(-1);
7222 }
7223 if (Value < 0) {
7224 Error(ValueLoc, "invalid value name " + ValueName);
7225 return false;
7226 }
7227
7228 Delay |= Value << Shift;
7229 return true;
7230 }
7231
parseSDelayALU(OperandVector & Operands)7232 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7233 int64_t Delay = 0;
7234 SMLoc S = getLoc();
7235
7236 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7237 do {
7238 if (!parseDelay(Delay))
7239 return ParseStatus::Failure;
7240 } while (trySkipToken(AsmToken::Pipe));
7241 } else {
7242 if (!parseExpr(Delay))
7243 return ParseStatus::Failure;
7244 }
7245
7246 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7247 return ParseStatus::Success;
7248 }
7249
7250 bool
isSWaitCnt() const7251 AMDGPUOperand::isSWaitCnt() const {
7252 return isImm();
7253 }
7254
isSDelayALU() const7255 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7256
7257 //===----------------------------------------------------------------------===//
7258 // DepCtr
7259 //===----------------------------------------------------------------------===//
7260
depCtrError(SMLoc Loc,int ErrorId,StringRef DepCtrName)7261 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7262 StringRef DepCtrName) {
7263 switch (ErrorId) {
7264 case OPR_ID_UNKNOWN:
7265 Error(Loc, Twine("invalid counter name ", DepCtrName));
7266 return;
7267 case OPR_ID_UNSUPPORTED:
7268 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7269 return;
7270 case OPR_ID_DUPLICATE:
7271 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7272 return;
7273 case OPR_VAL_INVALID:
7274 Error(Loc, Twine("invalid value for ", DepCtrName));
7275 return;
7276 default:
7277 assert(false);
7278 }
7279 }
7280
parseDepCtr(int64_t & DepCtr,unsigned & UsedOprMask)7281 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7282
7283 using namespace llvm::AMDGPU::DepCtr;
7284
7285 SMLoc DepCtrLoc = getLoc();
7286 StringRef DepCtrName = getTokenStr();
7287
7288 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7289 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7290 return false;
7291
7292 int64_t ExprVal;
7293 if (!parseExpr(ExprVal))
7294 return false;
7295
7296 unsigned PrevOprMask = UsedOprMask;
7297 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7298
7299 if (CntVal < 0) {
7300 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7301 return false;
7302 }
7303
7304 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7305 return false;
7306
7307 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7308 if (isToken(AsmToken::EndOfStatement)) {
7309 Error(getLoc(), "expected a counter name");
7310 return false;
7311 }
7312 }
7313
7314 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7315 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7316 return true;
7317 }
7318
parseDepCtr(OperandVector & Operands)7319 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7320 using namespace llvm::AMDGPU::DepCtr;
7321
7322 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7323 SMLoc Loc = getLoc();
7324
7325 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7326 unsigned UsedOprMask = 0;
7327 while (!isToken(AsmToken::EndOfStatement)) {
7328 if (!parseDepCtr(DepCtr, UsedOprMask))
7329 return ParseStatus::Failure;
7330 }
7331 } else {
7332 if (!parseExpr(DepCtr))
7333 return ParseStatus::Failure;
7334 }
7335
7336 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7337 return ParseStatus::Success;
7338 }
7339
isDepCtr() const7340 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7341
7342 //===----------------------------------------------------------------------===//
7343 // hwreg
7344 //===----------------------------------------------------------------------===//
7345
parseHwregFunc(OperandInfoTy & HwReg,OperandInfoTy & Offset,OperandInfoTy & Width)7346 ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7347 OperandInfoTy &Offset,
7348 OperandInfoTy &Width) {
7349 using namespace llvm::AMDGPU::Hwreg;
7350
7351 if (!trySkipId("hwreg", AsmToken::LParen))
7352 return ParseStatus::NoMatch;
7353
7354 // The register may be specified by name or using a numeric code
7355 HwReg.Loc = getLoc();
7356 if (isToken(AsmToken::Identifier) &&
7357 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7358 HwReg.IsSymbolic = true;
7359 lex(); // skip register name
7360 } else if (!parseExpr(HwReg.Val, "a register name")) {
7361 return ParseStatus::Failure;
7362 }
7363
7364 if (trySkipToken(AsmToken::RParen))
7365 return ParseStatus::Success;
7366
7367 // parse optional params
7368 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7369 return ParseStatus::Failure;
7370
7371 Offset.Loc = getLoc();
7372 if (!parseExpr(Offset.Val))
7373 return ParseStatus::Failure;
7374
7375 if (!skipToken(AsmToken::Comma, "expected a comma"))
7376 return ParseStatus::Failure;
7377
7378 Width.Loc = getLoc();
7379 if (!parseExpr(Width.Val) ||
7380 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7381 return ParseStatus::Failure;
7382
7383 return ParseStatus::Success;
7384 }
7385
parseHwreg(OperandVector & Operands)7386 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7387 using namespace llvm::AMDGPU::Hwreg;
7388
7389 int64_t ImmVal = 0;
7390 SMLoc Loc = getLoc();
7391
7392 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7393 HwregId::Default);
7394 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7395 HwregOffset::Default);
7396 struct : StructuredOpField {
7397 using StructuredOpField::StructuredOpField;
7398 bool validate(AMDGPUAsmParser &Parser) const override {
7399 if (!isUIntN(Width, Val - 1))
7400 return Error(Parser, "only values from 1 to 32 are legal");
7401 return true;
7402 }
7403 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7404 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7405
7406 if (Res.isNoMatch())
7407 Res = parseHwregFunc(HwReg, Offset, Width);
7408
7409 if (Res.isSuccess()) {
7410 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7411 return ParseStatus::Failure;
7412 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7413 }
7414
7415 if (Res.isNoMatch() &&
7416 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7417 Res = ParseStatus::Success;
7418
7419 if (!Res.isSuccess())
7420 return ParseStatus::Failure;
7421
7422 if (!isUInt<16>(ImmVal))
7423 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7424 Operands.push_back(
7425 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7426 return ParseStatus::Success;
7427 }
7428
isHwreg() const7429 bool AMDGPUOperand::isHwreg() const {
7430 return isImmTy(ImmTyHwreg);
7431 }
7432
7433 //===----------------------------------------------------------------------===//
7434 // sendmsg
7435 //===----------------------------------------------------------------------===//
7436
7437 bool
parseSendMsgBody(OperandInfoTy & Msg,OperandInfoTy & Op,OperandInfoTy & Stream)7438 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7439 OperandInfoTy &Op,
7440 OperandInfoTy &Stream) {
7441 using namespace llvm::AMDGPU::SendMsg;
7442
7443 Msg.Loc = getLoc();
7444 if (isToken(AsmToken::Identifier) &&
7445 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7446 Msg.IsSymbolic = true;
7447 lex(); // skip message name
7448 } else if (!parseExpr(Msg.Val, "a message name")) {
7449 return false;
7450 }
7451
7452 if (trySkipToken(AsmToken::Comma)) {
7453 Op.IsDefined = true;
7454 Op.Loc = getLoc();
7455 if (isToken(AsmToken::Identifier) &&
7456 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7457 OPR_ID_UNKNOWN) {
7458 lex(); // skip operation name
7459 } else if (!parseExpr(Op.Val, "an operation name")) {
7460 return false;
7461 }
7462
7463 if (trySkipToken(AsmToken::Comma)) {
7464 Stream.IsDefined = true;
7465 Stream.Loc = getLoc();
7466 if (!parseExpr(Stream.Val))
7467 return false;
7468 }
7469 }
7470
7471 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7472 }
7473
7474 bool
validateSendMsg(const OperandInfoTy & Msg,const OperandInfoTy & Op,const OperandInfoTy & Stream)7475 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7476 const OperandInfoTy &Op,
7477 const OperandInfoTy &Stream) {
7478 using namespace llvm::AMDGPU::SendMsg;
7479
7480 // Validation strictness depends on whether message is specified
7481 // in a symbolic or in a numeric form. In the latter case
7482 // only encoding possibility is checked.
7483 bool Strict = Msg.IsSymbolic;
7484
7485 if (Strict) {
7486 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7487 Error(Msg.Loc, "specified message id is not supported on this GPU");
7488 return false;
7489 }
7490 } else {
7491 if (!isValidMsgId(Msg.Val, getSTI())) {
7492 Error(Msg.Loc, "invalid message id");
7493 return false;
7494 }
7495 }
7496 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7497 if (Op.IsDefined) {
7498 Error(Op.Loc, "message does not support operations");
7499 } else {
7500 Error(Msg.Loc, "missing message operation");
7501 }
7502 return false;
7503 }
7504 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7505 if (Op.Val == OPR_ID_UNSUPPORTED)
7506 Error(Op.Loc, "specified operation id is not supported on this GPU");
7507 else
7508 Error(Op.Loc, "invalid operation id");
7509 return false;
7510 }
7511 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7512 Stream.IsDefined) {
7513 Error(Stream.Loc, "message operation does not support streams");
7514 return false;
7515 }
7516 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7517 Error(Stream.Loc, "invalid message stream id");
7518 return false;
7519 }
7520 return true;
7521 }
7522
parseSendMsg(OperandVector & Operands)7523 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7524 using namespace llvm::AMDGPU::SendMsg;
7525
7526 int64_t ImmVal = 0;
7527 SMLoc Loc = getLoc();
7528
7529 if (trySkipId("sendmsg", AsmToken::LParen)) {
7530 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7531 OperandInfoTy Op(OP_NONE_);
7532 OperandInfoTy Stream(STREAM_ID_NONE_);
7533 if (parseSendMsgBody(Msg, Op, Stream) &&
7534 validateSendMsg(Msg, Op, Stream)) {
7535 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7536 } else {
7537 return ParseStatus::Failure;
7538 }
7539 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7540 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7541 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7542 } else {
7543 return ParseStatus::Failure;
7544 }
7545
7546 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7547 return ParseStatus::Success;
7548 }
7549
isSendMsg() const7550 bool AMDGPUOperand::isSendMsg() const {
7551 return isImmTy(ImmTySendMsg);
7552 }
7553
7554 //===----------------------------------------------------------------------===//
7555 // v_interp
7556 //===----------------------------------------------------------------------===//
7557
parseInterpSlot(OperandVector & Operands)7558 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7559 StringRef Str;
7560 SMLoc S = getLoc();
7561
7562 if (!parseId(Str))
7563 return ParseStatus::NoMatch;
7564
7565 int Slot = StringSwitch<int>(Str)
7566 .Case("p10", 0)
7567 .Case("p20", 1)
7568 .Case("p0", 2)
7569 .Default(-1);
7570
7571 if (Slot == -1)
7572 return Error(S, "invalid interpolation slot");
7573
7574 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7575 AMDGPUOperand::ImmTyInterpSlot));
7576 return ParseStatus::Success;
7577 }
7578
parseInterpAttr(OperandVector & Operands)7579 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7580 StringRef Str;
7581 SMLoc S = getLoc();
7582
7583 if (!parseId(Str))
7584 return ParseStatus::NoMatch;
7585
7586 if (!Str.starts_with("attr"))
7587 return Error(S, "invalid interpolation attribute");
7588
7589 StringRef Chan = Str.take_back(2);
7590 int AttrChan = StringSwitch<int>(Chan)
7591 .Case(".x", 0)
7592 .Case(".y", 1)
7593 .Case(".z", 2)
7594 .Case(".w", 3)
7595 .Default(-1);
7596 if (AttrChan == -1)
7597 return Error(S, "invalid or missing interpolation attribute channel");
7598
7599 Str = Str.drop_back(2).drop_front(4);
7600
7601 uint8_t Attr;
7602 if (Str.getAsInteger(10, Attr))
7603 return Error(S, "invalid or missing interpolation attribute number");
7604
7605 if (Attr > 32)
7606 return Error(S, "out of bounds interpolation attribute number");
7607
7608 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7609
7610 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7611 AMDGPUOperand::ImmTyInterpAttr));
7612 Operands.push_back(AMDGPUOperand::CreateImm(
7613 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7614 return ParseStatus::Success;
7615 }
7616
7617 //===----------------------------------------------------------------------===//
7618 // exp
7619 //===----------------------------------------------------------------------===//
7620
parseExpTgt(OperandVector & Operands)7621 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7622 using namespace llvm::AMDGPU::Exp;
7623
7624 StringRef Str;
7625 SMLoc S = getLoc();
7626
7627 if (!parseId(Str))
7628 return ParseStatus::NoMatch;
7629
7630 unsigned Id = getTgtId(Str);
7631 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7632 return Error(S, (Id == ET_INVALID)
7633 ? "invalid exp target"
7634 : "exp target is not supported on this GPU");
7635
7636 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7637 AMDGPUOperand::ImmTyExpTgt));
7638 return ParseStatus::Success;
7639 }
7640
7641 //===----------------------------------------------------------------------===//
7642 // parser helpers
7643 //===----------------------------------------------------------------------===//
7644
7645 bool
isId(const AsmToken & Token,const StringRef Id) const7646 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7647 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7648 }
7649
7650 bool
isId(const StringRef Id) const7651 AMDGPUAsmParser::isId(const StringRef Id) const {
7652 return isId(getToken(), Id);
7653 }
7654
7655 bool
isToken(const AsmToken::TokenKind Kind) const7656 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7657 return getTokenKind() == Kind;
7658 }
7659
getId() const7660 StringRef AMDGPUAsmParser::getId() const {
7661 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7662 }
7663
7664 bool
trySkipId(const StringRef Id)7665 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7666 if (isId(Id)) {
7667 lex();
7668 return true;
7669 }
7670 return false;
7671 }
7672
7673 bool
trySkipId(const StringRef Pref,const StringRef Id)7674 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7675 if (isToken(AsmToken::Identifier)) {
7676 StringRef Tok = getTokenStr();
7677 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7678 lex();
7679 return true;
7680 }
7681 }
7682 return false;
7683 }
7684
7685 bool
trySkipId(const StringRef Id,const AsmToken::TokenKind Kind)7686 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7687 if (isId(Id) && peekToken().is(Kind)) {
7688 lex();
7689 lex();
7690 return true;
7691 }
7692 return false;
7693 }
7694
7695 bool
trySkipToken(const AsmToken::TokenKind Kind)7696 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7697 if (isToken(Kind)) {
7698 lex();
7699 return true;
7700 }
7701 return false;
7702 }
7703
7704 bool
skipToken(const AsmToken::TokenKind Kind,const StringRef ErrMsg)7705 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7706 const StringRef ErrMsg) {
7707 if (!trySkipToken(Kind)) {
7708 Error(getLoc(), ErrMsg);
7709 return false;
7710 }
7711 return true;
7712 }
7713
7714 bool
parseExpr(int64_t & Imm,StringRef Expected)7715 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7716 SMLoc S = getLoc();
7717
7718 const MCExpr *Expr;
7719 if (Parser.parseExpression(Expr))
7720 return false;
7721
7722 if (Expr->evaluateAsAbsolute(Imm))
7723 return true;
7724
7725 if (Expected.empty()) {
7726 Error(S, "expected absolute expression");
7727 } else {
7728 Error(S, Twine("expected ", Expected) +
7729 Twine(" or an absolute expression"));
7730 }
7731 return false;
7732 }
7733
7734 bool
parseExpr(OperandVector & Operands)7735 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7736 SMLoc S = getLoc();
7737
7738 const MCExpr *Expr;
7739 if (Parser.parseExpression(Expr))
7740 return false;
7741
7742 int64_t IntVal;
7743 if (Expr->evaluateAsAbsolute(IntVal)) {
7744 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7745 } else {
7746 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7747 }
7748 return true;
7749 }
7750
7751 bool
parseString(StringRef & Val,const StringRef ErrMsg)7752 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7753 if (isToken(AsmToken::String)) {
7754 Val = getToken().getStringContents();
7755 lex();
7756 return true;
7757 }
7758 Error(getLoc(), ErrMsg);
7759 return false;
7760 }
7761
7762 bool
parseId(StringRef & Val,const StringRef ErrMsg)7763 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7764 if (isToken(AsmToken::Identifier)) {
7765 Val = getTokenStr();
7766 lex();
7767 return true;
7768 }
7769 if (!ErrMsg.empty())
7770 Error(getLoc(), ErrMsg);
7771 return false;
7772 }
7773
7774 AsmToken
getToken() const7775 AMDGPUAsmParser::getToken() const {
7776 return Parser.getTok();
7777 }
7778
peekToken(bool ShouldSkipSpace)7779 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7780 return isToken(AsmToken::EndOfStatement)
7781 ? getToken()
7782 : getLexer().peekTok(ShouldSkipSpace);
7783 }
7784
7785 void
peekTokens(MutableArrayRef<AsmToken> Tokens)7786 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7787 auto TokCount = getLexer().peekTokens(Tokens);
7788
7789 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7790 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7791 }
7792
7793 AsmToken::TokenKind
getTokenKind() const7794 AMDGPUAsmParser::getTokenKind() const {
7795 return getLexer().getKind();
7796 }
7797
7798 SMLoc
getLoc() const7799 AMDGPUAsmParser::getLoc() const {
7800 return getToken().getLoc();
7801 }
7802
7803 StringRef
getTokenStr() const7804 AMDGPUAsmParser::getTokenStr() const {
7805 return getToken().getString();
7806 }
7807
7808 void
lex()7809 AMDGPUAsmParser::lex() {
7810 Parser.Lex();
7811 }
7812
getInstLoc(const OperandVector & Operands) const7813 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7814 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7815 }
7816
7817 SMLoc
getOperandLoc(std::function<bool (const AMDGPUOperand &)> Test,const OperandVector & Operands) const7818 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7819 const OperandVector &Operands) const {
7820 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7821 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7822 if (Test(Op))
7823 return Op.getStartLoc();
7824 }
7825 return getInstLoc(Operands);
7826 }
7827
7828 SMLoc
getImmLoc(AMDGPUOperand::ImmTy Type,const OperandVector & Operands) const7829 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7830 const OperandVector &Operands) const {
7831 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7832 return getOperandLoc(Test, Operands);
7833 }
7834
7835 SMLoc
getRegLoc(unsigned Reg,const OperandVector & Operands) const7836 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7837 const OperandVector &Operands) const {
7838 auto Test = [=](const AMDGPUOperand& Op) {
7839 return Op.isRegKind() && Op.getReg() == Reg;
7840 };
7841 return getOperandLoc(Test, Operands);
7842 }
7843
getLitLoc(const OperandVector & Operands,bool SearchMandatoryLiterals) const7844 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7845 bool SearchMandatoryLiterals) const {
7846 auto Test = [](const AMDGPUOperand& Op) {
7847 return Op.IsImmKindLiteral() || Op.isExpr();
7848 };
7849 SMLoc Loc = getOperandLoc(Test, Operands);
7850 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7851 Loc = getMandatoryLitLoc(Operands);
7852 return Loc;
7853 }
7854
getMandatoryLitLoc(const OperandVector & Operands) const7855 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7856 auto Test = [](const AMDGPUOperand &Op) {
7857 return Op.IsImmKindMandatoryLiteral();
7858 };
7859 return getOperandLoc(Test, Operands);
7860 }
7861
7862 SMLoc
getConstLoc(const OperandVector & Operands) const7863 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7864 auto Test = [](const AMDGPUOperand& Op) {
7865 return Op.isImmKindConst();
7866 };
7867 return getOperandLoc(Test, Operands);
7868 }
7869
7870 ParseStatus
parseStructuredOpFields(ArrayRef<StructuredOpField * > Fields)7871 AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7872 if (!trySkipToken(AsmToken::LCurly))
7873 return ParseStatus::NoMatch;
7874
7875 bool First = true;
7876 while (!trySkipToken(AsmToken::RCurly)) {
7877 if (!First &&
7878 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7879 return ParseStatus::Failure;
7880
7881 StringRef Id = getTokenStr();
7882 SMLoc IdLoc = getLoc();
7883 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7884 !skipToken(AsmToken::Colon, "colon expected"))
7885 return ParseStatus::Failure;
7886
7887 auto I =
7888 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7889 if (I == Fields.end())
7890 return Error(IdLoc, "unknown field");
7891 if ((*I)->IsDefined)
7892 return Error(IdLoc, "duplicate field");
7893
7894 // TODO: Support symbolic values.
7895 (*I)->Loc = getLoc();
7896 if (!parseExpr((*I)->Val))
7897 return ParseStatus::Failure;
7898 (*I)->IsDefined = true;
7899
7900 First = false;
7901 }
7902 return ParseStatus::Success;
7903 }
7904
validateStructuredOpFields(ArrayRef<const StructuredOpField * > Fields)7905 bool AMDGPUAsmParser::validateStructuredOpFields(
7906 ArrayRef<const StructuredOpField *> Fields) {
7907 return all_of(Fields, [this](const StructuredOpField *F) {
7908 return F->validate(*this);
7909 });
7910 }
7911
7912 //===----------------------------------------------------------------------===//
7913 // swizzle
7914 //===----------------------------------------------------------------------===//
7915
7916 LLVM_READNONE
7917 static unsigned
encodeBitmaskPerm(const unsigned AndMask,const unsigned OrMask,const unsigned XorMask)7918 encodeBitmaskPerm(const unsigned AndMask,
7919 const unsigned OrMask,
7920 const unsigned XorMask) {
7921 using namespace llvm::AMDGPU::Swizzle;
7922
7923 return BITMASK_PERM_ENC |
7924 (AndMask << BITMASK_AND_SHIFT) |
7925 (OrMask << BITMASK_OR_SHIFT) |
7926 (XorMask << BITMASK_XOR_SHIFT);
7927 }
7928
7929 bool
parseSwizzleOperand(int64_t & Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg,SMLoc & Loc)7930 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7931 const unsigned MinVal,
7932 const unsigned MaxVal,
7933 const StringRef ErrMsg,
7934 SMLoc &Loc) {
7935 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7936 return false;
7937 }
7938 Loc = getLoc();
7939 if (!parseExpr(Op)) {
7940 return false;
7941 }
7942 if (Op < MinVal || Op > MaxVal) {
7943 Error(Loc, ErrMsg);
7944 return false;
7945 }
7946
7947 return true;
7948 }
7949
7950 bool
parseSwizzleOperands(const unsigned OpNum,int64_t * Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg)7951 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7952 const unsigned MinVal,
7953 const unsigned MaxVal,
7954 const StringRef ErrMsg) {
7955 SMLoc Loc;
7956 for (unsigned i = 0; i < OpNum; ++i) {
7957 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7958 return false;
7959 }
7960
7961 return true;
7962 }
7963
7964 bool
parseSwizzleQuadPerm(int64_t & Imm)7965 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7966 using namespace llvm::AMDGPU::Swizzle;
7967
7968 int64_t Lane[LANE_NUM];
7969 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7970 "expected a 2-bit lane id")) {
7971 Imm = QUAD_PERM_ENC;
7972 for (unsigned I = 0; I < LANE_NUM; ++I) {
7973 Imm |= Lane[I] << (LANE_SHIFT * I);
7974 }
7975 return true;
7976 }
7977 return false;
7978 }
7979
7980 bool
parseSwizzleBroadcast(int64_t & Imm)7981 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7982 using namespace llvm::AMDGPU::Swizzle;
7983
7984 SMLoc Loc;
7985 int64_t GroupSize;
7986 int64_t LaneIdx;
7987
7988 if (!parseSwizzleOperand(GroupSize,
7989 2, 32,
7990 "group size must be in the interval [2,32]",
7991 Loc)) {
7992 return false;
7993 }
7994 if (!isPowerOf2_64(GroupSize)) {
7995 Error(Loc, "group size must be a power of two");
7996 return false;
7997 }
7998 if (parseSwizzleOperand(LaneIdx,
7999 0, GroupSize - 1,
8000 "lane id must be in the interval [0,group size - 1]",
8001 Loc)) {
8002 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8003 return true;
8004 }
8005 return false;
8006 }
8007
8008 bool
parseSwizzleReverse(int64_t & Imm)8009 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8010 using namespace llvm::AMDGPU::Swizzle;
8011
8012 SMLoc Loc;
8013 int64_t GroupSize;
8014
8015 if (!parseSwizzleOperand(GroupSize,
8016 2, 32,
8017 "group size must be in the interval [2,32]",
8018 Loc)) {
8019 return false;
8020 }
8021 if (!isPowerOf2_64(GroupSize)) {
8022 Error(Loc, "group size must be a power of two");
8023 return false;
8024 }
8025
8026 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8027 return true;
8028 }
8029
8030 bool
parseSwizzleSwap(int64_t & Imm)8031 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8032 using namespace llvm::AMDGPU::Swizzle;
8033
8034 SMLoc Loc;
8035 int64_t GroupSize;
8036
8037 if (!parseSwizzleOperand(GroupSize,
8038 1, 16,
8039 "group size must be in the interval [1,16]",
8040 Loc)) {
8041 return false;
8042 }
8043 if (!isPowerOf2_64(GroupSize)) {
8044 Error(Loc, "group size must be a power of two");
8045 return false;
8046 }
8047
8048 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8049 return true;
8050 }
8051
8052 bool
parseSwizzleBitmaskPerm(int64_t & Imm)8053 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8054 using namespace llvm::AMDGPU::Swizzle;
8055
8056 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8057 return false;
8058 }
8059
8060 StringRef Ctl;
8061 SMLoc StrLoc = getLoc();
8062 if (!parseString(Ctl)) {
8063 return false;
8064 }
8065 if (Ctl.size() != BITMASK_WIDTH) {
8066 Error(StrLoc, "expected a 5-character mask");
8067 return false;
8068 }
8069
8070 unsigned AndMask = 0;
8071 unsigned OrMask = 0;
8072 unsigned XorMask = 0;
8073
8074 for (size_t i = 0; i < Ctl.size(); ++i) {
8075 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8076 switch(Ctl[i]) {
8077 default:
8078 Error(StrLoc, "invalid mask");
8079 return false;
8080 case '0':
8081 break;
8082 case '1':
8083 OrMask |= Mask;
8084 break;
8085 case 'p':
8086 AndMask |= Mask;
8087 break;
8088 case 'i':
8089 AndMask |= Mask;
8090 XorMask |= Mask;
8091 break;
8092 }
8093 }
8094
8095 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8096 return true;
8097 }
8098
8099 bool
parseSwizzleOffset(int64_t & Imm)8100 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8101
8102 SMLoc OffsetLoc = getLoc();
8103
8104 if (!parseExpr(Imm, "a swizzle macro")) {
8105 return false;
8106 }
8107 if (!isUInt<16>(Imm)) {
8108 Error(OffsetLoc, "expected a 16-bit offset");
8109 return false;
8110 }
8111 return true;
8112 }
8113
8114 bool
parseSwizzleMacro(int64_t & Imm)8115 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8116 using namespace llvm::AMDGPU::Swizzle;
8117
8118 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8119
8120 SMLoc ModeLoc = getLoc();
8121 bool Ok = false;
8122
8123 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8124 Ok = parseSwizzleQuadPerm(Imm);
8125 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8126 Ok = parseSwizzleBitmaskPerm(Imm);
8127 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8128 Ok = parseSwizzleBroadcast(Imm);
8129 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8130 Ok = parseSwizzleSwap(Imm);
8131 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8132 Ok = parseSwizzleReverse(Imm);
8133 } else {
8134 Error(ModeLoc, "expected a swizzle mode");
8135 }
8136
8137 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8138 }
8139
8140 return false;
8141 }
8142
parseSwizzle(OperandVector & Operands)8143 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8144 SMLoc S = getLoc();
8145 int64_t Imm = 0;
8146
8147 if (trySkipId("offset")) {
8148
8149 bool Ok = false;
8150 if (skipToken(AsmToken::Colon, "expected a colon")) {
8151 if (trySkipId("swizzle")) {
8152 Ok = parseSwizzleMacro(Imm);
8153 } else {
8154 Ok = parseSwizzleOffset(Imm);
8155 }
8156 }
8157
8158 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8159
8160 return Ok ? ParseStatus::Success : ParseStatus::Failure;
8161 }
8162 return ParseStatus::NoMatch;
8163 }
8164
8165 bool
isSwizzle() const8166 AMDGPUOperand::isSwizzle() const {
8167 return isImmTy(ImmTySwizzle);
8168 }
8169
8170 //===----------------------------------------------------------------------===//
8171 // VGPR Index Mode
8172 //===----------------------------------------------------------------------===//
8173
parseGPRIdxMacro()8174 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8175
8176 using namespace llvm::AMDGPU::VGPRIndexMode;
8177
8178 if (trySkipToken(AsmToken::RParen)) {
8179 return OFF;
8180 }
8181
8182 int64_t Imm = 0;
8183
8184 while (true) {
8185 unsigned Mode = 0;
8186 SMLoc S = getLoc();
8187
8188 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8189 if (trySkipId(IdSymbolic[ModeId])) {
8190 Mode = 1 << ModeId;
8191 break;
8192 }
8193 }
8194
8195 if (Mode == 0) {
8196 Error(S, (Imm == 0)?
8197 "expected a VGPR index mode or a closing parenthesis" :
8198 "expected a VGPR index mode");
8199 return UNDEF;
8200 }
8201
8202 if (Imm & Mode) {
8203 Error(S, "duplicate VGPR index mode");
8204 return UNDEF;
8205 }
8206 Imm |= Mode;
8207
8208 if (trySkipToken(AsmToken::RParen))
8209 break;
8210 if (!skipToken(AsmToken::Comma,
8211 "expected a comma or a closing parenthesis"))
8212 return UNDEF;
8213 }
8214
8215 return Imm;
8216 }
8217
parseGPRIdxMode(OperandVector & Operands)8218 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8219
8220 using namespace llvm::AMDGPU::VGPRIndexMode;
8221
8222 int64_t Imm = 0;
8223 SMLoc S = getLoc();
8224
8225 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8226 Imm = parseGPRIdxMacro();
8227 if (Imm == UNDEF)
8228 return ParseStatus::Failure;
8229 } else {
8230 if (getParser().parseAbsoluteExpression(Imm))
8231 return ParseStatus::Failure;
8232 if (Imm < 0 || !isUInt<4>(Imm))
8233 return Error(S, "invalid immediate: only 4-bit values are legal");
8234 }
8235
8236 Operands.push_back(
8237 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8238 return ParseStatus::Success;
8239 }
8240
isGPRIdxMode() const8241 bool AMDGPUOperand::isGPRIdxMode() const {
8242 return isImmTy(ImmTyGprIdxMode);
8243 }
8244
8245 //===----------------------------------------------------------------------===//
8246 // sopp branch targets
8247 //===----------------------------------------------------------------------===//
8248
parseSOPPBrTarget(OperandVector & Operands)8249 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8250
8251 // Make sure we are not parsing something
8252 // that looks like a label or an expression but is not.
8253 // This will improve error messages.
8254 if (isRegister() || isModifier())
8255 return ParseStatus::NoMatch;
8256
8257 if (!parseExpr(Operands))
8258 return ParseStatus::Failure;
8259
8260 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8261 assert(Opr.isImm() || Opr.isExpr());
8262 SMLoc Loc = Opr.getStartLoc();
8263
8264 // Currently we do not support arbitrary expressions as branch targets.
8265 // Only labels and absolute expressions are accepted.
8266 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8267 Error(Loc, "expected an absolute expression or a label");
8268 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8269 Error(Loc, "expected a 16-bit signed jump offset");
8270 }
8271
8272 return ParseStatus::Success;
8273 }
8274
8275 //===----------------------------------------------------------------------===//
8276 // Boolean holding registers
8277 //===----------------------------------------------------------------------===//
8278
parseBoolReg(OperandVector & Operands)8279 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8280 return parseReg(Operands);
8281 }
8282
8283 //===----------------------------------------------------------------------===//
8284 // mubuf
8285 //===----------------------------------------------------------------------===//
8286
cvtMubufImpl(MCInst & Inst,const OperandVector & Operands,bool IsAtomic)8287 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8288 const OperandVector &Operands,
8289 bool IsAtomic) {
8290 OptionalImmIndexMap OptionalIdx;
8291 unsigned FirstOperandIdx = 1;
8292 bool IsAtomicReturn = false;
8293
8294 if (IsAtomic) {
8295 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8296 SIInstrFlags::IsAtomicRet;
8297 }
8298
8299 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8300 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8301
8302 // Add the register arguments
8303 if (Op.isReg()) {
8304 Op.addRegOperands(Inst, 1);
8305 // Insert a tied src for atomic return dst.
8306 // This cannot be postponed as subsequent calls to
8307 // addImmOperands rely on correct number of MC operands.
8308 if (IsAtomicReturn && i == FirstOperandIdx)
8309 Op.addRegOperands(Inst, 1);
8310 continue;
8311 }
8312
8313 // Handle the case where soffset is an immediate
8314 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8315 Op.addImmOperands(Inst, 1);
8316 continue;
8317 }
8318
8319 // Handle tokens like 'offen' which are sometimes hard-coded into the
8320 // asm string. There are no MCInst operands for these.
8321 if (Op.isToken()) {
8322 continue;
8323 }
8324 assert(Op.isImm());
8325
8326 // Handle optional arguments
8327 OptionalIdx[Op.getImmTy()] = i;
8328 }
8329
8330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8332 }
8333
8334 //===----------------------------------------------------------------------===//
8335 // smrd
8336 //===----------------------------------------------------------------------===//
8337
isSMRDOffset8() const8338 bool AMDGPUOperand::isSMRDOffset8() const {
8339 return isImmLiteral() && isUInt<8>(getImm());
8340 }
8341
isSMEMOffset() const8342 bool AMDGPUOperand::isSMEMOffset() const {
8343 // Offset range is checked later by validator.
8344 return isImmLiteral();
8345 }
8346
isSMRDLiteralOffset() const8347 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8348 // 32-bit literals are only supported on CI and we only want to use them
8349 // when the offset is > 8-bits.
8350 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8351 }
8352
8353 //===----------------------------------------------------------------------===//
8354 // vop3
8355 //===----------------------------------------------------------------------===//
8356
ConvertOmodMul(int64_t & Mul)8357 static bool ConvertOmodMul(int64_t &Mul) {
8358 if (Mul != 1 && Mul != 2 && Mul != 4)
8359 return false;
8360
8361 Mul >>= 1;
8362 return true;
8363 }
8364
ConvertOmodDiv(int64_t & Div)8365 static bool ConvertOmodDiv(int64_t &Div) {
8366 if (Div == 1) {
8367 Div = 0;
8368 return true;
8369 }
8370
8371 if (Div == 2) {
8372 Div = 3;
8373 return true;
8374 }
8375
8376 return false;
8377 }
8378
8379 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8380 // This is intentional and ensures compatibility with sp3.
8381 // See bug 35397 for details.
convertDppBoundCtrl(int64_t & BoundCtrl)8382 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8383 if (BoundCtrl == 0 || BoundCtrl == 1) {
8384 if (!isGFX11Plus())
8385 BoundCtrl = 1;
8386 return true;
8387 }
8388 return false;
8389 }
8390
onBeginOfFile()8391 void AMDGPUAsmParser::onBeginOfFile() {
8392 if (!getParser().getStreamer().getTargetStreamer() ||
8393 getSTI().getTargetTriple().getArch() == Triple::r600)
8394 return;
8395
8396 if (!getTargetStreamer().getTargetID())
8397 getTargetStreamer().initializeTargetID(getSTI(),
8398 getSTI().getFeatureString());
8399
8400 if (isHsaAbi(getSTI()))
8401 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8402 }
8403
8404 /// Parse AMDGPU specific expressions.
8405 ///
8406 /// expr ::= or(expr, ...) |
8407 /// max(expr, ...)
8408 ///
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)8409 bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8410 using AGVK = AMDGPUMCExpr::VariantKind;
8411
8412 if (isToken(AsmToken::Identifier)) {
8413 StringRef TokenId = getTokenStr();
8414 AGVK VK = StringSwitch<AGVK>(TokenId)
8415 .Case("max", AGVK::AGVK_Max)
8416 .Case("or", AGVK::AGVK_Or)
8417 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8418 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8419 .Case("alignto", AGVK::AGVK_AlignTo)
8420 .Case("occupancy", AGVK::AGVK_Occupancy)
8421 .Default(AGVK::AGVK_None);
8422
8423 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8424 SmallVector<const MCExpr *, 4> Exprs;
8425 uint64_t CommaCount = 0;
8426 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8427 lex(); // Eat '('
8428 while (true) {
8429 if (trySkipToken(AsmToken::RParen)) {
8430 if (Exprs.empty()) {
8431 Error(getToken().getLoc(),
8432 "empty " + Twine(TokenId) + " expression");
8433 return true;
8434 }
8435 if (CommaCount + 1 != Exprs.size()) {
8436 Error(getToken().getLoc(),
8437 "mismatch of commas in " + Twine(TokenId) + " expression");
8438 return true;
8439 }
8440 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8441 return false;
8442 }
8443 const MCExpr *Expr;
8444 if (getParser().parseExpression(Expr, EndLoc))
8445 return true;
8446 Exprs.push_back(Expr);
8447 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8448 if (LastTokenWasComma)
8449 CommaCount++;
8450 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8451 Error(getToken().getLoc(),
8452 "unexpected token in " + Twine(TokenId) + " expression");
8453 return true;
8454 }
8455 }
8456 }
8457 }
8458 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8459 }
8460
parseOModSI(OperandVector & Operands)8461 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8462 StringRef Name = getTokenStr();
8463 if (Name == "mul") {
8464 return parseIntWithPrefix("mul", Operands,
8465 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8466 }
8467
8468 if (Name == "div") {
8469 return parseIntWithPrefix("div", Operands,
8470 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8471 }
8472
8473 return ParseStatus::NoMatch;
8474 }
8475
8476 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8477 // the number of src operands present, then copies that bit into src0_modifiers.
cvtVOP3DstOpSelOnly(MCInst & Inst,const MCRegisterInfo & MRI)8478 static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8479 int Opc = Inst.getOpcode();
8480 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8481 if (OpSelIdx == -1)
8482 return;
8483
8484 int SrcNum;
8485 const int Ops[] = { AMDGPU::OpName::src0,
8486 AMDGPU::OpName::src1,
8487 AMDGPU::OpName::src2 };
8488 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8489 ++SrcNum)
8490 ;
8491 assert(SrcNum > 0);
8492
8493 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8494
8495 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8496 if (DstIdx == -1)
8497 return;
8498
8499 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8500 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8501 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8502 if (DstOp.isReg() &&
8503 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8504 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8505 ModVal |= SISrcMods::DST_OP_SEL;
8506 } else {
8507 if ((OpSel & (1 << SrcNum)) != 0)
8508 ModVal |= SISrcMods::DST_OP_SEL;
8509 }
8510 Inst.getOperand(ModIdx).setImm(ModVal);
8511 }
8512
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands)8513 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8514 const OperandVector &Operands) {
8515 cvtVOP3P(Inst, Operands);
8516 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8517 }
8518
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)8519 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8520 OptionalImmIndexMap &OptionalIdx) {
8521 cvtVOP3P(Inst, Operands, OptionalIdx);
8522 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8523 }
8524
isRegOrImmWithInputMods(const MCInstrDesc & Desc,unsigned OpNum)8525 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8526 return
8527 // 1. This operand is input modifiers
8528 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8529 // 2. This is not last operand
8530 && Desc.NumOperands > (OpNum + 1)
8531 // 3. Next operand is register class
8532 && Desc.operands()[OpNum + 1].RegClass != -1
8533 // 4. Next register is not tied to any other operand
8534 && Desc.getOperandConstraint(OpNum + 1,
8535 MCOI::OperandConstraint::TIED_TO) == -1;
8536 }
8537
cvtVOP3Interp(MCInst & Inst,const OperandVector & Operands)8538 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8539 {
8540 OptionalImmIndexMap OptionalIdx;
8541 unsigned Opc = Inst.getOpcode();
8542
8543 unsigned I = 1;
8544 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8545 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8546 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8547 }
8548
8549 for (unsigned E = Operands.size(); I != E; ++I) {
8550 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8551 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8552 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8553 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8554 Op.isInterpAttrChan()) {
8555 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8556 } else if (Op.isImmModifier()) {
8557 OptionalIdx[Op.getImmTy()] = I;
8558 } else {
8559 llvm_unreachable("unhandled operand type");
8560 }
8561 }
8562
8563 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8564 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8565 AMDGPUOperand::ImmTyHigh);
8566
8567 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8568 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8569 AMDGPUOperand::ImmTyClamp);
8570
8571 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8572 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8573 AMDGPUOperand::ImmTyOModSI);
8574 }
8575
cvtVINTERP(MCInst & Inst,const OperandVector & Operands)8576 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8577 {
8578 OptionalImmIndexMap OptionalIdx;
8579 unsigned Opc = Inst.getOpcode();
8580
8581 unsigned I = 1;
8582 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8583 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8584 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8585 }
8586
8587 for (unsigned E = Operands.size(); I != E; ++I) {
8588 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8589 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8590 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8591 } else if (Op.isImmModifier()) {
8592 OptionalIdx[Op.getImmTy()] = I;
8593 } else {
8594 llvm_unreachable("unhandled operand type");
8595 }
8596 }
8597
8598 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
8599
8600 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8601 if (OpSelIdx != -1)
8602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8603
8604 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8605
8606 if (OpSelIdx == -1)
8607 return;
8608
8609 const int Ops[] = { AMDGPU::OpName::src0,
8610 AMDGPU::OpName::src1,
8611 AMDGPU::OpName::src2 };
8612 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8613 AMDGPU::OpName::src1_modifiers,
8614 AMDGPU::OpName::src2_modifiers };
8615
8616 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8617
8618 for (int J = 0; J < 3; ++J) {
8619 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8620 if (OpIdx == -1)
8621 break;
8622
8623 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8624 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8625
8626 if ((OpSel & (1 << J)) != 0)
8627 ModVal |= SISrcMods::OP_SEL_0;
8628 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8629 (OpSel & (1 << 3)) != 0)
8630 ModVal |= SISrcMods::DST_OP_SEL;
8631
8632 Inst.getOperand(ModIdx).setImm(ModVal);
8633 }
8634 }
8635
cvtVOP3(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)8636 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8637 OptionalImmIndexMap &OptionalIdx) {
8638 unsigned Opc = Inst.getOpcode();
8639
8640 unsigned I = 1;
8641 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8642 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8643 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8644 }
8645
8646 for (unsigned E = Operands.size(); I != E; ++I) {
8647 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8648 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8649 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8650 } else if (Op.isImmModifier()) {
8651 OptionalIdx[Op.getImmTy()] = I;
8652 } else if (Op.isRegOrImm()) {
8653 Op.addRegOrImmOperands(Inst, 1);
8654 } else {
8655 llvm_unreachable("unhandled operand type");
8656 }
8657 }
8658
8659 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8660 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8661 Inst.addOperand(Inst.getOperand(0));
8662 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8663 AMDGPUOperand::ImmTyByteSel);
8664 }
8665
8666 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8667 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8668 AMDGPUOperand::ImmTyClamp);
8669
8670 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8671 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8672 AMDGPUOperand::ImmTyOModSI);
8673
8674 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8675 // it has src2 register operand that is tied to dst operand
8676 // we don't allow modifiers for this operand in assembler so src2_modifiers
8677 // should be 0.
8678 if (isMAC(Opc)) {
8679 auto it = Inst.begin();
8680 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8681 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8682 ++it;
8683 // Copy the operand to ensure it's not invalidated when Inst grows.
8684 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8685 }
8686 }
8687
cvtVOP3(MCInst & Inst,const OperandVector & Operands)8688 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8689 OptionalImmIndexMap OptionalIdx;
8690 cvtVOP3(Inst, Operands, OptionalIdx);
8691 }
8692
cvtVOP3P(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptIdx)8693 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8694 OptionalImmIndexMap &OptIdx) {
8695 const int Opc = Inst.getOpcode();
8696 const MCInstrDesc &Desc = MII.get(Opc);
8697
8698 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8699
8700 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8701 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8702 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8703 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8704 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8705 Inst.addOperand(Inst.getOperand(0));
8706 }
8707
8708 // Adding vdst_in operand is already covered for these DPP instructions in
8709 // cvtVOP3DPP.
8710 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8711 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8712 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8713 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8714 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8715 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8716 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8717 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8718 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8719 assert(!IsPacked);
8720 Inst.addOperand(Inst.getOperand(0));
8721 }
8722
8723 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8724 // instruction, and then figure out where to actually put the modifiers
8725
8726 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8727 if (OpSelIdx != -1) {
8728 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8729 }
8730
8731 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8732 if (OpSelHiIdx != -1) {
8733 int DefaultVal = IsPacked ? -1 : 0;
8734 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8735 DefaultVal);
8736 }
8737
8738 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8739 if (NegLoIdx != -1)
8740 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8741
8742 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8743 if (NegHiIdx != -1)
8744 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8745
8746 const int Ops[] = { AMDGPU::OpName::src0,
8747 AMDGPU::OpName::src1,
8748 AMDGPU::OpName::src2 };
8749 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8750 AMDGPU::OpName::src1_modifiers,
8751 AMDGPU::OpName::src2_modifiers };
8752
8753 unsigned OpSel = 0;
8754 unsigned OpSelHi = 0;
8755 unsigned NegLo = 0;
8756 unsigned NegHi = 0;
8757
8758 if (OpSelIdx != -1)
8759 OpSel = Inst.getOperand(OpSelIdx).getImm();
8760
8761 if (OpSelHiIdx != -1)
8762 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8763
8764 if (NegLoIdx != -1)
8765 NegLo = Inst.getOperand(NegLoIdx).getImm();
8766
8767 if (NegHiIdx != -1)
8768 NegHi = Inst.getOperand(NegHiIdx).getImm();
8769
8770 for (int J = 0; J < 3; ++J) {
8771 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8772 if (OpIdx == -1)
8773 break;
8774
8775 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8776
8777 if (ModIdx == -1)
8778 continue;
8779
8780 uint32_t ModVal = 0;
8781
8782 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8783 if (SrcOp.isReg() && getMRI()
8784 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8785 .contains(SrcOp.getReg())) {
8786 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8787 if (VGPRSuffixIsHi)
8788 ModVal |= SISrcMods::OP_SEL_0;
8789 } else {
8790 if ((OpSel & (1 << J)) != 0)
8791 ModVal |= SISrcMods::OP_SEL_0;
8792 }
8793
8794 if ((OpSelHi & (1 << J)) != 0)
8795 ModVal |= SISrcMods::OP_SEL_1;
8796
8797 if ((NegLo & (1 << J)) != 0)
8798 ModVal |= SISrcMods::NEG;
8799
8800 if ((NegHi & (1 << J)) != 0)
8801 ModVal |= SISrcMods::NEG_HI;
8802
8803 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8804 }
8805 }
8806
cvtVOP3P(MCInst & Inst,const OperandVector & Operands)8807 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8808 OptionalImmIndexMap OptIdx;
8809 cvtVOP3(Inst, Operands, OptIdx);
8810 cvtVOP3P(Inst, Operands, OptIdx);
8811 }
8812
addSrcModifiersAndSrc(MCInst & Inst,const OperandVector & Operands,unsigned i,unsigned Opc,unsigned OpName)8813 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
8814 unsigned i, unsigned Opc, unsigned OpName) {
8815 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8816 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8817 else
8818 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8819 }
8820
cvtSWMMAC(MCInst & Inst,const OperandVector & Operands)8821 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8822 unsigned Opc = Inst.getOpcode();
8823
8824 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8825 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8826 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8827 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8828 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8829
8830 OptionalImmIndexMap OptIdx;
8831 for (unsigned i = 5; i < Operands.size(); ++i) {
8832 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8833 OptIdx[Op.getImmTy()] = i;
8834 }
8835
8836 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8837 addOptionalImmOperand(Inst, Operands, OptIdx,
8838 AMDGPUOperand::ImmTyIndexKey8bit);
8839
8840 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8841 addOptionalImmOperand(Inst, Operands, OptIdx,
8842 AMDGPUOperand::ImmTyIndexKey16bit);
8843
8844 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8845 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
8846
8847 cvtVOP3P(Inst, Operands, OptIdx);
8848 }
8849
8850 //===----------------------------------------------------------------------===//
8851 // VOPD
8852 //===----------------------------------------------------------------------===//
8853
parseVOPD(OperandVector & Operands)8854 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8855 if (!hasVOPD(getSTI()))
8856 return ParseStatus::NoMatch;
8857
8858 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8859 SMLoc S = getLoc();
8860 lex();
8861 lex();
8862 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8863 SMLoc OpYLoc = getLoc();
8864 StringRef OpYName;
8865 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8866 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8867 return ParseStatus::Success;
8868 }
8869 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8870 }
8871 return ParseStatus::NoMatch;
8872 }
8873
8874 // Create VOPD MCInst operands using parsed assembler operands.
cvtVOPD(MCInst & Inst,const OperandVector & Operands)8875 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8876 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8877 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8878 if (Op.isReg()) {
8879 Op.addRegOperands(Inst, 1);
8880 return;
8881 }
8882 if (Op.isImm()) {
8883 Op.addImmOperands(Inst, 1);
8884 return;
8885 }
8886 llvm_unreachable("Unhandled operand type in cvtVOPD");
8887 };
8888
8889 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8890
8891 // MCInst operands are ordered as follows:
8892 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8893
8894 for (auto CompIdx : VOPD::COMPONENTS) {
8895 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8896 }
8897
8898 for (auto CompIdx : VOPD::COMPONENTS) {
8899 const auto &CInfo = InstInfo[CompIdx];
8900 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8901 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8902 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8903 if (CInfo.hasSrc2Acc())
8904 addOp(CInfo.getIndexOfDstInParsedOperands());
8905 }
8906 }
8907
8908 //===----------------------------------------------------------------------===//
8909 // dpp
8910 //===----------------------------------------------------------------------===//
8911
isDPP8() const8912 bool AMDGPUOperand::isDPP8() const {
8913 return isImmTy(ImmTyDPP8);
8914 }
8915
isDPPCtrl() const8916 bool AMDGPUOperand::isDPPCtrl() const {
8917 using namespace AMDGPU::DPP;
8918
8919 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8920 if (result) {
8921 int64_t Imm = getImm();
8922 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8923 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8924 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8925 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8926 (Imm == DppCtrl::WAVE_SHL1) ||
8927 (Imm == DppCtrl::WAVE_ROL1) ||
8928 (Imm == DppCtrl::WAVE_SHR1) ||
8929 (Imm == DppCtrl::WAVE_ROR1) ||
8930 (Imm == DppCtrl::ROW_MIRROR) ||
8931 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8932 (Imm == DppCtrl::BCAST15) ||
8933 (Imm == DppCtrl::BCAST31) ||
8934 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8935 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8936 }
8937 return false;
8938 }
8939
8940 //===----------------------------------------------------------------------===//
8941 // mAI
8942 //===----------------------------------------------------------------------===//
8943
isBLGP() const8944 bool AMDGPUOperand::isBLGP() const {
8945 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8946 }
8947
isS16Imm() const8948 bool AMDGPUOperand::isS16Imm() const {
8949 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8950 }
8951
isU16Imm() const8952 bool AMDGPUOperand::isU16Imm() const {
8953 return isImmLiteral() && isUInt<16>(getImm());
8954 }
8955
8956 //===----------------------------------------------------------------------===//
8957 // dim
8958 //===----------------------------------------------------------------------===//
8959
parseDimId(unsigned & Encoding)8960 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8961 // We want to allow "dim:1D" etc.,
8962 // but the initial 1 is tokenized as an integer.
8963 std::string Token;
8964 if (isToken(AsmToken::Integer)) {
8965 SMLoc Loc = getToken().getEndLoc();
8966 Token = std::string(getTokenStr());
8967 lex();
8968 if (getLoc() != Loc)
8969 return false;
8970 }
8971
8972 StringRef Suffix;
8973 if (!parseId(Suffix))
8974 return false;
8975 Token += Suffix;
8976
8977 StringRef DimId = Token;
8978 if (DimId.starts_with("SQ_RSRC_IMG_"))
8979 DimId = DimId.drop_front(12);
8980
8981 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8982 if (!DimInfo)
8983 return false;
8984
8985 Encoding = DimInfo->Encoding;
8986 return true;
8987 }
8988
parseDim(OperandVector & Operands)8989 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8990 if (!isGFX10Plus())
8991 return ParseStatus::NoMatch;
8992
8993 SMLoc S = getLoc();
8994
8995 if (!trySkipId("dim", AsmToken::Colon))
8996 return ParseStatus::NoMatch;
8997
8998 unsigned Encoding;
8999 SMLoc Loc = getLoc();
9000 if (!parseDimId(Encoding))
9001 return Error(Loc, "invalid dim value");
9002
9003 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9004 AMDGPUOperand::ImmTyDim));
9005 return ParseStatus::Success;
9006 }
9007
9008 //===----------------------------------------------------------------------===//
9009 // dpp
9010 //===----------------------------------------------------------------------===//
9011
parseDPP8(OperandVector & Operands)9012 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9013 SMLoc S = getLoc();
9014
9015 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9016 return ParseStatus::NoMatch;
9017
9018 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9019
9020 int64_t Sels[8];
9021
9022 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9023 return ParseStatus::Failure;
9024
9025 for (size_t i = 0; i < 8; ++i) {
9026 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9027 return ParseStatus::Failure;
9028
9029 SMLoc Loc = getLoc();
9030 if (getParser().parseAbsoluteExpression(Sels[i]))
9031 return ParseStatus::Failure;
9032 if (0 > Sels[i] || 7 < Sels[i])
9033 return Error(Loc, "expected a 3-bit value");
9034 }
9035
9036 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9037 return ParseStatus::Failure;
9038
9039 unsigned DPP8 = 0;
9040 for (size_t i = 0; i < 8; ++i)
9041 DPP8 |= (Sels[i] << (i * 3));
9042
9043 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9044 return ParseStatus::Success;
9045 }
9046
9047 bool
isSupportedDPPCtrl(StringRef Ctrl,const OperandVector & Operands)9048 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9049 const OperandVector &Operands) {
9050 if (Ctrl == "row_newbcast")
9051 return isGFX90A();
9052
9053 if (Ctrl == "row_share" ||
9054 Ctrl == "row_xmask")
9055 return isGFX10Plus();
9056
9057 if (Ctrl == "wave_shl" ||
9058 Ctrl == "wave_shr" ||
9059 Ctrl == "wave_rol" ||
9060 Ctrl == "wave_ror" ||
9061 Ctrl == "row_bcast")
9062 return isVI() || isGFX9();
9063
9064 return Ctrl == "row_mirror" ||
9065 Ctrl == "row_half_mirror" ||
9066 Ctrl == "quad_perm" ||
9067 Ctrl == "row_shl" ||
9068 Ctrl == "row_shr" ||
9069 Ctrl == "row_ror";
9070 }
9071
9072 int64_t
parseDPPCtrlPerm()9073 AMDGPUAsmParser::parseDPPCtrlPerm() {
9074 // quad_perm:[%d,%d,%d,%d]
9075
9076 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9077 return -1;
9078
9079 int64_t Val = 0;
9080 for (int i = 0; i < 4; ++i) {
9081 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9082 return -1;
9083
9084 int64_t Temp;
9085 SMLoc Loc = getLoc();
9086 if (getParser().parseAbsoluteExpression(Temp))
9087 return -1;
9088 if (Temp < 0 || Temp > 3) {
9089 Error(Loc, "expected a 2-bit value");
9090 return -1;
9091 }
9092
9093 Val += (Temp << i * 2);
9094 }
9095
9096 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9097 return -1;
9098
9099 return Val;
9100 }
9101
9102 int64_t
parseDPPCtrlSel(StringRef Ctrl)9103 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9104 using namespace AMDGPU::DPP;
9105
9106 // sel:%d
9107
9108 int64_t Val;
9109 SMLoc Loc = getLoc();
9110
9111 if (getParser().parseAbsoluteExpression(Val))
9112 return -1;
9113
9114 struct DppCtrlCheck {
9115 int64_t Ctrl;
9116 int Lo;
9117 int Hi;
9118 };
9119
9120 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9121 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9122 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9123 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9124 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9125 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9126 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9127 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9128 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9129 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9130 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9131 .Default({-1, 0, 0});
9132
9133 bool Valid;
9134 if (Check.Ctrl == -1) {
9135 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9136 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9137 } else {
9138 Valid = Check.Lo <= Val && Val <= Check.Hi;
9139 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9140 }
9141
9142 if (!Valid) {
9143 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9144 return -1;
9145 }
9146
9147 return Val;
9148 }
9149
parseDPPCtrl(OperandVector & Operands)9150 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9151 using namespace AMDGPU::DPP;
9152
9153 if (!isToken(AsmToken::Identifier) ||
9154 !isSupportedDPPCtrl(getTokenStr(), Operands))
9155 return ParseStatus::NoMatch;
9156
9157 SMLoc S = getLoc();
9158 int64_t Val = -1;
9159 StringRef Ctrl;
9160
9161 parseId(Ctrl);
9162
9163 if (Ctrl == "row_mirror") {
9164 Val = DppCtrl::ROW_MIRROR;
9165 } else if (Ctrl == "row_half_mirror") {
9166 Val = DppCtrl::ROW_HALF_MIRROR;
9167 } else {
9168 if (skipToken(AsmToken::Colon, "expected a colon")) {
9169 if (Ctrl == "quad_perm") {
9170 Val = parseDPPCtrlPerm();
9171 } else {
9172 Val = parseDPPCtrlSel(Ctrl);
9173 }
9174 }
9175 }
9176
9177 if (Val == -1)
9178 return ParseStatus::Failure;
9179
9180 Operands.push_back(
9181 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9182 return ParseStatus::Success;
9183 }
9184
cvtVOP3DPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)9185 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9186 bool IsDPP8) {
9187 OptionalImmIndexMap OptionalIdx;
9188 unsigned Opc = Inst.getOpcode();
9189 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9190
9191 // MAC instructions are special because they have 'old'
9192 // operand which is not tied to dst (but assumed to be).
9193 // They also have dummy unused src2_modifiers.
9194 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9195 int Src2ModIdx =
9196 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9197 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9198 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9199
9200 unsigned I = 1;
9201 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9202 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9203 }
9204
9205 int Fi = 0;
9206 for (unsigned E = Operands.size(); I != E; ++I) {
9207
9208 if (IsMAC) {
9209 int NumOperands = Inst.getNumOperands();
9210 if (OldIdx == NumOperands) {
9211 // Handle old operand
9212 constexpr int DST_IDX = 0;
9213 Inst.addOperand(Inst.getOperand(DST_IDX));
9214 } else if (Src2ModIdx == NumOperands) {
9215 // Add unused dummy src2_modifiers
9216 Inst.addOperand(MCOperand::createImm(0));
9217 }
9218 }
9219
9220 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9221 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9222 Inst.addOperand(Inst.getOperand(0));
9223 }
9224
9225 bool IsVOP3CvtSrDpp =
9226 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9227 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9228 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9229 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9230 if (IsVOP3CvtSrDpp) {
9231 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9232 Inst.addOperand(MCOperand::createImm(0));
9233 Inst.addOperand(MCOperand::createReg(0));
9234 }
9235 }
9236
9237 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9238 MCOI::TIED_TO);
9239 if (TiedTo != -1) {
9240 assert((unsigned)TiedTo < Inst.getNumOperands());
9241 // handle tied old or src2 for MAC instructions
9242 Inst.addOperand(Inst.getOperand(TiedTo));
9243 }
9244 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9245 // Add the register arguments
9246 if (IsDPP8 && Op.isDppFI()) {
9247 Fi = Op.getImm();
9248 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9249 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9250 } else if (Op.isReg()) {
9251 Op.addRegOperands(Inst, 1);
9252 } else if (Op.isImm() &&
9253 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9254 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9255 Op.addImmOperands(Inst, 1);
9256 } else if (Op.isImm()) {
9257 OptionalIdx[Op.getImmTy()] = I;
9258 } else {
9259 llvm_unreachable("unhandled operand type");
9260 }
9261 }
9262
9263 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9264 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9265 AMDGPUOperand::ImmTyByteSel);
9266
9267 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9268 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9269 AMDGPUOperand::ImmTyClamp);
9270
9271 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9272 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9273
9274 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9275 cvtVOP3P(Inst, Operands, OptionalIdx);
9276 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9277 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9278 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9280 }
9281
9282 if (IsDPP8) {
9283 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9284 using namespace llvm::AMDGPU::DPP;
9285 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9286 } else {
9287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9291
9292 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9293 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9294 AMDGPUOperand::ImmTyDppFI);
9295 }
9296 }
9297
cvtDPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)9298 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9299 OptionalImmIndexMap OptionalIdx;
9300
9301 unsigned I = 1;
9302 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9303 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9304 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9305 }
9306
9307 int Fi = 0;
9308 for (unsigned E = Operands.size(); I != E; ++I) {
9309 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9310 MCOI::TIED_TO);
9311 if (TiedTo != -1) {
9312 assert((unsigned)TiedTo < Inst.getNumOperands());
9313 // handle tied old or src2 for MAC instructions
9314 Inst.addOperand(Inst.getOperand(TiedTo));
9315 }
9316 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9317 // Add the register arguments
9318 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9319 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9320 // Skip it.
9321 continue;
9322 }
9323
9324 if (IsDPP8) {
9325 if (Op.isDPP8()) {
9326 Op.addImmOperands(Inst, 1);
9327 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9328 Op.addRegWithFPInputModsOperands(Inst, 2);
9329 } else if (Op.isDppFI()) {
9330 Fi = Op.getImm();
9331 } else if (Op.isReg()) {
9332 Op.addRegOperands(Inst, 1);
9333 } else {
9334 llvm_unreachable("Invalid operand type");
9335 }
9336 } else {
9337 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9338 Op.addRegWithFPInputModsOperands(Inst, 2);
9339 } else if (Op.isReg()) {
9340 Op.addRegOperands(Inst, 1);
9341 } else if (Op.isDPPCtrl()) {
9342 Op.addImmOperands(Inst, 1);
9343 } else if (Op.isImm()) {
9344 // Handle optional arguments
9345 OptionalIdx[Op.getImmTy()] = I;
9346 } else {
9347 llvm_unreachable("Invalid operand type");
9348 }
9349 }
9350 }
9351
9352 if (IsDPP8) {
9353 using namespace llvm::AMDGPU::DPP;
9354 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9355 } else {
9356 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9357 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9358 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9359 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9360 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9361 AMDGPUOperand::ImmTyDppFI);
9362 }
9363 }
9364 }
9365
9366 //===----------------------------------------------------------------------===//
9367 // sdwa
9368 //===----------------------------------------------------------------------===//
9369
parseSDWASel(OperandVector & Operands,StringRef Prefix,AMDGPUOperand::ImmTy Type)9370 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9371 StringRef Prefix,
9372 AMDGPUOperand::ImmTy Type) {
9373 using namespace llvm::AMDGPU::SDWA;
9374
9375 SMLoc S = getLoc();
9376 StringRef Value;
9377
9378 SMLoc StringLoc;
9379 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9380 if (!Res.isSuccess())
9381 return Res;
9382
9383 int64_t Int;
9384 Int = StringSwitch<int64_t>(Value)
9385 .Case("BYTE_0", SdwaSel::BYTE_0)
9386 .Case("BYTE_1", SdwaSel::BYTE_1)
9387 .Case("BYTE_2", SdwaSel::BYTE_2)
9388 .Case("BYTE_3", SdwaSel::BYTE_3)
9389 .Case("WORD_0", SdwaSel::WORD_0)
9390 .Case("WORD_1", SdwaSel::WORD_1)
9391 .Case("DWORD", SdwaSel::DWORD)
9392 .Default(0xffffffff);
9393
9394 if (Int == 0xffffffff)
9395 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9396
9397 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9398 return ParseStatus::Success;
9399 }
9400
parseSDWADstUnused(OperandVector & Operands)9401 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9402 using namespace llvm::AMDGPU::SDWA;
9403
9404 SMLoc S = getLoc();
9405 StringRef Value;
9406
9407 SMLoc StringLoc;
9408 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9409 if (!Res.isSuccess())
9410 return Res;
9411
9412 int64_t Int;
9413 Int = StringSwitch<int64_t>(Value)
9414 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9415 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9416 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9417 .Default(0xffffffff);
9418
9419 if (Int == 0xffffffff)
9420 return Error(StringLoc, "invalid dst_unused value");
9421
9422 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9423 return ParseStatus::Success;
9424 }
9425
cvtSdwaVOP1(MCInst & Inst,const OperandVector & Operands)9426 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9427 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9428 }
9429
cvtSdwaVOP2(MCInst & Inst,const OperandVector & Operands)9430 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9431 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9432 }
9433
cvtSdwaVOP2b(MCInst & Inst,const OperandVector & Operands)9434 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9435 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9436 }
9437
cvtSdwaVOP2e(MCInst & Inst,const OperandVector & Operands)9438 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9439 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9440 }
9441
cvtSdwaVOPC(MCInst & Inst,const OperandVector & Operands)9442 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9443 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9444 }
9445
cvtSDWA(MCInst & Inst,const OperandVector & Operands,uint64_t BasicInstType,bool SkipDstVcc,bool SkipSrcVcc)9446 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9447 uint64_t BasicInstType,
9448 bool SkipDstVcc,
9449 bool SkipSrcVcc) {
9450 using namespace llvm::AMDGPU::SDWA;
9451
9452 OptionalImmIndexMap OptionalIdx;
9453 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9454 bool SkippedVcc = false;
9455
9456 unsigned I = 1;
9457 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9458 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9459 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9460 }
9461
9462 for (unsigned E = Operands.size(); I != E; ++I) {
9463 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9464 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9465 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9466 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9467 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9468 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9469 // Skip VCC only if we didn't skip it on previous iteration.
9470 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9471 if (BasicInstType == SIInstrFlags::VOP2 &&
9472 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9473 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9474 SkippedVcc = true;
9475 continue;
9476 }
9477 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
9478 SkippedVcc = true;
9479 continue;
9480 }
9481 }
9482 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9483 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9484 } else if (Op.isImm()) {
9485 // Handle optional arguments
9486 OptionalIdx[Op.getImmTy()] = I;
9487 } else {
9488 llvm_unreachable("Invalid operand type");
9489 }
9490 SkippedVcc = false;
9491 }
9492
9493 const unsigned Opc = Inst.getOpcode();
9494 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9495 Opc != AMDGPU::V_NOP_sdwa_vi) {
9496 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9497 switch (BasicInstType) {
9498 case SIInstrFlags::VOP1:
9499 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9500 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9501 AMDGPUOperand::ImmTyClamp, 0);
9502
9503 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9504 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9505 AMDGPUOperand::ImmTyOModSI, 0);
9506
9507 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9508 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9509 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9510
9511 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9512 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9513 AMDGPUOperand::ImmTySDWADstUnused,
9514 DstUnused::UNUSED_PRESERVE);
9515
9516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9517 break;
9518
9519 case SIInstrFlags::VOP2:
9520 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9521 AMDGPUOperand::ImmTyClamp, 0);
9522
9523 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9525
9526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9530 break;
9531
9532 case SIInstrFlags::VOPC:
9533 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9534 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9535 AMDGPUOperand::ImmTyClamp, 0);
9536 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9537 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9538 break;
9539
9540 default:
9541 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9542 }
9543 }
9544
9545 // special case v_mac_{f16, f32}:
9546 // it has src2 register operand that is tied to dst operand
9547 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9548 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9549 auto it = Inst.begin();
9550 std::advance(
9551 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9552 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9553 }
9554 }
9555
9556 /// Force static initialization.
LLVMInitializeAMDGPUAsmParser()9557 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9558 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9559 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9560 }
9561
9562 #define GET_REGISTER_MATCHER
9563 #define GET_MATCHER_IMPLEMENTATION
9564 #define GET_MNEMONIC_SPELL_CHECKER
9565 #define GET_MNEMONIC_CHECKER
9566 #include "AMDGPUGenAsmMatcher.inc"
9567
parseCustomOperand(OperandVector & Operands,unsigned MCK)9568 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9569 unsigned MCK) {
9570 switch (MCK) {
9571 case MCK_addr64:
9572 return parseTokenOp("addr64", Operands);
9573 case MCK_done:
9574 return parseTokenOp("done", Operands);
9575 case MCK_idxen:
9576 return parseTokenOp("idxen", Operands);
9577 case MCK_lds:
9578 return parseTokenOp("lds", Operands);
9579 case MCK_offen:
9580 return parseTokenOp("offen", Operands);
9581 case MCK_off:
9582 return parseTokenOp("off", Operands);
9583 case MCK_row_95_en:
9584 return parseTokenOp("row_en", Operands);
9585 case MCK_gds:
9586 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9587 case MCK_tfe:
9588 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9589 }
9590 return tryCustomParseOperand(Operands, MCK);
9591 }
9592
9593 // This function should be defined after auto-generated include so that we have
9594 // MatchClassKind enum defined
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)9595 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9596 unsigned Kind) {
9597 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9598 // But MatchInstructionImpl() expects to meet token and fails to validate
9599 // operand. This method checks if we are given immediate operand but expect to
9600 // get corresponding token.
9601 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9602 switch (Kind) {
9603 case MCK_addr64:
9604 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9605 case MCK_gds:
9606 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9607 case MCK_lds:
9608 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9609 case MCK_idxen:
9610 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9611 case MCK_offen:
9612 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9613 case MCK_tfe:
9614 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9615 case MCK_SSrc_b32:
9616 // When operands have expression values, they will return true for isToken,
9617 // because it is not possible to distinguish between a token and an
9618 // expression at parse time. MatchInstructionImpl() will always try to
9619 // match an operand as a token, when isToken returns true, and when the
9620 // name of the expression is not a valid token, the match will fail,
9621 // so we need to handle it here.
9622 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9623 case MCK_SSrc_f32:
9624 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9625 case MCK_SOPPBrTarget:
9626 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9627 case MCK_VReg32OrOff:
9628 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9629 case MCK_InterpSlot:
9630 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9631 case MCK_InterpAttr:
9632 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9633 case MCK_InterpAttrChan:
9634 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9635 case MCK_SReg_64:
9636 case MCK_SReg_64_XEXEC:
9637 // Null is defined as a 32-bit register but
9638 // it should also be enabled with 64-bit operands.
9639 // The following code enables it for SReg_64 operands
9640 // used as source and destination. Remaining source
9641 // operands are handled in isInlinableImm.
9642 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9643 default:
9644 return Match_InvalidOperand;
9645 }
9646 }
9647
9648 //===----------------------------------------------------------------------===//
9649 // endpgm
9650 //===----------------------------------------------------------------------===//
9651
parseEndpgm(OperandVector & Operands)9652 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9653 SMLoc S = getLoc();
9654 int64_t Imm = 0;
9655
9656 if (!parseExpr(Imm)) {
9657 // The operand is optional, if not present default to 0
9658 Imm = 0;
9659 }
9660
9661 if (!isUInt<16>(Imm))
9662 return Error(S, "expected a 16-bit value");
9663
9664 Operands.push_back(
9665 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9666 return ParseStatus::Success;
9667 }
9668
isEndpgm() const9669 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9670
9671 //===----------------------------------------------------------------------===//
9672 // Split Barrier
9673 //===----------------------------------------------------------------------===//
9674
isSplitBarrier() const9675 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
9676