1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUInstPrinter.h" 11 #include "MCTargetDesc/AMDGPUMCAsmInfo.h" 12 #include "MCTargetDesc/AMDGPUMCExpr.h" 13 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" 14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 15 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 16 #include "SIDefines.h" 17 #include "SIInstrInfo.h" 18 #include "TargetInfo/AMDGPUTargetInfo.h" 19 #include "Utils/AMDGPUAsmUtils.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "Utils/AMDKernelCodeTUtils.h" 22 #include "llvm/ADT/APFloat.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/StringSet.h" 25 #include "llvm/ADT/Twine.h" 26 #include "llvm/BinaryFormat/ELF.h" 27 #include "llvm/CodeGenTypes/MachineValueType.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCParser/AsmLexer.h" 34 #include "llvm/MC/MCParser/MCAsmParser.h" 35 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 36 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 37 #include "llvm/MC/MCRegisterInfo.h" 38 #include "llvm/MC/MCSymbol.h" 39 #include "llvm/MC/TargetRegistry.h" 40 #include "llvm/Support/AMDGPUMetadata.h" 41 #include "llvm/Support/AMDHSAKernelDescriptor.h" 42 #include "llvm/Support/Casting.h" 43 #include "llvm/Support/Compiler.h" 44 #include "llvm/Support/MathExtras.h" 45 #include "llvm/TargetParser/TargetParser.h" 46 #include <optional> 47 48 using namespace llvm; 49 using namespace llvm::AMDGPU; 50 using namespace llvm::amdhsa; 51 52 namespace { 53 54 class AMDGPUAsmParser; 55 56 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 57 58 //===----------------------------------------------------------------------===// 59 // Operand 60 //===----------------------------------------------------------------------===// 61 62 class AMDGPUOperand : public MCParsedAsmOperand { 63 enum KindTy { 64 Token, 65 Immediate, 66 Register, 67 Expression 68 } Kind; 69 70 SMLoc StartLoc, EndLoc; 71 const AMDGPUAsmParser *AsmParser; 72 73 public: 74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 75 : Kind(Kind_), AsmParser(AsmParser_) {} 76 77 using Ptr = std::unique_ptr<AMDGPUOperand>; 78 79 struct Modifiers { 80 bool Abs = false; 81 bool Neg = false; 82 bool Sext = false; 83 bool Lit = false; 84 bool Lit64 = false; 85 86 bool hasFPModifiers() const { return Abs || Neg; } 87 bool hasIntModifiers() const { return Sext; } 88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 89 90 int64_t getFPModifiersOperand() const { 91 int64_t Operand = 0; 92 Operand |= Abs ? SISrcMods::ABS : 0u; 93 Operand |= Neg ? SISrcMods::NEG : 0u; 94 return Operand; 95 } 96 97 int64_t getIntModifiersOperand() const { 98 int64_t Operand = 0; 99 Operand |= Sext ? SISrcMods::SEXT : 0u; 100 return Operand; 101 } 102 103 int64_t getModifiersOperand() const { 104 assert(!(hasFPModifiers() && hasIntModifiers()) 105 && "fp and int modifiers should not be used simultaneously"); 106 if (hasFPModifiers()) 107 return getFPModifiersOperand(); 108 if (hasIntModifiers()) 109 return getIntModifiersOperand(); 110 return 0; 111 } 112 113 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 114 }; 115 116 enum ImmTy { 117 ImmTyNone, 118 ImmTyGDS, 119 ImmTyLDS, 120 ImmTyOffen, 121 ImmTyIdxen, 122 ImmTyAddr64, 123 ImmTyOffset, 124 ImmTyInstOffset, 125 ImmTyOffset0, 126 ImmTyOffset1, 127 ImmTySMEMOffsetMod, 128 ImmTyCPol, 129 ImmTyTFE, 130 ImmTyD16, 131 ImmTyClamp, 132 ImmTyOModSI, 133 ImmTySDWADstSel, 134 ImmTySDWASrc0Sel, 135 ImmTySDWASrc1Sel, 136 ImmTySDWADstUnused, 137 ImmTyDMask, 138 ImmTyDim, 139 ImmTyUNorm, 140 ImmTyDA, 141 ImmTyR128A16, 142 ImmTyA16, 143 ImmTyLWE, 144 ImmTyExpTgt, 145 ImmTyExpCompr, 146 ImmTyExpVM, 147 ImmTyFORMAT, 148 ImmTyHwreg, 149 ImmTyOff, 150 ImmTySendMsg, 151 ImmTyInterpSlot, 152 ImmTyInterpAttr, 153 ImmTyInterpAttrChan, 154 ImmTyOpSel, 155 ImmTyOpSelHi, 156 ImmTyNegLo, 157 ImmTyNegHi, 158 ImmTyIndexKey8bit, 159 ImmTyIndexKey16bit, 160 ImmTyIndexKey32bit, 161 ImmTyDPP8, 162 ImmTyDppCtrl, 163 ImmTyDppRowMask, 164 ImmTyDppBankMask, 165 ImmTyDppBoundCtrl, 166 ImmTyDppFI, 167 ImmTySwizzle, 168 ImmTyGprIdxMode, 169 ImmTyHigh, 170 ImmTyBLGP, 171 ImmTyCBSZ, 172 ImmTyABID, 173 ImmTyEndpgm, 174 ImmTyWaitVDST, 175 ImmTyWaitEXP, 176 ImmTyWaitVAVDst, 177 ImmTyWaitVMVSrc, 178 ImmTyBitOp3, 179 ImmTyMatrixAReuse, 180 ImmTyMatrixBReuse, 181 ImmTyByteSel, 182 }; 183 184 // Immediate operand kind. 185 // It helps to identify the location of an offending operand after an error. 186 // Note that regular literals and mandatory literals (KImm) must be handled 187 // differently. When looking for an offending operand, we should usually 188 // ignore mandatory literals because they are part of the instruction and 189 // cannot be changed. Report location of mandatory operands only for VOPD, 190 // when both OpX and OpY have a KImm and there are no other literals. 191 enum ImmKindTy { 192 ImmKindTyNone, 193 ImmKindTyLiteral, 194 ImmKindTyMandatoryLiteral, 195 ImmKindTyConst, 196 }; 197 198 private: 199 struct TokOp { 200 const char *Data; 201 unsigned Length; 202 }; 203 204 struct ImmOp { 205 int64_t Val; 206 ImmTy Type; 207 bool IsFPImm; 208 mutable ImmKindTy Kind; 209 Modifiers Mods; 210 }; 211 212 struct RegOp { 213 MCRegister RegNo; 214 Modifiers Mods; 215 }; 216 217 union { 218 TokOp Tok; 219 ImmOp Imm; 220 RegOp Reg; 221 const MCExpr *Expr; 222 }; 223 224 public: 225 bool isToken() const override { return Kind == Token; } 226 227 bool isSymbolRefExpr() const { 228 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 229 } 230 231 bool isImm() const override { 232 return Kind == Immediate; 233 } 234 235 void setImmKindNone() const { 236 assert(isImm()); 237 Imm.Kind = ImmKindTyNone; 238 } 239 240 void setImmKindLiteral() const { 241 assert(isImm()); 242 Imm.Kind = ImmKindTyLiteral; 243 } 244 245 void setImmKindMandatoryLiteral() const { 246 assert(isImm()); 247 Imm.Kind = ImmKindTyMandatoryLiteral; 248 } 249 250 void setImmKindConst() const { 251 assert(isImm()); 252 Imm.Kind = ImmKindTyConst; 253 } 254 255 bool IsImmKindLiteral() const { 256 return isImm() && Imm.Kind == ImmKindTyLiteral; 257 } 258 259 bool IsImmKindMandatoryLiteral() const { 260 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; 261 } 262 263 bool isImmKindConst() const { 264 return isImm() && Imm.Kind == ImmKindTyConst; 265 } 266 267 bool isInlinableImm(MVT type) const; 268 bool isLiteralImm(MVT type) const; 269 270 bool isRegKind() const { 271 return Kind == Register; 272 } 273 274 bool isReg() const override { 275 return isRegKind() && !hasModifiers(); 276 } 277 278 bool isRegOrInline(unsigned RCID, MVT type) const { 279 return isRegClass(RCID) || isInlinableImm(type); 280 } 281 282 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 283 return isRegOrInline(RCID, type) || isLiteralImm(type); 284 } 285 286 bool isRegOrImmWithInt16InputMods() const { 287 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 288 } 289 290 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const { 291 return isRegOrImmWithInputMods( 292 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16); 293 } 294 295 bool isRegOrImmWithInt32InputMods() const { 296 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 297 } 298 299 bool isRegOrInlineImmWithInt16InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 301 } 302 303 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const { 304 return isRegOrInline( 305 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16); 306 } 307 308 bool isRegOrInlineImmWithInt32InputMods() const { 309 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 310 } 311 312 bool isRegOrImmWithInt64InputMods() const { 313 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 314 } 315 316 bool isRegOrImmWithFP16InputMods() const { 317 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 318 } 319 320 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const { 321 return isRegOrImmWithInputMods( 322 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16); 323 } 324 325 bool isRegOrImmWithFP32InputMods() const { 326 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 327 } 328 329 bool isRegOrImmWithFP64InputMods() const { 330 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 331 } 332 333 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const { 334 return isRegOrInline( 335 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16); 336 } 337 338 bool isRegOrInlineImmWithFP32InputMods() const { 339 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 340 } 341 342 bool isRegOrInlineImmWithFP64InputMods() const { 343 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64); 344 } 345 346 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); } 347 348 bool isVRegWithFP32InputMods() const { 349 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID); 350 } 351 352 bool isVRegWithFP64InputMods() const { 353 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID); 354 } 355 356 bool isPackedFP16InputMods() const { 357 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16); 358 } 359 360 bool isPackedVGPRFP32InputMods() const { 361 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32); 362 } 363 364 bool isVReg() const { 365 return isRegClass(AMDGPU::VGPR_32RegClassID) || 366 isRegClass(AMDGPU::VReg_64RegClassID) || 367 isRegClass(AMDGPU::VReg_96RegClassID) || 368 isRegClass(AMDGPU::VReg_128RegClassID) || 369 isRegClass(AMDGPU::VReg_160RegClassID) || 370 isRegClass(AMDGPU::VReg_192RegClassID) || 371 isRegClass(AMDGPU::VReg_256RegClassID) || 372 isRegClass(AMDGPU::VReg_512RegClassID) || 373 isRegClass(AMDGPU::VReg_1024RegClassID); 374 } 375 376 bool isVReg32() const { 377 return isRegClass(AMDGPU::VGPR_32RegClassID); 378 } 379 380 bool isVReg32OrOff() const { 381 return isOff() || isVReg32(); 382 } 383 384 bool isNull() const { 385 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 386 } 387 388 bool isVRegWithInputMods() const; 389 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const; 390 template <bool IsFake16> bool isT16VRegWithInputMods() const; 391 392 bool isSDWAOperand(MVT type) const; 393 bool isSDWAFP16Operand() const; 394 bool isSDWAFP32Operand() const; 395 bool isSDWAInt16Operand() const; 396 bool isSDWAInt32Operand() const; 397 398 bool isImmTy(ImmTy ImmT) const { 399 return isImm() && Imm.Type == ImmT; 400 } 401 402 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); } 403 404 bool isImmLiteral() const { return isImmTy(ImmTyNone); } 405 406 bool isImmModifier() const { 407 return isImm() && Imm.Type != ImmTyNone; 408 } 409 410 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 411 bool isDim() const { return isImmTy(ImmTyDim); } 412 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 413 bool isOff() const { return isImmTy(ImmTyOff); } 414 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 415 bool isOffen() const { return isImmTy(ImmTyOffen); } 416 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 417 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 418 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); } 419 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 420 bool isGDS() const { return isImmTy(ImmTyGDS); } 421 bool isLDS() const { return isImmTy(ImmTyLDS); } 422 bool isCPol() const { return isImmTy(ImmTyCPol); } 423 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); } 424 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); } 425 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); } 426 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); } 427 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); } 428 bool isTFE() const { return isImmTy(ImmTyTFE); } 429 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 430 bool isDppFI() const { return isImmTy(ImmTyDppFI); } 431 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); } 432 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); } 433 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); } 434 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); } 435 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 436 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 437 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); } 438 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 439 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 440 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 441 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 442 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); } 443 444 bool isRegOrImm() const { 445 return isReg() || isImm(); 446 } 447 448 bool isRegClass(unsigned RCID) const; 449 450 bool isInlineValue() const; 451 452 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 453 return isRegOrInline(RCID, type) && !hasModifiers(); 454 } 455 456 bool isSCSrcB16() const { 457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 458 } 459 460 bool isSCSrcV2B16() const { 461 return isSCSrcB16(); 462 } 463 464 bool isSCSrc_b32() const { 465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 466 } 467 468 bool isSCSrc_b64() const { 469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 470 } 471 472 bool isBoolReg() const; 473 474 bool isSCSrcF16() const { 475 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 476 } 477 478 bool isSCSrcV2F16() const { 479 return isSCSrcF16(); 480 } 481 482 bool isSCSrcF32() const { 483 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 484 } 485 486 bool isSCSrcF64() const { 487 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 488 } 489 490 bool isSSrc_b32() const { 491 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr(); 492 } 493 494 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); } 495 496 bool isSSrcV2B16() const { 497 llvm_unreachable("cannot happen"); 498 return isSSrc_b16(); 499 } 500 501 bool isSSrc_b64() const { 502 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 503 // See isVSrc64(). 504 return isSCSrc_b64() || isLiteralImm(MVT::i64) || 505 (((const MCTargetAsmParser *)AsmParser) 506 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] && 507 isExpr()); 508 } 509 510 bool isSSrc_f32() const { 511 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr(); 512 } 513 514 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); } 515 516 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); } 517 518 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); } 519 520 bool isSSrcV2F16() const { 521 llvm_unreachable("cannot happen"); 522 return isSSrc_f16(); 523 } 524 525 bool isSSrcV2FP32() const { 526 llvm_unreachable("cannot happen"); 527 return isSSrc_f32(); 528 } 529 530 bool isSCSrcV2FP32() const { 531 llvm_unreachable("cannot happen"); 532 return isSCSrcF32(); 533 } 534 535 bool isSSrcV2INT32() const { 536 llvm_unreachable("cannot happen"); 537 return isSSrc_b32(); 538 } 539 540 bool isSCSrcV2INT32() const { 541 llvm_unreachable("cannot happen"); 542 return isSCSrc_b32(); 543 } 544 545 bool isSSrcOrLds_b32() const { 546 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 547 isLiteralImm(MVT::i32) || isExpr(); 548 } 549 550 bool isVCSrc_b32() const { 551 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 552 } 553 554 bool isVCSrc_b64() const { 555 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 556 } 557 558 bool isVCSrcT_b16() const { 559 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16); 560 } 561 562 bool isVCSrcTB16_Lo128() const { 563 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16); 564 } 565 566 bool isVCSrcFake16B16_Lo128() const { 567 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); 568 } 569 570 bool isVCSrc_b16() const { 571 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 572 } 573 574 bool isVCSrc_v2b16() const { return isVCSrc_b16(); } 575 576 bool isVCSrc_f32() const { 577 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 578 } 579 580 bool isVCSrc_f64() const { 581 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 582 } 583 584 bool isVCSrcTBF16() const { 585 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16); 586 } 587 588 bool isVCSrcT_f16() const { 589 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); 590 } 591 592 bool isVCSrcT_bf16() const { 593 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); 594 } 595 596 bool isVCSrcTBF16_Lo128() const { 597 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16); 598 } 599 600 bool isVCSrcTF16_Lo128() const { 601 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16); 602 } 603 604 bool isVCSrcFake16BF16_Lo128() const { 605 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16); 606 } 607 608 bool isVCSrcFake16F16_Lo128() const { 609 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); 610 } 611 612 bool isVCSrc_bf16() const { 613 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16); 614 } 615 616 bool isVCSrc_f16() const { 617 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 618 } 619 620 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); } 621 622 bool isVCSrc_v2f16() const { return isVCSrc_f16(); } 623 624 bool isVSrc_b32() const { 625 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr(); 626 } 627 628 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); } 629 630 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); } 631 632 bool isVSrcT_b16_Lo128() const { 633 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); 634 } 635 636 bool isVSrcFake16_b16_Lo128() const { 637 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16); 638 } 639 640 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); } 641 642 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); } 643 644 bool isVCSrcV2FP32() const { return isVCSrc_f64(); } 645 646 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); } 647 648 bool isVCSrc_v2b32() const { return isVCSrc_b64(); } 649 650 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); } 651 652 bool isVSrc_f32() const { 653 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr(); 654 } 655 656 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); } 657 658 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); } 659 660 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); } 661 662 bool isVSrcT_bf16_Lo128() const { 663 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16); 664 } 665 666 bool isVSrcT_f16_Lo128() const { 667 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); 668 } 669 670 bool isVSrcFake16_bf16_Lo128() const { 671 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16); 672 } 673 674 bool isVSrcFake16_f16_Lo128() const { 675 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16); 676 } 677 678 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); } 679 680 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); } 681 682 bool isVSrc_v2bf16() const { 683 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16); 684 } 685 686 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); } 687 688 bool isVISrcB32() const { 689 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 690 } 691 692 bool isVISrcB16() const { 693 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 694 } 695 696 bool isVISrcV2B16() const { 697 return isVISrcB16(); 698 } 699 700 bool isVISrcF32() const { 701 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 702 } 703 704 bool isVISrcF16() const { 705 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 706 } 707 708 bool isVISrcV2F16() const { 709 return isVISrcF16() || isVISrcB32(); 710 } 711 712 bool isVISrc_64_bf16() const { 713 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16); 714 } 715 716 bool isVISrc_64_f16() const { 717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16); 718 } 719 720 bool isVISrc_64_b32() const { 721 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 722 } 723 724 bool isVISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 726 } 727 728 bool isVISrc_64_f64() const { 729 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 730 } 731 732 bool isVISrc_64V2FP32() const { 733 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 734 } 735 736 bool isVISrc_64V2INT32() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 738 } 739 740 bool isVISrc_256_b32() const { 741 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 742 } 743 744 bool isVISrc_256_f32() const { 745 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 746 } 747 748 bool isVISrc_256B64() const { 749 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 750 } 751 752 bool isVISrc_256_f64() const { 753 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 754 } 755 756 bool isVISrc_512_f64() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64); 758 } 759 760 bool isVISrc_128B16() const { 761 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 762 } 763 764 bool isVISrc_128V2B16() const { 765 return isVISrc_128B16(); 766 } 767 768 bool isVISrc_128_b32() const { 769 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 770 } 771 772 bool isVISrc_128_f32() const { 773 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 774 } 775 776 bool isVISrc_256V2FP32() const { 777 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 778 } 779 780 bool isVISrc_256V2INT32() const { 781 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 782 } 783 784 bool isVISrc_512_b32() const { 785 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 786 } 787 788 bool isVISrc_512B16() const { 789 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 790 } 791 792 bool isVISrc_512V2B16() const { 793 return isVISrc_512B16(); 794 } 795 796 bool isVISrc_512_f32() const { 797 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 798 } 799 800 bool isVISrc_512F16() const { 801 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 802 } 803 804 bool isVISrc_512V2F16() const { 805 return isVISrc_512F16() || isVISrc_512_b32(); 806 } 807 808 bool isVISrc_1024_b32() const { 809 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 810 } 811 812 bool isVISrc_1024B16() const { 813 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 814 } 815 816 bool isVISrc_1024V2B16() const { 817 return isVISrc_1024B16(); 818 } 819 820 bool isVISrc_1024_f32() const { 821 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 822 } 823 824 bool isVISrc_1024F16() const { 825 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 826 } 827 828 bool isVISrc_1024V2F16() const { 829 return isVISrc_1024F16() || isVISrc_1024_b32(); 830 } 831 832 bool isAISrcB32() const { 833 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 834 } 835 836 bool isAISrcB16() const { 837 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 838 } 839 840 bool isAISrcV2B16() const { 841 return isAISrcB16(); 842 } 843 844 bool isAISrcF32() const { 845 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 846 } 847 848 bool isAISrcF16() const { 849 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 850 } 851 852 bool isAISrcV2F16() const { 853 return isAISrcF16() || isAISrcB32(); 854 } 855 856 bool isAISrc_64B64() const { 857 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 858 } 859 860 bool isAISrc_64_f64() const { 861 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 862 } 863 864 bool isAISrc_128_b32() const { 865 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 866 } 867 868 bool isAISrc_128B16() const { 869 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 870 } 871 872 bool isAISrc_128V2B16() const { 873 return isAISrc_128B16(); 874 } 875 876 bool isAISrc_128_f32() const { 877 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 878 } 879 880 bool isAISrc_128F16() const { 881 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 882 } 883 884 bool isAISrc_128V2F16() const { 885 return isAISrc_128F16() || isAISrc_128_b32(); 886 } 887 888 bool isVISrc_128_bf16() const { 889 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16); 890 } 891 892 bool isVISrc_128_f16() const { 893 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 894 } 895 896 bool isVISrc_128V2F16() const { 897 return isVISrc_128_f16() || isVISrc_128_b32(); 898 } 899 900 bool isAISrc_256B64() const { 901 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 902 } 903 904 bool isAISrc_256_f64() const { 905 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 906 } 907 908 bool isAISrc_512_b32() const { 909 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 910 } 911 912 bool isAISrc_512B16() const { 913 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 914 } 915 916 bool isAISrc_512V2B16() const { 917 return isAISrc_512B16(); 918 } 919 920 bool isAISrc_512_f32() const { 921 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 922 } 923 924 bool isAISrc_512F16() const { 925 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 926 } 927 928 bool isAISrc_512V2F16() const { 929 return isAISrc_512F16() || isAISrc_512_b32(); 930 } 931 932 bool isAISrc_1024_b32() const { 933 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 934 } 935 936 bool isAISrc_1024B16() const { 937 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 938 } 939 940 bool isAISrc_1024V2B16() const { 941 return isAISrc_1024B16(); 942 } 943 944 bool isAISrc_1024_f32() const { 945 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 946 } 947 948 bool isAISrc_1024F16() const { 949 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 950 } 951 952 bool isAISrc_1024V2F16() const { 953 return isAISrc_1024F16() || isAISrc_1024_b32(); 954 } 955 956 bool isKImmFP32() const { 957 return isLiteralImm(MVT::f32); 958 } 959 960 bool isKImmFP16() const { 961 return isLiteralImm(MVT::f16); 962 } 963 964 bool isKImmFP64() const { return isLiteralImm(MVT::f64); } 965 966 bool isMem() const override { 967 return false; 968 } 969 970 bool isExpr() const { 971 return Kind == Expression; 972 } 973 974 bool isSOPPBrTarget() const { return isExpr() || isImm(); } 975 976 bool isSWaitCnt() const; 977 bool isDepCtr() const; 978 bool isSDelayALU() const; 979 bool isHwreg() const; 980 bool isSendMsg() const; 981 bool isSplitBarrier() const; 982 bool isSwizzle() const; 983 bool isSMRDOffset8() const; 984 bool isSMEMOffset() const; 985 bool isSMRDLiteralOffset() const; 986 bool isDPP8() const; 987 bool isDPPCtrl() const; 988 bool isBLGP() const; 989 bool isGPRIdxMode() const; 990 bool isS16Imm() const; 991 bool isU16Imm() const; 992 bool isEndpgm() const; 993 994 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { 995 return [=](){ return P(*this); }; 996 } 997 998 StringRef getToken() const { 999 assert(isToken()); 1000 return StringRef(Tok.Data, Tok.Length); 1001 } 1002 1003 int64_t getImm() const { 1004 assert(isImm()); 1005 return Imm.Val; 1006 } 1007 1008 void setImm(int64_t Val) { 1009 assert(isImm()); 1010 Imm.Val = Val; 1011 } 1012 1013 ImmTy getImmTy() const { 1014 assert(isImm()); 1015 return Imm.Type; 1016 } 1017 1018 MCRegister getReg() const override { 1019 assert(isRegKind()); 1020 return Reg.RegNo; 1021 } 1022 1023 SMLoc getStartLoc() const override { 1024 return StartLoc; 1025 } 1026 1027 SMLoc getEndLoc() const override { 1028 return EndLoc; 1029 } 1030 1031 SMRange getLocRange() const { 1032 return SMRange(StartLoc, EndLoc); 1033 } 1034 1035 Modifiers getModifiers() const { 1036 assert(isRegKind() || isImmTy(ImmTyNone)); 1037 return isRegKind() ? Reg.Mods : Imm.Mods; 1038 } 1039 1040 void setModifiers(Modifiers Mods) { 1041 assert(isRegKind() || isImmTy(ImmTyNone)); 1042 if (isRegKind()) 1043 Reg.Mods = Mods; 1044 else 1045 Imm.Mods = Mods; 1046 } 1047 1048 bool hasModifiers() const { 1049 return getModifiers().hasModifiers(); 1050 } 1051 1052 bool hasFPModifiers() const { 1053 return getModifiers().hasFPModifiers(); 1054 } 1055 1056 bool hasIntModifiers() const { 1057 return getModifiers().hasIntModifiers(); 1058 } 1059 1060 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 1061 1062 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 1063 1064 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 1065 1066 void addRegOperands(MCInst &Inst, unsigned N) const; 1067 1068 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 1069 if (isRegKind()) 1070 addRegOperands(Inst, N); 1071 else 1072 addImmOperands(Inst, N); 1073 } 1074 1075 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 1076 Modifiers Mods = getModifiers(); 1077 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1078 if (isRegKind()) { 1079 addRegOperands(Inst, N); 1080 } else { 1081 addImmOperands(Inst, N, false); 1082 } 1083 } 1084 1085 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1086 assert(!hasIntModifiers()); 1087 addRegOrImmWithInputModsOperands(Inst, N); 1088 } 1089 1090 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1091 assert(!hasFPModifiers()); 1092 addRegOrImmWithInputModsOperands(Inst, N); 1093 } 1094 1095 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 1096 Modifiers Mods = getModifiers(); 1097 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1098 assert(isRegKind()); 1099 addRegOperands(Inst, N); 1100 } 1101 1102 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1103 assert(!hasIntModifiers()); 1104 addRegWithInputModsOperands(Inst, N); 1105 } 1106 1107 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1108 assert(!hasFPModifiers()); 1109 addRegWithInputModsOperands(Inst, N); 1110 } 1111 1112 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1113 // clang-format off 1114 switch (Type) { 1115 case ImmTyNone: OS << "None"; break; 1116 case ImmTyGDS: OS << "GDS"; break; 1117 case ImmTyLDS: OS << "LDS"; break; 1118 case ImmTyOffen: OS << "Offen"; break; 1119 case ImmTyIdxen: OS << "Idxen"; break; 1120 case ImmTyAddr64: OS << "Addr64"; break; 1121 case ImmTyOffset: OS << "Offset"; break; 1122 case ImmTyInstOffset: OS << "InstOffset"; break; 1123 case ImmTyOffset0: OS << "Offset0"; break; 1124 case ImmTyOffset1: OS << "Offset1"; break; 1125 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break; 1126 case ImmTyCPol: OS << "CPol"; break; 1127 case ImmTyIndexKey8bit: OS << "index_key"; break; 1128 case ImmTyIndexKey16bit: OS << "index_key"; break; 1129 case ImmTyIndexKey32bit: OS << "index_key"; break; 1130 case ImmTyTFE: OS << "TFE"; break; 1131 case ImmTyD16: OS << "D16"; break; 1132 case ImmTyFORMAT: OS << "FORMAT"; break; 1133 case ImmTyClamp: OS << "Clamp"; break; 1134 case ImmTyOModSI: OS << "OModSI"; break; 1135 case ImmTyDPP8: OS << "DPP8"; break; 1136 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1137 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1138 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1139 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1140 case ImmTyDppFI: OS << "DppFI"; break; 1141 case ImmTySDWADstSel: OS << "SDWADstSel"; break; 1142 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break; 1143 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break; 1144 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break; 1145 case ImmTyDMask: OS << "DMask"; break; 1146 case ImmTyDim: OS << "Dim"; break; 1147 case ImmTyUNorm: OS << "UNorm"; break; 1148 case ImmTyDA: OS << "DA"; break; 1149 case ImmTyR128A16: OS << "R128A16"; break; 1150 case ImmTyA16: OS << "A16"; break; 1151 case ImmTyLWE: OS << "LWE"; break; 1152 case ImmTyOff: OS << "Off"; break; 1153 case ImmTyExpTgt: OS << "ExpTgt"; break; 1154 case ImmTyExpCompr: OS << "ExpCompr"; break; 1155 case ImmTyExpVM: OS << "ExpVM"; break; 1156 case ImmTyHwreg: OS << "Hwreg"; break; 1157 case ImmTySendMsg: OS << "SendMsg"; break; 1158 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1159 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1160 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break; 1161 case ImmTyOpSel: OS << "OpSel"; break; 1162 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1163 case ImmTyNegLo: OS << "NegLo"; break; 1164 case ImmTyNegHi: OS << "NegHi"; break; 1165 case ImmTySwizzle: OS << "Swizzle"; break; 1166 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1167 case ImmTyHigh: OS << "High"; break; 1168 case ImmTyBLGP: OS << "BLGP"; break; 1169 case ImmTyCBSZ: OS << "CBSZ"; break; 1170 case ImmTyABID: OS << "ABID"; break; 1171 case ImmTyEndpgm: OS << "Endpgm"; break; 1172 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1173 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1174 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; 1175 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; 1176 case ImmTyBitOp3: OS << "BitOp3"; break; 1177 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break; 1178 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break; 1179 case ImmTyByteSel: OS << "ByteSel" ; break; 1180 } 1181 // clang-format on 1182 } 1183 1184 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override { 1185 switch (Kind) { 1186 case Register: 1187 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg()) 1188 << " mods: " << Reg.Mods << '>'; 1189 break; 1190 case Immediate: 1191 OS << '<' << getImm(); 1192 if (getImmTy() != ImmTyNone) { 1193 OS << " type: "; printImmTy(OS, getImmTy()); 1194 } 1195 OS << " mods: " << Imm.Mods << '>'; 1196 break; 1197 case Token: 1198 OS << '\'' << getToken() << '\''; 1199 break; 1200 case Expression: 1201 OS << "<expr "; 1202 MAI.printExpr(OS, *Expr); 1203 OS << '>'; 1204 break; 1205 } 1206 } 1207 1208 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1209 int64_t Val, SMLoc Loc, 1210 ImmTy Type = ImmTyNone, 1211 bool IsFPImm = false) { 1212 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1213 Op->Imm.Val = Val; 1214 Op->Imm.IsFPImm = IsFPImm; 1215 Op->Imm.Kind = ImmKindTyNone; 1216 Op->Imm.Type = Type; 1217 Op->Imm.Mods = Modifiers(); 1218 Op->StartLoc = Loc; 1219 Op->EndLoc = Loc; 1220 return Op; 1221 } 1222 1223 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1224 StringRef Str, SMLoc Loc, 1225 bool HasExplicitEncodingSize = true) { 1226 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1227 Res->Tok.Data = Str.data(); 1228 Res->Tok.Length = Str.size(); 1229 Res->StartLoc = Loc; 1230 Res->EndLoc = Loc; 1231 return Res; 1232 } 1233 1234 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1235 MCRegister Reg, SMLoc S, SMLoc E) { 1236 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1237 Op->Reg.RegNo = Reg; 1238 Op->Reg.Mods = Modifiers(); 1239 Op->StartLoc = S; 1240 Op->EndLoc = E; 1241 return Op; 1242 } 1243 1244 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1245 const class MCExpr *Expr, SMLoc S) { 1246 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1247 Op->Expr = Expr; 1248 Op->StartLoc = S; 1249 Op->EndLoc = S; 1250 return Op; 1251 } 1252 }; 1253 1254 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1255 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1256 return OS; 1257 } 1258 1259 //===----------------------------------------------------------------------===// 1260 // AsmParser 1261 //===----------------------------------------------------------------------===// 1262 1263 // Holds info related to the current kernel, e.g. count of SGPRs used. 1264 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1265 // .amdgpu_hsa_kernel or at EOF. 1266 class KernelScopeInfo { 1267 int SgprIndexUnusedMin = -1; 1268 int VgprIndexUnusedMin = -1; 1269 int AgprIndexUnusedMin = -1; 1270 MCContext *Ctx = nullptr; 1271 MCSubtargetInfo const *MSTI = nullptr; 1272 1273 void usesSgprAt(int i) { 1274 if (i >= SgprIndexUnusedMin) { 1275 SgprIndexUnusedMin = ++i; 1276 if (Ctx) { 1277 MCSymbol* const Sym = 1278 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1279 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1280 } 1281 } 1282 } 1283 1284 void usesVgprAt(int i) { 1285 if (i >= VgprIndexUnusedMin) { 1286 VgprIndexUnusedMin = ++i; 1287 if (Ctx) { 1288 MCSymbol* const Sym = 1289 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1290 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1291 VgprIndexUnusedMin); 1292 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1293 } 1294 } 1295 } 1296 1297 void usesAgprAt(int i) { 1298 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction 1299 if (!hasMAIInsts(*MSTI)) 1300 return; 1301 1302 if (i >= AgprIndexUnusedMin) { 1303 AgprIndexUnusedMin = ++i; 1304 if (Ctx) { 1305 MCSymbol* const Sym = 1306 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1307 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1308 1309 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1310 MCSymbol* const vSym = 1311 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1312 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1313 VgprIndexUnusedMin); 1314 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1315 } 1316 } 1317 } 1318 1319 public: 1320 KernelScopeInfo() = default; 1321 1322 void initialize(MCContext &Context) { 1323 Ctx = &Context; 1324 MSTI = Ctx->getSubtargetInfo(); 1325 1326 usesSgprAt(SgprIndexUnusedMin = -1); 1327 usesVgprAt(VgprIndexUnusedMin = -1); 1328 if (hasMAIInsts(*MSTI)) { 1329 usesAgprAt(AgprIndexUnusedMin = -1); 1330 } 1331 } 1332 1333 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1334 unsigned RegWidth) { 1335 switch (RegKind) { 1336 case IS_SGPR: 1337 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1338 break; 1339 case IS_AGPR: 1340 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1341 break; 1342 case IS_VGPR: 1343 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1344 break; 1345 default: 1346 break; 1347 } 1348 } 1349 }; 1350 1351 class AMDGPUAsmParser : public MCTargetAsmParser { 1352 MCAsmParser &Parser; 1353 1354 unsigned ForcedEncodingSize = 0; 1355 bool ForcedDPP = false; 1356 bool ForcedSDWA = false; 1357 KernelScopeInfo KernelScope; 1358 1359 /// @name Auto-generated Match Functions 1360 /// { 1361 1362 #define GET_ASSEMBLER_HEADER 1363 #include "AMDGPUGenAsmMatcher.inc" 1364 1365 /// } 1366 1367 private: 1368 void createConstantSymbol(StringRef Id, int64_t Val); 1369 1370 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1371 bool OutOfRangeError(SMRange Range); 1372 /// Calculate VGPR/SGPR blocks required for given target, reserved 1373 /// registers, and user-specified NextFreeXGPR values. 1374 /// 1375 /// \param Features [in] Target features, used for bug corrections. 1376 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1377 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1378 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1379 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1380 /// descriptor field, if valid. 1381 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1382 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1383 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1384 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1385 /// \param VGPRBlocks [out] Result VGPR block count. 1386 /// \param SGPRBlocks [out] Result SGPR block count. 1387 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed, 1388 const MCExpr *FlatScrUsed, bool XNACKUsed, 1389 std::optional<bool> EnableWavefrontSize32, 1390 const MCExpr *NextFreeVGPR, SMRange VGPRRange, 1391 const MCExpr *NextFreeSGPR, SMRange SGPRRange, 1392 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks); 1393 bool ParseDirectiveAMDGCNTarget(); 1394 bool ParseDirectiveAMDHSACodeObjectVersion(); 1395 bool ParseDirectiveAMDHSAKernel(); 1396 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header); 1397 bool ParseDirectiveAMDKernelCodeT(); 1398 // TODO: Possibly make subtargetHasRegister const. 1399 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg); 1400 bool ParseDirectiveAMDGPUHsaKernel(); 1401 1402 bool ParseDirectiveISAVersion(); 1403 bool ParseDirectiveHSAMetadata(); 1404 bool ParseDirectivePALMetadataBegin(); 1405 bool ParseDirectivePALMetadata(); 1406 bool ParseDirectiveAMDGPULDS(); 1407 1408 /// Common code to parse out a block of text (typically YAML) between start and 1409 /// end directives. 1410 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1411 const char *AssemblerDirectiveEnd, 1412 std::string &CollectString); 1413 1414 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth, 1415 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc); 1416 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg, 1417 unsigned &RegNum, unsigned &RegWidth, 1418 bool RestoreOnFailure = false); 1419 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg, 1420 unsigned &RegNum, unsigned &RegWidth, 1421 SmallVectorImpl<AsmToken> &Tokens); 1422 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1423 unsigned &RegWidth, 1424 SmallVectorImpl<AsmToken> &Tokens); 1425 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1426 unsigned &RegWidth, 1427 SmallVectorImpl<AsmToken> &Tokens); 1428 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1429 unsigned &RegWidth, 1430 SmallVectorImpl<AsmToken> &Tokens); 1431 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg); 1432 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum, 1433 unsigned SubReg, unsigned RegWidth, SMLoc Loc); 1434 1435 bool isRegister(); 1436 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1437 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1438 void initializeGprCountSymbol(RegisterKind RegKind); 1439 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1440 unsigned RegWidth); 1441 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1442 bool IsAtomic); 1443 1444 public: 1445 enum OperandMode { 1446 OperandMode_Default, 1447 OperandMode_NSA, 1448 }; 1449 1450 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1451 1452 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1453 const MCInstrInfo &MII, 1454 const MCTargetOptions &Options) 1455 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1456 MCAsmParserExtension::Initialize(Parser); 1457 1458 if (getFeatureBits().none()) { 1459 // Set default features. 1460 copySTI().ToggleFeature("southern-islands"); 1461 } 1462 1463 FeatureBitset FB = getFeatureBits(); 1464 if (!FB[AMDGPU::FeatureWavefrontSize64] && 1465 !FB[AMDGPU::FeatureWavefrontSize32]) { 1466 // If there is no default wave size it must be a generation before gfx10, 1467 // these have FeatureWavefrontSize64 in their definition already. For 1468 // gfx10+ set wave32 as a default. 1469 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32); 1470 } 1471 1472 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1473 1474 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1475 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1476 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major); 1477 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor); 1478 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping); 1479 } else { 1480 createConstantSymbol(".option.machine_version_major", ISA.Major); 1481 createConstantSymbol(".option.machine_version_minor", ISA.Minor); 1482 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping); 1483 } 1484 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1485 initializeGprCountSymbol(IS_VGPR); 1486 initializeGprCountSymbol(IS_SGPR); 1487 } else 1488 KernelScope.initialize(getContext()); 1489 1490 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions()) 1491 createConstantSymbol(Symbol, Code); 1492 1493 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000); 1494 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000); 1495 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000); 1496 } 1497 1498 bool hasMIMG_R128() const { 1499 return AMDGPU::hasMIMG_R128(getSTI()); 1500 } 1501 1502 bool hasPackedD16() const { 1503 return AMDGPU::hasPackedD16(getSTI()); 1504 } 1505 1506 bool hasA16() const { return AMDGPU::hasA16(getSTI()); } 1507 1508 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1509 1510 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); } 1511 1512 bool isSI() const { 1513 return AMDGPU::isSI(getSTI()); 1514 } 1515 1516 bool isCI() const { 1517 return AMDGPU::isCI(getSTI()); 1518 } 1519 1520 bool isVI() const { 1521 return AMDGPU::isVI(getSTI()); 1522 } 1523 1524 bool isGFX9() const { 1525 return AMDGPU::isGFX9(getSTI()); 1526 } 1527 1528 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1529 bool isGFX90A() const { 1530 return AMDGPU::isGFX90A(getSTI()); 1531 } 1532 1533 bool isGFX940() const { 1534 return AMDGPU::isGFX940(getSTI()); 1535 } 1536 1537 bool isGFX9Plus() const { 1538 return AMDGPU::isGFX9Plus(getSTI()); 1539 } 1540 1541 bool isGFX10() const { 1542 return AMDGPU::isGFX10(getSTI()); 1543 } 1544 1545 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1546 1547 bool isGFX11() const { 1548 return AMDGPU::isGFX11(getSTI()); 1549 } 1550 1551 bool isGFX11Plus() const { 1552 return AMDGPU::isGFX11Plus(getSTI()); 1553 } 1554 1555 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); } 1556 1557 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); } 1558 1559 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); } 1560 1561 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } 1562 1563 bool isGFX10_BEncoding() const { 1564 return AMDGPU::isGFX10_BEncoding(getSTI()); 1565 } 1566 1567 bool hasInv2PiInlineImm() const { 1568 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1569 } 1570 1571 bool has64BitLiterals() const { 1572 return getFeatureBits()[AMDGPU::Feature64BitLiterals]; 1573 } 1574 1575 bool hasFlatOffsets() const { 1576 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1577 } 1578 1579 bool hasTrue16Insts() const { 1580 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts]; 1581 } 1582 1583 bool hasArchitectedFlatScratch() const { 1584 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1585 } 1586 1587 bool hasSGPR102_SGPR103() const { 1588 return !isVI() && !isGFX9(); 1589 } 1590 1591 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1592 1593 bool hasIntClamp() const { 1594 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1595 } 1596 1597 bool hasPartialNSAEncoding() const { 1598 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; 1599 } 1600 1601 unsigned getNSAMaxSize(bool HasSampler = false) const { 1602 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler); 1603 } 1604 1605 unsigned getMaxNumUserSGPRs() const { 1606 return AMDGPU::getMaxNumUserSGPRs(getSTI()); 1607 } 1608 1609 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } 1610 1611 AMDGPUTargetStreamer &getTargetStreamer() { 1612 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1613 return static_cast<AMDGPUTargetStreamer &>(TS); 1614 } 1615 1616 const MCRegisterInfo *getMRI() const { 1617 // We need this const_cast because for some reason getContext() is not const 1618 // in MCAsmParser. 1619 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1620 } 1621 1622 const MCInstrInfo *getMII() const { 1623 return &MII; 1624 } 1625 1626 const FeatureBitset &getFeatureBits() const { 1627 return getSTI().getFeatureBits(); 1628 } 1629 1630 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1631 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1632 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1633 1634 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1635 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1636 bool isForcedDPP() const { return ForcedDPP; } 1637 bool isForcedSDWA() const { return ForcedSDWA; } 1638 ArrayRef<unsigned> getMatchedVariants() const; 1639 StringRef getMatchedVariantName() const; 1640 1641 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1642 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1643 bool RestoreOnFailure); 1644 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1645 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1646 SMLoc &EndLoc) override; 1647 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1648 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1649 unsigned Kind) override; 1650 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1651 OperandVector &Operands, MCStreamer &Out, 1652 uint64_t &ErrorInfo, 1653 bool MatchingInlineAsm) override; 1654 bool ParseDirective(AsmToken DirectiveID) override; 1655 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, 1656 OperandMode Mode = OperandMode_Default); 1657 StringRef parseMnemonicSuffix(StringRef Name); 1658 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, 1659 SMLoc NameLoc, OperandVector &Operands) override; 1660 //bool ProcessInstruction(MCInst &Inst); 1661 1662 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); 1663 1664 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); 1665 1666 ParseStatus 1667 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1668 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1669 std::function<bool(int64_t &)> ConvertResult = nullptr); 1670 1671 ParseStatus parseOperandArrayWithPrefix( 1672 const char *Prefix, OperandVector &Operands, 1673 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1674 bool (*ConvertResult)(int64_t &) = nullptr); 1675 1676 ParseStatus 1677 parseNamedBit(StringRef Name, OperandVector &Operands, 1678 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1679 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; 1680 ParseStatus parseCPol(OperandVector &Operands); 1681 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope); 1682 ParseStatus parseTH(OperandVector &Operands, int64_t &TH); 1683 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, 1684 SMLoc &StringLoc); 1685 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands, 1686 StringRef Name, 1687 ArrayRef<const char *> Ids, 1688 int64_t &IntVal); 1689 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands, 1690 StringRef Name, 1691 ArrayRef<const char *> Ids, 1692 AMDGPUOperand::ImmTy Type); 1693 1694 bool isModifier(); 1695 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1696 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1697 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1698 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1699 bool parseSP3NegModifier(); 1700 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false, 1701 bool HasLit = false, bool HasLit64 = false); 1702 ParseStatus parseReg(OperandVector &Operands); 1703 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false, 1704 bool HasLit = false, bool HasLit64 = false); 1705 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, 1706 bool AllowImm = true); 1707 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, 1708 bool AllowImm = true); 1709 ParseStatus parseRegWithFPInputMods(OperandVector &Operands); 1710 ParseStatus parseRegWithIntInputMods(OperandVector &Operands); 1711 ParseStatus parseVReg32OrOff(OperandVector &Operands); 1712 ParseStatus tryParseIndexKey(OperandVector &Operands, 1713 AMDGPUOperand::ImmTy ImmTy); 1714 ParseStatus parseIndexKey8bit(OperandVector &Operands); 1715 ParseStatus parseIndexKey16bit(OperandVector &Operands); 1716 ParseStatus parseIndexKey32bit(OperandVector &Operands); 1717 1718 ParseStatus parseDfmtNfmt(int64_t &Format); 1719 ParseStatus parseUfmt(int64_t &Format); 1720 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, 1721 int64_t &Format); 1722 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, 1723 int64_t &Format); 1724 ParseStatus parseFORMAT(OperandVector &Operands); 1725 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); 1726 ParseStatus parseNumericFormat(int64_t &Format); 1727 ParseStatus parseFlatOffset(OperandVector &Operands); 1728 ParseStatus parseR128A16(OperandVector &Operands); 1729 ParseStatus parseBLGP(OperandVector &Operands); 1730 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1731 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1732 1733 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1734 1735 bool parseCnt(int64_t &IntVal); 1736 ParseStatus parseSWaitCnt(OperandVector &Operands); 1737 1738 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1739 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1740 ParseStatus parseDepCtr(OperandVector &Operands); 1741 1742 bool parseDelay(int64_t &Delay); 1743 ParseStatus parseSDelayALU(OperandVector &Operands); 1744 1745 ParseStatus parseHwreg(OperandVector &Operands); 1746 1747 private: 1748 struct OperandInfoTy { 1749 SMLoc Loc; 1750 int64_t Val; 1751 bool IsSymbolic = false; 1752 bool IsDefined = false; 1753 1754 OperandInfoTy(int64_t Val) : Val(Val) {} 1755 }; 1756 1757 struct StructuredOpField : OperandInfoTy { 1758 StringLiteral Id; 1759 StringLiteral Desc; 1760 unsigned Width; 1761 bool IsDefined = false; 1762 1763 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width, 1764 int64_t Default) 1765 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {} 1766 virtual ~StructuredOpField() = default; 1767 1768 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const { 1769 Parser.Error(Loc, "invalid " + Desc + ": " + Err); 1770 return false; 1771 } 1772 1773 virtual bool validate(AMDGPUAsmParser &Parser) const { 1774 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED) 1775 return Error(Parser, "not supported on this GPU"); 1776 if (!isUIntN(Width, Val)) 1777 return Error(Parser, "only " + Twine(Width) + "-bit values are legal"); 1778 return true; 1779 } 1780 }; 1781 1782 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields); 1783 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields); 1784 1785 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1786 bool validateSendMsg(const OperandInfoTy &Msg, 1787 const OperandInfoTy &Op, 1788 const OperandInfoTy &Stream); 1789 1790 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset, 1791 OperandInfoTy &Width); 1792 1793 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1794 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1795 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1796 1797 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1798 const OperandVector &Operands) const; 1799 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1800 SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const; 1801 SMLoc getLitLoc(const OperandVector &Operands, 1802 bool SearchMandatoryLiterals = false) const; 1803 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; 1804 SMLoc getConstLoc(const OperandVector &Operands) const; 1805 SMLoc getInstLoc(const OperandVector &Operands) const; 1806 1807 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1808 bool validateOffset(const MCInst &Inst, const OperandVector &Operands); 1809 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1810 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1811 bool validateSOPLiteral(const MCInst &Inst) const; 1812 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1813 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst, 1814 bool AsVOPD3); 1815 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands); 1816 bool tryVOPD(const MCInst &Inst); 1817 bool tryVOPD3(const MCInst &Inst); 1818 bool tryAnotherVOPDEncoding(const MCInst &Inst); 1819 1820 bool validateIntClampSupported(const MCInst &Inst); 1821 bool validateMIMGAtomicDMask(const MCInst &Inst); 1822 bool validateMIMGGatherDMask(const MCInst &Inst); 1823 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1824 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); 1825 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); 1826 bool validateMIMGD16(const MCInst &Inst); 1827 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands); 1828 bool validateTensorR128(const MCInst &Inst); 1829 bool validateMIMGMSAA(const MCInst &Inst); 1830 bool validateOpSel(const MCInst &Inst); 1831 bool validateTrue16OpSel(const MCInst &Inst); 1832 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName); 1833 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1834 bool validateVccOperand(MCRegister Reg) const; 1835 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1836 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1837 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); 1838 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1839 bool validateAGPRLdSt(const MCInst &Inst) const; 1840 bool validateVGPRAlign(const MCInst &Inst) const; 1841 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1842 bool validateDS(const MCInst &Inst, const OperandVector &Operands); 1843 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1844 bool validateDivScale(const MCInst &Inst); 1845 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); 1846 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1847 const SMLoc &IDLoc); 1848 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, 1849 const unsigned CPol); 1850 bool validateTFE(const MCInst &Inst, const OperandVector &Operands); 1851 std::optional<StringRef> validateLdsDirect(const MCInst &Inst); 1852 unsigned getConstantBusLimit(unsigned Opcode) const; 1853 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1854 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1855 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1856 1857 bool isSupportedMnemo(StringRef Mnemo, 1858 const FeatureBitset &FBS); 1859 bool isSupportedMnemo(StringRef Mnemo, 1860 const FeatureBitset &FBS, 1861 ArrayRef<unsigned> Variants); 1862 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1863 1864 bool isId(const StringRef Id) const; 1865 bool isId(const AsmToken &Token, const StringRef Id) const; 1866 bool isToken(const AsmToken::TokenKind Kind) const; 1867 StringRef getId() const; 1868 bool trySkipId(const StringRef Id); 1869 bool trySkipId(const StringRef Pref, const StringRef Id); 1870 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1871 bool trySkipToken(const AsmToken::TokenKind Kind); 1872 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1873 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1874 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1875 1876 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1877 AsmToken::TokenKind getTokenKind() const; 1878 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1879 bool parseExpr(OperandVector &Operands); 1880 StringRef getTokenStr() const; 1881 AsmToken peekToken(bool ShouldSkipSpace = true); 1882 AsmToken getToken() const; 1883 SMLoc getLoc() const; 1884 void lex(); 1885 1886 public: 1887 void onBeginOfFile() override; 1888 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 1889 1890 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); 1891 1892 ParseStatus parseExpTgt(OperandVector &Operands); 1893 ParseStatus parseSendMsg(OperandVector &Operands); 1894 ParseStatus parseInterpSlot(OperandVector &Operands); 1895 ParseStatus parseInterpAttr(OperandVector &Operands); 1896 ParseStatus parseSOPPBrTarget(OperandVector &Operands); 1897 ParseStatus parseBoolReg(OperandVector &Operands); 1898 1899 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal, 1900 const unsigned MaxVal, const Twine &ErrMsg, 1901 SMLoc &Loc); 1902 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1903 const unsigned MinVal, 1904 const unsigned MaxVal, 1905 const StringRef ErrMsg); 1906 ParseStatus parseSwizzle(OperandVector &Operands); 1907 bool parseSwizzleOffset(int64_t &Imm); 1908 bool parseSwizzleMacro(int64_t &Imm); 1909 bool parseSwizzleQuadPerm(int64_t &Imm); 1910 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1911 bool parseSwizzleBroadcast(int64_t &Imm); 1912 bool parseSwizzleSwap(int64_t &Imm); 1913 bool parseSwizzleReverse(int64_t &Imm); 1914 bool parseSwizzleFFT(int64_t &Imm); 1915 bool parseSwizzleRotate(int64_t &Imm); 1916 1917 ParseStatus parseGPRIdxMode(OperandVector &Operands); 1918 int64_t parseGPRIdxMacro(); 1919 1920 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1921 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1922 1923 ParseStatus parseOModSI(OperandVector &Operands); 1924 1925 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1926 OptionalImmIndexMap &OptionalIdx); 1927 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands); 1928 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1929 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1930 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1931 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands); 1932 1933 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1934 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1935 OptionalImmIndexMap &OptionalIdx); 1936 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1937 OptionalImmIndexMap &OptionalIdx); 1938 1939 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1940 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1941 1942 bool parseDimId(unsigned &Encoding); 1943 ParseStatus parseDim(OperandVector &Operands); 1944 bool convertDppBoundCtrl(int64_t &BoundCtrl); 1945 ParseStatus parseDPP8(OperandVector &Operands); 1946 ParseStatus parseDPPCtrl(OperandVector &Operands); 1947 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1948 int64_t parseDPPCtrlSel(StringRef Ctrl); 1949 int64_t parseDPPCtrlPerm(); 1950 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1951 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1952 cvtDPP(Inst, Operands, true); 1953 } 1954 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1955 bool IsDPP8 = false); 1956 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1957 cvtVOP3DPP(Inst, Operands, true); 1958 } 1959 1960 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, 1961 AMDGPUOperand::ImmTy Type); 1962 ParseStatus parseSDWADstUnused(OperandVector &Operands); 1963 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1964 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1965 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1966 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1967 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1968 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1969 uint64_t BasicInstType, 1970 bool SkipDstVcc = false, 1971 bool SkipSrcVcc = false); 1972 1973 ParseStatus parseEndpgm(OperandVector &Operands); 1974 1975 ParseStatus parseVOPD(OperandVector &Operands); 1976 }; 1977 1978 } // end anonymous namespace 1979 1980 // May be called with integer type with equivalent bitwidth. 1981 static const fltSemantics *getFltSemantics(unsigned Size) { 1982 switch (Size) { 1983 case 4: 1984 return &APFloat::IEEEsingle(); 1985 case 8: 1986 return &APFloat::IEEEdouble(); 1987 case 2: 1988 return &APFloat::IEEEhalf(); 1989 default: 1990 llvm_unreachable("unsupported fp type"); 1991 } 1992 } 1993 1994 static const fltSemantics *getFltSemantics(MVT VT) { 1995 return getFltSemantics(VT.getSizeInBits() / 8); 1996 } 1997 1998 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1999 switch (OperandType) { 2000 // When floating-point immediate is used as operand of type i16, the 32-bit 2001 // representation of the constant truncated to the 16 LSBs should be used. 2002 case AMDGPU::OPERAND_REG_IMM_INT16: 2003 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2004 case AMDGPU::OPERAND_REG_IMM_INT32: 2005 case AMDGPU::OPERAND_REG_IMM_FP32: 2006 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2007 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2008 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2009 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2010 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2011 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2012 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2013 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2014 case AMDGPU::OPERAND_KIMM32: 2015 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 2016 return &APFloat::IEEEsingle(); 2017 case AMDGPU::OPERAND_REG_IMM_INT64: 2018 case AMDGPU::OPERAND_REG_IMM_FP64: 2019 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2020 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2021 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2022 case AMDGPU::OPERAND_KIMM64: 2023 return &APFloat::IEEEdouble(); 2024 case AMDGPU::OPERAND_REG_IMM_FP16: 2025 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2026 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2027 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2028 case AMDGPU::OPERAND_KIMM16: 2029 return &APFloat::IEEEhalf(); 2030 case AMDGPU::OPERAND_REG_IMM_BF16: 2031 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 2032 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 2033 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2034 return &APFloat::BFloat(); 2035 default: 2036 llvm_unreachable("unsupported fp type"); 2037 } 2038 } 2039 2040 //===----------------------------------------------------------------------===// 2041 // Operand 2042 //===----------------------------------------------------------------------===// 2043 2044 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 2045 bool Lost; 2046 2047 // Convert literal to single precision 2048 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 2049 APFloat::rmNearestTiesToEven, 2050 &Lost); 2051 // We allow precision lost but not overflow or underflow 2052 if (Status != APFloat::opOK && 2053 Lost && 2054 ((Status & APFloat::opOverflow) != 0 || 2055 (Status & APFloat::opUnderflow) != 0)) { 2056 return false; 2057 } 2058 2059 return true; 2060 } 2061 2062 static bool isSafeTruncation(int64_t Val, unsigned Size) { 2063 return isUIntN(Size, Val) || isIntN(Size, Val); 2064 } 2065 2066 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 2067 if (VT.getScalarType() == MVT::i16) 2068 return isInlinableLiteral32(Val, HasInv2Pi); 2069 2070 if (VT.getScalarType() == MVT::f16) 2071 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi); 2072 2073 assert(VT.getScalarType() == MVT::bf16); 2074 2075 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi); 2076 } 2077 2078 bool AMDGPUOperand::isInlinableImm(MVT type) const { 2079 2080 // This is a hack to enable named inline values like 2081 // shared_base with both 32-bit and 64-bit operands. 2082 // Note that these values are defined as 2083 // 32-bit operands only. 2084 if (isInlineValue()) { 2085 return true; 2086 } 2087 2088 if (!isImmTy(ImmTyNone)) { 2089 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 2090 return false; 2091 } 2092 // TODO: We should avoid using host float here. It would be better to 2093 // check the float bit values which is what a few other places do. 2094 // We've had bot failures before due to weird NaN support on mips hosts. 2095 2096 APInt Literal(64, Imm.Val); 2097 2098 if (Imm.IsFPImm) { // We got fp literal token 2099 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 2100 return AMDGPU::isInlinableLiteral64(Imm.Val, 2101 AsmParser->hasInv2PiInlineImm()); 2102 } 2103 2104 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2105 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 2106 return false; 2107 2108 if (type.getScalarSizeInBits() == 16) { 2109 bool Lost = false; 2110 switch (type.getScalarType().SimpleTy) { 2111 default: 2112 llvm_unreachable("unknown 16-bit type"); 2113 case MVT::bf16: 2114 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven, 2115 &Lost); 2116 break; 2117 case MVT::f16: 2118 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven, 2119 &Lost); 2120 break; 2121 case MVT::i16: 2122 FPLiteral.convert(APFloatBase::IEEEsingle(), 2123 APFloat::rmNearestTiesToEven, &Lost); 2124 break; 2125 } 2126 // We need to use 32-bit representation here because when a floating-point 2127 // inline constant is used as an i16 operand, its 32-bit representation 2128 // representation will be used. We will need the 32-bit value to check if 2129 // it is FP inline constant. 2130 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2131 return isInlineableLiteralOp16(ImmVal, type, 2132 AsmParser->hasInv2PiInlineImm()); 2133 } 2134 2135 // Check if single precision literal is inlinable 2136 return AMDGPU::isInlinableLiteral32( 2137 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 2138 AsmParser->hasInv2PiInlineImm()); 2139 } 2140 2141 // We got int literal token. 2142 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 2143 return AMDGPU::isInlinableLiteral64(Imm.Val, 2144 AsmParser->hasInv2PiInlineImm()); 2145 } 2146 2147 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 2148 return false; 2149 } 2150 2151 if (type.getScalarSizeInBits() == 16) { 2152 return isInlineableLiteralOp16( 2153 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 2154 type, AsmParser->hasInv2PiInlineImm()); 2155 } 2156 2157 return AMDGPU::isInlinableLiteral32( 2158 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 2159 AsmParser->hasInv2PiInlineImm()); 2160 } 2161 2162 bool AMDGPUOperand::isLiteralImm(MVT type) const { 2163 // Check that this immediate can be added as literal 2164 if (!isImmTy(ImmTyNone)) { 2165 return false; 2166 } 2167 2168 bool Allow64Bit = 2169 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals(); 2170 2171 if (!Imm.IsFPImm) { 2172 // We got int literal token. 2173 2174 if (type == MVT::f64 && hasFPModifiers()) { 2175 // Cannot apply fp modifiers to int literals preserving the same semantics 2176 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 2177 // disable these cases. 2178 return false; 2179 } 2180 2181 unsigned Size = type.getSizeInBits(); 2182 if (Size == 64) { 2183 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false)) 2184 return true; 2185 Size = 32; 2186 } 2187 2188 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2189 // types. 2190 return isSafeTruncation(Imm.Val, Size); 2191 } 2192 2193 // We got fp literal token 2194 if (type == MVT::f64) { // Expected 64-bit fp operand 2195 // We would set low 64-bits of literal to zeroes but we accept this literals 2196 return true; 2197 } 2198 2199 if (type == MVT::i64) { // Expected 64-bit int operand 2200 // We don't allow fp literals in 64-bit integer instructions. It is 2201 // unclear how we should encode them. 2202 return false; 2203 } 2204 2205 // We allow fp literals with f16x2 operands assuming that the specified 2206 // literal goes into the lower half and the upper half is zero. We also 2207 // require that the literal may be losslessly converted to f16. 2208 // 2209 // For i16x2 operands, we assume that the specified literal is encoded as a 2210 // single-precision float. This is pretty odd, but it matches SP3 and what 2211 // happens in hardware. 2212 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16 2213 : (type == MVT::v2i16) ? MVT::f32 2214 : (type == MVT::v2f32) ? MVT::f32 2215 : type; 2216 2217 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2218 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2219 } 2220 2221 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2222 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2223 } 2224 2225 bool AMDGPUOperand::isVRegWithInputMods() const { 2226 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2227 // GFX90A allows DPP on 64-bit operands. 2228 (isRegClass(AMDGPU::VReg_64RegClassID) && 2229 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); 2230 } 2231 2232 template <bool IsFake16> 2233 bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const { 2234 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID 2235 : AMDGPU::VGPR_16_Lo128RegClassID); 2236 } 2237 2238 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const { 2239 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID 2240 : AMDGPU::VGPR_16RegClassID); 2241 } 2242 2243 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2244 if (AsmParser->isVI()) 2245 return isVReg32(); 2246 if (AsmParser->isGFX9Plus()) 2247 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2248 return false; 2249 } 2250 2251 bool AMDGPUOperand::isSDWAFP16Operand() const { 2252 return isSDWAOperand(MVT::f16); 2253 } 2254 2255 bool AMDGPUOperand::isSDWAFP32Operand() const { 2256 return isSDWAOperand(MVT::f32); 2257 } 2258 2259 bool AMDGPUOperand::isSDWAInt16Operand() const { 2260 return isSDWAOperand(MVT::i16); 2261 } 2262 2263 bool AMDGPUOperand::isSDWAInt32Operand() const { 2264 return isSDWAOperand(MVT::i32); 2265 } 2266 2267 bool AMDGPUOperand::isBoolReg() const { 2268 auto FB = AsmParser->getFeatureBits(); 2269 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) || 2270 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32())); 2271 } 2272 2273 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2274 { 2275 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2276 assert(Size == 2 || Size == 4 || Size == 8); 2277 2278 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2279 2280 if (Imm.Mods.Abs) { 2281 Val &= ~FpSignMask; 2282 } 2283 if (Imm.Mods.Neg) { 2284 Val ^= FpSignMask; 2285 } 2286 2287 return Val; 2288 } 2289 2290 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2291 if (isExpr()) { 2292 Inst.addOperand(MCOperand::createExpr(Expr)); 2293 return; 2294 } 2295 2296 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2297 Inst.getNumOperands())) { 2298 addLiteralImmOperand(Inst, Imm.Val, 2299 ApplyModifiers & 2300 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2301 } else { 2302 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2303 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2304 setImmKindNone(); 2305 } 2306 } 2307 2308 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2309 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2310 auto OpNum = Inst.getNumOperands(); 2311 // Check that this operand accepts literals 2312 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2313 2314 if (ApplyModifiers) { 2315 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2316 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2317 Val = applyInputFPModifiers(Val, Size); 2318 } 2319 2320 APInt Literal(64, Val); 2321 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; 2322 2323 if (Imm.IsFPImm) { // We got fp literal token 2324 switch (OpTy) { 2325 case AMDGPU::OPERAND_REG_IMM_INT64: 2326 case AMDGPU::OPERAND_REG_IMM_FP64: 2327 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2328 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2329 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2330 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2331 AsmParser->hasInv2PiInlineImm())) { 2332 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2333 setImmKindConst(); 2334 return; 2335 } 2336 2337 // Non-inlineable 2338 if (AMDGPU::isSISrcFPOperand(InstDesc, 2339 OpNum)) { // Expected 64-bit fp operand 2340 bool HasMandatoryLiteral = 2341 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm); 2342 // For fp operands we check if low 32 bits are zeros 2343 if (Literal.getLoBits(32) != 0 && 2344 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) && 2345 !HasMandatoryLiteral) { 2346 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning( 2347 Inst.getLoc(), 2348 "Can't encode literal as exact 64-bit floating-point operand. " 2349 "Low 32-bits will be set to zero"); 2350 Val &= 0xffffffff00000000u; 2351 } 2352 2353 Inst.addOperand(MCOperand::createImm(Val)); 2354 setImmKindLiteral(); 2355 return; 2356 } 2357 2358 // We don't allow fp literals in 64-bit integer instructions. It is 2359 // unclear how we should encode them. This case should be checked earlier 2360 // in predicate methods (isLiteralImm()) 2361 llvm_unreachable("fp literal in 64-bit integer instruction."); 2362 2363 case AMDGPU::OPERAND_KIMM64: 2364 Inst.addOperand(MCOperand::createImm(Val)); 2365 setImmKindMandatoryLiteral(); 2366 return; 2367 2368 case AMDGPU::OPERAND_REG_IMM_BF16: 2369 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 2370 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 2371 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2372 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) { 2373 // This is the 1/(2*pi) which is going to be truncated to bf16 with the 2374 // loss of precision. The constant represents ideomatic fp32 value of 2375 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16 2376 // bits. Prevent rounding below. 2377 Inst.addOperand(MCOperand::createImm(0x3e22)); 2378 setImmKindLiteral(); 2379 return; 2380 } 2381 [[fallthrough]]; 2382 2383 case AMDGPU::OPERAND_REG_IMM_INT32: 2384 case AMDGPU::OPERAND_REG_IMM_FP32: 2385 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2386 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2387 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2388 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2389 case AMDGPU::OPERAND_REG_IMM_INT16: 2390 case AMDGPU::OPERAND_REG_IMM_FP16: 2391 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2392 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2393 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2394 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2395 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2396 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2397 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2398 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2399 case AMDGPU::OPERAND_KIMM32: 2400 case AMDGPU::OPERAND_KIMM16: 2401 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { 2402 bool lost; 2403 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2404 // Convert literal to single precision 2405 FPLiteral.convert(*getOpFltSemantics(OpTy), 2406 APFloat::rmNearestTiesToEven, &lost); 2407 // We allow precision lost but not overflow or underflow. This should be 2408 // checked earlier in isLiteralImm() 2409 2410 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2411 Inst.addOperand(MCOperand::createImm(ImmVal)); 2412 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { 2413 setImmKindMandatoryLiteral(); 2414 } else { 2415 setImmKindLiteral(); 2416 } 2417 return; 2418 } 2419 default: 2420 llvm_unreachable("invalid operand size"); 2421 } 2422 2423 return; 2424 } 2425 2426 // We got int literal token. 2427 // Only sign extend inline immediates. 2428 switch (OpTy) { 2429 case AMDGPU::OPERAND_REG_IMM_INT32: 2430 case AMDGPU::OPERAND_REG_IMM_FP32: 2431 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2432 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2433 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2434 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2435 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2436 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2437 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2438 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2439 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2440 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 2441 if (isSafeTruncation(Val, 32) && 2442 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2443 AsmParser->hasInv2PiInlineImm())) { 2444 Inst.addOperand(MCOperand::createImm(Val)); 2445 setImmKindConst(); 2446 return; 2447 } 2448 2449 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2450 setImmKindLiteral(); 2451 return; 2452 2453 case AMDGPU::OPERAND_REG_IMM_INT64: 2454 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2455 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2456 Inst.addOperand(MCOperand::createImm(Val)); 2457 setImmKindConst(); 2458 return; 2459 } 2460 2461 // When the 32 MSBs are not zero (effectively means it can't be safely 2462 // truncated to uint32_t), if the target doesn't support 64-bit literals, or 2463 // the lit modifier is explicitly used, we need to truncate it to the 32 2464 // LSBs. 2465 if (!AsmParser->has64BitLiterals() || getModifiers().Lit) 2466 Val = Lo_32(Val); 2467 2468 Inst.addOperand(MCOperand::createImm(Val)); 2469 setImmKindLiteral(); 2470 return; 2471 2472 case AMDGPU::OPERAND_REG_IMM_FP64: 2473 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2474 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2475 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2476 Inst.addOperand(MCOperand::createImm(Val)); 2477 setImmKindConst(); 2478 return; 2479 } 2480 2481 // If the target doesn't support 64-bit literals, we need to use the 2482 // constant as the high 32 MSBs of a double-precision floating point value. 2483 if (!AsmParser->has64BitLiterals()) { 2484 Val = static_cast<uint64_t>(Val) << 32; 2485 } else { 2486 // Now the target does support 64-bit literals, there are two cases 2487 // where we still want to use src_literal encoding: 2488 // 1) explicitly forced by using lit modifier; 2489 // 2) the value is a valid 32-bit representation (signed or unsigned), 2490 // meanwhile not forced by lit64 modifier. 2491 if (getModifiers().Lit || 2492 (!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val)))) 2493 Val = static_cast<uint64_t>(Val) << 32; 2494 } 2495 2496 Inst.addOperand(MCOperand::createImm(Val)); 2497 setImmKindLiteral(); 2498 return; 2499 2500 case AMDGPU::OPERAND_REG_IMM_INT16: 2501 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2502 if (isSafeTruncation(Val, 16) && 2503 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) { 2504 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2505 setImmKindConst(); 2506 return; 2507 } 2508 2509 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2510 setImmKindLiteral(); 2511 return; 2512 2513 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2514 case AMDGPU::OPERAND_REG_IMM_FP16: 2515 if (isSafeTruncation(Val, 16) && 2516 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val), 2517 AsmParser->hasInv2PiInlineImm())) { 2518 Inst.addOperand(MCOperand::createImm(Val)); 2519 setImmKindConst(); 2520 return; 2521 } 2522 2523 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2524 setImmKindLiteral(); 2525 return; 2526 2527 case AMDGPU::OPERAND_REG_IMM_BF16: 2528 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 2529 if (isSafeTruncation(Val, 16) && 2530 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val), 2531 AsmParser->hasInv2PiInlineImm())) { 2532 Inst.addOperand(MCOperand::createImm(Val)); 2533 setImmKindConst(); 2534 return; 2535 } 2536 2537 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2538 setImmKindLiteral(); 2539 return; 2540 2541 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: { 2542 assert(isSafeTruncation(Val, 16)); 2543 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))); 2544 Inst.addOperand(MCOperand::createImm(Val)); 2545 return; 2546 } 2547 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 2548 assert(isSafeTruncation(Val, 16)); 2549 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val), 2550 AsmParser->hasInv2PiInlineImm())); 2551 2552 Inst.addOperand(MCOperand::createImm(Val)); 2553 return; 2554 } 2555 2556 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: { 2557 assert(isSafeTruncation(Val, 16)); 2558 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val), 2559 AsmParser->hasInv2PiInlineImm())); 2560 2561 Inst.addOperand(MCOperand::createImm(Val)); 2562 return; 2563 } 2564 2565 case AMDGPU::OPERAND_KIMM32: 2566 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2567 setImmKindMandatoryLiteral(); 2568 return; 2569 case AMDGPU::OPERAND_KIMM16: 2570 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2571 setImmKindMandatoryLiteral(); 2572 return; 2573 case AMDGPU::OPERAND_KIMM64: 2574 if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64) 2575 Val <<= 32; 2576 2577 Inst.addOperand(MCOperand::createImm(Val)); 2578 setImmKindMandatoryLiteral(); 2579 return; 2580 default: 2581 llvm_unreachable("invalid operand size"); 2582 } 2583 } 2584 2585 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2586 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2587 } 2588 2589 bool AMDGPUOperand::isInlineValue() const { 2590 return isRegKind() && ::isInlineValue(getReg()); 2591 } 2592 2593 //===----------------------------------------------------------------------===// 2594 // AsmParser 2595 //===----------------------------------------------------------------------===// 2596 2597 void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) { 2598 // TODO: make those pre-defined variables read-only. 2599 // Currently there is none suitable machinery in the core llvm-mc for this. 2600 // MCSymbol::isRedefinable is intended for another purpose, and 2601 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 2602 MCContext &Ctx = getContext(); 2603 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id); 2604 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx)); 2605 } 2606 2607 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2608 if (Is == IS_VGPR) { 2609 switch (RegWidth) { 2610 default: return -1; 2611 case 32: 2612 return AMDGPU::VGPR_32RegClassID; 2613 case 64: 2614 return AMDGPU::VReg_64RegClassID; 2615 case 96: 2616 return AMDGPU::VReg_96RegClassID; 2617 case 128: 2618 return AMDGPU::VReg_128RegClassID; 2619 case 160: 2620 return AMDGPU::VReg_160RegClassID; 2621 case 192: 2622 return AMDGPU::VReg_192RegClassID; 2623 case 224: 2624 return AMDGPU::VReg_224RegClassID; 2625 case 256: 2626 return AMDGPU::VReg_256RegClassID; 2627 case 288: 2628 return AMDGPU::VReg_288RegClassID; 2629 case 320: 2630 return AMDGPU::VReg_320RegClassID; 2631 case 352: 2632 return AMDGPU::VReg_352RegClassID; 2633 case 384: 2634 return AMDGPU::VReg_384RegClassID; 2635 case 512: 2636 return AMDGPU::VReg_512RegClassID; 2637 case 1024: 2638 return AMDGPU::VReg_1024RegClassID; 2639 } 2640 } else if (Is == IS_TTMP) { 2641 switch (RegWidth) { 2642 default: return -1; 2643 case 32: 2644 return AMDGPU::TTMP_32RegClassID; 2645 case 64: 2646 return AMDGPU::TTMP_64RegClassID; 2647 case 128: 2648 return AMDGPU::TTMP_128RegClassID; 2649 case 256: 2650 return AMDGPU::TTMP_256RegClassID; 2651 case 512: 2652 return AMDGPU::TTMP_512RegClassID; 2653 } 2654 } else if (Is == IS_SGPR) { 2655 switch (RegWidth) { 2656 default: return -1; 2657 case 32: 2658 return AMDGPU::SGPR_32RegClassID; 2659 case 64: 2660 return AMDGPU::SGPR_64RegClassID; 2661 case 96: 2662 return AMDGPU::SGPR_96RegClassID; 2663 case 128: 2664 return AMDGPU::SGPR_128RegClassID; 2665 case 160: 2666 return AMDGPU::SGPR_160RegClassID; 2667 case 192: 2668 return AMDGPU::SGPR_192RegClassID; 2669 case 224: 2670 return AMDGPU::SGPR_224RegClassID; 2671 case 256: 2672 return AMDGPU::SGPR_256RegClassID; 2673 case 288: 2674 return AMDGPU::SGPR_288RegClassID; 2675 case 320: 2676 return AMDGPU::SGPR_320RegClassID; 2677 case 352: 2678 return AMDGPU::SGPR_352RegClassID; 2679 case 384: 2680 return AMDGPU::SGPR_384RegClassID; 2681 case 512: 2682 return AMDGPU::SGPR_512RegClassID; 2683 } 2684 } else if (Is == IS_AGPR) { 2685 switch (RegWidth) { 2686 default: return -1; 2687 case 32: 2688 return AMDGPU::AGPR_32RegClassID; 2689 case 64: 2690 return AMDGPU::AReg_64RegClassID; 2691 case 96: 2692 return AMDGPU::AReg_96RegClassID; 2693 case 128: 2694 return AMDGPU::AReg_128RegClassID; 2695 case 160: 2696 return AMDGPU::AReg_160RegClassID; 2697 case 192: 2698 return AMDGPU::AReg_192RegClassID; 2699 case 224: 2700 return AMDGPU::AReg_224RegClassID; 2701 case 256: 2702 return AMDGPU::AReg_256RegClassID; 2703 case 288: 2704 return AMDGPU::AReg_288RegClassID; 2705 case 320: 2706 return AMDGPU::AReg_320RegClassID; 2707 case 352: 2708 return AMDGPU::AReg_352RegClassID; 2709 case 384: 2710 return AMDGPU::AReg_384RegClassID; 2711 case 512: 2712 return AMDGPU::AReg_512RegClassID; 2713 case 1024: 2714 return AMDGPU::AReg_1024RegClassID; 2715 } 2716 } 2717 return -1; 2718 } 2719 2720 static MCRegister getSpecialRegForName(StringRef RegName) { 2721 return StringSwitch<unsigned>(RegName) 2722 .Case("exec", AMDGPU::EXEC) 2723 .Case("vcc", AMDGPU::VCC) 2724 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2725 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2726 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2727 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2728 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2729 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2730 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2731 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2732 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2733 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2734 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2735 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2736 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2737 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2738 .Case("m0", AMDGPU::M0) 2739 .Case("vccz", AMDGPU::SRC_VCCZ) 2740 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2741 .Case("execz", AMDGPU::SRC_EXECZ) 2742 .Case("src_execz", AMDGPU::SRC_EXECZ) 2743 .Case("scc", AMDGPU::SRC_SCC) 2744 .Case("src_scc", AMDGPU::SRC_SCC) 2745 .Case("tba", AMDGPU::TBA) 2746 .Case("tma", AMDGPU::TMA) 2747 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2748 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2749 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2750 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2751 .Case("vcc_lo", AMDGPU::VCC_LO) 2752 .Case("vcc_hi", AMDGPU::VCC_HI) 2753 .Case("exec_lo", AMDGPU::EXEC_LO) 2754 .Case("exec_hi", AMDGPU::EXEC_HI) 2755 .Case("tma_lo", AMDGPU::TMA_LO) 2756 .Case("tma_hi", AMDGPU::TMA_HI) 2757 .Case("tba_lo", AMDGPU::TBA_LO) 2758 .Case("tba_hi", AMDGPU::TBA_HI) 2759 .Case("pc", AMDGPU::PC_REG) 2760 .Case("null", AMDGPU::SGPR_NULL) 2761 .Default(AMDGPU::NoRegister); 2762 } 2763 2764 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2765 SMLoc &EndLoc, bool RestoreOnFailure) { 2766 auto R = parseRegister(); 2767 if (!R) return true; 2768 assert(R->isReg()); 2769 RegNo = R->getReg(); 2770 StartLoc = R->getStartLoc(); 2771 EndLoc = R->getEndLoc(); 2772 return false; 2773 } 2774 2775 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 2776 SMLoc &EndLoc) { 2777 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2778 } 2779 2780 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 2781 SMLoc &EndLoc) { 2782 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2783 bool PendingErrors = getParser().hasPendingError(); 2784 getParser().clearPendingErrors(); 2785 if (PendingErrors) 2786 return ParseStatus::Failure; 2787 if (Result) 2788 return ParseStatus::NoMatch; 2789 return ParseStatus::Success; 2790 } 2791 2792 bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth, 2793 RegisterKind RegKind, 2794 MCRegister Reg1, SMLoc Loc) { 2795 switch (RegKind) { 2796 case IS_SPECIAL: 2797 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2798 Reg = AMDGPU::EXEC; 2799 RegWidth = 64; 2800 return true; 2801 } 2802 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2803 Reg = AMDGPU::FLAT_SCR; 2804 RegWidth = 64; 2805 return true; 2806 } 2807 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2808 Reg = AMDGPU::XNACK_MASK; 2809 RegWidth = 64; 2810 return true; 2811 } 2812 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2813 Reg = AMDGPU::VCC; 2814 RegWidth = 64; 2815 return true; 2816 } 2817 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2818 Reg = AMDGPU::TBA; 2819 RegWidth = 64; 2820 return true; 2821 } 2822 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2823 Reg = AMDGPU::TMA; 2824 RegWidth = 64; 2825 return true; 2826 } 2827 Error(Loc, "register does not fit in the list"); 2828 return false; 2829 case IS_VGPR: 2830 case IS_SGPR: 2831 case IS_AGPR: 2832 case IS_TTMP: 2833 if (Reg1 != Reg + RegWidth / 32) { 2834 Error(Loc, "registers in a list must have consecutive indices"); 2835 return false; 2836 } 2837 RegWidth += 32; 2838 return true; 2839 default: 2840 llvm_unreachable("unexpected register kind"); 2841 } 2842 } 2843 2844 struct RegInfo { 2845 StringLiteral Name; 2846 RegisterKind Kind; 2847 }; 2848 2849 static constexpr RegInfo RegularRegisters[] = { 2850 {{"v"}, IS_VGPR}, 2851 {{"s"}, IS_SGPR}, 2852 {{"ttmp"}, IS_TTMP}, 2853 {{"acc"}, IS_AGPR}, 2854 {{"a"}, IS_AGPR}, 2855 }; 2856 2857 static bool isRegularReg(RegisterKind Kind) { 2858 return Kind == IS_VGPR || 2859 Kind == IS_SGPR || 2860 Kind == IS_TTMP || 2861 Kind == IS_AGPR; 2862 } 2863 2864 static const RegInfo* getRegularRegInfo(StringRef Str) { 2865 for (const RegInfo &Reg : RegularRegisters) 2866 if (Str.starts_with(Reg.Name)) 2867 return &Reg; 2868 return nullptr; 2869 } 2870 2871 static bool getRegNum(StringRef Str, unsigned& Num) { 2872 return !Str.getAsInteger(10, Num); 2873 } 2874 2875 bool 2876 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2877 const AsmToken &NextToken) const { 2878 2879 // A list of consecutive registers: [s0,s1,s2,s3] 2880 if (Token.is(AsmToken::LBrac)) 2881 return true; 2882 2883 if (!Token.is(AsmToken::Identifier)) 2884 return false; 2885 2886 // A single register like s0 or a range of registers like s[0:1] 2887 2888 StringRef Str = Token.getString(); 2889 const RegInfo *Reg = getRegularRegInfo(Str); 2890 if (Reg) { 2891 StringRef RegName = Reg->Name; 2892 StringRef RegSuffix = Str.substr(RegName.size()); 2893 if (!RegSuffix.empty()) { 2894 RegSuffix.consume_back(".l"); 2895 RegSuffix.consume_back(".h"); 2896 unsigned Num; 2897 // A single register with an index: rXX 2898 if (getRegNum(RegSuffix, Num)) 2899 return true; 2900 } else { 2901 // A range of registers: r[XX:YY]. 2902 if (NextToken.is(AsmToken::LBrac)) 2903 return true; 2904 } 2905 } 2906 2907 return getSpecialRegForName(Str).isValid(); 2908 } 2909 2910 bool 2911 AMDGPUAsmParser::isRegister() 2912 { 2913 return isRegister(getToken(), peekToken()); 2914 } 2915 2916 MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, 2917 unsigned SubReg, unsigned RegWidth, 2918 SMLoc Loc) { 2919 assert(isRegularReg(RegKind)); 2920 2921 unsigned AlignSize = 1; 2922 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2923 // SGPR and TTMP registers must be aligned. 2924 // Max required alignment is 4 dwords. 2925 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u); 2926 } 2927 2928 if (RegNum % AlignSize != 0) { 2929 Error(Loc, "invalid register alignment"); 2930 return MCRegister(); 2931 } 2932 2933 unsigned RegIdx = RegNum / AlignSize; 2934 int RCID = getRegClass(RegKind, RegWidth); 2935 if (RCID == -1) { 2936 Error(Loc, "invalid or unsupported register size"); 2937 return MCRegister(); 2938 } 2939 2940 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2941 const MCRegisterClass RC = TRI->getRegClass(RCID); 2942 if (RegIdx >= RC.getNumRegs()) { 2943 Error(Loc, "register index is out of range"); 2944 return MCRegister(); 2945 } 2946 2947 MCRegister Reg = RC.getRegister(RegIdx); 2948 2949 if (SubReg) { 2950 Reg = TRI->getSubReg(Reg, SubReg); 2951 2952 // Currently all regular registers have their .l and .h subregisters, so 2953 // we should never need to generate an error here. 2954 assert(Reg && "Invalid subregister!"); 2955 } 2956 2957 return Reg; 2958 } 2959 2960 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth, 2961 unsigned &SubReg) { 2962 int64_t RegLo, RegHi; 2963 if (!skipToken(AsmToken::LBrac, "missing register index")) 2964 return false; 2965 2966 SMLoc FirstIdxLoc = getLoc(); 2967 SMLoc SecondIdxLoc; 2968 2969 if (!parseExpr(RegLo)) 2970 return false; 2971 2972 if (trySkipToken(AsmToken::Colon)) { 2973 SecondIdxLoc = getLoc(); 2974 if (!parseExpr(RegHi)) 2975 return false; 2976 } else { 2977 RegHi = RegLo; 2978 } 2979 2980 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2981 return false; 2982 2983 if (!isUInt<32>(RegLo)) { 2984 Error(FirstIdxLoc, "invalid register index"); 2985 return false; 2986 } 2987 2988 if (!isUInt<32>(RegHi)) { 2989 Error(SecondIdxLoc, "invalid register index"); 2990 return false; 2991 } 2992 2993 if (RegLo > RegHi) { 2994 Error(FirstIdxLoc, "first register index should not exceed second index"); 2995 return false; 2996 } 2997 2998 if (RegHi == RegLo) { 2999 StringRef RegSuffix = getTokenStr(); 3000 if (RegSuffix == ".l") { 3001 SubReg = AMDGPU::lo16; 3002 lex(); 3003 } else if (RegSuffix == ".h") { 3004 SubReg = AMDGPU::hi16; 3005 lex(); 3006 } 3007 } 3008 3009 Num = static_cast<unsigned>(RegLo); 3010 RegWidth = 32 * ((RegHi - RegLo) + 1); 3011 3012 return true; 3013 } 3014 3015 MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 3016 unsigned &RegNum, 3017 unsigned &RegWidth, 3018 SmallVectorImpl<AsmToken> &Tokens) { 3019 assert(isToken(AsmToken::Identifier)); 3020 MCRegister Reg = getSpecialRegForName(getTokenStr()); 3021 if (Reg) { 3022 RegNum = 0; 3023 RegWidth = 32; 3024 RegKind = IS_SPECIAL; 3025 Tokens.push_back(getToken()); 3026 lex(); // skip register name 3027 } 3028 return Reg; 3029 } 3030 3031 MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 3032 unsigned &RegNum, 3033 unsigned &RegWidth, 3034 SmallVectorImpl<AsmToken> &Tokens) { 3035 assert(isToken(AsmToken::Identifier)); 3036 StringRef RegName = getTokenStr(); 3037 auto Loc = getLoc(); 3038 3039 const RegInfo *RI = getRegularRegInfo(RegName); 3040 if (!RI) { 3041 Error(Loc, "invalid register name"); 3042 return MCRegister(); 3043 } 3044 3045 Tokens.push_back(getToken()); 3046 lex(); // skip register name 3047 3048 RegKind = RI->Kind; 3049 StringRef RegSuffix = RegName.substr(RI->Name.size()); 3050 unsigned SubReg = NoSubRegister; 3051 if (!RegSuffix.empty()) { 3052 if (RegSuffix.consume_back(".l")) 3053 SubReg = AMDGPU::lo16; 3054 else if (RegSuffix.consume_back(".h")) 3055 SubReg = AMDGPU::hi16; 3056 3057 // Single 32-bit register: vXX. 3058 if (!getRegNum(RegSuffix, RegNum)) { 3059 Error(Loc, "invalid register index"); 3060 return MCRegister(); 3061 } 3062 RegWidth = 32; 3063 } else { 3064 // Range of registers: v[XX:YY]. ":YY" is optional. 3065 if (!ParseRegRange(RegNum, RegWidth, SubReg)) 3066 return MCRegister(); 3067 } 3068 3069 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); 3070 } 3071 3072 MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, 3073 unsigned &RegNum, unsigned &RegWidth, 3074 SmallVectorImpl<AsmToken> &Tokens) { 3075 MCRegister Reg; 3076 auto ListLoc = getLoc(); 3077 3078 if (!skipToken(AsmToken::LBrac, 3079 "expected a register or a list of registers")) { 3080 return MCRegister(); 3081 } 3082 3083 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 3084 3085 auto Loc = getLoc(); 3086 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 3087 return MCRegister(); 3088 if (RegWidth != 32) { 3089 Error(Loc, "expected a single 32-bit register"); 3090 return MCRegister(); 3091 } 3092 3093 for (; trySkipToken(AsmToken::Comma); ) { 3094 RegisterKind NextRegKind; 3095 MCRegister NextReg; 3096 unsigned NextRegNum, NextRegWidth; 3097 Loc = getLoc(); 3098 3099 if (!ParseAMDGPURegister(NextRegKind, NextReg, 3100 NextRegNum, NextRegWidth, 3101 Tokens)) { 3102 return MCRegister(); 3103 } 3104 if (NextRegWidth != 32) { 3105 Error(Loc, "expected a single 32-bit register"); 3106 return MCRegister(); 3107 } 3108 if (NextRegKind != RegKind) { 3109 Error(Loc, "registers in a list must be of the same kind"); 3110 return MCRegister(); 3111 } 3112 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 3113 return MCRegister(); 3114 } 3115 3116 if (!skipToken(AsmToken::RBrac, 3117 "expected a comma or a closing square bracket")) { 3118 return MCRegister(); 3119 } 3120 3121 if (isRegularReg(RegKind)) 3122 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc); 3123 3124 return Reg; 3125 } 3126 3127 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, 3128 MCRegister &Reg, unsigned &RegNum, 3129 unsigned &RegWidth, 3130 SmallVectorImpl<AsmToken> &Tokens) { 3131 auto Loc = getLoc(); 3132 Reg = MCRegister(); 3133 3134 if (isToken(AsmToken::Identifier)) { 3135 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 3136 if (!Reg) 3137 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 3138 } else { 3139 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 3140 } 3141 3142 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3143 if (!Reg) { 3144 assert(Parser.hasPendingError()); 3145 return false; 3146 } 3147 3148 if (!subtargetHasRegister(*TRI, Reg)) { 3149 if (Reg == AMDGPU::SGPR_NULL) { 3150 Error(Loc, "'null' operand is not supported on this GPU"); 3151 } else { 3152 Error(Loc, Twine(AMDGPUInstPrinter::getRegisterName(Reg)) + 3153 " register not available on this GPU"); 3154 } 3155 return false; 3156 } 3157 3158 return true; 3159 } 3160 3161 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, 3162 MCRegister &Reg, unsigned &RegNum, 3163 unsigned &RegWidth, 3164 bool RestoreOnFailure /*=false*/) { 3165 Reg = MCRegister(); 3166 3167 SmallVector<AsmToken, 1> Tokens; 3168 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 3169 if (RestoreOnFailure) { 3170 while (!Tokens.empty()) { 3171 getLexer().UnLex(Tokens.pop_back_val()); 3172 } 3173 } 3174 return true; 3175 } 3176 return false; 3177 } 3178 3179 std::optional<StringRef> 3180 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 3181 switch (RegKind) { 3182 case IS_VGPR: 3183 return StringRef(".amdgcn.next_free_vgpr"); 3184 case IS_SGPR: 3185 return StringRef(".amdgcn.next_free_sgpr"); 3186 default: 3187 return std::nullopt; 3188 } 3189 } 3190 3191 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 3192 auto SymbolName = getGprCountSymbolName(RegKind); 3193 assert(SymbolName && "initializing invalid register kind"); 3194 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 3195 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 3196 Sym->setRedefinable(true); 3197 } 3198 3199 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 3200 unsigned DwordRegIndex, 3201 unsigned RegWidth) { 3202 // Symbols are only defined for GCN targets 3203 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 3204 return true; 3205 3206 auto SymbolName = getGprCountSymbolName(RegKind); 3207 if (!SymbolName) 3208 return true; 3209 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 3210 3211 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 3212 int64_t OldCount; 3213 3214 if (!Sym->isVariable()) 3215 return !Error(getLoc(), 3216 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 3217 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount)) 3218 return !Error( 3219 getLoc(), 3220 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 3221 3222 if (OldCount <= NewMax) 3223 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 3224 3225 return true; 3226 } 3227 3228 std::unique_ptr<AMDGPUOperand> 3229 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 3230 const auto &Tok = getToken(); 3231 SMLoc StartLoc = Tok.getLoc(); 3232 SMLoc EndLoc = Tok.getEndLoc(); 3233 RegisterKind RegKind; 3234 MCRegister Reg; 3235 unsigned RegNum, RegWidth; 3236 3237 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 3238 return nullptr; 3239 } 3240 if (isHsaAbi(getSTI())) { 3241 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 3242 return nullptr; 3243 } else 3244 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 3245 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 3246 } 3247 3248 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, 3249 bool HasSP3AbsModifier, bool HasLit, 3250 bool HasLit64) { 3251 // TODO: add syntactic sugar for 1/(2*PI) 3252 3253 if (isRegister() || isModifier()) 3254 return ParseStatus::NoMatch; 3255 3256 if (!HasLit && !HasLit64) { 3257 HasLit64 = trySkipId("lit64"); 3258 HasLit = !HasLit64 && trySkipId("lit"); 3259 if (HasLit || HasLit64) { 3260 if (!skipToken(AsmToken::LParen, "expected left paren after lit")) 3261 return ParseStatus::Failure; 3262 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit, HasLit64); 3263 if (S.isSuccess() && 3264 !skipToken(AsmToken::RParen, "expected closing parentheses")) 3265 return ParseStatus::Failure; 3266 return S; 3267 } 3268 } 3269 3270 const auto& Tok = getToken(); 3271 const auto& NextTok = peekToken(); 3272 bool IsReal = Tok.is(AsmToken::Real); 3273 SMLoc S = getLoc(); 3274 bool Negate = false; 3275 3276 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 3277 lex(); 3278 IsReal = true; 3279 Negate = true; 3280 } 3281 3282 AMDGPUOperand::Modifiers Mods; 3283 Mods.Lit = HasLit; 3284 Mods.Lit64 = HasLit64; 3285 3286 if (IsReal) { 3287 // Floating-point expressions are not supported. 3288 // Can only allow floating-point literals with an 3289 // optional sign. 3290 3291 StringRef Num = getTokenStr(); 3292 lex(); 3293 3294 APFloat RealVal(APFloat::IEEEdouble()); 3295 auto roundMode = APFloat::rmNearestTiesToEven; 3296 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) 3297 return ParseStatus::Failure; 3298 if (Negate) 3299 RealVal.changeSign(); 3300 3301 Operands.push_back( 3302 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 3303 AMDGPUOperand::ImmTyNone, true)); 3304 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3305 Op.setModifiers(Mods); 3306 3307 return ParseStatus::Success; 3308 3309 } else { 3310 int64_t IntVal; 3311 const MCExpr *Expr; 3312 SMLoc S = getLoc(); 3313 3314 if (HasSP3AbsModifier) { 3315 // This is a workaround for handling expressions 3316 // as arguments of SP3 'abs' modifier, for example: 3317 // |1.0| 3318 // |-1| 3319 // |1+x| 3320 // This syntax is not compatible with syntax of standard 3321 // MC expressions (due to the trailing '|'). 3322 SMLoc EndLoc; 3323 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 3324 return ParseStatus::Failure; 3325 } else { 3326 if (Parser.parseExpression(Expr)) 3327 return ParseStatus::Failure; 3328 } 3329 3330 if (Expr->evaluateAsAbsolute(IntVal)) { 3331 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 3332 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3333 Op.setModifiers(Mods); 3334 } else { 3335 if (HasLit || HasLit64) 3336 return ParseStatus::NoMatch; 3337 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3338 } 3339 3340 return ParseStatus::Success; 3341 } 3342 3343 return ParseStatus::NoMatch; 3344 } 3345 3346 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { 3347 if (!isRegister()) 3348 return ParseStatus::NoMatch; 3349 3350 if (auto R = parseRegister()) { 3351 assert(R->isReg()); 3352 Operands.push_back(std::move(R)); 3353 return ParseStatus::Success; 3354 } 3355 return ParseStatus::Failure; 3356 } 3357 3358 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, 3359 bool HasSP3AbsMod, bool HasLit, 3360 bool HasLit64) { 3361 ParseStatus Res = parseReg(Operands); 3362 if (!Res.isNoMatch()) 3363 return Res; 3364 if (isModifier()) 3365 return ParseStatus::NoMatch; 3366 return parseImm(Operands, HasSP3AbsMod, HasLit, HasLit64); 3367 } 3368 3369 bool 3370 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3371 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3372 const auto &str = Token.getString(); 3373 return str == "abs" || str == "neg" || str == "sext"; 3374 } 3375 return false; 3376 } 3377 3378 bool 3379 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3380 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3381 } 3382 3383 bool 3384 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3385 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3386 } 3387 3388 bool 3389 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3390 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3391 } 3392 3393 // Check if this is an operand modifier or an opcode modifier 3394 // which may look like an expression but it is not. We should 3395 // avoid parsing these modifiers as expressions. Currently 3396 // recognized sequences are: 3397 // |...| 3398 // abs(...) 3399 // neg(...) 3400 // sext(...) 3401 // -reg 3402 // -|...| 3403 // -abs(...) 3404 // name:... 3405 // 3406 bool 3407 AMDGPUAsmParser::isModifier() { 3408 3409 AsmToken Tok = getToken(); 3410 AsmToken NextToken[2]; 3411 peekTokens(NextToken); 3412 3413 return isOperandModifier(Tok, NextToken[0]) || 3414 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3415 isOpcodeModifierWithVal(Tok, NextToken[0]); 3416 } 3417 3418 // Check if the current token is an SP3 'neg' modifier. 3419 // Currently this modifier is allowed in the following context: 3420 // 3421 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3422 // 2. Before an 'abs' modifier: -abs(...) 3423 // 3. Before an SP3 'abs' modifier: -|...| 3424 // 3425 // In all other cases "-" is handled as a part 3426 // of an expression that follows the sign. 3427 // 3428 // Note: When "-" is followed by an integer literal, 3429 // this is interpreted as integer negation rather 3430 // than a floating-point NEG modifier applied to N. 3431 // Beside being contr-intuitive, such use of floating-point 3432 // NEG modifier would have resulted in different meaning 3433 // of integer literals used with VOP1/2/C and VOP3, 3434 // for example: 3435 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3436 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3437 // Negative fp literals with preceding "-" are 3438 // handled likewise for uniformity 3439 // 3440 bool 3441 AMDGPUAsmParser::parseSP3NegModifier() { 3442 3443 AsmToken NextToken[2]; 3444 peekTokens(NextToken); 3445 3446 if (isToken(AsmToken::Minus) && 3447 (isRegister(NextToken[0], NextToken[1]) || 3448 NextToken[0].is(AsmToken::Pipe) || 3449 isId(NextToken[0], "abs"))) { 3450 lex(); 3451 return true; 3452 } 3453 3454 return false; 3455 } 3456 3457 ParseStatus 3458 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3459 bool AllowImm) { 3460 bool Neg, SP3Neg; 3461 bool Abs, SP3Abs; 3462 bool Lit64, Lit; 3463 SMLoc Loc; 3464 3465 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3466 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) 3467 return Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3468 3469 SP3Neg = parseSP3NegModifier(); 3470 3471 Loc = getLoc(); 3472 Neg = trySkipId("neg"); 3473 if (Neg && SP3Neg) 3474 return Error(Loc, "expected register or immediate"); 3475 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3476 return ParseStatus::Failure; 3477 3478 Abs = trySkipId("abs"); 3479 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3480 return ParseStatus::Failure; 3481 3482 Lit64 = trySkipId("lit64"); 3483 if (Lit64) { 3484 if (!skipToken(AsmToken::LParen, "expected left paren after lit64")) 3485 return ParseStatus::Failure; 3486 if (!has64BitLiterals()) 3487 return Error(Loc, "lit64 is not supported on this GPU"); 3488 } 3489 3490 Lit = !Lit64 && trySkipId("lit"); 3491 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit")) 3492 return ParseStatus::Failure; 3493 3494 Loc = getLoc(); 3495 SP3Abs = trySkipToken(AsmToken::Pipe); 3496 if (Abs && SP3Abs) 3497 return Error(Loc, "expected register or immediate"); 3498 3499 ParseStatus Res; 3500 if (AllowImm) { 3501 Res = parseRegOrImm(Operands, SP3Abs, Lit, Lit64); 3502 } else { 3503 Res = parseReg(Operands); 3504 } 3505 if (!Res.isSuccess()) 3506 return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64) 3507 ? ParseStatus::Failure 3508 : Res; 3509 3510 if ((Lit || Lit64) && !Operands.back()->isImm()) 3511 Error(Loc, "expected immediate with lit modifier"); 3512 3513 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3514 return ParseStatus::Failure; 3515 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3516 return ParseStatus::Failure; 3517 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3518 return ParseStatus::Failure; 3519 if ((Lit || Lit64) && 3520 !skipToken(AsmToken::RParen, "expected closing parentheses")) 3521 return ParseStatus::Failure; 3522 3523 AMDGPUOperand::Modifiers Mods; 3524 Mods.Abs = Abs || SP3Abs; 3525 Mods.Neg = Neg || SP3Neg; 3526 Mods.Lit = Lit; 3527 Mods.Lit64 = Lit64; 3528 3529 if (Mods.hasFPModifiers() || Lit || Lit64) { 3530 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3531 if (Op.isExpr()) 3532 return Error(Op.getStartLoc(), "expected an absolute expression"); 3533 Op.setModifiers(Mods); 3534 } 3535 return ParseStatus::Success; 3536 } 3537 3538 ParseStatus 3539 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3540 bool AllowImm) { 3541 bool Sext = trySkipId("sext"); 3542 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3543 return ParseStatus::Failure; 3544 3545 ParseStatus Res; 3546 if (AllowImm) { 3547 Res = parseRegOrImm(Operands); 3548 } else { 3549 Res = parseReg(Operands); 3550 } 3551 if (!Res.isSuccess()) 3552 return Sext ? ParseStatus::Failure : Res; 3553 3554 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3555 return ParseStatus::Failure; 3556 3557 AMDGPUOperand::Modifiers Mods; 3558 Mods.Sext = Sext; 3559 3560 if (Mods.hasIntModifiers()) { 3561 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3562 if (Op.isExpr()) 3563 return Error(Op.getStartLoc(), "expected an absolute expression"); 3564 Op.setModifiers(Mods); 3565 } 3566 3567 return ParseStatus::Success; 3568 } 3569 3570 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3571 return parseRegOrImmWithFPInputMods(Operands, false); 3572 } 3573 3574 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3575 return parseRegOrImmWithIntInputMods(Operands, false); 3576 } 3577 3578 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3579 auto Loc = getLoc(); 3580 if (trySkipId("off")) { 3581 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3582 AMDGPUOperand::ImmTyOff, false)); 3583 return ParseStatus::Success; 3584 } 3585 3586 if (!isRegister()) 3587 return ParseStatus::NoMatch; 3588 3589 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3590 if (Reg) { 3591 Operands.push_back(std::move(Reg)); 3592 return ParseStatus::Success; 3593 } 3594 3595 return ParseStatus::Failure; 3596 } 3597 3598 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3599 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3600 3601 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3602 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3603 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3604 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3605 return Match_InvalidOperand; 3606 3607 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3608 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3609 // v_mac_f32/16 allow only dst_sel == DWORD; 3610 auto OpNum = 3611 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3612 const auto &Op = Inst.getOperand(OpNum); 3613 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3614 return Match_InvalidOperand; 3615 } 3616 } 3617 3618 // Asm can first try to match VOPD or VOPD3. By failing early here with 3619 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD. 3620 // Checking later during validateInstruction does not give a chance to retry 3621 // parsing as a different encoding. 3622 if (tryAnotherVOPDEncoding(Inst)) 3623 return Match_InvalidOperand; 3624 3625 return Match_Success; 3626 } 3627 3628 static ArrayRef<unsigned> getAllVariants() { 3629 static const unsigned Variants[] = { 3630 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3631 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3632 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3633 }; 3634 3635 return ArrayRef(Variants); 3636 } 3637 3638 // What asm variants we should check 3639 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3640 if (isForcedDPP() && isForcedVOP3()) { 3641 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3642 return ArrayRef(Variants); 3643 } 3644 if (getForcedEncodingSize() == 32) { 3645 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3646 return ArrayRef(Variants); 3647 } 3648 3649 if (isForcedVOP3()) { 3650 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3651 return ArrayRef(Variants); 3652 } 3653 3654 if (isForcedSDWA()) { 3655 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3656 AMDGPUAsmVariants::SDWA9}; 3657 return ArrayRef(Variants); 3658 } 3659 3660 if (isForcedDPP()) { 3661 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3662 return ArrayRef(Variants); 3663 } 3664 3665 return getAllVariants(); 3666 } 3667 3668 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3669 if (isForcedDPP() && isForcedVOP3()) 3670 return "e64_dpp"; 3671 3672 if (getForcedEncodingSize() == 32) 3673 return "e32"; 3674 3675 if (isForcedVOP3()) 3676 return "e64"; 3677 3678 if (isForcedSDWA()) 3679 return "sdwa"; 3680 3681 if (isForcedDPP()) 3682 return "dpp"; 3683 3684 return ""; 3685 } 3686 3687 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3688 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3689 for (MCPhysReg Reg : Desc.implicit_uses()) { 3690 switch (Reg) { 3691 case AMDGPU::FLAT_SCR: 3692 case AMDGPU::VCC: 3693 case AMDGPU::VCC_LO: 3694 case AMDGPU::VCC_HI: 3695 case AMDGPU::M0: 3696 return Reg; 3697 default: 3698 break; 3699 } 3700 } 3701 return AMDGPU::NoRegister; 3702 } 3703 3704 // NB: This code is correct only when used to check constant 3705 // bus limitations because GFX7 support no f16 inline constants. 3706 // Note that there are no cases when a GFX7 opcode violates 3707 // constant bus limitations due to the use of an f16 constant. 3708 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3709 unsigned OpIdx) const { 3710 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3711 3712 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) || 3713 AMDGPU::isKImmOperand(Desc, OpIdx)) { 3714 return false; 3715 } 3716 3717 const MCOperand &MO = Inst.getOperand(OpIdx); 3718 3719 int64_t Val = MO.getImm(); 3720 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3721 3722 switch (OpSize) { // expected operand size 3723 case 8: 3724 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3725 case 4: 3726 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3727 case 2: { 3728 const unsigned OperandType = Desc.operands()[OpIdx].OperandType; 3729 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3730 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16) 3731 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm()); 3732 3733 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3734 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3735 return AMDGPU::isInlinableLiteralV2I16(Val); 3736 3737 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3738 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3739 return AMDGPU::isInlinableLiteralV2F16(Val); 3740 3741 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 || 3742 OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16) 3743 return AMDGPU::isInlinableLiteralV2BF16(Val); 3744 3745 if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 || 3746 OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16) 3747 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm()); 3748 3749 if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 || 3750 OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16) 3751 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm()); 3752 3753 llvm_unreachable("invalid operand type"); 3754 } 3755 default: 3756 llvm_unreachable("invalid operand size"); 3757 } 3758 } 3759 3760 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3761 if (!isGFX10Plus()) 3762 return 1; 3763 3764 switch (Opcode) { 3765 // 64-bit shift instructions can use only one scalar value input 3766 case AMDGPU::V_LSHLREV_B64_e64: 3767 case AMDGPU::V_LSHLREV_B64_gfx10: 3768 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3769 case AMDGPU::V_LSHLREV_B64_e32_gfx12: 3770 case AMDGPU::V_LSHLREV_B64_e64_gfx12: 3771 case AMDGPU::V_LSHRREV_B64_e64: 3772 case AMDGPU::V_LSHRREV_B64_gfx10: 3773 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3774 case AMDGPU::V_LSHRREV_B64_e64_gfx12: 3775 case AMDGPU::V_ASHRREV_I64_e64: 3776 case AMDGPU::V_ASHRREV_I64_gfx10: 3777 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3778 case AMDGPU::V_ASHRREV_I64_e64_gfx12: 3779 case AMDGPU::V_LSHL_B64_e64: 3780 case AMDGPU::V_LSHR_B64_e64: 3781 case AMDGPU::V_ASHR_I64_e64: 3782 return 1; 3783 default: 3784 return 2; 3785 } 3786 } 3787 3788 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; 3789 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; 3790 3791 // Get regular operand indices in the same order as specified 3792 // in the instruction (but append mandatory literals to the end). 3793 static OperandIndices getSrcOperandIndices(unsigned Opcode, 3794 bool AddMandatoryLiterals = false) { 3795 3796 int16_t ImmIdx = 3797 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1; 3798 3799 if (isVOPD(Opcode)) { 3800 int16_t ImmXIdx = 3801 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1; 3802 3803 return {getNamedOperandIdx(Opcode, OpName::src0X), 3804 getNamedOperandIdx(Opcode, OpName::vsrc1X), 3805 getNamedOperandIdx(Opcode, OpName::vsrc2X), 3806 getNamedOperandIdx(Opcode, OpName::src0Y), 3807 getNamedOperandIdx(Opcode, OpName::vsrc1Y), 3808 getNamedOperandIdx(Opcode, OpName::vsrc2Y), 3809 ImmXIdx, 3810 ImmIdx}; 3811 } 3812 3813 return {getNamedOperandIdx(Opcode, OpName::src0), 3814 getNamedOperandIdx(Opcode, OpName::src1), 3815 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx}; 3816 } 3817 3818 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3819 const MCOperand &MO = Inst.getOperand(OpIdx); 3820 if (MO.isImm()) 3821 return !isInlineConstant(Inst, OpIdx); 3822 if (MO.isReg()) { 3823 auto Reg = MO.getReg(); 3824 if (!Reg) 3825 return false; 3826 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3827 auto PReg = mc2PseudoReg(Reg); 3828 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3829 } 3830 return true; 3831 } 3832 3833 // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`: 3834 // Writelane is special in that it can use SGPR and M0 (which would normally 3835 // count as using the constant bus twice - but in this case it is allowed since 3836 // the lane selector doesn't count as a use of the constant bus). However, it is 3837 // still required to abide by the 1 SGPR rule. 3838 static bool checkWriteLane(const MCInst &Inst) { 3839 const unsigned Opcode = Inst.getOpcode(); 3840 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi) 3841 return false; 3842 const MCOperand &LaneSelOp = Inst.getOperand(2); 3843 if (!LaneSelOp.isReg()) 3844 return false; 3845 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg()); 3846 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11; 3847 } 3848 3849 bool AMDGPUAsmParser::validateConstantBusLimitations( 3850 const MCInst &Inst, const OperandVector &Operands) { 3851 const unsigned Opcode = Inst.getOpcode(); 3852 const MCInstrDesc &Desc = MII.get(Opcode); 3853 MCRegister LastSGPR; 3854 unsigned ConstantBusUseCount = 0; 3855 unsigned NumLiterals = 0; 3856 unsigned LiteralSize; 3857 3858 if (!(Desc.TSFlags & 3859 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3860 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && 3861 !isVOPD(Opcode)) 3862 return true; 3863 3864 if (checkWriteLane(Inst)) 3865 return true; 3866 3867 // Check special imm operands (used by madmk, etc) 3868 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { 3869 ++NumLiterals; 3870 LiteralSize = 4; 3871 } 3872 3873 SmallDenseSet<unsigned> SGPRsUsed; 3874 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3875 if (SGPRUsed != AMDGPU::NoRegister) { 3876 SGPRsUsed.insert(SGPRUsed); 3877 ++ConstantBusUseCount; 3878 } 3879 3880 OperandIndices OpIndices = getSrcOperandIndices(Opcode); 3881 3882 for (int OpIdx : OpIndices) { 3883 if (OpIdx == -1) 3884 continue; 3885 3886 const MCOperand &MO = Inst.getOperand(OpIdx); 3887 if (usesConstantBus(Inst, OpIdx)) { 3888 if (MO.isReg()) { 3889 LastSGPR = mc2PseudoReg(MO.getReg()); 3890 // Pairs of registers with a partial intersections like these 3891 // s0, s[0:1] 3892 // flat_scratch_lo, flat_scratch 3893 // flat_scratch_lo, flat_scratch_hi 3894 // are theoretically valid but they are disabled anyway. 3895 // Note that this code mimics SIInstrInfo::verifyInstruction 3896 if (SGPRsUsed.insert(LastSGPR).second) { 3897 ++ConstantBusUseCount; 3898 } 3899 } else { // Expression or a literal 3900 3901 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3902 continue; // special operand like VINTERP attr_chan 3903 3904 // An instruction may use only one literal. 3905 // This has been validated on the previous step. 3906 // See validateVOPLiteral. 3907 // This literal may be used as more than one operand. 3908 // If all these operands are of the same size, 3909 // this literal counts as one scalar value. 3910 // Otherwise it counts as 2 scalar values. 3911 // See "GFX10 Shader Programming", section 3.6.2.3. 3912 3913 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3914 if (Size < 4) 3915 Size = 4; 3916 3917 if (NumLiterals == 0) { 3918 NumLiterals = 1; 3919 LiteralSize = Size; 3920 } else if (LiteralSize != Size) { 3921 NumLiterals = 2; 3922 } 3923 } 3924 } 3925 } 3926 ConstantBusUseCount += NumLiterals; 3927 3928 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3929 return true; 3930 3931 SMLoc LitLoc = getLitLoc(Operands); 3932 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3933 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3934 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3935 return false; 3936 } 3937 3938 std::optional<unsigned> 3939 AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) { 3940 3941 const unsigned Opcode = Inst.getOpcode(); 3942 if (!isVOPD(Opcode)) 3943 return {}; 3944 3945 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3946 3947 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { 3948 const MCOperand &Opr = Inst.getOperand(OperandIdx); 3949 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) 3950 ? Opr.getReg() 3951 : MCRegister(); 3952 }; 3953 3954 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 3955 // source-cache. 3956 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 || 3957 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 || 3958 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250; 3959 bool AllowSameVGPR = isGFX1250(); 3960 3961 if (AsVOPD3) { // Literal constants are not allowed with VOPD3. 3962 for (auto OpName : {OpName::src0X, OpName::src0Y}) { 3963 int I = getNamedOperandIdx(Opcode, OpName); 3964 const MCOperand &Op = Inst.getOperand(I); 3965 if (!Op.isImm()) 3966 continue; 3967 int64_t Imm = Op.getImm(); 3968 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) && 3969 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm())) 3970 return (unsigned)I; 3971 } 3972 3973 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X, 3974 OpName::vsrc2Y, OpName::imm}) { 3975 int I = getNamedOperandIdx(Opcode, OpName); 3976 if (I == -1) 3977 continue; 3978 const MCOperand &Op = Inst.getOperand(I); 3979 if (Op.isImm()) 3980 return (unsigned)I; 3981 } 3982 } 3983 3984 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 3985 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex( 3986 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3); 3987 3988 return InvalidCompOprIdx; 3989 } 3990 3991 bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst, 3992 const OperandVector &Operands) { 3993 3994 unsigned Opcode = Inst.getOpcode(); 3995 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3; 3996 3997 if (AsVOPD3) { 3998 for (unsigned I = 0, E = Operands.size(); I != E; ++I) { 3999 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4000 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) && 4001 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS)) 4002 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions"); 4003 } 4004 } 4005 4006 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3); 4007 if (!InvalidCompOprIdx.has_value()) 4008 return true; 4009 4010 auto CompOprIdx = *InvalidCompOprIdx; 4011 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 4012 auto ParsedIdx = 4013 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), 4014 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); 4015 assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); 4016 4017 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); 4018 if (CompOprIdx == VOPD::Component::DST) { 4019 if (AsVOPD3) 4020 Error(Loc, "dst registers must be distinct"); 4021 else 4022 Error(Loc, "one dst register must be even and the other odd"); 4023 } else { 4024 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; 4025 Error(Loc, Twine("src") + Twine(CompSrcIdx) + 4026 " operands must use different VGPR banks"); 4027 } 4028 4029 return false; 4030 } 4031 4032 // \returns true if \p Inst does not satisfy VOPD constraints, but can be 4033 // potentially used as VOPD3 with the same operands. 4034 bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) { 4035 // First check if it fits VOPD 4036 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false); 4037 if (!InvalidCompOprIdx.has_value()) 4038 return false; 4039 4040 // Then if it fits VOPD3 4041 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true); 4042 if (InvalidCompOprIdx.has_value()) { 4043 // If failed operand is dst it is better to show error about VOPD3 4044 // instruction as it has more capabilities and error message will be 4045 // more informative. If the dst is not legal for VOPD3, then it is not 4046 // legal for VOPD either. 4047 if (*InvalidCompOprIdx == VOPD::Component::DST) 4048 return true; 4049 4050 // Otherwise prefer VOPD as we may find ourselves in an awkward situation 4051 // with a conflict in tied implicit src2 of fmac and no asm operand to 4052 // to point to. 4053 return false; 4054 } 4055 return true; 4056 } 4057 4058 // \returns true is a VOPD3 instruction can be also represented as a shorter 4059 // VOPD encoding. 4060 bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) { 4061 const unsigned Opcode = Inst.getOpcode(); 4062 const auto &II = getVOPDInstInfo(Opcode, &MII); 4063 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI()); 4064 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X || 4065 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y) 4066 return false; 4067 4068 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has 4069 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot 4070 // be parsed as VOPD which does not accept src2. 4071 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 || 4072 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32) 4073 return false; 4074 4075 // If any modifiers are set this cannot be VOPD. 4076 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers, 4077 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers, 4078 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) { 4079 int I = getNamedOperandIdx(Opcode, OpName); 4080 if (I == -1) 4081 continue; 4082 if (Inst.getOperand(I).getImm()) 4083 return false; 4084 } 4085 4086 return !tryVOPD3(Inst); 4087 } 4088 4089 // VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD 4090 // form but switch to VOPD3 otherwise. 4091 bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) { 4092 const unsigned Opcode = Inst.getOpcode(); 4093 if (!isGFX1250() || !isVOPD(Opcode)) 4094 return false; 4095 4096 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3) 4097 return tryVOPD(Inst); 4098 return tryVOPD3(Inst); 4099 } 4100 4101 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 4102 4103 const unsigned Opc = Inst.getOpcode(); 4104 const MCInstrDesc &Desc = MII.get(Opc); 4105 4106 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 4107 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 4108 assert(ClampIdx != -1); 4109 return Inst.getOperand(ClampIdx).getImm() == 0; 4110 } 4111 4112 return true; 4113 } 4114 4115 constexpr uint64_t MIMGFlags = 4116 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; 4117 4118 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, 4119 const SMLoc &IDLoc) { 4120 4121 const unsigned Opc = Inst.getOpcode(); 4122 const MCInstrDesc &Desc = MII.get(Opc); 4123 4124 if ((Desc.TSFlags & MIMGFlags) == 0) 4125 return true; 4126 4127 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 4128 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 4129 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 4130 4131 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample 4132 return true; 4133 4134 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray 4135 return true; 4136 4137 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 4138 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 4139 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 4140 if (DMask == 0) 4141 DMask = 1; 4142 4143 bool IsPackedD16 = false; 4144 unsigned DataSize = 4145 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask); 4146 if (hasPackedD16()) { 4147 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 4148 IsPackedD16 = D16Idx >= 0; 4149 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm()) 4150 DataSize = (DataSize + 1) / 2; 4151 } 4152 4153 if ((VDataSize / 4) == DataSize + TFESize) 4154 return true; 4155 4156 StringRef Modifiers; 4157 if (isGFX90A()) 4158 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask"; 4159 else 4160 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe"; 4161 4162 Error(IDLoc, Twine("image data size does not match ") + Modifiers); 4163 return false; 4164 } 4165 4166 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, 4167 const SMLoc &IDLoc) { 4168 const unsigned Opc = Inst.getOpcode(); 4169 const MCInstrDesc &Desc = MII.get(Opc); 4170 4171 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus()) 4172 return true; 4173 4174 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 4175 4176 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 4177 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 4178 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 4179 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) 4180 ? AMDGPU::OpName::srsrc 4181 : AMDGPU::OpName::rsrc; 4182 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName); 4183 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 4184 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 4185 4186 assert(VAddr0Idx != -1); 4187 assert(SrsrcIdx != -1); 4188 assert(SrsrcIdx > VAddr0Idx); 4189 4190 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 4191 if (BaseOpcode->BVH) { 4192 if (IsA16 == BaseOpcode->A16) 4193 return true; 4194 Error(IDLoc, "image address size does not match a16"); 4195 return false; 4196 } 4197 4198 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 4199 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 4200 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 4201 unsigned ActualAddrSize = 4202 IsNSA ? SrsrcIdx - VAddr0Idx 4203 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 4204 4205 unsigned ExpectedAddrSize = 4206 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 4207 4208 if (IsNSA) { 4209 if (hasPartialNSAEncoding() && 4210 ExpectedAddrSize > 4211 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) { 4212 int VAddrLastIdx = SrsrcIdx - 1; 4213 unsigned VAddrLastSize = 4214 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; 4215 4216 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; 4217 } 4218 } else { 4219 if (ExpectedAddrSize > 12) 4220 ExpectedAddrSize = 16; 4221 4222 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 4223 // This provides backward compatibility for assembly created 4224 // before 160b/192b/224b types were directly supported. 4225 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 4226 return true; 4227 } 4228 4229 if (ActualAddrSize == ExpectedAddrSize) 4230 return true; 4231 4232 Error(IDLoc, "image address size does not match dim and a16"); 4233 return false; 4234 } 4235 4236 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 4237 4238 const unsigned Opc = Inst.getOpcode(); 4239 const MCInstrDesc &Desc = MII.get(Opc); 4240 4241 if ((Desc.TSFlags & MIMGFlags) == 0) 4242 return true; 4243 if (!Desc.mayLoad() || !Desc.mayStore()) 4244 return true; // Not atomic 4245 4246 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 4247 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 4248 4249 // This is an incomplete check because image_atomic_cmpswap 4250 // may only use 0x3 and 0xf while other atomic operations 4251 // may use 0x1 and 0x3. However these limitations are 4252 // verified when we check that dmask matches dst size. 4253 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 4254 } 4255 4256 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 4257 4258 const unsigned Opc = Inst.getOpcode(); 4259 const MCInstrDesc &Desc = MII.get(Opc); 4260 4261 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 4262 return true; 4263 4264 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 4265 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 4266 4267 // GATHER4 instructions use dmask in a different fashion compared to 4268 // other MIMG instructions. The only useful DMASK values are 4269 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 4270 // (red,red,red,red) etc.) The ISA document doesn't mention 4271 // this. 4272 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 4273 } 4274 4275 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst, 4276 const OperandVector &Operands) { 4277 if (!isGFX10Plus()) 4278 return true; 4279 4280 const unsigned Opc = Inst.getOpcode(); 4281 const MCInstrDesc &Desc = MII.get(Opc); 4282 4283 if ((Desc.TSFlags & MIMGFlags) == 0) 4284 return true; 4285 4286 // image_bvh_intersect_ray instructions do not have dim 4287 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH) 4288 return true; 4289 4290 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4291 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4292 if (Op.isDim()) 4293 return true; 4294 } 4295 return false; 4296 } 4297 4298 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 4299 const unsigned Opc = Inst.getOpcode(); 4300 const MCInstrDesc &Desc = MII.get(Opc); 4301 4302 if ((Desc.TSFlags & MIMGFlags) == 0) 4303 return true; 4304 4305 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 4306 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 4307 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 4308 4309 if (!BaseOpcode->MSAA) 4310 return true; 4311 4312 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 4313 assert(DimIdx != -1); 4314 4315 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 4316 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 4317 4318 return DimInfo->MSAA; 4319 } 4320 4321 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 4322 { 4323 switch (Opcode) { 4324 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 4325 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 4326 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 4327 return true; 4328 default: 4329 return false; 4330 } 4331 } 4332 4333 // movrels* opcodes should only allow VGPRS as src0. 4334 // This is specified in .td description for vop1/vop3, 4335 // but sdwa is handled differently. See isSDWAOperand. 4336 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 4337 const OperandVector &Operands) { 4338 4339 const unsigned Opc = Inst.getOpcode(); 4340 const MCInstrDesc &Desc = MII.get(Opc); 4341 4342 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 4343 return true; 4344 4345 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4346 assert(Src0Idx != -1); 4347 4348 SMLoc ErrLoc; 4349 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 4350 if (Src0.isReg()) { 4351 auto Reg = mc2PseudoReg(Src0.getReg()); 4352 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4353 if (!isSGPR(Reg, TRI)) 4354 return true; 4355 ErrLoc = getRegLoc(Reg, Operands); 4356 } else { 4357 ErrLoc = getConstLoc(Operands); 4358 } 4359 4360 Error(ErrLoc, "source operand must be a VGPR"); 4361 return false; 4362 } 4363 4364 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 4365 const OperandVector &Operands) { 4366 4367 const unsigned Opc = Inst.getOpcode(); 4368 4369 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 4370 return true; 4371 4372 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4373 assert(Src0Idx != -1); 4374 4375 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 4376 if (!Src0.isReg()) 4377 return true; 4378 4379 auto Reg = mc2PseudoReg(Src0.getReg()); 4380 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4381 if (!isGFX90A() && isSGPR(Reg, TRI)) { 4382 Error(getRegLoc(Reg, Operands), 4383 "source operand must be either a VGPR or an inline constant"); 4384 return false; 4385 } 4386 4387 return true; 4388 } 4389 4390 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, 4391 const OperandVector &Operands) { 4392 unsigned Opcode = Inst.getOpcode(); 4393 const MCInstrDesc &Desc = MII.get(Opcode); 4394 4395 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || 4396 !getFeatureBits()[FeatureMFMAInlineLiteralBug]) 4397 return true; 4398 4399 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2); 4400 if (Src2Idx == -1) 4401 return true; 4402 4403 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) { 4404 Error(getConstLoc(Operands), 4405 "inline constants are not allowed for this operand"); 4406 return false; 4407 } 4408 4409 return true; 4410 } 4411 4412 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 4413 const OperandVector &Operands) { 4414 const unsigned Opc = Inst.getOpcode(); 4415 const MCInstrDesc &Desc = MII.get(Opc); 4416 4417 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 4418 return true; 4419 4420 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4421 if (BlgpIdx != -1) { 4422 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) { 4423 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz); 4424 4425 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm(); 4426 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm(); 4427 4428 // Validate the correct register size was used for the floating point 4429 // format operands 4430 4431 bool Success = true; 4432 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) { 4433 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4434 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()), 4435 Operands), 4436 "wrong register tuple size for cbsz value " + Twine(CBSZ)); 4437 Success = false; 4438 } 4439 4440 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) { 4441 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 4442 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()), 4443 Operands), 4444 "wrong register tuple size for blgp value " + Twine(BLGP)); 4445 Success = false; 4446 } 4447 4448 return Success; 4449 } 4450 } 4451 4452 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 4453 if (Src2Idx == -1) 4454 return true; 4455 4456 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 4457 if (!Src2.isReg()) 4458 return true; 4459 4460 MCRegister Src2Reg = Src2.getReg(); 4461 MCRegister DstReg = Inst.getOperand(0).getReg(); 4462 if (Src2Reg == DstReg) 4463 return true; 4464 4465 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4466 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) 4467 return true; 4468 4469 if (TRI->regsOverlap(Src2Reg, DstReg)) { 4470 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 4471 "source 2 operand must not partially overlap with dst"); 4472 return false; 4473 } 4474 4475 return true; 4476 } 4477 4478 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 4479 switch (Inst.getOpcode()) { 4480 default: 4481 return true; 4482 case V_DIV_SCALE_F32_gfx6_gfx7: 4483 case V_DIV_SCALE_F32_vi: 4484 case V_DIV_SCALE_F32_gfx10: 4485 case V_DIV_SCALE_F64_gfx6_gfx7: 4486 case V_DIV_SCALE_F64_vi: 4487 case V_DIV_SCALE_F64_gfx10: 4488 break; 4489 } 4490 4491 // TODO: Check that src0 = src1 or src2. 4492 4493 for (auto Name : {AMDGPU::OpName::src0_modifiers, 4494 AMDGPU::OpName::src2_modifiers, 4495 AMDGPU::OpName::src2_modifiers}) { 4496 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 4497 .getImm() & 4498 SISrcMods::ABS) { 4499 return false; 4500 } 4501 } 4502 4503 return true; 4504 } 4505 4506 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 4507 4508 const unsigned Opc = Inst.getOpcode(); 4509 const MCInstrDesc &Desc = MII.get(Opc); 4510 4511 if ((Desc.TSFlags & MIMGFlags) == 0) 4512 return true; 4513 4514 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 4515 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 4516 if (isCI() || isSI()) 4517 return false; 4518 } 4519 4520 return true; 4521 } 4522 4523 bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) { 4524 const unsigned Opc = Inst.getOpcode(); 4525 const MCInstrDesc &Desc = MII.get(Opc); 4526 4527 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0) 4528 return true; 4529 4530 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128); 4531 4532 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm(); 4533 } 4534 4535 static bool IsRevOpcode(const unsigned Opcode) 4536 { 4537 switch (Opcode) { 4538 case AMDGPU::V_SUBREV_F32_e32: 4539 case AMDGPU::V_SUBREV_F32_e64: 4540 case AMDGPU::V_SUBREV_F32_e32_gfx10: 4541 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 4542 case AMDGPU::V_SUBREV_F32_e32_vi: 4543 case AMDGPU::V_SUBREV_F32_e64_gfx10: 4544 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 4545 case AMDGPU::V_SUBREV_F32_e64_vi: 4546 4547 case AMDGPU::V_SUBREV_CO_U32_e32: 4548 case AMDGPU::V_SUBREV_CO_U32_e64: 4549 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 4550 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 4551 4552 case AMDGPU::V_SUBBREV_U32_e32: 4553 case AMDGPU::V_SUBBREV_U32_e64: 4554 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 4555 case AMDGPU::V_SUBBREV_U32_e32_vi: 4556 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 4557 case AMDGPU::V_SUBBREV_U32_e64_vi: 4558 4559 case AMDGPU::V_SUBREV_U32_e32: 4560 case AMDGPU::V_SUBREV_U32_e64: 4561 case AMDGPU::V_SUBREV_U32_e32_gfx9: 4562 case AMDGPU::V_SUBREV_U32_e32_vi: 4563 case AMDGPU::V_SUBREV_U32_e64_gfx9: 4564 case AMDGPU::V_SUBREV_U32_e64_vi: 4565 4566 case AMDGPU::V_SUBREV_F16_e32: 4567 case AMDGPU::V_SUBREV_F16_e64: 4568 case AMDGPU::V_SUBREV_F16_e32_gfx10: 4569 case AMDGPU::V_SUBREV_F16_e32_vi: 4570 case AMDGPU::V_SUBREV_F16_e64_gfx10: 4571 case AMDGPU::V_SUBREV_F16_e64_vi: 4572 4573 case AMDGPU::V_SUBREV_U16_e32: 4574 case AMDGPU::V_SUBREV_U16_e64: 4575 case AMDGPU::V_SUBREV_U16_e32_vi: 4576 case AMDGPU::V_SUBREV_U16_e64_vi: 4577 4578 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 4579 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 4580 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 4581 4582 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 4583 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 4584 4585 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 4586 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 4587 4588 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 4589 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 4590 4591 case AMDGPU::V_LSHRREV_B32_e32: 4592 case AMDGPU::V_LSHRREV_B32_e64: 4593 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 4594 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 4595 case AMDGPU::V_LSHRREV_B32_e32_vi: 4596 case AMDGPU::V_LSHRREV_B32_e64_vi: 4597 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 4598 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 4599 4600 case AMDGPU::V_ASHRREV_I32_e32: 4601 case AMDGPU::V_ASHRREV_I32_e64: 4602 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 4603 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 4604 case AMDGPU::V_ASHRREV_I32_e32_vi: 4605 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 4606 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 4607 case AMDGPU::V_ASHRREV_I32_e64_vi: 4608 4609 case AMDGPU::V_LSHLREV_B32_e32: 4610 case AMDGPU::V_LSHLREV_B32_e64: 4611 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 4612 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 4613 case AMDGPU::V_LSHLREV_B32_e32_vi: 4614 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 4615 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 4616 case AMDGPU::V_LSHLREV_B32_e64_vi: 4617 4618 case AMDGPU::V_LSHLREV_B16_e32: 4619 case AMDGPU::V_LSHLREV_B16_e64: 4620 case AMDGPU::V_LSHLREV_B16_e32_vi: 4621 case AMDGPU::V_LSHLREV_B16_e64_vi: 4622 case AMDGPU::V_LSHLREV_B16_gfx10: 4623 4624 case AMDGPU::V_LSHRREV_B16_e32: 4625 case AMDGPU::V_LSHRREV_B16_e64: 4626 case AMDGPU::V_LSHRREV_B16_e32_vi: 4627 case AMDGPU::V_LSHRREV_B16_e64_vi: 4628 case AMDGPU::V_LSHRREV_B16_gfx10: 4629 4630 case AMDGPU::V_ASHRREV_I16_e32: 4631 case AMDGPU::V_ASHRREV_I16_e64: 4632 case AMDGPU::V_ASHRREV_I16_e32_vi: 4633 case AMDGPU::V_ASHRREV_I16_e64_vi: 4634 case AMDGPU::V_ASHRREV_I16_gfx10: 4635 4636 case AMDGPU::V_LSHLREV_B64_e64: 4637 case AMDGPU::V_LSHLREV_B64_gfx10: 4638 case AMDGPU::V_LSHLREV_B64_vi: 4639 4640 case AMDGPU::V_LSHRREV_B64_e64: 4641 case AMDGPU::V_LSHRREV_B64_gfx10: 4642 case AMDGPU::V_LSHRREV_B64_vi: 4643 4644 case AMDGPU::V_ASHRREV_I64_e64: 4645 case AMDGPU::V_ASHRREV_I64_gfx10: 4646 case AMDGPU::V_ASHRREV_I64_vi: 4647 4648 case AMDGPU::V_PK_LSHLREV_B16: 4649 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 4650 case AMDGPU::V_PK_LSHLREV_B16_vi: 4651 4652 case AMDGPU::V_PK_LSHRREV_B16: 4653 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4654 case AMDGPU::V_PK_LSHRREV_B16_vi: 4655 case AMDGPU::V_PK_ASHRREV_I16: 4656 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4657 case AMDGPU::V_PK_ASHRREV_I16_vi: 4658 return true; 4659 default: 4660 return false; 4661 } 4662 } 4663 4664 std::optional<StringRef> 4665 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4666 4667 using namespace SIInstrFlags; 4668 const unsigned Opcode = Inst.getOpcode(); 4669 const MCInstrDesc &Desc = MII.get(Opcode); 4670 4671 // lds_direct register is defined so that it can be used 4672 // with 9-bit operands only. Ignore encodings which do not accept these. 4673 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4674 if ((Desc.TSFlags & Enc) == 0) 4675 return std::nullopt; 4676 4677 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4678 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4679 if (SrcIdx == -1) 4680 break; 4681 const auto &Src = Inst.getOperand(SrcIdx); 4682 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4683 4684 if (isGFX90A() || isGFX11Plus()) 4685 return StringRef("lds_direct is not supported on this GPU"); 4686 4687 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4688 return StringRef("lds_direct cannot be used with this instruction"); 4689 4690 if (SrcName != OpName::src0) 4691 return StringRef("lds_direct may be used as src0 only"); 4692 } 4693 } 4694 4695 return std::nullopt; 4696 } 4697 4698 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4699 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4700 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4701 if (Op.isFlatOffset()) 4702 return Op.getStartLoc(); 4703 } 4704 return getLoc(); 4705 } 4706 4707 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, 4708 const OperandVector &Operands) { 4709 auto Opcode = Inst.getOpcode(); 4710 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4711 if (OpNum == -1) 4712 return true; 4713 4714 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4715 if ((TSFlags & SIInstrFlags::FLAT)) 4716 return validateFlatOffset(Inst, Operands); 4717 4718 if ((TSFlags & SIInstrFlags::SMRD)) 4719 return validateSMEMOffset(Inst, Operands); 4720 4721 const auto &Op = Inst.getOperand(OpNum); 4722 if (isGFX12Plus() && 4723 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4724 const unsigned OffsetSize = 24; 4725 if (!isIntN(OffsetSize, Op.getImm())) { 4726 Error(getFlatOffsetLoc(Operands), 4727 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4728 return false; 4729 } 4730 } else { 4731 const unsigned OffsetSize = 16; 4732 if (!isUIntN(OffsetSize, Op.getImm())) { 4733 Error(getFlatOffsetLoc(Operands), 4734 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4735 return false; 4736 } 4737 } 4738 return true; 4739 } 4740 4741 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4742 const OperandVector &Operands) { 4743 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4744 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4745 return true; 4746 4747 auto Opcode = Inst.getOpcode(); 4748 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4749 assert(OpNum != -1); 4750 4751 const auto &Op = Inst.getOperand(OpNum); 4752 if (!hasFlatOffsets() && Op.getImm() != 0) { 4753 Error(getFlatOffsetLoc(Operands), 4754 "flat offset modifier is not supported on this GPU"); 4755 return false; 4756 } 4757 4758 // For pre-GFX12 FLAT instructions the offset must be positive; 4759 // MSB is ignored and forced to zero. 4760 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); 4761 bool AllowNegative = 4762 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || 4763 isGFX12Plus(); 4764 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { 4765 Error(getFlatOffsetLoc(Operands), 4766 Twine("expected a ") + 4767 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" 4768 : Twine(OffsetSize - 1) + "-bit unsigned offset")); 4769 return false; 4770 } 4771 4772 return true; 4773 } 4774 4775 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4776 // Start with second operand because SMEM Offset cannot be dst or src0. 4777 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4778 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4779 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) 4780 return Op.getStartLoc(); 4781 } 4782 return getLoc(); 4783 } 4784 4785 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4786 const OperandVector &Operands) { 4787 if (isCI() || isSI()) 4788 return true; 4789 4790 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4791 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4792 return true; 4793 4794 auto Opcode = Inst.getOpcode(); 4795 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4796 if (OpNum == -1) 4797 return true; 4798 4799 const auto &Op = Inst.getOperand(OpNum); 4800 if (!Op.isImm()) 4801 return true; 4802 4803 uint64_t Offset = Op.getImm(); 4804 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4805 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4806 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4807 return true; 4808 4809 Error(getSMEMOffsetLoc(Operands), 4810 isGFX12Plus() ? "expected a 24-bit signed offset" 4811 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" 4812 : "expected a 21-bit signed offset"); 4813 4814 return false; 4815 } 4816 4817 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4818 unsigned Opcode = Inst.getOpcode(); 4819 const MCInstrDesc &Desc = MII.get(Opcode); 4820 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4821 return true; 4822 4823 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4824 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4825 4826 const int OpIndices[] = { Src0Idx, Src1Idx }; 4827 4828 unsigned NumExprs = 0; 4829 unsigned NumLiterals = 0; 4830 uint64_t LiteralValue; 4831 4832 for (int OpIdx : OpIndices) { 4833 if (OpIdx == -1) break; 4834 4835 const MCOperand &MO = Inst.getOperand(OpIdx); 4836 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4837 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4838 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4839 uint64_t Value = static_cast<uint64_t>(MO.getImm()); 4840 if (NumLiterals == 0 || LiteralValue != Value) { 4841 LiteralValue = Value; 4842 ++NumLiterals; 4843 } 4844 } else if (MO.isExpr()) { 4845 ++NumExprs; 4846 } 4847 } 4848 } 4849 4850 return NumLiterals + NumExprs <= 1; 4851 } 4852 4853 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4854 const unsigned Opc = Inst.getOpcode(); 4855 if (isPermlane16(Opc)) { 4856 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4857 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4858 4859 if (OpSel & ~3) 4860 return false; 4861 } 4862 4863 uint64_t TSFlags = MII.get(Opc).TSFlags; 4864 4865 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4866 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4867 if (OpSelIdx != -1) { 4868 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4869 return false; 4870 } 4871 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4872 if (OpSelHiIdx != -1) { 4873 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4874 return false; 4875 } 4876 } 4877 4878 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4879 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && 4880 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { 4881 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4882 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4883 if (OpSel & 3) 4884 return false; 4885 } 4886 4887 return true; 4888 } 4889 4890 bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) { 4891 if (!hasTrue16Insts()) 4892 return true; 4893 const MCRegisterInfo *MRI = getMRI(); 4894 const unsigned Opc = Inst.getOpcode(); 4895 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4896 if (OpSelIdx == -1) 4897 return true; 4898 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm(); 4899 // If the value is 0 we could have a default OpSel Operand, so conservatively 4900 // allow it. 4901 if (OpSelOpValue == 0) 4902 return true; 4903 unsigned OpCount = 0; 4904 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1, 4905 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) { 4906 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName); 4907 if (OpIdx == -1) 4908 continue; 4909 const MCOperand &Op = Inst.getOperand(OpIdx); 4910 if (Op.isReg() && 4911 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) { 4912 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI); 4913 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0); 4914 if (OpSelOpIsHi != VGPRSuffixIsHi) 4915 return false; 4916 } 4917 ++OpCount; 4918 } 4919 4920 return true; 4921 } 4922 4923 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) { 4924 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi); 4925 4926 const unsigned Opc = Inst.getOpcode(); 4927 uint64_t TSFlags = MII.get(Opc).TSFlags; 4928 4929 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2) 4930 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1) 4931 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1) 4932 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand. 4933 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) && 4934 !(TSFlags & SIInstrFlags::IsSWMMAC)) 4935 return true; 4936 4937 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName); 4938 if (NegIdx == -1) 4939 return true; 4940 4941 unsigned Neg = Inst.getOperand(NegIdx).getImm(); 4942 4943 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed 4944 // on some src operands but not allowed on other. 4945 // It is convenient that such instructions don't have src_modifiers operand 4946 // for src operands that don't allow neg because they also don't allow opsel. 4947 4948 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers, 4949 AMDGPU::OpName::src1_modifiers, 4950 AMDGPU::OpName::src2_modifiers}; 4951 4952 for (unsigned i = 0; i < 3; ++i) { 4953 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) { 4954 if (Neg & (1 << i)) 4955 return false; 4956 } 4957 } 4958 4959 return true; 4960 } 4961 4962 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4963 const OperandVector &Operands) { 4964 const unsigned Opc = Inst.getOpcode(); 4965 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4966 if (DppCtrlIdx >= 0) { 4967 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4968 4969 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) && 4970 AMDGPU::isDPALU_DPP(MII.get(Opc))) { 4971 // DP ALU DPP is supported for row_newbcast only on GFX9* 4972 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4973 Error(S, "DP ALU dpp only supports row_newbcast"); 4974 return false; 4975 } 4976 } 4977 4978 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8); 4979 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0; 4980 4981 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) { 4982 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 4983 if (Src1Idx >= 0) { 4984 const MCOperand &Src1 = Inst.getOperand(Src1Idx); 4985 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4986 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) { 4987 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()); 4988 SMLoc S = getRegLoc(Reg, Operands); 4989 Error(S, "invalid operand for instruction"); 4990 return false; 4991 } 4992 if (Src1.isImm()) { 4993 Error(getInstLoc(Operands), 4994 "src1 immediate operand invalid for instruction"); 4995 return false; 4996 } 4997 } 4998 } 4999 5000 return true; 5001 } 5002 5003 // Check if VCC register matches wavefront size 5004 bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const { 5005 auto FB = getFeatureBits(); 5006 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 5007 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 5008 } 5009 5010 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 5011 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 5012 const OperandVector &Operands) { 5013 unsigned Opcode = Inst.getOpcode(); 5014 const MCInstrDesc &Desc = MII.get(Opcode); 5015 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1; 5016 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 5017 !HasMandatoryLiteral && !isVOPD(Opcode)) 5018 return true; 5019 5020 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral); 5021 5022 unsigned NumExprs = 0; 5023 unsigned NumLiterals = 0; 5024 uint64_t LiteralValue; 5025 5026 for (int OpIdx : OpIndices) { 5027 if (OpIdx == -1) 5028 continue; 5029 5030 const MCOperand &MO = Inst.getOperand(OpIdx); 5031 if (!MO.isImm() && !MO.isExpr()) 5032 continue; 5033 if (!isSISrcOperand(Desc, OpIdx)) 5034 continue; 5035 5036 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 5037 uint64_t Value = static_cast<uint64_t>(MO.getImm()); 5038 bool IsForcedFP64 = 5039 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 || 5040 (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 && 5041 HasMandatoryLiteral); 5042 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) && 5043 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; 5044 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); 5045 5046 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) && 5047 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) { 5048 Error(getLitLoc(Operands), "invalid operand for instruction"); 5049 return false; 5050 } 5051 5052 if (IsFP64 && IsValid32Op && !IsForcedFP64) 5053 Value = Hi_32(Value); 5054 5055 if (NumLiterals == 0 || LiteralValue != Value) { 5056 LiteralValue = Value; 5057 ++NumLiterals; 5058 } 5059 } else if (MO.isExpr()) { 5060 ++NumExprs; 5061 } 5062 } 5063 NumLiterals += NumExprs; 5064 5065 if (!NumLiterals) 5066 return true; 5067 5068 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { 5069 Error(getLitLoc(Operands), "literal operands are not supported"); 5070 return false; 5071 } 5072 5073 if (NumLiterals > 1) { 5074 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed"); 5075 return false; 5076 } 5077 5078 return true; 5079 } 5080 5081 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 5082 static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, 5083 const MCRegisterInfo *MRI) { 5084 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name); 5085 if (OpIdx < 0) 5086 return -1; 5087 5088 const MCOperand &Op = Inst.getOperand(OpIdx); 5089 if (!Op.isReg()) 5090 return -1; 5091 5092 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 5093 auto Reg = Sub ? Sub : Op.getReg(); 5094 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 5095 return AGPR32.contains(Reg) ? 1 : 0; 5096 } 5097 5098 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 5099 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 5100 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 5101 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 5102 SIInstrFlags::DS)) == 0) 5103 return true; 5104 5105 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS) 5106 ? AMDGPU::OpName::data0 5107 : AMDGPU::OpName::vdata; 5108 5109 const MCRegisterInfo *MRI = getMRI(); 5110 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 5111 int DataAreg = IsAGPROperand(Inst, DataName, MRI); 5112 5113 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 5114 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 5115 if (Data2Areg >= 0 && Data2Areg != DataAreg) 5116 return false; 5117 } 5118 5119 auto FB = getFeatureBits(); 5120 if (FB[AMDGPU::FeatureGFX90AInsts]) { 5121 if (DataAreg < 0 || DstAreg < 0) 5122 return true; 5123 return DstAreg == DataAreg; 5124 } 5125 5126 return DstAreg < 1 && DataAreg < 1; 5127 } 5128 5129 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 5130 auto FB = getFeatureBits(); 5131 unsigned Opc = Inst.getOpcode(); 5132 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows 5133 // unaligned VGPR. All others only allow even aligned VGPRs. 5134 if (!(FB[AMDGPU::FeatureGFX90AInsts]) || Opc == AMDGPU::DS_READ_B96_TR_B6_vi) 5135 return true; 5136 5137 const MCRegisterInfo *MRI = getMRI(); 5138 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 5139 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 5140 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 5141 const MCOperand &Op = Inst.getOperand(I); 5142 if (!Op.isReg()) 5143 continue; 5144 5145 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 5146 if (!Sub) 5147 continue; 5148 5149 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 5150 return false; 5151 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 5152 return false; 5153 } 5154 5155 return true; 5156 } 5157 5158 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 5159 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5160 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5161 if (Op.isBLGP()) 5162 return Op.getStartLoc(); 5163 } 5164 return SMLoc(); 5165 } 5166 5167 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 5168 const OperandVector &Operands) { 5169 unsigned Opc = Inst.getOpcode(); 5170 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 5171 if (BlgpIdx == -1) 5172 return true; 5173 SMLoc BLGPLoc = getBLGPLoc(Operands); 5174 if (!BLGPLoc.isValid()) 5175 return true; 5176 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); 5177 auto FB = getFeatureBits(); 5178 bool UsesNeg = false; 5179 if (FB[AMDGPU::FeatureGFX940Insts]) { 5180 switch (Opc) { 5181 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 5182 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 5183 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 5184 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 5185 UsesNeg = true; 5186 } 5187 } 5188 5189 if (IsNeg == UsesNeg) 5190 return true; 5191 5192 Error(BLGPLoc, 5193 UsesNeg ? "invalid modifier: blgp is not supported" 5194 : "invalid modifier: neg is not supported"); 5195 5196 return false; 5197 } 5198 5199 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, 5200 const OperandVector &Operands) { 5201 if (!isGFX11Plus()) 5202 return true; 5203 5204 unsigned Opc = Inst.getOpcode(); 5205 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && 5206 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && 5207 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && 5208 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) 5209 return true; 5210 5211 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 5212 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); 5213 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()); 5214 if (Reg == AMDGPU::SGPR_NULL) 5215 return true; 5216 5217 SMLoc RegLoc = getRegLoc(Reg, Operands); 5218 Error(RegLoc, "src0 must be null"); 5219 return false; 5220 } 5221 5222 bool AMDGPUAsmParser::validateDS(const MCInst &Inst, 5223 const OperandVector &Operands) { 5224 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 5225 if ((TSFlags & SIInstrFlags::DS) == 0) 5226 return true; 5227 if (TSFlags & SIInstrFlags::GWS) 5228 return validateGWS(Inst, Operands); 5229 // Only validate GDS for non-GWS instructions. 5230 if (hasGDS()) 5231 return true; 5232 int GDSIdx = 5233 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds); 5234 if (GDSIdx < 0) 5235 return true; 5236 unsigned GDS = Inst.getOperand(GDSIdx).getImm(); 5237 if (GDS) { 5238 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands); 5239 Error(S, "gds modifier is not supported on this GPU"); 5240 return false; 5241 } 5242 return true; 5243 } 5244 5245 // gfx90a has an undocumented limitation: 5246 // DS_GWS opcodes must use even aligned registers. 5247 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 5248 const OperandVector &Operands) { 5249 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 5250 return true; 5251 5252 int Opc = Inst.getOpcode(); 5253 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 5254 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 5255 return true; 5256 5257 const MCRegisterInfo *MRI = getMRI(); 5258 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 5259 int Data0Pos = 5260 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 5261 assert(Data0Pos != -1); 5262 auto Reg = Inst.getOperand(Data0Pos).getReg(); 5263 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 5264 if (RegIdx & 1) { 5265 SMLoc RegLoc = getRegLoc(Reg, Operands); 5266 Error(RegLoc, "vgpr must be even aligned"); 5267 return false; 5268 } 5269 5270 return true; 5271 } 5272 5273 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 5274 const OperandVector &Operands, 5275 const SMLoc &IDLoc) { 5276 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 5277 AMDGPU::OpName::cpol); 5278 if (CPolPos == -1) 5279 return true; 5280 5281 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 5282 5283 if (isGFX12Plus()) 5284 return validateTHAndScopeBits(Inst, Operands, CPol); 5285 5286 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 5287 if (TSFlags & SIInstrFlags::SMRD) { 5288 if (CPol && (isSI() || isCI())) { 5289 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 5290 Error(S, "cache policy is not supported for SMRD instructions"); 5291 return false; 5292 } 5293 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 5294 Error(IDLoc, "invalid cache policy for SMEM instruction"); 5295 return false; 5296 } 5297 } 5298 5299 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 5300 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | 5301 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 5302 SIInstrFlags::FLAT; 5303 if (!(TSFlags & AllowSCCModifier)) { 5304 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 5305 StringRef CStr(S.getPointer()); 5306 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 5307 Error(S, 5308 "scc modifier is not supported for this instruction on this GPU"); 5309 return false; 5310 } 5311 } 5312 5313 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 5314 return true; 5315 5316 if (TSFlags & SIInstrFlags::IsAtomicRet) { 5317 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 5318 Error(IDLoc, isGFX940() ? "instruction must use sc0" 5319 : "instruction must use glc"); 5320 return false; 5321 } 5322 } else { 5323 if (CPol & CPol::GLC) { 5324 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 5325 StringRef CStr(S.getPointer()); 5326 S = SMLoc::getFromPointer( 5327 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 5328 Error(S, isGFX940() ? "instruction must not use sc0" 5329 : "instruction must not use glc"); 5330 return false; 5331 } 5332 } 5333 5334 return true; 5335 } 5336 5337 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst, 5338 const OperandVector &Operands, 5339 const unsigned CPol) { 5340 const unsigned TH = CPol & AMDGPU::CPol::TH; 5341 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE; 5342 5343 const unsigned Opcode = Inst.getOpcode(); 5344 const MCInstrDesc &TID = MII.get(Opcode); 5345 5346 auto PrintError = [&](StringRef Msg) { 5347 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 5348 Error(S, Msg); 5349 return false; 5350 }; 5351 5352 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) && 5353 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) && 5354 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN))) 5355 return PrintError("instruction must use th:TH_ATOMIC_RETURN"); 5356 5357 if (TH == 0) 5358 return true; 5359 5360 if ((TID.TSFlags & SIInstrFlags::SMRD) && 5361 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) || 5362 (TH == AMDGPU::CPol::TH_NT_HT))) 5363 return PrintError("invalid th value for SMEM instruction"); 5364 5365 if (TH == AMDGPU::CPol::TH_BYPASS) { 5366 if ((Scope != AMDGPU::CPol::SCOPE_SYS && 5367 CPol & AMDGPU::CPol::TH_REAL_BYPASS) || 5368 (Scope == AMDGPU::CPol::SCOPE_SYS && 5369 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS))) 5370 return PrintError("scope and th combination is not valid"); 5371 } 5372 5373 unsigned THType = AMDGPU::getTemporalHintType(TID); 5374 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) { 5375 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC)) 5376 return PrintError("invalid th value for atomic instructions"); 5377 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) { 5378 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE)) 5379 return PrintError("invalid th value for store instructions"); 5380 } else { 5381 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD)) 5382 return PrintError("invalid th value for load instructions"); 5383 } 5384 5385 return true; 5386 } 5387 5388 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, 5389 const OperandVector &Operands) { 5390 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5391 if (Desc.mayStore() && 5392 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 5393 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands); 5394 if (Loc != getInstLoc(Operands)) { 5395 Error(Loc, "TFE modifier has no meaning for store instructions"); 5396 return false; 5397 } 5398 } 5399 5400 return true; 5401 } 5402 5403 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 5404 const SMLoc &IDLoc, 5405 const OperandVector &Operands) { 5406 if (auto ErrMsg = validateLdsDirect(Inst)) { 5407 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 5408 return false; 5409 } 5410 if (!validateTrue16OpSel(Inst)) { 5411 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 5412 "op_sel operand conflicts with 16-bit operand suffix"); 5413 return false; 5414 } 5415 if (!validateSOPLiteral(Inst)) { 5416 Error(getLitLoc(Operands), 5417 "only one unique literal operand is allowed"); 5418 return false; 5419 } 5420 if (!validateVOPLiteral(Inst, Operands)) { 5421 return false; 5422 } 5423 if (!validateConstantBusLimitations(Inst, Operands)) { 5424 return false; 5425 } 5426 if (!validateVOPD(Inst, Operands)) { 5427 return false; 5428 } 5429 if (!validateIntClampSupported(Inst)) { 5430 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands), 5431 "integer clamping is not supported on this GPU"); 5432 return false; 5433 } 5434 if (!validateOpSel(Inst)) { 5435 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 5436 "invalid op_sel operand"); 5437 return false; 5438 } 5439 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) { 5440 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands), 5441 "invalid neg_lo operand"); 5442 return false; 5443 } 5444 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) { 5445 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands), 5446 "invalid neg_hi operand"); 5447 return false; 5448 } 5449 if (!validateDPP(Inst, Operands)) { 5450 return false; 5451 } 5452 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 5453 if (!validateMIMGD16(Inst)) { 5454 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 5455 "d16 modifier is not supported on this GPU"); 5456 return false; 5457 } 5458 if (!validateMIMGDim(Inst, Operands)) { 5459 Error(IDLoc, "missing dim operand"); 5460 return false; 5461 } 5462 if (!validateTensorR128(Inst)) { 5463 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 5464 "instruction must set modifier r128=0"); 5465 return false; 5466 } 5467 if (!validateMIMGMSAA(Inst)) { 5468 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 5469 "invalid dim; must be MSAA type"); 5470 return false; 5471 } 5472 if (!validateMIMGDataSize(Inst, IDLoc)) { 5473 return false; 5474 } 5475 if (!validateMIMGAddrSize(Inst, IDLoc)) 5476 return false; 5477 if (!validateMIMGAtomicDMask(Inst)) { 5478 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 5479 "invalid atomic image dmask"); 5480 return false; 5481 } 5482 if (!validateMIMGGatherDMask(Inst)) { 5483 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 5484 "invalid image_gather dmask: only one bit must be set"); 5485 return false; 5486 } 5487 if (!validateMovrels(Inst, Operands)) { 5488 return false; 5489 } 5490 if (!validateOffset(Inst, Operands)) { 5491 return false; 5492 } 5493 if (!validateMAIAccWrite(Inst, Operands)) { 5494 return false; 5495 } 5496 if (!validateMAISrc2(Inst, Operands)) { 5497 return false; 5498 } 5499 if (!validateMFMA(Inst, Operands)) { 5500 return false; 5501 } 5502 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 5503 return false; 5504 } 5505 5506 if (!validateAGPRLdSt(Inst)) { 5507 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 5508 ? "invalid register class: data and dst should be all VGPR or AGPR" 5509 : "invalid register class: agpr loads and stores not supported on this GPU" 5510 ); 5511 return false; 5512 } 5513 if (!validateVGPRAlign(Inst)) { 5514 Error(IDLoc, 5515 "invalid register class: vgpr tuples must be 64 bit aligned"); 5516 return false; 5517 } 5518 if (!validateDS(Inst, Operands)) { 5519 return false; 5520 } 5521 5522 if (!validateBLGP(Inst, Operands)) { 5523 return false; 5524 } 5525 5526 if (!validateDivScale(Inst)) { 5527 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 5528 return false; 5529 } 5530 if (!validateWaitCnt(Inst, Operands)) { 5531 return false; 5532 } 5533 if (!validateTFE(Inst, Operands)) { 5534 return false; 5535 } 5536 5537 return true; 5538 } 5539 5540 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 5541 const FeatureBitset &FBS, 5542 unsigned VariantID = 0); 5543 5544 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 5545 const FeatureBitset &AvailableFeatures, 5546 unsigned VariantID); 5547 5548 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 5549 const FeatureBitset &FBS) { 5550 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 5551 } 5552 5553 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 5554 const FeatureBitset &FBS, 5555 ArrayRef<unsigned> Variants) { 5556 for (auto Variant : Variants) { 5557 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 5558 return true; 5559 } 5560 5561 return false; 5562 } 5563 5564 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 5565 const SMLoc &IDLoc) { 5566 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits()); 5567 5568 // Check if requested instruction variant is supported. 5569 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 5570 return false; 5571 5572 // This instruction is not supported. 5573 // Clear any other pending errors because they are no longer relevant. 5574 getParser().clearPendingErrors(); 5575 5576 // Requested instruction variant is not supported. 5577 // Check if any other variants are supported. 5578 StringRef VariantName = getMatchedVariantName(); 5579 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 5580 return Error(IDLoc, 5581 Twine(VariantName, 5582 " variant of this instruction is not supported")); 5583 } 5584 5585 // Check if this instruction may be used with a different wavesize. 5586 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && 5587 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { 5588 5589 FeatureBitset FeaturesWS32 = getFeatureBits(); 5590 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) 5591 .flip(AMDGPU::FeatureWavefrontSize32); 5592 FeatureBitset AvailableFeaturesWS32 = 5593 ComputeAvailableFeatures(FeaturesWS32); 5594 5595 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants())) 5596 return Error(IDLoc, "instruction requires wavesize=32"); 5597 } 5598 5599 // Finally check if this instruction is supported on any other GPU. 5600 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 5601 return Error(IDLoc, "instruction not supported on this GPU"); 5602 } 5603 5604 // Instruction not supported on any GPU. Probably a typo. 5605 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 5606 return Error(IDLoc, "invalid instruction" + Suggestion); 5607 } 5608 5609 static bool isInvalidVOPDY(const OperandVector &Operands, 5610 uint64_t InvalidOprIdx) { 5611 assert(InvalidOprIdx < Operands.size()); 5612 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); 5613 if (Op.isToken() && InvalidOprIdx > 1) { 5614 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); 5615 return PrevOp.isToken() && PrevOp.getToken() == "::"; 5616 } 5617 return false; 5618 } 5619 5620 bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 5621 OperandVector &Operands, 5622 MCStreamer &Out, 5623 uint64_t &ErrorInfo, 5624 bool MatchingInlineAsm) { 5625 MCInst Inst; 5626 unsigned Result = Match_Success; 5627 for (auto Variant : getMatchedVariants()) { 5628 uint64_t EI; 5629 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 5630 Variant); 5631 // We order match statuses from least to most specific. We use most specific 5632 // status as resulting 5633 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature 5634 if (R == Match_Success || R == Match_MissingFeature || 5635 (R == Match_InvalidOperand && Result != Match_MissingFeature) || 5636 (R == Match_MnemonicFail && Result != Match_InvalidOperand && 5637 Result != Match_MissingFeature)) { 5638 Result = R; 5639 ErrorInfo = EI; 5640 } 5641 if (R == Match_Success) 5642 break; 5643 } 5644 5645 if (Result == Match_Success) { 5646 if (!validateInstruction(Inst, IDLoc, Operands)) { 5647 return true; 5648 } 5649 Inst.setLoc(IDLoc); 5650 Out.emitInstruction(Inst, getSTI()); 5651 return false; 5652 } 5653 5654 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5655 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 5656 return true; 5657 } 5658 5659 switch (Result) { 5660 default: break; 5661 case Match_MissingFeature: 5662 // It has been verified that the specified instruction 5663 // mnemonic is valid. A match was found but it requires 5664 // features which are not supported on this GPU. 5665 return Error(IDLoc, "operands are not valid for this GPU or mode"); 5666 5667 case Match_InvalidOperand: { 5668 SMLoc ErrorLoc = IDLoc; 5669 if (ErrorInfo != ~0ULL) { 5670 if (ErrorInfo >= Operands.size()) { 5671 return Error(IDLoc, "too few operands for instruction"); 5672 } 5673 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 5674 if (ErrorLoc == SMLoc()) 5675 ErrorLoc = IDLoc; 5676 5677 if (isInvalidVOPDY(Operands, ErrorInfo)) 5678 return Error(ErrorLoc, "invalid VOPDY instruction"); 5679 } 5680 return Error(ErrorLoc, "invalid operand for instruction"); 5681 } 5682 5683 case Match_MnemonicFail: 5684 llvm_unreachable("Invalid instructions should have been handled already"); 5685 } 5686 llvm_unreachable("Implement any new match types added!"); 5687 } 5688 5689 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 5690 int64_t Tmp = -1; 5691 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 5692 return true; 5693 } 5694 if (getParser().parseAbsoluteExpression(Tmp)) { 5695 return true; 5696 } 5697 Ret = static_cast<uint32_t>(Tmp); 5698 return false; 5699 } 5700 5701 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 5702 if (!getSTI().getTargetTriple().isAMDGCN()) 5703 return TokError("directive only supported for amdgcn architecture"); 5704 5705 std::string TargetIDDirective; 5706 SMLoc TargetStart = getTok().getLoc(); 5707 if (getParser().parseEscapedString(TargetIDDirective)) 5708 return true; 5709 5710 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 5711 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5712 return getParser().Error(TargetRange.Start, 5713 (Twine(".amdgcn_target directive's target id ") + 5714 Twine(TargetIDDirective) + 5715 Twine(" does not match the specified target id ") + 5716 Twine(getTargetStreamer().getTargetID()->toString())).str()); 5717 5718 return false; 5719 } 5720 5721 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 5722 return Error(Range.Start, "value out of range", Range); 5723 } 5724 5725 bool AMDGPUAsmParser::calculateGPRBlocks( 5726 const FeatureBitset &Features, const MCExpr *VCCUsed, 5727 const MCExpr *FlatScrUsed, bool XNACKUsed, 5728 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR, 5729 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange, 5730 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) { 5731 // TODO(scott.linder): These calculations are duplicated from 5732 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 5733 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 5734 MCContext &Ctx = getContext(); 5735 5736 const MCExpr *NumSGPRs = NextFreeSGPR; 5737 int64_t EvaluatedSGPRs; 5738 5739 if (Version.Major >= 10) 5740 NumSGPRs = MCConstantExpr::create(0, Ctx); 5741 else { 5742 unsigned MaxAddressableNumSGPRs = 5743 IsaInfo::getAddressableNumSGPRs(&getSTI()); 5744 5745 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 && 5746 !Features.test(FeatureSGPRInitBug) && 5747 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) 5748 return OutOfRangeError(SGPRRange); 5749 5750 const MCExpr *ExtraSGPRs = 5751 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx); 5752 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx); 5753 5754 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && 5755 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 5756 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) 5757 return OutOfRangeError(SGPRRange); 5758 5759 if (Features.test(FeatureSGPRInitBug)) 5760 NumSGPRs = 5761 MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx); 5762 } 5763 5764 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks: 5765 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1 5766 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR, 5767 unsigned Granule) -> const MCExpr * { 5768 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx); 5769 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx); 5770 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx); 5771 const MCExpr *AlignToGPR = 5772 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx); 5773 const MCExpr *DivGPR = 5774 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx); 5775 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx); 5776 return SubGPR; 5777 }; 5778 5779 VGPRBlocks = GetNumGPRBlocks( 5780 NextFreeVGPR, 5781 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32)); 5782 SGPRBlocks = 5783 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI())); 5784 5785 return false; 5786 } 5787 5788 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 5789 if (!getSTI().getTargetTriple().isAMDGCN()) 5790 return TokError("directive only supported for amdgcn architecture"); 5791 5792 if (!isHsaAbi(getSTI())) 5793 return TokError("directive only supported for amdhsa OS"); 5794 5795 StringRef KernelName; 5796 if (getParser().parseIdentifier(KernelName)) 5797 return true; 5798 5799 AMDGPU::MCKernelDescriptor KD = 5800 AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor( 5801 &getSTI(), getContext()); 5802 5803 StringSet<> Seen; 5804 5805 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 5806 5807 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext()); 5808 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext()); 5809 5810 SMRange VGPRRange; 5811 const MCExpr *NextFreeVGPR = ZeroExpr; 5812 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext()); 5813 uint64_t SharedVGPRCount = 0; 5814 uint64_t PreloadLength = 0; 5815 uint64_t PreloadOffset = 0; 5816 SMRange SGPRRange; 5817 const MCExpr *NextFreeSGPR = ZeroExpr; 5818 5819 // Count the number of user SGPRs implied from the enabled feature bits. 5820 unsigned ImpliedUserSGPRCount = 0; 5821 5822 // Track if the asm explicitly contains the directive for the user SGPR 5823 // count. 5824 std::optional<unsigned> ExplicitUserSGPRCount; 5825 const MCExpr *ReserveVCC = OneExpr; 5826 const MCExpr *ReserveFlatScr = OneExpr; 5827 std::optional<bool> EnableWavefrontSize32; 5828 5829 while (true) { 5830 while (trySkipToken(AsmToken::EndOfStatement)); 5831 5832 StringRef ID; 5833 SMRange IDRange = getTok().getLocRange(); 5834 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 5835 return true; 5836 5837 if (ID == ".end_amdhsa_kernel") 5838 break; 5839 5840 if (!Seen.insert(ID).second) 5841 return TokError(".amdhsa_ directives cannot be repeated"); 5842 5843 SMLoc ValStart = getLoc(); 5844 const MCExpr *ExprVal; 5845 if (getParser().parseExpression(ExprVal)) 5846 return true; 5847 SMLoc ValEnd = getLoc(); 5848 SMRange ValRange = SMRange(ValStart, ValEnd); 5849 5850 int64_t IVal = 0; 5851 uint64_t Val = IVal; 5852 bool EvaluatableExpr; 5853 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) { 5854 if (IVal < 0) 5855 return OutOfRangeError(ValRange); 5856 Val = IVal; 5857 } 5858 5859 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 5860 if (!isUInt<ENTRY##_WIDTH>(Val)) \ 5861 return OutOfRangeError(RANGE); \ 5862 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \ 5863 getContext()); 5864 5865 // Some fields use the parsed value immediately which requires the expression to 5866 // be solvable. 5867 #define EXPR_RESOLVE_OR_ERROR(RESOLVED) \ 5868 if (!(RESOLVED)) \ 5869 return Error(IDRange.Start, "directive should have resolvable expression", \ 5870 IDRange); 5871 5872 if (ID == ".amdhsa_group_segment_fixed_size") { 5873 if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) * 5874 CHAR_BIT>(Val)) 5875 return OutOfRangeError(ValRange); 5876 KD.group_segment_fixed_size = ExprVal; 5877 } else if (ID == ".amdhsa_private_segment_fixed_size") { 5878 if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) * 5879 CHAR_BIT>(Val)) 5880 return OutOfRangeError(ValRange); 5881 KD.private_segment_fixed_size = ExprVal; 5882 } else if (ID == ".amdhsa_kernarg_size") { 5883 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val)) 5884 return OutOfRangeError(ValRange); 5885 KD.kernarg_size = ExprVal; 5886 } else if (ID == ".amdhsa_user_sgpr_count") { 5887 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5888 ExplicitUserSGPRCount = Val; 5889 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 5890 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5891 if (hasArchitectedFlatScratch()) 5892 return Error(IDRange.Start, 5893 "directive is not supported with architected flat scratch", 5894 IDRange); 5895 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5896 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 5897 ExprVal, ValRange); 5898 if (Val) 5899 ImpliedUserSGPRCount += 4; 5900 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") { 5901 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5902 if (!hasKernargPreload()) 5903 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5904 5905 if (Val > getMaxNumUserSGPRs()) 5906 return OutOfRangeError(ValRange); 5907 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal, 5908 ValRange); 5909 if (Val) { 5910 ImpliedUserSGPRCount += Val; 5911 PreloadLength = Val; 5912 } 5913 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") { 5914 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5915 if (!hasKernargPreload()) 5916 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5917 5918 if (Val >= 1024) 5919 return OutOfRangeError(ValRange); 5920 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal, 5921 ValRange); 5922 if (Val) 5923 PreloadOffset = Val; 5924 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 5925 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5926 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5927 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal, 5928 ValRange); 5929 if (Val) 5930 ImpliedUserSGPRCount += 2; 5931 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 5932 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5933 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5934 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal, 5935 ValRange); 5936 if (Val) 5937 ImpliedUserSGPRCount += 2; 5938 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 5939 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5940 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5941 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 5942 ExprVal, ValRange); 5943 if (Val) 5944 ImpliedUserSGPRCount += 2; 5945 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 5946 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5947 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5948 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal, 5949 ValRange); 5950 if (Val) 5951 ImpliedUserSGPRCount += 2; 5952 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 5953 if (hasArchitectedFlatScratch()) 5954 return Error(IDRange.Start, 5955 "directive is not supported with architected flat scratch", 5956 IDRange); 5957 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5958 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5959 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5960 ExprVal, ValRange); 5961 if (Val) 5962 ImpliedUserSGPRCount += 2; 5963 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 5964 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5965 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5966 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 5967 ExprVal, ValRange); 5968 if (Val) 5969 ImpliedUserSGPRCount += 1; 5970 } else if (ID == ".amdhsa_wavefront_size32") { 5971 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5972 if (IVersion.Major < 10) 5973 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5974 EnableWavefrontSize32 = Val; 5975 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5976 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal, 5977 ValRange); 5978 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5979 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5980 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal, 5981 ValRange); 5982 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5983 if (hasArchitectedFlatScratch()) 5984 return Error(IDRange.Start, 5985 "directive is not supported with architected flat scratch", 5986 IDRange); 5987 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5988 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, 5989 ValRange); 5990 } else if (ID == ".amdhsa_enable_private_segment") { 5991 if (!hasArchitectedFlatScratch()) 5992 return Error( 5993 IDRange.Start, 5994 "directive is not supported without architected flat scratch", 5995 IDRange); 5996 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5997 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, 5998 ValRange); 5999 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 6000 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6001 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal, 6002 ValRange); 6003 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 6004 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6005 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal, 6006 ValRange); 6007 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 6008 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6009 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal, 6010 ValRange); 6011 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 6012 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6013 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal, 6014 ValRange); 6015 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 6016 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6017 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal, 6018 ValRange); 6019 } else if (ID == ".amdhsa_next_free_vgpr") { 6020 VGPRRange = ValRange; 6021 NextFreeVGPR = ExprVal; 6022 } else if (ID == ".amdhsa_next_free_sgpr") { 6023 SGPRRange = ValRange; 6024 NextFreeSGPR = ExprVal; 6025 } else if (ID == ".amdhsa_accum_offset") { 6026 if (!isGFX90A()) 6027 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 6028 AccumOffset = ExprVal; 6029 } else if (ID == ".amdhsa_reserve_vcc") { 6030 if (EvaluatableExpr && !isUInt<1>(Val)) 6031 return OutOfRangeError(ValRange); 6032 ReserveVCC = ExprVal; 6033 } else if (ID == ".amdhsa_reserve_flat_scratch") { 6034 if (IVersion.Major < 7) 6035 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 6036 if (hasArchitectedFlatScratch()) 6037 return Error(IDRange.Start, 6038 "directive is not supported with architected flat scratch", 6039 IDRange); 6040 if (EvaluatableExpr && !isUInt<1>(Val)) 6041 return OutOfRangeError(ValRange); 6042 ReserveFlatScr = ExprVal; 6043 } else if (ID == ".amdhsa_reserve_xnack_mask") { 6044 if (IVersion.Major < 8) 6045 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 6046 if (!isUInt<1>(Val)) 6047 return OutOfRangeError(ValRange); 6048 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 6049 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 6050 IDRange); 6051 } else if (ID == ".amdhsa_float_round_mode_32") { 6052 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6053 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal, 6054 ValRange); 6055 } else if (ID == ".amdhsa_float_round_mode_16_64") { 6056 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6057 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal, 6058 ValRange); 6059 } else if (ID == ".amdhsa_float_denorm_mode_32") { 6060 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6061 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal, 6062 ValRange); 6063 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 6064 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6065 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, 6066 ValRange); 6067 } else if (ID == ".amdhsa_dx10_clamp") { 6068 if (IVersion.Major >= 12) 6069 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 6070 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6071 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, 6072 ValRange); 6073 } else if (ID == ".amdhsa_ieee_mode") { 6074 if (IVersion.Major >= 12) 6075 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 6076 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6077 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal, 6078 ValRange); 6079 } else if (ID == ".amdhsa_fp16_overflow") { 6080 if (IVersion.Major < 9) 6081 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 6082 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6083 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal, 6084 ValRange); 6085 } else if (ID == ".amdhsa_tg_split") { 6086 if (!isGFX90A()) 6087 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 6088 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 6089 ExprVal, ValRange); 6090 } else if (ID == ".amdhsa_workgroup_processor_mode") { 6091 if (IVersion.Major < 10) 6092 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 6093 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6094 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal, 6095 ValRange); 6096 } else if (ID == ".amdhsa_memory_ordered") { 6097 if (IVersion.Major < 10) 6098 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 6099 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6100 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal, 6101 ValRange); 6102 } else if (ID == ".amdhsa_forward_progress") { 6103 if (IVersion.Major < 10) 6104 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 6105 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6106 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal, 6107 ValRange); 6108 } else if (ID == ".amdhsa_shared_vgpr_count") { 6109 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 6110 if (IVersion.Major < 10 || IVersion.Major >= 12) 6111 return Error(IDRange.Start, "directive requires gfx10 or gfx11", 6112 IDRange); 6113 SharedVGPRCount = Val; 6114 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 6115 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal, 6116 ValRange); 6117 } else if (ID == ".amdhsa_inst_pref_size") { 6118 if (IVersion.Major < 11) 6119 return Error(IDRange.Start, "directive requires gfx11+", IDRange); 6120 if (IVersion.Major == 11) { 6121 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 6122 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal, 6123 ValRange); 6124 } else { 6125 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 6126 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal, 6127 ValRange); 6128 } 6129 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 6130 PARSE_BITS_ENTRY( 6131 KD.compute_pgm_rsrc2, 6132 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 6133 ExprVal, ValRange); 6134 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 6135 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6136 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 6137 ExprVal, ValRange); 6138 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 6139 PARSE_BITS_ENTRY( 6140 KD.compute_pgm_rsrc2, 6141 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 6142 ExprVal, ValRange); 6143 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 6144 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6145 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 6146 ExprVal, ValRange); 6147 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 6148 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6149 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 6150 ExprVal, ValRange); 6151 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 6152 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6153 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 6154 ExprVal, ValRange); 6155 } else if (ID == ".amdhsa_exception_int_div_zero") { 6156 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 6157 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 6158 ExprVal, ValRange); 6159 } else if (ID == ".amdhsa_round_robin_scheduling") { 6160 if (IVersion.Major < 12) 6161 return Error(IDRange.Start, "directive requires gfx12+", IDRange); 6162 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 6163 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal, 6164 ValRange); 6165 } else { 6166 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 6167 } 6168 6169 #undef PARSE_BITS_ENTRY 6170 } 6171 6172 if (!Seen.contains(".amdhsa_next_free_vgpr")) 6173 return TokError(".amdhsa_next_free_vgpr directive is required"); 6174 6175 if (!Seen.contains(".amdhsa_next_free_sgpr")) 6176 return TokError(".amdhsa_next_free_sgpr directive is required"); 6177 6178 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount); 6179 6180 // Consider the case where the total number of UserSGPRs with trailing 6181 // allocated preload SGPRs, is greater than the number of explicitly 6182 // referenced SGPRs. 6183 if (PreloadLength) { 6184 MCContext &Ctx = getContext(); 6185 NextFreeSGPR = AMDGPUMCExpr::createMax( 6186 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx); 6187 } 6188 6189 const MCExpr *VGPRBlocks; 6190 const MCExpr *SGPRBlocks; 6191 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 6192 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 6193 EnableWavefrontSize32, NextFreeVGPR, 6194 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 6195 SGPRBlocks)) 6196 return true; 6197 6198 int64_t EvaluatedVGPRBlocks; 6199 bool VGPRBlocksEvaluatable = 6200 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks); 6201 if (VGPRBlocksEvaluatable && 6202 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 6203 static_cast<uint64_t>(EvaluatedVGPRBlocks))) { 6204 return OutOfRangeError(VGPRRange); 6205 } 6206 AMDGPU::MCKernelDescriptor::bits_set( 6207 KD.compute_pgm_rsrc1, VGPRBlocks, 6208 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT, 6209 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext()); 6210 6211 int64_t EvaluatedSGPRBlocks; 6212 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) && 6213 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 6214 static_cast<uint64_t>(EvaluatedSGPRBlocks))) 6215 return OutOfRangeError(SGPRRange); 6216 AMDGPU::MCKernelDescriptor::bits_set( 6217 KD.compute_pgm_rsrc1, SGPRBlocks, 6218 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT, 6219 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext()); 6220 6221 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 6222 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 6223 "enabled user SGPRs"); 6224 6225 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 6226 return TokError("too many user SGPRs enabled"); 6227 AMDGPU::MCKernelDescriptor::bits_set( 6228 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()), 6229 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, 6230 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext()); 6231 6232 int64_t IVal = 0; 6233 if (!KD.kernarg_size->evaluateAsAbsolute(IVal)) 6234 return TokError("Kernarg size should be resolvable"); 6235 uint64_t kernarg_size = IVal; 6236 if (PreloadLength && kernarg_size && 6237 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size)) 6238 return TokError("Kernarg preload length + offset is larger than the " 6239 "kernarg segment size"); 6240 6241 if (isGFX90A()) { 6242 if (!Seen.contains(".amdhsa_accum_offset")) 6243 return TokError(".amdhsa_accum_offset directive is required"); 6244 int64_t EvaluatedAccum; 6245 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum); 6246 uint64_t UEvaluatedAccum = EvaluatedAccum; 6247 if (AccumEvaluatable && 6248 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3))) 6249 return TokError("accum_offset should be in range [4..256] in " 6250 "increments of 4"); 6251 6252 int64_t EvaluatedNumVGPR; 6253 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) && 6254 AccumEvaluatable && 6255 UEvaluatedAccum > 6256 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4)) 6257 return TokError("accum_offset exceeds total VGPR allocation"); 6258 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub( 6259 MCBinaryExpr::createDiv( 6260 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()), 6261 MCConstantExpr::create(1, getContext()), getContext()); 6262 MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum, 6263 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, 6264 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 6265 getContext()); 6266 } 6267 6268 if (IVersion.Major >= 10 && IVersion.Major < 12) { 6269 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 6270 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { 6271 return TokError("shared_vgpr_count directive not valid on " 6272 "wavefront size 32"); 6273 } 6274 6275 if (VGPRBlocksEvaluatable && 6276 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) > 6277 63)) { 6278 return TokError("shared_vgpr_count*2 + " 6279 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 6280 "exceed 63\n"); 6281 } 6282 } 6283 6284 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD, 6285 NextFreeVGPR, NextFreeSGPR, 6286 ReserveVCC, ReserveFlatScr); 6287 return false; 6288 } 6289 6290 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() { 6291 uint32_t Version; 6292 if (ParseAsAbsoluteExpression(Version)) 6293 return true; 6294 6295 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version); 6296 return false; 6297 } 6298 6299 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 6300 AMDGPUMCKernelCodeT &C) { 6301 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 6302 // assembly for backwards compatibility. 6303 if (ID == "max_scratch_backing_memory_byte_size") { 6304 Parser.eatToEndOfStatement(); 6305 return false; 6306 } 6307 6308 SmallString<40> ErrStr; 6309 raw_svector_ostream Err(ErrStr); 6310 if (!C.ParseKernelCodeT(ID, getParser(), Err)) { 6311 return TokError(Err.str()); 6312 } 6313 Lex(); 6314 6315 if (ID == "enable_wavefront_size32") { 6316 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 6317 if (!isGFX10Plus()) 6318 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 6319 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 6320 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 6321 } else { 6322 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 6323 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 6324 } 6325 } 6326 6327 if (ID == "wavefront_size") { 6328 if (C.wavefront_size == 5) { 6329 if (!isGFX10Plus()) 6330 return TokError("wavefront_size=5 is only allowed on GFX10+"); 6331 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 6332 return TokError("wavefront_size=5 requires +WavefrontSize32"); 6333 } else if (C.wavefront_size == 6) { 6334 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 6335 return TokError("wavefront_size=6 requires +WavefrontSize64"); 6336 } 6337 } 6338 6339 return false; 6340 } 6341 6342 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 6343 AMDGPUMCKernelCodeT KernelCode; 6344 KernelCode.initDefault(&getSTI(), getContext()); 6345 6346 while (true) { 6347 // Lex EndOfStatement. This is in a while loop, because lexing a comment 6348 // will set the current token to EndOfStatement. 6349 while(trySkipToken(AsmToken::EndOfStatement)); 6350 6351 StringRef ID; 6352 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 6353 return true; 6354 6355 if (ID == ".end_amd_kernel_code_t") 6356 break; 6357 6358 if (ParseAMDKernelCodeTValue(ID, KernelCode)) 6359 return true; 6360 } 6361 6362 KernelCode.validate(&getSTI(), getContext()); 6363 getTargetStreamer().EmitAMDKernelCodeT(KernelCode); 6364 6365 return false; 6366 } 6367 6368 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 6369 StringRef KernelName; 6370 if (!parseId(KernelName, "expected symbol name")) 6371 return true; 6372 6373 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 6374 ELF::STT_AMDGPU_HSA_KERNEL); 6375 6376 KernelScope.initialize(getContext()); 6377 return false; 6378 } 6379 6380 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 6381 if (!getSTI().getTargetTriple().isAMDGCN()) { 6382 return Error(getLoc(), 6383 ".amd_amdgpu_isa directive is not available on non-amdgcn " 6384 "architectures"); 6385 } 6386 6387 auto TargetIDDirective = getLexer().getTok().getStringContents(); 6388 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 6389 return Error(getParser().getTok().getLoc(), "target id must match options"); 6390 6391 getTargetStreamer().EmitISAVersion(); 6392 Lex(); 6393 6394 return false; 6395 } 6396 6397 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 6398 assert(isHsaAbi(getSTI())); 6399 6400 std::string HSAMetadataString; 6401 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin, 6402 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString)) 6403 return true; 6404 6405 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 6406 return Error(getLoc(), "invalid HSA metadata"); 6407 6408 return false; 6409 } 6410 6411 /// Common code to parse out a block of text (typically YAML) between start and 6412 /// end directives. 6413 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 6414 const char *AssemblerDirectiveEnd, 6415 std::string &CollectString) { 6416 6417 raw_string_ostream CollectStream(CollectString); 6418 6419 getLexer().setSkipSpace(false); 6420 6421 bool FoundEnd = false; 6422 while (!isToken(AsmToken::Eof)) { 6423 while (isToken(AsmToken::Space)) { 6424 CollectStream << getTokenStr(); 6425 Lex(); 6426 } 6427 6428 if (trySkipId(AssemblerDirectiveEnd)) { 6429 FoundEnd = true; 6430 break; 6431 } 6432 6433 CollectStream << Parser.parseStringToEndOfStatement() 6434 << getContext().getAsmInfo()->getSeparatorString(); 6435 6436 Parser.eatToEndOfStatement(); 6437 } 6438 6439 getLexer().setSkipSpace(true); 6440 6441 if (isToken(AsmToken::Eof) && !FoundEnd) { 6442 return TokError(Twine("expected directive ") + 6443 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 6444 } 6445 6446 return false; 6447 } 6448 6449 /// Parse the assembler directive for new MsgPack-format PAL metadata. 6450 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 6451 std::string String; 6452 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 6453 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 6454 return true; 6455 6456 auto *PALMetadata = getTargetStreamer().getPALMetadata(); 6457 if (!PALMetadata->setFromString(String)) 6458 return Error(getLoc(), "invalid PAL metadata"); 6459 return false; 6460 } 6461 6462 /// Parse the assembler directive for old linear-format PAL metadata. 6463 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 6464 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 6465 return Error(getLoc(), 6466 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 6467 "not available on non-amdpal OSes")).str()); 6468 } 6469 6470 auto *PALMetadata = getTargetStreamer().getPALMetadata(); 6471 PALMetadata->setLegacy(); 6472 for (;;) { 6473 uint32_t Key, Value; 6474 if (ParseAsAbsoluteExpression(Key)) { 6475 return TokError(Twine("invalid value in ") + 6476 Twine(PALMD::AssemblerDirective)); 6477 } 6478 if (!trySkipToken(AsmToken::Comma)) { 6479 return TokError(Twine("expected an even number of values in ") + 6480 Twine(PALMD::AssemblerDirective)); 6481 } 6482 if (ParseAsAbsoluteExpression(Value)) { 6483 return TokError(Twine("invalid value in ") + 6484 Twine(PALMD::AssemblerDirective)); 6485 } 6486 PALMetadata->setRegister(Key, Value); 6487 if (!trySkipToken(AsmToken::Comma)) 6488 break; 6489 } 6490 return false; 6491 } 6492 6493 /// ParseDirectiveAMDGPULDS 6494 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 6495 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 6496 if (getParser().checkForValidSection()) 6497 return true; 6498 6499 StringRef Name; 6500 SMLoc NameLoc = getLoc(); 6501 if (getParser().parseIdentifier(Name)) 6502 return TokError("expected identifier in directive"); 6503 6504 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 6505 if (getParser().parseComma()) 6506 return true; 6507 6508 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 6509 6510 int64_t Size; 6511 SMLoc SizeLoc = getLoc(); 6512 if (getParser().parseAbsoluteExpression(Size)) 6513 return true; 6514 if (Size < 0) 6515 return Error(SizeLoc, "size must be non-negative"); 6516 if (Size > LocalMemorySize) 6517 return Error(SizeLoc, "size is too large"); 6518 6519 int64_t Alignment = 4; 6520 if (trySkipToken(AsmToken::Comma)) { 6521 SMLoc AlignLoc = getLoc(); 6522 if (getParser().parseAbsoluteExpression(Alignment)) 6523 return true; 6524 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 6525 return Error(AlignLoc, "alignment must be a power of two"); 6526 6527 // Alignment larger than the size of LDS is possible in theory, as long 6528 // as the linker manages to place to symbol at address 0, but we do want 6529 // to make sure the alignment fits nicely into a 32-bit integer. 6530 if (Alignment >= 1u << 31) 6531 return Error(AlignLoc, "alignment is too large"); 6532 } 6533 6534 if (parseEOL()) 6535 return true; 6536 6537 Symbol->redefineIfPossible(); 6538 if (!Symbol->isUndefined()) 6539 return Error(NameLoc, "invalid symbol redefinition"); 6540 6541 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 6542 return false; 6543 } 6544 6545 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 6546 StringRef IDVal = DirectiveID.getString(); 6547 6548 if (isHsaAbi(getSTI())) { 6549 if (IDVal == ".amdhsa_kernel") 6550 return ParseDirectiveAMDHSAKernel(); 6551 6552 if (IDVal == ".amdhsa_code_object_version") 6553 return ParseDirectiveAMDHSACodeObjectVersion(); 6554 6555 // TODO: Restructure/combine with PAL metadata directive. 6556 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 6557 return ParseDirectiveHSAMetadata(); 6558 } else { 6559 if (IDVal == ".amd_kernel_code_t") 6560 return ParseDirectiveAMDKernelCodeT(); 6561 6562 if (IDVal == ".amdgpu_hsa_kernel") 6563 return ParseDirectiveAMDGPUHsaKernel(); 6564 6565 if (IDVal == ".amd_amdgpu_isa") 6566 return ParseDirectiveISAVersion(); 6567 6568 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) { 6569 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) + 6570 Twine(" directive is " 6571 "not available on non-amdhsa OSes")) 6572 .str()); 6573 } 6574 } 6575 6576 if (IDVal == ".amdgcn_target") 6577 return ParseDirectiveAMDGCNTarget(); 6578 6579 if (IDVal == ".amdgpu_lds") 6580 return ParseDirectiveAMDGPULDS(); 6581 6582 if (IDVal == PALMD::AssemblerDirectiveBegin) 6583 return ParseDirectivePALMetadataBegin(); 6584 6585 if (IDVal == PALMD::AssemblerDirective) 6586 return ParseDirectivePALMetadata(); 6587 6588 return true; 6589 } 6590 6591 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 6592 MCRegister Reg) { 6593 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg)) 6594 return isGFX9Plus(); 6595 6596 // GFX10+ has 2 more SGPRs 104 and 105. 6597 if (MRI.regsOverlap(SGPR104_SGPR105, Reg)) 6598 return hasSGPR104_SGPR105(); 6599 6600 switch (Reg.id()) { 6601 case SRC_SHARED_BASE_LO: 6602 case SRC_SHARED_BASE: 6603 case SRC_SHARED_LIMIT_LO: 6604 case SRC_SHARED_LIMIT: 6605 case SRC_PRIVATE_BASE_LO: 6606 case SRC_PRIVATE_BASE: 6607 case SRC_PRIVATE_LIMIT_LO: 6608 case SRC_PRIVATE_LIMIT: 6609 return isGFX9Plus(); 6610 case SRC_POPS_EXITING_WAVE_ID: 6611 return isGFX9Plus() && !isGFX11Plus(); 6612 case TBA: 6613 case TBA_LO: 6614 case TBA_HI: 6615 case TMA: 6616 case TMA_LO: 6617 case TMA_HI: 6618 return !isGFX9Plus(); 6619 case XNACK_MASK: 6620 case XNACK_MASK_LO: 6621 case XNACK_MASK_HI: 6622 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 6623 case SGPR_NULL: 6624 return isGFX10Plus(); 6625 case SRC_EXECZ: 6626 case SRC_VCCZ: 6627 return !isGFX11Plus(); 6628 default: 6629 break; 6630 } 6631 6632 if (isCI()) 6633 return true; 6634 6635 if (isSI() || isGFX10Plus()) { 6636 // No flat_scr on SI. 6637 // On GFX10Plus flat scratch is not a valid register operand and can only be 6638 // accessed with s_setreg/s_getreg. 6639 switch (Reg.id()) { 6640 case FLAT_SCR: 6641 case FLAT_SCR_LO: 6642 case FLAT_SCR_HI: 6643 return false; 6644 default: 6645 return true; 6646 } 6647 } 6648 6649 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 6650 // SI/CI have. 6651 if (MRI.regsOverlap(SGPR102_SGPR103, Reg)) 6652 return hasSGPR102_SGPR103(); 6653 6654 return true; 6655 } 6656 6657 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, 6658 StringRef Mnemonic, 6659 OperandMode Mode) { 6660 ParseStatus Res = parseVOPD(Operands); 6661 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6662 return Res; 6663 6664 // Try to parse with a custom parser 6665 Res = MatchOperandParserImpl(Operands, Mnemonic); 6666 6667 // If we successfully parsed the operand or if there as an error parsing, 6668 // we are done. 6669 // 6670 // If we are parsing after we reach EndOfStatement then this means we 6671 // are appending default values to the Operands list. This is only done 6672 // by custom parser, so we shouldn't continue on to the generic parsing. 6673 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6674 return Res; 6675 6676 SMLoc RBraceLoc; 6677 SMLoc LBraceLoc = getLoc(); 6678 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 6679 unsigned Prefix = Operands.size(); 6680 6681 for (;;) { 6682 auto Loc = getLoc(); 6683 Res = parseReg(Operands); 6684 if (Res.isNoMatch()) 6685 Error(Loc, "expected a register"); 6686 if (!Res.isSuccess()) 6687 return ParseStatus::Failure; 6688 6689 RBraceLoc = getLoc(); 6690 if (trySkipToken(AsmToken::RBrac)) 6691 break; 6692 6693 if (!skipToken(AsmToken::Comma, 6694 "expected a comma or a closing square bracket")) 6695 return ParseStatus::Failure; 6696 } 6697 6698 if (Operands.size() - Prefix > 1) { 6699 Operands.insert(Operands.begin() + Prefix, 6700 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 6701 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 6702 } 6703 6704 return ParseStatus::Success; 6705 } 6706 6707 return parseRegOrImm(Operands); 6708 } 6709 6710 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 6711 // Clear any forced encodings from the previous instruction. 6712 setForcedEncodingSize(0); 6713 setForcedDPP(false); 6714 setForcedSDWA(false); 6715 6716 if (Name.consume_back("_e64_dpp")) { 6717 setForcedDPP(true); 6718 setForcedEncodingSize(64); 6719 return Name; 6720 } 6721 if (Name.consume_back("_e64")) { 6722 setForcedEncodingSize(64); 6723 return Name; 6724 } 6725 if (Name.consume_back("_e32")) { 6726 setForcedEncodingSize(32); 6727 return Name; 6728 } 6729 if (Name.consume_back("_dpp")) { 6730 setForcedDPP(true); 6731 return Name; 6732 } 6733 if (Name.consume_back("_sdwa")) { 6734 setForcedSDWA(true); 6735 return Name; 6736 } 6737 return Name; 6738 } 6739 6740 static void applyMnemonicAliases(StringRef &Mnemonic, 6741 const FeatureBitset &Features, 6742 unsigned VariantID); 6743 6744 bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info, 6745 StringRef Name, SMLoc NameLoc, 6746 OperandVector &Operands) { 6747 // Add the instruction mnemonic 6748 Name = parseMnemonicSuffix(Name); 6749 6750 // If the target architecture uses MnemonicAlias, call it here to parse 6751 // operands correctly. 6752 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 6753 6754 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 6755 6756 bool IsMIMG = Name.starts_with("image_"); 6757 6758 while (!trySkipToken(AsmToken::EndOfStatement)) { 6759 OperandMode Mode = OperandMode_Default; 6760 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 6761 Mode = OperandMode_NSA; 6762 ParseStatus Res = parseOperand(Operands, Name, Mode); 6763 6764 if (!Res.isSuccess()) { 6765 checkUnsupportedInstruction(Name, NameLoc); 6766 if (!Parser.hasPendingError()) { 6767 // FIXME: use real operand location rather than the current location. 6768 StringRef Msg = Res.isFailure() ? "failed parsing operand." 6769 : "not a valid operand."; 6770 Error(getLoc(), Msg); 6771 } 6772 while (!trySkipToken(AsmToken::EndOfStatement)) { 6773 lex(); 6774 } 6775 return true; 6776 } 6777 6778 // Eat the comma or space if there is one. 6779 trySkipToken(AsmToken::Comma); 6780 } 6781 6782 return false; 6783 } 6784 6785 //===----------------------------------------------------------------------===// 6786 // Utility functions 6787 //===----------------------------------------------------------------------===// 6788 6789 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, 6790 OperandVector &Operands) { 6791 SMLoc S = getLoc(); 6792 if (!trySkipId(Name)) 6793 return ParseStatus::NoMatch; 6794 6795 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S)); 6796 return ParseStatus::Success; 6797 } 6798 6799 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, 6800 int64_t &IntVal) { 6801 6802 if (!trySkipId(Prefix, AsmToken::Colon)) 6803 return ParseStatus::NoMatch; 6804 6805 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure; 6806 } 6807 6808 ParseStatus AMDGPUAsmParser::parseIntWithPrefix( 6809 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6810 std::function<bool(int64_t &)> ConvertResult) { 6811 SMLoc S = getLoc(); 6812 int64_t Value = 0; 6813 6814 ParseStatus Res = parseIntWithPrefix(Prefix, Value); 6815 if (!Res.isSuccess()) 6816 return Res; 6817 6818 if (ConvertResult && !ConvertResult(Value)) { 6819 Error(S, "invalid " + StringRef(Prefix) + " value."); 6820 } 6821 6822 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 6823 return ParseStatus::Success; 6824 } 6825 6826 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( 6827 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6828 bool (*ConvertResult)(int64_t &)) { 6829 SMLoc S = getLoc(); 6830 if (!trySkipId(Prefix, AsmToken::Colon)) 6831 return ParseStatus::NoMatch; 6832 6833 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 6834 return ParseStatus::Failure; 6835 6836 unsigned Val = 0; 6837 const unsigned MaxSize = 4; 6838 6839 // FIXME: How to verify the number of elements matches the number of src 6840 // operands? 6841 for (int I = 0; ; ++I) { 6842 int64_t Op; 6843 SMLoc Loc = getLoc(); 6844 if (!parseExpr(Op)) 6845 return ParseStatus::Failure; 6846 6847 if (Op != 0 && Op != 1) 6848 return Error(Loc, "invalid " + StringRef(Prefix) + " value."); 6849 6850 Val |= (Op << I); 6851 6852 if (trySkipToken(AsmToken::RBrac)) 6853 break; 6854 6855 if (I + 1 == MaxSize) 6856 return Error(getLoc(), "expected a closing square bracket"); 6857 6858 if (!skipToken(AsmToken::Comma, "expected a comma")) 6859 return ParseStatus::Failure; 6860 } 6861 6862 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 6863 return ParseStatus::Success; 6864 } 6865 6866 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, 6867 OperandVector &Operands, 6868 AMDGPUOperand::ImmTy ImmTy) { 6869 int64_t Bit; 6870 SMLoc S = getLoc(); 6871 6872 if (trySkipId(Name)) { 6873 Bit = 1; 6874 } else if (trySkipId("no", Name)) { 6875 Bit = 0; 6876 } else { 6877 return ParseStatus::NoMatch; 6878 } 6879 6880 if (Name == "r128" && !hasMIMG_R128()) 6881 return Error(S, "r128 modifier is not supported on this GPU"); 6882 if (Name == "a16" && !hasA16()) 6883 return Error(S, "a16 modifier is not supported on this GPU"); 6884 6885 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 6886 ImmTy = AMDGPUOperand::ImmTyR128A16; 6887 6888 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 6889 return ParseStatus::Success; 6890 } 6891 6892 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, 6893 bool &Disabling) const { 6894 Disabling = Id.consume_front("no"); 6895 6896 if (isGFX940() && !Mnemo.starts_with("s_")) { 6897 return StringSwitch<unsigned>(Id) 6898 .Case("nt", AMDGPU::CPol::NT) 6899 .Case("sc0", AMDGPU::CPol::SC0) 6900 .Case("sc1", AMDGPU::CPol::SC1) 6901 .Default(0); 6902 } 6903 6904 return StringSwitch<unsigned>(Id) 6905 .Case("dlc", AMDGPU::CPol::DLC) 6906 .Case("glc", AMDGPU::CPol::GLC) 6907 .Case("scc", AMDGPU::CPol::SCC) 6908 .Case("slc", AMDGPU::CPol::SLC) 6909 .Default(0); 6910 } 6911 6912 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 6913 if (isGFX12Plus()) { 6914 SMLoc StringLoc = getLoc(); 6915 6916 int64_t CPolVal = 0; 6917 ParseStatus ResTH = ParseStatus::NoMatch; 6918 ParseStatus ResScope = ParseStatus::NoMatch; 6919 6920 for (;;) { 6921 if (ResTH.isNoMatch()) { 6922 int64_t TH; 6923 ResTH = parseTH(Operands, TH); 6924 if (ResTH.isFailure()) 6925 return ResTH; 6926 if (ResTH.isSuccess()) { 6927 CPolVal |= TH; 6928 continue; 6929 } 6930 } 6931 6932 if (ResScope.isNoMatch()) { 6933 int64_t Scope; 6934 ResScope = parseScope(Operands, Scope); 6935 if (ResScope.isFailure()) 6936 return ResScope; 6937 if (ResScope.isSuccess()) { 6938 CPolVal |= Scope; 6939 continue; 6940 } 6941 } 6942 6943 break; 6944 } 6945 6946 if (ResTH.isNoMatch() && ResScope.isNoMatch()) 6947 return ParseStatus::NoMatch; 6948 6949 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc, 6950 AMDGPUOperand::ImmTyCPol)); 6951 return ParseStatus::Success; 6952 } 6953 6954 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 6955 SMLoc OpLoc = getLoc(); 6956 unsigned Enabled = 0, Seen = 0; 6957 for (;;) { 6958 SMLoc S = getLoc(); 6959 bool Disabling; 6960 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); 6961 if (!CPol) 6962 break; 6963 6964 lex(); 6965 6966 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) 6967 return Error(S, "dlc modifier is not supported on this GPU"); 6968 6969 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) 6970 return Error(S, "scc modifier is not supported on this GPU"); 6971 6972 if (Seen & CPol) 6973 return Error(S, "duplicate cache policy modifier"); 6974 6975 if (!Disabling) 6976 Enabled |= CPol; 6977 6978 Seen |= CPol; 6979 } 6980 6981 if (!Seen) 6982 return ParseStatus::NoMatch; 6983 6984 Operands.push_back( 6985 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol)); 6986 return ParseStatus::Success; 6987 } 6988 6989 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands, 6990 int64_t &Scope) { 6991 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE, 6992 CPol::SCOPE_DEV, CPol::SCOPE_SYS}; 6993 6994 ParseStatus Res = parseStringOrIntWithPrefix( 6995 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"}, 6996 Scope); 6997 6998 if (Res.isSuccess()) 6999 Scope = Scopes[Scope]; 7000 7001 return Res; 7002 } 7003 7004 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { 7005 TH = AMDGPU::CPol::TH_RT; // default 7006 7007 StringRef Value; 7008 SMLoc StringLoc; 7009 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc); 7010 if (!Res.isSuccess()) 7011 return Res; 7012 7013 if (Value == "TH_DEFAULT") 7014 TH = AMDGPU::CPol::TH_RT; 7015 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" || 7016 Value == "TH_LOAD_NT_WB") { 7017 return Error(StringLoc, "invalid th value"); 7018 } else if (Value.consume_front("TH_ATOMIC_")) { 7019 TH = AMDGPU::CPol::TH_TYPE_ATOMIC; 7020 } else if (Value.consume_front("TH_LOAD_")) { 7021 TH = AMDGPU::CPol::TH_TYPE_LOAD; 7022 } else if (Value.consume_front("TH_STORE_")) { 7023 TH = AMDGPU::CPol::TH_TYPE_STORE; 7024 } else { 7025 return Error(StringLoc, "invalid th value"); 7026 } 7027 7028 if (Value == "BYPASS") 7029 TH |= AMDGPU::CPol::TH_REAL_BYPASS; 7030 7031 if (TH != 0) { 7032 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC) 7033 TH |= StringSwitch<int64_t>(Value) 7034 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 7035 .Case("RT", AMDGPU::CPol::TH_RT) 7036 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 7037 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT) 7038 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT | 7039 AMDGPU::CPol::TH_ATOMIC_RETURN) 7040 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE) 7041 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE | 7042 AMDGPU::CPol::TH_ATOMIC_NT) 7043 .Default(0xffffffff); 7044 else 7045 TH |= StringSwitch<int64_t>(Value) 7046 .Case("RT", AMDGPU::CPol::TH_RT) 7047 .Case("NT", AMDGPU::CPol::TH_NT) 7048 .Case("HT", AMDGPU::CPol::TH_HT) 7049 .Case("LU", AMDGPU::CPol::TH_LU) 7050 .Case("WB", AMDGPU::CPol::TH_WB) 7051 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT) 7052 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT) 7053 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT) 7054 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB) 7055 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS) 7056 .Default(0xffffffff); 7057 } 7058 7059 if (TH == 0xffffffff) 7060 return Error(StringLoc, "invalid th value"); 7061 7062 return ParseStatus::Success; 7063 } 7064 7065 static void 7066 addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, 7067 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, 7068 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0, 7069 std::optional<unsigned> InsertAt = std::nullopt) { 7070 auto i = OptionalIdx.find(ImmT); 7071 if (i != OptionalIdx.end()) { 7072 unsigned Idx = i->second; 7073 const AMDGPUOperand &Op = 7074 static_cast<const AMDGPUOperand &>(*Operands[Idx]); 7075 if (InsertAt) 7076 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm())); 7077 else 7078 Op.addImmOperands(Inst, 1); 7079 } else { 7080 if (InsertAt.has_value()) 7081 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default)); 7082 else 7083 Inst.addOperand(MCOperand::createImm(Default)); 7084 } 7085 } 7086 7087 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 7088 StringRef &Value, 7089 SMLoc &StringLoc) { 7090 if (!trySkipId(Prefix, AsmToken::Colon)) 7091 return ParseStatus::NoMatch; 7092 7093 StringLoc = getLoc(); 7094 return parseId(Value, "expected an identifier") ? ParseStatus::Success 7095 : ParseStatus::Failure; 7096 } 7097 7098 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix( 7099 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids, 7100 int64_t &IntVal) { 7101 if (!trySkipId(Name, AsmToken::Colon)) 7102 return ParseStatus::NoMatch; 7103 7104 SMLoc StringLoc = getLoc(); 7105 7106 StringRef Value; 7107 if (isToken(AsmToken::Identifier)) { 7108 Value = getTokenStr(); 7109 lex(); 7110 7111 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal) 7112 if (Value == Ids[IntVal]) 7113 break; 7114 } else if (!parseExpr(IntVal)) 7115 return ParseStatus::Failure; 7116 7117 if (IntVal < 0 || IntVal >= (int64_t)Ids.size()) 7118 return Error(StringLoc, "invalid " + Twine(Name) + " value"); 7119 7120 return ParseStatus::Success; 7121 } 7122 7123 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix( 7124 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids, 7125 AMDGPUOperand::ImmTy Type) { 7126 SMLoc S = getLoc(); 7127 int64_t IntVal; 7128 7129 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal); 7130 if (Res.isSuccess()) 7131 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type)); 7132 7133 return Res; 7134 } 7135 7136 //===----------------------------------------------------------------------===// 7137 // MTBUF format 7138 //===----------------------------------------------------------------------===// 7139 7140 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 7141 int64_t MaxVal, 7142 int64_t &Fmt) { 7143 int64_t Val; 7144 SMLoc Loc = getLoc(); 7145 7146 auto Res = parseIntWithPrefix(Pref, Val); 7147 if (Res.isFailure()) 7148 return false; 7149 if (Res.isNoMatch()) 7150 return true; 7151 7152 if (Val < 0 || Val > MaxVal) { 7153 Error(Loc, Twine("out of range ", StringRef(Pref))); 7154 return false; 7155 } 7156 7157 Fmt = Val; 7158 return true; 7159 } 7160 7161 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands, 7162 AMDGPUOperand::ImmTy ImmTy) { 7163 const char *Pref = "index_key"; 7164 int64_t ImmVal = 0; 7165 SMLoc Loc = getLoc(); 7166 auto Res = parseIntWithPrefix(Pref, ImmVal); 7167 if (!Res.isSuccess()) 7168 return Res; 7169 7170 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit || 7171 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) && 7172 (ImmVal < 0 || ImmVal > 1)) 7173 return Error(Loc, Twine("out of range ", StringRef(Pref))); 7174 7175 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3)) 7176 return Error(Loc, Twine("out of range ", StringRef(Pref))); 7177 7178 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy)); 7179 return ParseStatus::Success; 7180 } 7181 7182 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) { 7183 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit); 7184 } 7185 7186 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) { 7187 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit); 7188 } 7189 7190 ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) { 7191 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit); 7192 } 7193 7194 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 7195 // values to live in a joint format operand in the MCInst encoding. 7196 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 7197 using namespace llvm::AMDGPU::MTBUFFormat; 7198 7199 int64_t Dfmt = DFMT_UNDEF; 7200 int64_t Nfmt = NFMT_UNDEF; 7201 7202 // dfmt and nfmt can appear in either order, and each is optional. 7203 for (int I = 0; I < 2; ++I) { 7204 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 7205 return ParseStatus::Failure; 7206 7207 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) 7208 return ParseStatus::Failure; 7209 7210 // Skip optional comma between dfmt/nfmt 7211 // but guard against 2 commas following each other. 7212 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 7213 !peekToken().is(AsmToken::Comma)) { 7214 trySkipToken(AsmToken::Comma); 7215 } 7216 } 7217 7218 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 7219 return ParseStatus::NoMatch; 7220 7221 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 7222 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 7223 7224 Format = encodeDfmtNfmt(Dfmt, Nfmt); 7225 return ParseStatus::Success; 7226 } 7227 7228 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { 7229 using namespace llvm::AMDGPU::MTBUFFormat; 7230 7231 int64_t Fmt = UFMT_UNDEF; 7232 7233 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 7234 return ParseStatus::Failure; 7235 7236 if (Fmt == UFMT_UNDEF) 7237 return ParseStatus::NoMatch; 7238 7239 Format = Fmt; 7240 return ParseStatus::Success; 7241 } 7242 7243 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 7244 int64_t &Nfmt, 7245 StringRef FormatStr, 7246 SMLoc Loc) { 7247 using namespace llvm::AMDGPU::MTBUFFormat; 7248 int64_t Format; 7249 7250 Format = getDfmt(FormatStr); 7251 if (Format != DFMT_UNDEF) { 7252 Dfmt = Format; 7253 return true; 7254 } 7255 7256 Format = getNfmt(FormatStr, getSTI()); 7257 if (Format != NFMT_UNDEF) { 7258 Nfmt = Format; 7259 return true; 7260 } 7261 7262 Error(Loc, "unsupported format"); 7263 return false; 7264 } 7265 7266 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 7267 SMLoc FormatLoc, 7268 int64_t &Format) { 7269 using namespace llvm::AMDGPU::MTBUFFormat; 7270 7271 int64_t Dfmt = DFMT_UNDEF; 7272 int64_t Nfmt = NFMT_UNDEF; 7273 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 7274 return ParseStatus::Failure; 7275 7276 if (trySkipToken(AsmToken::Comma)) { 7277 StringRef Str; 7278 SMLoc Loc = getLoc(); 7279 if (!parseId(Str, "expected a format string") || 7280 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) 7281 return ParseStatus::Failure; 7282 if (Dfmt == DFMT_UNDEF) 7283 return Error(Loc, "duplicate numeric format"); 7284 if (Nfmt == NFMT_UNDEF) 7285 return Error(Loc, "duplicate data format"); 7286 } 7287 7288 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 7289 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 7290 7291 if (isGFX10Plus()) { 7292 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 7293 if (Ufmt == UFMT_UNDEF) 7294 return Error(FormatLoc, "unsupported format"); 7295 Format = Ufmt; 7296 } else { 7297 Format = encodeDfmtNfmt(Dfmt, Nfmt); 7298 } 7299 7300 return ParseStatus::Success; 7301 } 7302 7303 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 7304 SMLoc Loc, 7305 int64_t &Format) { 7306 using namespace llvm::AMDGPU::MTBUFFormat; 7307 7308 auto Id = getUnifiedFormat(FormatStr, getSTI()); 7309 if (Id == UFMT_UNDEF) 7310 return ParseStatus::NoMatch; 7311 7312 if (!isGFX10Plus()) 7313 return Error(Loc, "unified format is not supported on this GPU"); 7314 7315 Format = Id; 7316 return ParseStatus::Success; 7317 } 7318 7319 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 7320 using namespace llvm::AMDGPU::MTBUFFormat; 7321 SMLoc Loc = getLoc(); 7322 7323 if (!parseExpr(Format)) 7324 return ParseStatus::Failure; 7325 if (!isValidFormatEncoding(Format, getSTI())) 7326 return Error(Loc, "out of range format"); 7327 7328 return ParseStatus::Success; 7329 } 7330 7331 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 7332 using namespace llvm::AMDGPU::MTBUFFormat; 7333 7334 if (!trySkipId("format", AsmToken::Colon)) 7335 return ParseStatus::NoMatch; 7336 7337 if (trySkipToken(AsmToken::LBrac)) { 7338 StringRef FormatStr; 7339 SMLoc Loc = getLoc(); 7340 if (!parseId(FormatStr, "expected a format string")) 7341 return ParseStatus::Failure; 7342 7343 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 7344 if (Res.isNoMatch()) 7345 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 7346 if (!Res.isSuccess()) 7347 return Res; 7348 7349 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7350 return ParseStatus::Failure; 7351 7352 return ParseStatus::Success; 7353 } 7354 7355 return parseNumericFormat(Format); 7356 } 7357 7358 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 7359 using namespace llvm::AMDGPU::MTBUFFormat; 7360 7361 int64_t Format = getDefaultFormatEncoding(getSTI()); 7362 ParseStatus Res; 7363 SMLoc Loc = getLoc(); 7364 7365 // Parse legacy format syntax. 7366 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 7367 if (Res.isFailure()) 7368 return Res; 7369 7370 bool FormatFound = Res.isSuccess(); 7371 7372 Operands.push_back( 7373 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 7374 7375 if (FormatFound) 7376 trySkipToken(AsmToken::Comma); 7377 7378 if (isToken(AsmToken::EndOfStatement)) { 7379 // We are expecting an soffset operand, 7380 // but let matcher handle the error. 7381 return ParseStatus::Success; 7382 } 7383 7384 // Parse soffset. 7385 Res = parseRegOrImm(Operands); 7386 if (!Res.isSuccess()) 7387 return Res; 7388 7389 trySkipToken(AsmToken::Comma); 7390 7391 if (!FormatFound) { 7392 Res = parseSymbolicOrNumericFormat(Format); 7393 if (Res.isFailure()) 7394 return Res; 7395 if (Res.isSuccess()) { 7396 auto Size = Operands.size(); 7397 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 7398 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 7399 Op.setImm(Format); 7400 } 7401 return ParseStatus::Success; 7402 } 7403 7404 if (isId("format") && peekToken().is(AsmToken::Colon)) 7405 return Error(getLoc(), "duplicate format"); 7406 return ParseStatus::Success; 7407 } 7408 7409 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { 7410 ParseStatus Res = 7411 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 7412 if (Res.isNoMatch()) { 7413 Res = parseIntWithPrefix("inst_offset", Operands, 7414 AMDGPUOperand::ImmTyInstOffset); 7415 } 7416 return Res; 7417 } 7418 7419 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { 7420 ParseStatus Res = 7421 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16); 7422 if (Res.isNoMatch()) 7423 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16); 7424 return Res; 7425 } 7426 7427 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { 7428 ParseStatus Res = 7429 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP); 7430 if (Res.isNoMatch()) { 7431 Res = 7432 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP); 7433 } 7434 return Res; 7435 } 7436 7437 //===----------------------------------------------------------------------===// 7438 // Exp 7439 //===----------------------------------------------------------------------===// 7440 7441 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 7442 OptionalImmIndexMap OptionalIdx; 7443 7444 unsigned OperandIdx[4]; 7445 unsigned EnMask = 0; 7446 int SrcIdx = 0; 7447 7448 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7449 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7450 7451 // Add the register arguments 7452 if (Op.isReg()) { 7453 assert(SrcIdx < 4); 7454 OperandIdx[SrcIdx] = Inst.size(); 7455 Op.addRegOperands(Inst, 1); 7456 ++SrcIdx; 7457 continue; 7458 } 7459 7460 if (Op.isOff()) { 7461 assert(SrcIdx < 4); 7462 OperandIdx[SrcIdx] = Inst.size(); 7463 Inst.addOperand(MCOperand::createReg(MCRegister())); 7464 ++SrcIdx; 7465 continue; 7466 } 7467 7468 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 7469 Op.addImmOperands(Inst, 1); 7470 continue; 7471 } 7472 7473 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 7474 continue; 7475 7476 // Handle optional arguments 7477 OptionalIdx[Op.getImmTy()] = i; 7478 } 7479 7480 assert(SrcIdx == 4); 7481 7482 bool Compr = false; 7483 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 7484 Compr = true; 7485 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 7486 Inst.getOperand(OperandIdx[2]).setReg(MCRegister()); 7487 Inst.getOperand(OperandIdx[3]).setReg(MCRegister()); 7488 } 7489 7490 for (auto i = 0; i < SrcIdx; ++i) { 7491 if (Inst.getOperand(OperandIdx[i]).getReg()) { 7492 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 7493 } 7494 } 7495 7496 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 7497 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 7498 7499 Inst.addOperand(MCOperand::createImm(EnMask)); 7500 } 7501 7502 //===----------------------------------------------------------------------===// 7503 // s_waitcnt 7504 //===----------------------------------------------------------------------===// 7505 7506 static bool 7507 encodeCnt( 7508 const AMDGPU::IsaVersion ISA, 7509 int64_t &IntVal, 7510 int64_t CntVal, 7511 bool Saturate, 7512 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 7513 unsigned (*decode)(const IsaVersion &Version, unsigned)) 7514 { 7515 bool Failed = false; 7516 7517 IntVal = encode(ISA, IntVal, CntVal); 7518 if (CntVal != decode(ISA, IntVal)) { 7519 if (Saturate) { 7520 IntVal = encode(ISA, IntVal, -1); 7521 } else { 7522 Failed = true; 7523 } 7524 } 7525 return Failed; 7526 } 7527 7528 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 7529 7530 SMLoc CntLoc = getLoc(); 7531 StringRef CntName = getTokenStr(); 7532 7533 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 7534 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7535 return false; 7536 7537 int64_t CntVal; 7538 SMLoc ValLoc = getLoc(); 7539 if (!parseExpr(CntVal)) 7540 return false; 7541 7542 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 7543 7544 bool Failed = true; 7545 bool Sat = CntName.ends_with("_sat"); 7546 7547 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 7548 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 7549 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 7550 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 7551 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 7552 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 7553 } else { 7554 Error(CntLoc, "invalid counter name " + CntName); 7555 return false; 7556 } 7557 7558 if (Failed) { 7559 Error(ValLoc, "too large value for " + CntName); 7560 return false; 7561 } 7562 7563 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7564 return false; 7565 7566 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 7567 if (isToken(AsmToken::EndOfStatement)) { 7568 Error(getLoc(), "expected a counter name"); 7569 return false; 7570 } 7571 } 7572 7573 return true; 7574 } 7575 7576 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { 7577 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 7578 int64_t Waitcnt = getWaitcntBitMask(ISA); 7579 SMLoc S = getLoc(); 7580 7581 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7582 while (!isToken(AsmToken::EndOfStatement)) { 7583 if (!parseCnt(Waitcnt)) 7584 return ParseStatus::Failure; 7585 } 7586 } else { 7587 if (!parseExpr(Waitcnt)) 7588 return ParseStatus::Failure; 7589 } 7590 7591 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 7592 return ParseStatus::Success; 7593 } 7594 7595 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 7596 SMLoc FieldLoc = getLoc(); 7597 StringRef FieldName = getTokenStr(); 7598 if (!skipToken(AsmToken::Identifier, "expected a field name") || 7599 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7600 return false; 7601 7602 SMLoc ValueLoc = getLoc(); 7603 StringRef ValueName = getTokenStr(); 7604 if (!skipToken(AsmToken::Identifier, "expected a value name") || 7605 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 7606 return false; 7607 7608 unsigned Shift; 7609 if (FieldName == "instid0") { 7610 Shift = 0; 7611 } else if (FieldName == "instskip") { 7612 Shift = 4; 7613 } else if (FieldName == "instid1") { 7614 Shift = 7; 7615 } else { 7616 Error(FieldLoc, "invalid field name " + FieldName); 7617 return false; 7618 } 7619 7620 int Value; 7621 if (Shift == 4) { 7622 // Parse values for instskip. 7623 Value = StringSwitch<int>(ValueName) 7624 .Case("SAME", 0) 7625 .Case("NEXT", 1) 7626 .Case("SKIP_1", 2) 7627 .Case("SKIP_2", 3) 7628 .Case("SKIP_3", 4) 7629 .Case("SKIP_4", 5) 7630 .Default(-1); 7631 } else { 7632 // Parse values for instid0 and instid1. 7633 Value = StringSwitch<int>(ValueName) 7634 .Case("NO_DEP", 0) 7635 .Case("VALU_DEP_1", 1) 7636 .Case("VALU_DEP_2", 2) 7637 .Case("VALU_DEP_3", 3) 7638 .Case("VALU_DEP_4", 4) 7639 .Case("TRANS32_DEP_1", 5) 7640 .Case("TRANS32_DEP_2", 6) 7641 .Case("TRANS32_DEP_3", 7) 7642 .Case("FMA_ACCUM_CYCLE_1", 8) 7643 .Case("SALU_CYCLE_1", 9) 7644 .Case("SALU_CYCLE_2", 10) 7645 .Case("SALU_CYCLE_3", 11) 7646 .Default(-1); 7647 } 7648 if (Value < 0) { 7649 Error(ValueLoc, "invalid value name " + ValueName); 7650 return false; 7651 } 7652 7653 Delay |= Value << Shift; 7654 return true; 7655 } 7656 7657 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { 7658 int64_t Delay = 0; 7659 SMLoc S = getLoc(); 7660 7661 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7662 do { 7663 if (!parseDelay(Delay)) 7664 return ParseStatus::Failure; 7665 } while (trySkipToken(AsmToken::Pipe)); 7666 } else { 7667 if (!parseExpr(Delay)) 7668 return ParseStatus::Failure; 7669 } 7670 7671 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 7672 return ParseStatus::Success; 7673 } 7674 7675 bool 7676 AMDGPUOperand::isSWaitCnt() const { 7677 return isImm(); 7678 } 7679 7680 bool AMDGPUOperand::isSDelayALU() const { return isImm(); } 7681 7682 //===----------------------------------------------------------------------===// 7683 // DepCtr 7684 //===----------------------------------------------------------------------===// 7685 7686 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 7687 StringRef DepCtrName) { 7688 switch (ErrorId) { 7689 case OPR_ID_UNKNOWN: 7690 Error(Loc, Twine("invalid counter name ", DepCtrName)); 7691 return; 7692 case OPR_ID_UNSUPPORTED: 7693 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 7694 return; 7695 case OPR_ID_DUPLICATE: 7696 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 7697 return; 7698 case OPR_VAL_INVALID: 7699 Error(Loc, Twine("invalid value for ", DepCtrName)); 7700 return; 7701 default: 7702 assert(false); 7703 } 7704 } 7705 7706 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 7707 7708 using namespace llvm::AMDGPU::DepCtr; 7709 7710 SMLoc DepCtrLoc = getLoc(); 7711 StringRef DepCtrName = getTokenStr(); 7712 7713 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 7714 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7715 return false; 7716 7717 int64_t ExprVal; 7718 if (!parseExpr(ExprVal)) 7719 return false; 7720 7721 unsigned PrevOprMask = UsedOprMask; 7722 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 7723 7724 if (CntVal < 0) { 7725 depCtrError(DepCtrLoc, CntVal, DepCtrName); 7726 return false; 7727 } 7728 7729 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7730 return false; 7731 7732 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 7733 if (isToken(AsmToken::EndOfStatement)) { 7734 Error(getLoc(), "expected a counter name"); 7735 return false; 7736 } 7737 } 7738 7739 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 7740 DepCtr = (DepCtr & ~CntValMask) | CntVal; 7741 return true; 7742 } 7743 7744 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { 7745 using namespace llvm::AMDGPU::DepCtr; 7746 7747 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 7748 SMLoc Loc = getLoc(); 7749 7750 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7751 unsigned UsedOprMask = 0; 7752 while (!isToken(AsmToken::EndOfStatement)) { 7753 if (!parseDepCtr(DepCtr, UsedOprMask)) 7754 return ParseStatus::Failure; 7755 } 7756 } else { 7757 if (!parseExpr(DepCtr)) 7758 return ParseStatus::Failure; 7759 } 7760 7761 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 7762 return ParseStatus::Success; 7763 } 7764 7765 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 7766 7767 //===----------------------------------------------------------------------===// 7768 // hwreg 7769 //===----------------------------------------------------------------------===// 7770 7771 ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg, 7772 OperandInfoTy &Offset, 7773 OperandInfoTy &Width) { 7774 using namespace llvm::AMDGPU::Hwreg; 7775 7776 if (!trySkipId("hwreg", AsmToken::LParen)) 7777 return ParseStatus::NoMatch; 7778 7779 // The register may be specified by name or using a numeric code 7780 HwReg.Loc = getLoc(); 7781 if (isToken(AsmToken::Identifier) && 7782 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7783 HwReg.IsSymbolic = true; 7784 lex(); // skip register name 7785 } else if (!parseExpr(HwReg.Val, "a register name")) { 7786 return ParseStatus::Failure; 7787 } 7788 7789 if (trySkipToken(AsmToken::RParen)) 7790 return ParseStatus::Success; 7791 7792 // parse optional params 7793 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 7794 return ParseStatus::Failure; 7795 7796 Offset.Loc = getLoc(); 7797 if (!parseExpr(Offset.Val)) 7798 return ParseStatus::Failure; 7799 7800 if (!skipToken(AsmToken::Comma, "expected a comma")) 7801 return ParseStatus::Failure; 7802 7803 Width.Loc = getLoc(); 7804 if (!parseExpr(Width.Val) || 7805 !skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7806 return ParseStatus::Failure; 7807 7808 return ParseStatus::Success; 7809 } 7810 7811 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 7812 using namespace llvm::AMDGPU::Hwreg; 7813 7814 int64_t ImmVal = 0; 7815 SMLoc Loc = getLoc(); 7816 7817 StructuredOpField HwReg("id", "hardware register", HwregId::Width, 7818 HwregId::Default); 7819 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width, 7820 HwregOffset::Default); 7821 struct : StructuredOpField { 7822 using StructuredOpField::StructuredOpField; 7823 bool validate(AMDGPUAsmParser &Parser) const override { 7824 if (!isUIntN(Width, Val - 1)) 7825 return Error(Parser, "only values from 1 to 32 are legal"); 7826 return true; 7827 } 7828 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default); 7829 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width}); 7830 7831 if (Res.isNoMatch()) 7832 Res = parseHwregFunc(HwReg, Offset, Width); 7833 7834 if (Res.isSuccess()) { 7835 if (!validateStructuredOpFields({&HwReg, &Offset, &Width})) 7836 return ParseStatus::Failure; 7837 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val); 7838 } 7839 7840 if (Res.isNoMatch() && 7841 parseExpr(ImmVal, "a hwreg macro, structured immediate")) 7842 Res = ParseStatus::Success; 7843 7844 if (!Res.isSuccess()) 7845 return ParseStatus::Failure; 7846 7847 if (!isUInt<16>(ImmVal)) 7848 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7849 Operands.push_back( 7850 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 7851 return ParseStatus::Success; 7852 } 7853 7854 bool AMDGPUOperand::isHwreg() const { 7855 return isImmTy(ImmTyHwreg); 7856 } 7857 7858 //===----------------------------------------------------------------------===// 7859 // sendmsg 7860 //===----------------------------------------------------------------------===// 7861 7862 bool 7863 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 7864 OperandInfoTy &Op, 7865 OperandInfoTy &Stream) { 7866 using namespace llvm::AMDGPU::SendMsg; 7867 7868 Msg.Loc = getLoc(); 7869 if (isToken(AsmToken::Identifier) && 7870 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7871 Msg.IsSymbolic = true; 7872 lex(); // skip message name 7873 } else if (!parseExpr(Msg.Val, "a message name")) { 7874 return false; 7875 } 7876 7877 if (trySkipToken(AsmToken::Comma)) { 7878 Op.IsDefined = true; 7879 Op.Loc = getLoc(); 7880 if (isToken(AsmToken::Identifier) && 7881 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) != 7882 OPR_ID_UNKNOWN) { 7883 lex(); // skip operation name 7884 } else if (!parseExpr(Op.Val, "an operation name")) { 7885 return false; 7886 } 7887 7888 if (trySkipToken(AsmToken::Comma)) { 7889 Stream.IsDefined = true; 7890 Stream.Loc = getLoc(); 7891 if (!parseExpr(Stream.Val)) 7892 return false; 7893 } 7894 } 7895 7896 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7897 } 7898 7899 bool 7900 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 7901 const OperandInfoTy &Op, 7902 const OperandInfoTy &Stream) { 7903 using namespace llvm::AMDGPU::SendMsg; 7904 7905 // Validation strictness depends on whether message is specified 7906 // in a symbolic or in a numeric form. In the latter case 7907 // only encoding possibility is checked. 7908 bool Strict = Msg.IsSymbolic; 7909 7910 if (Strict) { 7911 if (Msg.Val == OPR_ID_UNSUPPORTED) { 7912 Error(Msg.Loc, "specified message id is not supported on this GPU"); 7913 return false; 7914 } 7915 } else { 7916 if (!isValidMsgId(Msg.Val, getSTI())) { 7917 Error(Msg.Loc, "invalid message id"); 7918 return false; 7919 } 7920 } 7921 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) { 7922 if (Op.IsDefined) { 7923 Error(Op.Loc, "message does not support operations"); 7924 } else { 7925 Error(Msg.Loc, "missing message operation"); 7926 } 7927 return false; 7928 } 7929 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) { 7930 if (Op.Val == OPR_ID_UNSUPPORTED) 7931 Error(Op.Loc, "specified operation id is not supported on this GPU"); 7932 else 7933 Error(Op.Loc, "invalid operation id"); 7934 return false; 7935 } 7936 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) && 7937 Stream.IsDefined) { 7938 Error(Stream.Loc, "message operation does not support streams"); 7939 return false; 7940 } 7941 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) { 7942 Error(Stream.Loc, "invalid message stream id"); 7943 return false; 7944 } 7945 return true; 7946 } 7947 7948 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { 7949 using namespace llvm::AMDGPU::SendMsg; 7950 7951 int64_t ImmVal = 0; 7952 SMLoc Loc = getLoc(); 7953 7954 if (trySkipId("sendmsg", AsmToken::LParen)) { 7955 OperandInfoTy Msg(OPR_ID_UNKNOWN); 7956 OperandInfoTy Op(OP_NONE_); 7957 OperandInfoTy Stream(STREAM_ID_NONE_); 7958 if (parseSendMsgBody(Msg, Op, Stream) && 7959 validateSendMsg(Msg, Op, Stream)) { 7960 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val); 7961 } else { 7962 return ParseStatus::Failure; 7963 } 7964 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 7965 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7966 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7967 } else { 7968 return ParseStatus::Failure; 7969 } 7970 7971 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 7972 return ParseStatus::Success; 7973 } 7974 7975 bool AMDGPUOperand::isSendMsg() const { 7976 return isImmTy(ImmTySendMsg); 7977 } 7978 7979 //===----------------------------------------------------------------------===// 7980 // v_interp 7981 //===----------------------------------------------------------------------===// 7982 7983 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 7984 StringRef Str; 7985 SMLoc S = getLoc(); 7986 7987 if (!parseId(Str)) 7988 return ParseStatus::NoMatch; 7989 7990 int Slot = StringSwitch<int>(Str) 7991 .Case("p10", 0) 7992 .Case("p20", 1) 7993 .Case("p0", 2) 7994 .Default(-1); 7995 7996 if (Slot == -1) 7997 return Error(S, "invalid interpolation slot"); 7998 7999 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 8000 AMDGPUOperand::ImmTyInterpSlot)); 8001 return ParseStatus::Success; 8002 } 8003 8004 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 8005 StringRef Str; 8006 SMLoc S = getLoc(); 8007 8008 if (!parseId(Str)) 8009 return ParseStatus::NoMatch; 8010 8011 if (!Str.starts_with("attr")) 8012 return Error(S, "invalid interpolation attribute"); 8013 8014 StringRef Chan = Str.take_back(2); 8015 int AttrChan = StringSwitch<int>(Chan) 8016 .Case(".x", 0) 8017 .Case(".y", 1) 8018 .Case(".z", 2) 8019 .Case(".w", 3) 8020 .Default(-1); 8021 if (AttrChan == -1) 8022 return Error(S, "invalid or missing interpolation attribute channel"); 8023 8024 Str = Str.drop_back(2).drop_front(4); 8025 8026 uint8_t Attr; 8027 if (Str.getAsInteger(10, Attr)) 8028 return Error(S, "invalid or missing interpolation attribute number"); 8029 8030 if (Attr > 32) 8031 return Error(S, "out of bounds interpolation attribute number"); 8032 8033 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 8034 8035 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 8036 AMDGPUOperand::ImmTyInterpAttr)); 8037 Operands.push_back(AMDGPUOperand::CreateImm( 8038 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan)); 8039 return ParseStatus::Success; 8040 } 8041 8042 //===----------------------------------------------------------------------===// 8043 // exp 8044 //===----------------------------------------------------------------------===// 8045 8046 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 8047 using namespace llvm::AMDGPU::Exp; 8048 8049 StringRef Str; 8050 SMLoc S = getLoc(); 8051 8052 if (!parseId(Str)) 8053 return ParseStatus::NoMatch; 8054 8055 unsigned Id = getTgtId(Str); 8056 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) 8057 return Error(S, (Id == ET_INVALID) 8058 ? "invalid exp target" 8059 : "exp target is not supported on this GPU"); 8060 8061 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 8062 AMDGPUOperand::ImmTyExpTgt)); 8063 return ParseStatus::Success; 8064 } 8065 8066 //===----------------------------------------------------------------------===// 8067 // parser helpers 8068 //===----------------------------------------------------------------------===// 8069 8070 bool 8071 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 8072 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 8073 } 8074 8075 bool 8076 AMDGPUAsmParser::isId(const StringRef Id) const { 8077 return isId(getToken(), Id); 8078 } 8079 8080 bool 8081 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 8082 return getTokenKind() == Kind; 8083 } 8084 8085 StringRef AMDGPUAsmParser::getId() const { 8086 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef(); 8087 } 8088 8089 bool 8090 AMDGPUAsmParser::trySkipId(const StringRef Id) { 8091 if (isId(Id)) { 8092 lex(); 8093 return true; 8094 } 8095 return false; 8096 } 8097 8098 bool 8099 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 8100 if (isToken(AsmToken::Identifier)) { 8101 StringRef Tok = getTokenStr(); 8102 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) { 8103 lex(); 8104 return true; 8105 } 8106 } 8107 return false; 8108 } 8109 8110 bool 8111 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 8112 if (isId(Id) && peekToken().is(Kind)) { 8113 lex(); 8114 lex(); 8115 return true; 8116 } 8117 return false; 8118 } 8119 8120 bool 8121 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 8122 if (isToken(Kind)) { 8123 lex(); 8124 return true; 8125 } 8126 return false; 8127 } 8128 8129 bool 8130 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 8131 const StringRef ErrMsg) { 8132 if (!trySkipToken(Kind)) { 8133 Error(getLoc(), ErrMsg); 8134 return false; 8135 } 8136 return true; 8137 } 8138 8139 bool 8140 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 8141 SMLoc S = getLoc(); 8142 8143 const MCExpr *Expr; 8144 if (Parser.parseExpression(Expr)) 8145 return false; 8146 8147 if (Expr->evaluateAsAbsolute(Imm)) 8148 return true; 8149 8150 if (Expected.empty()) { 8151 Error(S, "expected absolute expression"); 8152 } else { 8153 Error(S, Twine("expected ", Expected) + 8154 Twine(" or an absolute expression")); 8155 } 8156 return false; 8157 } 8158 8159 bool 8160 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 8161 SMLoc S = getLoc(); 8162 8163 const MCExpr *Expr; 8164 if (Parser.parseExpression(Expr)) 8165 return false; 8166 8167 int64_t IntVal; 8168 if (Expr->evaluateAsAbsolute(IntVal)) { 8169 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 8170 } else { 8171 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 8172 } 8173 return true; 8174 } 8175 8176 bool 8177 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 8178 if (isToken(AsmToken::String)) { 8179 Val = getToken().getStringContents(); 8180 lex(); 8181 return true; 8182 } 8183 Error(getLoc(), ErrMsg); 8184 return false; 8185 } 8186 8187 bool 8188 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 8189 if (isToken(AsmToken::Identifier)) { 8190 Val = getTokenStr(); 8191 lex(); 8192 return true; 8193 } 8194 if (!ErrMsg.empty()) 8195 Error(getLoc(), ErrMsg); 8196 return false; 8197 } 8198 8199 AsmToken 8200 AMDGPUAsmParser::getToken() const { 8201 return Parser.getTok(); 8202 } 8203 8204 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 8205 return isToken(AsmToken::EndOfStatement) 8206 ? getToken() 8207 : getLexer().peekTok(ShouldSkipSpace); 8208 } 8209 8210 void 8211 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 8212 auto TokCount = getLexer().peekTokens(Tokens); 8213 8214 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 8215 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 8216 } 8217 8218 AsmToken::TokenKind 8219 AMDGPUAsmParser::getTokenKind() const { 8220 return getLexer().getKind(); 8221 } 8222 8223 SMLoc 8224 AMDGPUAsmParser::getLoc() const { 8225 return getToken().getLoc(); 8226 } 8227 8228 StringRef 8229 AMDGPUAsmParser::getTokenStr() const { 8230 return getToken().getString(); 8231 } 8232 8233 void 8234 AMDGPUAsmParser::lex() { 8235 Parser.Lex(); 8236 } 8237 8238 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { 8239 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 8240 } 8241 8242 SMLoc 8243 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 8244 const OperandVector &Operands) const { 8245 for (unsigned i = Operands.size() - 1; i > 0; --i) { 8246 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8247 if (Test(Op)) 8248 return Op.getStartLoc(); 8249 } 8250 return getInstLoc(Operands); 8251 } 8252 8253 SMLoc 8254 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 8255 const OperandVector &Operands) const { 8256 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 8257 return getOperandLoc(Test, Operands); 8258 } 8259 8260 SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg, 8261 const OperandVector &Operands) const { 8262 auto Test = [=](const AMDGPUOperand& Op) { 8263 return Op.isRegKind() && Op.getReg() == Reg; 8264 }; 8265 return getOperandLoc(Test, Operands); 8266 } 8267 8268 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, 8269 bool SearchMandatoryLiterals) const { 8270 auto Test = [](const AMDGPUOperand& Op) { 8271 return Op.IsImmKindLiteral() || Op.isExpr(); 8272 }; 8273 SMLoc Loc = getOperandLoc(Test, Operands); 8274 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) 8275 Loc = getMandatoryLitLoc(Operands); 8276 return Loc; 8277 } 8278 8279 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { 8280 auto Test = [](const AMDGPUOperand &Op) { 8281 return Op.IsImmKindMandatoryLiteral(); 8282 }; 8283 return getOperandLoc(Test, Operands); 8284 } 8285 8286 SMLoc 8287 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 8288 auto Test = [](const AMDGPUOperand& Op) { 8289 return Op.isImmKindConst(); 8290 }; 8291 return getOperandLoc(Test, Operands); 8292 } 8293 8294 ParseStatus 8295 AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) { 8296 if (!trySkipToken(AsmToken::LCurly)) 8297 return ParseStatus::NoMatch; 8298 8299 bool First = true; 8300 while (!trySkipToken(AsmToken::RCurly)) { 8301 if (!First && 8302 !skipToken(AsmToken::Comma, "comma or closing brace expected")) 8303 return ParseStatus::Failure; 8304 8305 StringRef Id = getTokenStr(); 8306 SMLoc IdLoc = getLoc(); 8307 if (!skipToken(AsmToken::Identifier, "field name expected") || 8308 !skipToken(AsmToken::Colon, "colon expected")) 8309 return ParseStatus::Failure; 8310 8311 const auto *I = 8312 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; }); 8313 if (I == Fields.end()) 8314 return Error(IdLoc, "unknown field"); 8315 if ((*I)->IsDefined) 8316 return Error(IdLoc, "duplicate field"); 8317 8318 // TODO: Support symbolic values. 8319 (*I)->Loc = getLoc(); 8320 if (!parseExpr((*I)->Val)) 8321 return ParseStatus::Failure; 8322 (*I)->IsDefined = true; 8323 8324 First = false; 8325 } 8326 return ParseStatus::Success; 8327 } 8328 8329 bool AMDGPUAsmParser::validateStructuredOpFields( 8330 ArrayRef<const StructuredOpField *> Fields) { 8331 return all_of(Fields, [this](const StructuredOpField *F) { 8332 return F->validate(*this); 8333 }); 8334 } 8335 8336 //===----------------------------------------------------------------------===// 8337 // swizzle 8338 //===----------------------------------------------------------------------===// 8339 8340 LLVM_READNONE 8341 static unsigned 8342 encodeBitmaskPerm(const unsigned AndMask, 8343 const unsigned OrMask, 8344 const unsigned XorMask) { 8345 using namespace llvm::AMDGPU::Swizzle; 8346 8347 return BITMASK_PERM_ENC | 8348 (AndMask << BITMASK_AND_SHIFT) | 8349 (OrMask << BITMASK_OR_SHIFT) | 8350 (XorMask << BITMASK_XOR_SHIFT); 8351 } 8352 8353 bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal, 8354 const unsigned MaxVal, 8355 const Twine &ErrMsg, SMLoc &Loc) { 8356 if (!skipToken(AsmToken::Comma, "expected a comma")) { 8357 return false; 8358 } 8359 Loc = getLoc(); 8360 if (!parseExpr(Op)) { 8361 return false; 8362 } 8363 if (Op < MinVal || Op > MaxVal) { 8364 Error(Loc, ErrMsg); 8365 return false; 8366 } 8367 8368 return true; 8369 } 8370 8371 bool 8372 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 8373 const unsigned MinVal, 8374 const unsigned MaxVal, 8375 const StringRef ErrMsg) { 8376 SMLoc Loc; 8377 for (unsigned i = 0; i < OpNum; ++i) { 8378 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 8379 return false; 8380 } 8381 8382 return true; 8383 } 8384 8385 bool 8386 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 8387 using namespace llvm::AMDGPU::Swizzle; 8388 8389 int64_t Lane[LANE_NUM]; 8390 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 8391 "expected a 2-bit lane id")) { 8392 Imm = QUAD_PERM_ENC; 8393 for (unsigned I = 0; I < LANE_NUM; ++I) { 8394 Imm |= Lane[I] << (LANE_SHIFT * I); 8395 } 8396 return true; 8397 } 8398 return false; 8399 } 8400 8401 bool 8402 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 8403 using namespace llvm::AMDGPU::Swizzle; 8404 8405 SMLoc Loc; 8406 int64_t GroupSize; 8407 int64_t LaneIdx; 8408 8409 if (!parseSwizzleOperand(GroupSize, 8410 2, 32, 8411 "group size must be in the interval [2,32]", 8412 Loc)) { 8413 return false; 8414 } 8415 if (!isPowerOf2_64(GroupSize)) { 8416 Error(Loc, "group size must be a power of two"); 8417 return false; 8418 } 8419 if (parseSwizzleOperand(LaneIdx, 8420 0, GroupSize - 1, 8421 "lane id must be in the interval [0,group size - 1]", 8422 Loc)) { 8423 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 8424 return true; 8425 } 8426 return false; 8427 } 8428 8429 bool 8430 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 8431 using namespace llvm::AMDGPU::Swizzle; 8432 8433 SMLoc Loc; 8434 int64_t GroupSize; 8435 8436 if (!parseSwizzleOperand(GroupSize, 8437 2, 32, 8438 "group size must be in the interval [2,32]", 8439 Loc)) { 8440 return false; 8441 } 8442 if (!isPowerOf2_64(GroupSize)) { 8443 Error(Loc, "group size must be a power of two"); 8444 return false; 8445 } 8446 8447 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 8448 return true; 8449 } 8450 8451 bool 8452 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 8453 using namespace llvm::AMDGPU::Swizzle; 8454 8455 SMLoc Loc; 8456 int64_t GroupSize; 8457 8458 if (!parseSwizzleOperand(GroupSize, 8459 1, 16, 8460 "group size must be in the interval [1,16]", 8461 Loc)) { 8462 return false; 8463 } 8464 if (!isPowerOf2_64(GroupSize)) { 8465 Error(Loc, "group size must be a power of two"); 8466 return false; 8467 } 8468 8469 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 8470 return true; 8471 } 8472 8473 bool 8474 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 8475 using namespace llvm::AMDGPU::Swizzle; 8476 8477 if (!skipToken(AsmToken::Comma, "expected a comma")) { 8478 return false; 8479 } 8480 8481 StringRef Ctl; 8482 SMLoc StrLoc = getLoc(); 8483 if (!parseString(Ctl)) { 8484 return false; 8485 } 8486 if (Ctl.size() != BITMASK_WIDTH) { 8487 Error(StrLoc, "expected a 5-character mask"); 8488 return false; 8489 } 8490 8491 unsigned AndMask = 0; 8492 unsigned OrMask = 0; 8493 unsigned XorMask = 0; 8494 8495 for (size_t i = 0; i < Ctl.size(); ++i) { 8496 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 8497 switch(Ctl[i]) { 8498 default: 8499 Error(StrLoc, "invalid mask"); 8500 return false; 8501 case '0': 8502 break; 8503 case '1': 8504 OrMask |= Mask; 8505 break; 8506 case 'p': 8507 AndMask |= Mask; 8508 break; 8509 case 'i': 8510 AndMask |= Mask; 8511 XorMask |= Mask; 8512 break; 8513 } 8514 } 8515 8516 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 8517 return true; 8518 } 8519 8520 bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) { 8521 using namespace llvm::AMDGPU::Swizzle; 8522 8523 if (!AMDGPU::isGFX9Plus(getSTI())) { 8524 Error(getLoc(), "FFT mode swizzle not supported on this GPU"); 8525 return false; 8526 } 8527 8528 int64_t Swizzle; 8529 SMLoc Loc; 8530 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX, 8531 "FFT swizzle must be in the interval [0," + 8532 Twine(FFT_SWIZZLE_MAX) + Twine(']'), 8533 Loc)) 8534 return false; 8535 8536 Imm = FFT_MODE_ENC | Swizzle; 8537 return true; 8538 } 8539 8540 bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) { 8541 using namespace llvm::AMDGPU::Swizzle; 8542 8543 if (!AMDGPU::isGFX9Plus(getSTI())) { 8544 Error(getLoc(), "Rotate mode swizzle not supported on this GPU"); 8545 return false; 8546 } 8547 8548 SMLoc Loc; 8549 int64_t Direction; 8550 8551 if (!parseSwizzleOperand(Direction, 0, 1, 8552 "direction must be 0 (left) or 1 (right)", Loc)) 8553 return false; 8554 8555 int64_t RotateSize; 8556 if (!parseSwizzleOperand( 8557 RotateSize, 0, ROTATE_MAX_SIZE, 8558 "number of threads to rotate must be in the interval [0," + 8559 Twine(ROTATE_MAX_SIZE) + Twine(']'), 8560 Loc)) 8561 return false; 8562 8563 Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) | 8564 (RotateSize << ROTATE_SIZE_SHIFT); 8565 return true; 8566 } 8567 8568 bool 8569 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 8570 8571 SMLoc OffsetLoc = getLoc(); 8572 8573 if (!parseExpr(Imm, "a swizzle macro")) { 8574 return false; 8575 } 8576 if (!isUInt<16>(Imm)) { 8577 Error(OffsetLoc, "expected a 16-bit offset"); 8578 return false; 8579 } 8580 return true; 8581 } 8582 8583 bool 8584 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 8585 using namespace llvm::AMDGPU::Swizzle; 8586 8587 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 8588 8589 SMLoc ModeLoc = getLoc(); 8590 bool Ok = false; 8591 8592 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 8593 Ok = parseSwizzleQuadPerm(Imm); 8594 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 8595 Ok = parseSwizzleBitmaskPerm(Imm); 8596 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 8597 Ok = parseSwizzleBroadcast(Imm); 8598 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 8599 Ok = parseSwizzleSwap(Imm); 8600 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 8601 Ok = parseSwizzleReverse(Imm); 8602 } else if (trySkipId(IdSymbolic[ID_FFT])) { 8603 Ok = parseSwizzleFFT(Imm); 8604 } else if (trySkipId(IdSymbolic[ID_ROTATE])) { 8605 Ok = parseSwizzleRotate(Imm); 8606 } else { 8607 Error(ModeLoc, "expected a swizzle mode"); 8608 } 8609 8610 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 8611 } 8612 8613 return false; 8614 } 8615 8616 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { 8617 SMLoc S = getLoc(); 8618 int64_t Imm = 0; 8619 8620 if (trySkipId("offset")) { 8621 8622 bool Ok = false; 8623 if (skipToken(AsmToken::Colon, "expected a colon")) { 8624 if (trySkipId("swizzle")) { 8625 Ok = parseSwizzleMacro(Imm); 8626 } else { 8627 Ok = parseSwizzleOffset(Imm); 8628 } 8629 } 8630 8631 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 8632 8633 return Ok ? ParseStatus::Success : ParseStatus::Failure; 8634 } 8635 return ParseStatus::NoMatch; 8636 } 8637 8638 bool 8639 AMDGPUOperand::isSwizzle() const { 8640 return isImmTy(ImmTySwizzle); 8641 } 8642 8643 //===----------------------------------------------------------------------===// 8644 // VGPR Index Mode 8645 //===----------------------------------------------------------------------===// 8646 8647 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 8648 8649 using namespace llvm::AMDGPU::VGPRIndexMode; 8650 8651 if (trySkipToken(AsmToken::RParen)) { 8652 return OFF; 8653 } 8654 8655 int64_t Imm = 0; 8656 8657 while (true) { 8658 unsigned Mode = 0; 8659 SMLoc S = getLoc(); 8660 8661 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 8662 if (trySkipId(IdSymbolic[ModeId])) { 8663 Mode = 1 << ModeId; 8664 break; 8665 } 8666 } 8667 8668 if (Mode == 0) { 8669 Error(S, (Imm == 0)? 8670 "expected a VGPR index mode or a closing parenthesis" : 8671 "expected a VGPR index mode"); 8672 return UNDEF; 8673 } 8674 8675 if (Imm & Mode) { 8676 Error(S, "duplicate VGPR index mode"); 8677 return UNDEF; 8678 } 8679 Imm |= Mode; 8680 8681 if (trySkipToken(AsmToken::RParen)) 8682 break; 8683 if (!skipToken(AsmToken::Comma, 8684 "expected a comma or a closing parenthesis")) 8685 return UNDEF; 8686 } 8687 8688 return Imm; 8689 } 8690 8691 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 8692 8693 using namespace llvm::AMDGPU::VGPRIndexMode; 8694 8695 int64_t Imm = 0; 8696 SMLoc S = getLoc(); 8697 8698 if (trySkipId("gpr_idx", AsmToken::LParen)) { 8699 Imm = parseGPRIdxMacro(); 8700 if (Imm == UNDEF) 8701 return ParseStatus::Failure; 8702 } else { 8703 if (getParser().parseAbsoluteExpression(Imm)) 8704 return ParseStatus::Failure; 8705 if (Imm < 0 || !isUInt<4>(Imm)) 8706 return Error(S, "invalid immediate: only 4-bit values are legal"); 8707 } 8708 8709 Operands.push_back( 8710 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 8711 return ParseStatus::Success; 8712 } 8713 8714 bool AMDGPUOperand::isGPRIdxMode() const { 8715 return isImmTy(ImmTyGprIdxMode); 8716 } 8717 8718 //===----------------------------------------------------------------------===// 8719 // sopp branch targets 8720 //===----------------------------------------------------------------------===// 8721 8722 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { 8723 8724 // Make sure we are not parsing something 8725 // that looks like a label or an expression but is not. 8726 // This will improve error messages. 8727 if (isRegister() || isModifier()) 8728 return ParseStatus::NoMatch; 8729 8730 if (!parseExpr(Operands)) 8731 return ParseStatus::Failure; 8732 8733 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 8734 assert(Opr.isImm() || Opr.isExpr()); 8735 SMLoc Loc = Opr.getStartLoc(); 8736 8737 // Currently we do not support arbitrary expressions as branch targets. 8738 // Only labels and absolute expressions are accepted. 8739 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 8740 Error(Loc, "expected an absolute expression or a label"); 8741 } else if (Opr.isImm() && !Opr.isS16Imm()) { 8742 Error(Loc, "expected a 16-bit signed jump offset"); 8743 } 8744 8745 return ParseStatus::Success; 8746 } 8747 8748 //===----------------------------------------------------------------------===// 8749 // Boolean holding registers 8750 //===----------------------------------------------------------------------===// 8751 8752 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 8753 return parseReg(Operands); 8754 } 8755 8756 //===----------------------------------------------------------------------===// 8757 // mubuf 8758 //===----------------------------------------------------------------------===// 8759 8760 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 8761 const OperandVector &Operands, 8762 bool IsAtomic) { 8763 OptionalImmIndexMap OptionalIdx; 8764 unsigned FirstOperandIdx = 1; 8765 bool IsAtomicReturn = false; 8766 8767 if (IsAtomic) { 8768 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 8769 SIInstrFlags::IsAtomicRet; 8770 } 8771 8772 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 8773 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8774 8775 // Add the register arguments 8776 if (Op.isReg()) { 8777 Op.addRegOperands(Inst, 1); 8778 // Insert a tied src for atomic return dst. 8779 // This cannot be postponed as subsequent calls to 8780 // addImmOperands rely on correct number of MC operands. 8781 if (IsAtomicReturn && i == FirstOperandIdx) 8782 Op.addRegOperands(Inst, 1); 8783 continue; 8784 } 8785 8786 // Handle the case where soffset is an immediate 8787 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 8788 Op.addImmOperands(Inst, 1); 8789 continue; 8790 } 8791 8792 // Handle tokens like 'offen' which are sometimes hard-coded into the 8793 // asm string. There are no MCInst operands for these. 8794 if (Op.isToken()) { 8795 continue; 8796 } 8797 assert(Op.isImm()); 8798 8799 // Handle optional arguments 8800 OptionalIdx[Op.getImmTy()] = i; 8801 } 8802 8803 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 8804 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 8805 } 8806 8807 //===----------------------------------------------------------------------===// 8808 // smrd 8809 //===----------------------------------------------------------------------===// 8810 8811 bool AMDGPUOperand::isSMRDOffset8() const { 8812 return isImmLiteral() && isUInt<8>(getImm()); 8813 } 8814 8815 bool AMDGPUOperand::isSMEMOffset() const { 8816 // Offset range is checked later by validator. 8817 return isImmLiteral(); 8818 } 8819 8820 bool AMDGPUOperand::isSMRDLiteralOffset() const { 8821 // 32-bit literals are only supported on CI and we only want to use them 8822 // when the offset is > 8-bits. 8823 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 8824 } 8825 8826 //===----------------------------------------------------------------------===// 8827 // vop3 8828 //===----------------------------------------------------------------------===// 8829 8830 static bool ConvertOmodMul(int64_t &Mul) { 8831 if (Mul != 1 && Mul != 2 && Mul != 4) 8832 return false; 8833 8834 Mul >>= 1; 8835 return true; 8836 } 8837 8838 static bool ConvertOmodDiv(int64_t &Div) { 8839 if (Div == 1) { 8840 Div = 0; 8841 return true; 8842 } 8843 8844 if (Div == 2) { 8845 Div = 3; 8846 return true; 8847 } 8848 8849 return false; 8850 } 8851 8852 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 8853 // This is intentional and ensures compatibility with sp3. 8854 // See bug 35397 for details. 8855 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { 8856 if (BoundCtrl == 0 || BoundCtrl == 1) { 8857 if (!isGFX11Plus()) 8858 BoundCtrl = 1; 8859 return true; 8860 } 8861 return false; 8862 } 8863 8864 void AMDGPUAsmParser::onBeginOfFile() { 8865 if (!getParser().getStreamer().getTargetStreamer() || 8866 getSTI().getTargetTriple().getArch() == Triple::r600) 8867 return; 8868 8869 if (!getTargetStreamer().getTargetID()) 8870 getTargetStreamer().initializeTargetID(getSTI(), 8871 getSTI().getFeatureString()); 8872 8873 if (isHsaAbi(getSTI())) 8874 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 8875 } 8876 8877 /// Parse AMDGPU specific expressions. 8878 /// 8879 /// expr ::= or(expr, ...) | 8880 /// max(expr, ...) 8881 /// 8882 bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 8883 using AGVK = AMDGPUMCExpr::VariantKind; 8884 8885 if (isToken(AsmToken::Identifier)) { 8886 StringRef TokenId = getTokenStr(); 8887 AGVK VK = StringSwitch<AGVK>(TokenId) 8888 .Case("max", AGVK::AGVK_Max) 8889 .Case("or", AGVK::AGVK_Or) 8890 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs) 8891 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs) 8892 .Case("alignto", AGVK::AGVK_AlignTo) 8893 .Case("occupancy", AGVK::AGVK_Occupancy) 8894 .Default(AGVK::AGVK_None); 8895 8896 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) { 8897 SmallVector<const MCExpr *, 4> Exprs; 8898 uint64_t CommaCount = 0; 8899 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.) 8900 lex(); // Eat '(' 8901 while (true) { 8902 if (trySkipToken(AsmToken::RParen)) { 8903 if (Exprs.empty()) { 8904 Error(getToken().getLoc(), 8905 "empty " + Twine(TokenId) + " expression"); 8906 return true; 8907 } 8908 if (CommaCount + 1 != Exprs.size()) { 8909 Error(getToken().getLoc(), 8910 "mismatch of commas in " + Twine(TokenId) + " expression"); 8911 return true; 8912 } 8913 Res = AMDGPUMCExpr::create(VK, Exprs, getContext()); 8914 return false; 8915 } 8916 const MCExpr *Expr; 8917 if (getParser().parseExpression(Expr, EndLoc)) 8918 return true; 8919 Exprs.push_back(Expr); 8920 bool LastTokenWasComma = trySkipToken(AsmToken::Comma); 8921 if (LastTokenWasComma) 8922 CommaCount++; 8923 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) { 8924 Error(getToken().getLoc(), 8925 "unexpected token in " + Twine(TokenId) + " expression"); 8926 return true; 8927 } 8928 } 8929 } 8930 } 8931 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); 8932 } 8933 8934 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { 8935 StringRef Name = getTokenStr(); 8936 if (Name == "mul") { 8937 return parseIntWithPrefix("mul", Operands, 8938 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8939 } 8940 8941 if (Name == "div") { 8942 return parseIntWithPrefix("div", Operands, 8943 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8944 } 8945 8946 return ParseStatus::NoMatch; 8947 } 8948 8949 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8950 // the number of src operands present, then copies that bit into src0_modifiers. 8951 static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) { 8952 int Opc = Inst.getOpcode(); 8953 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8954 if (OpSelIdx == -1) 8955 return; 8956 8957 int SrcNum; 8958 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1, 8959 AMDGPU::OpName::src2}; 8960 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); 8961 ++SrcNum) 8962 ; 8963 assert(SrcNum > 0); 8964 8965 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8966 8967 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 8968 if (DstIdx == -1) 8969 return; 8970 8971 const MCOperand &DstOp = Inst.getOperand(DstIdx); 8972 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8973 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8974 if (DstOp.isReg() && 8975 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) { 8976 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI)) 8977 ModVal |= SISrcMods::DST_OP_SEL; 8978 } else { 8979 if ((OpSel & (1 << SrcNum)) != 0) 8980 ModVal |= SISrcMods::DST_OP_SEL; 8981 } 8982 Inst.getOperand(ModIdx).setImm(ModVal); 8983 } 8984 8985 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8986 const OperandVector &Operands) { 8987 cvtVOP3P(Inst, Operands); 8988 cvtVOP3DstOpSelOnly(Inst, *getMRI()); 8989 } 8990 8991 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8992 OptionalImmIndexMap &OptionalIdx) { 8993 cvtVOP3P(Inst, Operands, OptionalIdx); 8994 cvtVOP3DstOpSelOnly(Inst, *getMRI()); 8995 } 8996 8997 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8998 return 8999 // 1. This operand is input modifiers 9000 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 9001 // 2. This is not last operand 9002 && Desc.NumOperands > (OpNum + 1) 9003 // 3. Next operand is register class 9004 && Desc.operands()[OpNum + 1].RegClass != -1 9005 // 4. Next register is not tied to any other operand 9006 && Desc.getOperandConstraint(OpNum + 1, 9007 MCOI::OperandConstraint::TIED_TO) == -1; 9008 } 9009 9010 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 9011 { 9012 OptionalImmIndexMap OptionalIdx; 9013 unsigned Opc = Inst.getOpcode(); 9014 9015 unsigned I = 1; 9016 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9017 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9018 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9019 } 9020 9021 for (unsigned E = Operands.size(); I != E; ++I) { 9022 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9023 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9024 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 9025 } else if (Op.isInterpSlot() || Op.isInterpAttr() || 9026 Op.isInterpAttrChan()) { 9027 Inst.addOperand(MCOperand::createImm(Op.getImm())); 9028 } else if (Op.isImmModifier()) { 9029 OptionalIdx[Op.getImmTy()] = I; 9030 } else { 9031 llvm_unreachable("unhandled operand type"); 9032 } 9033 } 9034 9035 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) 9036 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9037 AMDGPUOperand::ImmTyHigh); 9038 9039 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9040 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9041 AMDGPUOperand::ImmTyClamp); 9042 9043 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9044 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9045 AMDGPUOperand::ImmTyOModSI); 9046 } 9047 9048 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 9049 { 9050 OptionalImmIndexMap OptionalIdx; 9051 unsigned Opc = Inst.getOpcode(); 9052 9053 unsigned I = 1; 9054 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9055 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9056 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9057 } 9058 9059 for (unsigned E = Operands.size(); I != E; ++I) { 9060 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9061 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9062 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 9063 } else if (Op.isImmModifier()) { 9064 OptionalIdx[Op.getImmTy()] = I; 9065 } else { 9066 llvm_unreachable("unhandled operand type"); 9067 } 9068 } 9069 9070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp); 9071 9072 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 9073 if (OpSelIdx != -1) 9074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 9075 9076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 9077 9078 if (OpSelIdx == -1) 9079 return; 9080 9081 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1, 9082 AMDGPU::OpName::src2}; 9083 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, 9084 AMDGPU::OpName::src1_modifiers, 9085 AMDGPU::OpName::src2_modifiers}; 9086 9087 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 9088 9089 for (int J = 0; J < 3; ++J) { 9090 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 9091 if (OpIdx == -1) 9092 break; 9093 9094 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 9095 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 9096 9097 if ((OpSel & (1 << J)) != 0) 9098 ModVal |= SISrcMods::OP_SEL_0; 9099 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 9100 (OpSel & (1 << 3)) != 0) 9101 ModVal |= SISrcMods::DST_OP_SEL; 9102 9103 Inst.getOperand(ModIdx).setImm(ModVal); 9104 } 9105 } 9106 void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst, 9107 const OperandVector &Operands) { 9108 OptionalImmIndexMap OptionalIdx; 9109 unsigned Opc = Inst.getOpcode(); 9110 unsigned I = 1; 9111 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz); 9112 9113 const MCInstrDesc &Desc = MII.get(Opc); 9114 9115 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) 9116 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1); 9117 9118 for (unsigned E = Operands.size(); I != E; ++I) { 9119 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]); 9120 int NumOperands = Inst.getNumOperands(); 9121 // The order of operands in MCInst and parsed operands are different. 9122 // Adding dummy cbsz and blgp operands at corresponding MCInst operand 9123 // indices for parsing scale values correctly. 9124 if (NumOperands == CbszOpIdx) { 9125 Inst.addOperand(MCOperand::createImm(0)); 9126 Inst.addOperand(MCOperand::createImm(0)); 9127 } 9128 if (isRegOrImmWithInputMods(Desc, NumOperands)) { 9129 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 9130 } else if (Op.isImmModifier()) { 9131 OptionalIdx[Op.getImmTy()] = I; 9132 } else { 9133 Op.addRegOrImmOperands(Inst, 1); 9134 } 9135 } 9136 9137 // Insert CBSZ and BLGP operands for F8F6F4 variants 9138 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ); 9139 if (CbszIdx != OptionalIdx.end()) { 9140 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm(); 9141 Inst.getOperand(CbszOpIdx).setImm(CbszVal); 9142 } 9143 9144 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 9145 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP); 9146 if (BlgpIdx != OptionalIdx.end()) { 9147 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm(); 9148 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal); 9149 } 9150 9151 // Add dummy src_modifiers 9152 Inst.addOperand(MCOperand::createImm(0)); 9153 Inst.addOperand(MCOperand::createImm(0)); 9154 9155 // Handle op_sel fields 9156 9157 unsigned OpSel = 0; 9158 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel); 9159 if (OpselIdx != OptionalIdx.end()) { 9160 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second]) 9161 .getImm(); 9162 } 9163 9164 unsigned OpSelHi = 0; 9165 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi); 9166 if (OpselHiIdx != OptionalIdx.end()) { 9167 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second]) 9168 .getImm(); 9169 } 9170 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, 9171 AMDGPU::OpName::src1_modifiers}; 9172 9173 for (unsigned J = 0; J < 2; ++J) { 9174 unsigned ModVal = 0; 9175 if (OpSel & (1 << J)) 9176 ModVal |= SISrcMods::OP_SEL_0; 9177 if (OpSelHi & (1 << J)) 9178 ModVal |= SISrcMods::OP_SEL_1; 9179 9180 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 9181 Inst.getOperand(ModIdx).setImm(ModVal); 9182 } 9183 } 9184 9185 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 9186 OptionalImmIndexMap &OptionalIdx) { 9187 unsigned Opc = Inst.getOpcode(); 9188 9189 unsigned I = 1; 9190 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9191 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9192 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9193 } 9194 9195 for (unsigned E = Operands.size(); I != E; ++I) { 9196 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9197 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9198 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 9199 } else if (Op.isImmModifier()) { 9200 OptionalIdx[Op.getImmTy()] = I; 9201 } else { 9202 Op.addRegOrImmOperands(Inst, 1); 9203 } 9204 } 9205 9206 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) { 9207 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) 9208 Inst.addOperand(Inst.getOperand(0)); 9209 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9210 AMDGPUOperand::ImmTyByteSel); 9211 } 9212 9213 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9214 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9215 AMDGPUOperand::ImmTyClamp); 9216 9217 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9218 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9219 AMDGPUOperand::ImmTyOModSI); 9220 9221 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 9222 // it has src2 register operand that is tied to dst operand 9223 // we don't allow modifiers for this operand in assembler so src2_modifiers 9224 // should be 0. 9225 if (isMAC(Opc)) { 9226 auto *it = Inst.begin(); 9227 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 9228 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 9229 ++it; 9230 // Copy the operand to ensure it's not invalidated when Inst grows. 9231 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 9232 } 9233 } 9234 9235 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 9236 OptionalImmIndexMap OptionalIdx; 9237 cvtVOP3(Inst, Operands, OptionalIdx); 9238 } 9239 9240 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 9241 OptionalImmIndexMap &OptIdx) { 9242 const int Opc = Inst.getOpcode(); 9243 const MCInstrDesc &Desc = MII.get(Opc); 9244 9245 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 9246 9247 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi || 9248 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi || 9249 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 9250 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || 9251 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 || 9252 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) { 9253 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 9254 Inst.addOperand(Inst.getOperand(0)); 9255 } 9256 9257 // Adding vdst_in operand is already covered for these DPP instructions in 9258 // cvtVOP3DPP. 9259 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) && 9260 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 || 9261 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 || 9262 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 || 9263 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 || 9264 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 || 9265 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 || 9266 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 || 9267 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 || 9268 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 || 9269 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || 9270 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || 9271 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) { 9272 Inst.addOperand(Inst.getOperand(0)); 9273 } 9274 9275 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3); 9276 if (BitOp3Idx != -1) { 9277 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3); 9278 } 9279 9280 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 9281 // instruction, and then figure out where to actually put the modifiers 9282 9283 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 9284 if (OpSelIdx != -1) { 9285 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 9286 } 9287 9288 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 9289 if (OpSelHiIdx != -1) { 9290 int DefaultVal = IsPacked ? -1 : 0; 9291 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 9292 DefaultVal); 9293 } 9294 9295 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse)) 9296 addOptionalImmOperand(Inst, Operands, OptIdx, 9297 AMDGPUOperand::ImmTyMatrixAReuse, 0); 9298 9299 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse)) 9300 addOptionalImmOperand(Inst, Operands, OptIdx, 9301 AMDGPUOperand::ImmTyMatrixBReuse, 0); 9302 9303 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 9304 if (NegLoIdx != -1) 9305 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 9306 9307 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 9308 if (NegHiIdx != -1) 9309 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 9310 9311 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1, 9312 AMDGPU::OpName::src2}; 9313 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, 9314 AMDGPU::OpName::src1_modifiers, 9315 AMDGPU::OpName::src2_modifiers}; 9316 9317 unsigned OpSel = 0; 9318 unsigned OpSelHi = 0; 9319 unsigned NegLo = 0; 9320 unsigned NegHi = 0; 9321 9322 if (OpSelIdx != -1) 9323 OpSel = Inst.getOperand(OpSelIdx).getImm(); 9324 9325 if (OpSelHiIdx != -1) 9326 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 9327 9328 if (NegLoIdx != -1) 9329 NegLo = Inst.getOperand(NegLoIdx).getImm(); 9330 9331 if (NegHiIdx != -1) 9332 NegHi = Inst.getOperand(NegHiIdx).getImm(); 9333 9334 for (int J = 0; J < 3; ++J) { 9335 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 9336 if (OpIdx == -1) 9337 break; 9338 9339 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 9340 9341 if (ModIdx == -1) 9342 continue; 9343 9344 uint32_t ModVal = 0; 9345 9346 const MCOperand &SrcOp = Inst.getOperand(OpIdx); 9347 if (SrcOp.isReg() && getMRI() 9348 ->getRegClass(AMDGPU::VGPR_16RegClassID) 9349 .contains(SrcOp.getReg())) { 9350 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI()); 9351 if (VGPRSuffixIsHi) 9352 ModVal |= SISrcMods::OP_SEL_0; 9353 } else { 9354 if ((OpSel & (1 << J)) != 0) 9355 ModVal |= SISrcMods::OP_SEL_0; 9356 } 9357 9358 if ((OpSelHi & (1 << J)) != 0) 9359 ModVal |= SISrcMods::OP_SEL_1; 9360 9361 if ((NegLo & (1 << J)) != 0) 9362 ModVal |= SISrcMods::NEG; 9363 9364 if ((NegHi & (1 << J)) != 0) 9365 ModVal |= SISrcMods::NEG_HI; 9366 9367 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 9368 } 9369 } 9370 9371 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 9372 OptionalImmIndexMap OptIdx; 9373 cvtVOP3(Inst, Operands, OptIdx); 9374 cvtVOP3P(Inst, Operands, OptIdx); 9375 } 9376 9377 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, 9378 unsigned i, unsigned Opc, 9379 AMDGPU::OpName OpName) { 9380 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1) 9381 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2); 9382 else 9383 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1); 9384 } 9385 9386 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) { 9387 unsigned Opc = Inst.getOpcode(); 9388 9389 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); 9390 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers); 9391 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers); 9392 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef 9393 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2 9394 9395 OptionalImmIndexMap OptIdx; 9396 for (unsigned i = 5; i < Operands.size(); ++i) { 9397 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 9398 OptIdx[Op.getImmTy()] = i; 9399 } 9400 9401 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit)) 9402 addOptionalImmOperand(Inst, Operands, OptIdx, 9403 AMDGPUOperand::ImmTyIndexKey8bit); 9404 9405 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit)) 9406 addOptionalImmOperand(Inst, Operands, OptIdx, 9407 AMDGPUOperand::ImmTyIndexKey16bit); 9408 9409 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit)) 9410 addOptionalImmOperand(Inst, Operands, OptIdx, 9411 AMDGPUOperand::ImmTyIndexKey32bit); 9412 9413 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9414 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp); 9415 9416 cvtVOP3P(Inst, Operands, OptIdx); 9417 } 9418 9419 //===----------------------------------------------------------------------===// 9420 // VOPD 9421 //===----------------------------------------------------------------------===// 9422 9423 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 9424 if (!hasVOPD(getSTI())) 9425 return ParseStatus::NoMatch; 9426 9427 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 9428 SMLoc S = getLoc(); 9429 lex(); 9430 lex(); 9431 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 9432 SMLoc OpYLoc = getLoc(); 9433 StringRef OpYName; 9434 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) { 9435 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc)); 9436 return ParseStatus::Success; 9437 } 9438 return Error(OpYLoc, "expected a VOPDY instruction after ::"); 9439 } 9440 return ParseStatus::NoMatch; 9441 } 9442 9443 // Create VOPD MCInst operands using parsed assembler operands. 9444 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 9445 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9446 9447 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer 9448 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); 9449 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9450 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 9451 return; 9452 } 9453 if (Op.isReg()) { 9454 Op.addRegOperands(Inst, 1); 9455 return; 9456 } 9457 if (Op.isImm()) { 9458 Op.addImmOperands(Inst, 1); 9459 return; 9460 } 9461 llvm_unreachable("Unhandled operand type in cvtVOPD"); 9462 }; 9463 9464 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); 9465 9466 // MCInst operands are ordered as follows: 9467 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 9468 9469 for (auto CompIdx : VOPD::COMPONENTS) { 9470 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); 9471 } 9472 9473 for (auto CompIdx : VOPD::COMPONENTS) { 9474 const auto &CInfo = InstInfo[CompIdx]; 9475 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); 9476 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) 9477 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); 9478 if (CInfo.hasSrc2Acc()) 9479 addOp(CInfo.getIndexOfDstInParsedOperands()); 9480 } 9481 9482 int BitOp3Idx = 9483 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3); 9484 if (BitOp3Idx != -1) { 9485 OptionalImmIndexMap OptIdx; 9486 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back()); 9487 if (Op.isImm()) 9488 OptIdx[Op.getImmTy()] = Operands.size() - 1; 9489 9490 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3); 9491 } 9492 } 9493 9494 //===----------------------------------------------------------------------===// 9495 // dpp 9496 //===----------------------------------------------------------------------===// 9497 9498 bool AMDGPUOperand::isDPP8() const { 9499 return isImmTy(ImmTyDPP8); 9500 } 9501 9502 bool AMDGPUOperand::isDPPCtrl() const { 9503 using namespace AMDGPU::DPP; 9504 9505 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 9506 if (result) { 9507 int64_t Imm = getImm(); 9508 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 9509 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 9510 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 9511 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 9512 (Imm == DppCtrl::WAVE_SHL1) || 9513 (Imm == DppCtrl::WAVE_ROL1) || 9514 (Imm == DppCtrl::WAVE_SHR1) || 9515 (Imm == DppCtrl::WAVE_ROR1) || 9516 (Imm == DppCtrl::ROW_MIRROR) || 9517 (Imm == DppCtrl::ROW_HALF_MIRROR) || 9518 (Imm == DppCtrl::BCAST15) || 9519 (Imm == DppCtrl::BCAST31) || 9520 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 9521 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 9522 } 9523 return false; 9524 } 9525 9526 //===----------------------------------------------------------------------===// 9527 // mAI 9528 //===----------------------------------------------------------------------===// 9529 9530 bool AMDGPUOperand::isBLGP() const { 9531 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 9532 } 9533 9534 bool AMDGPUOperand::isS16Imm() const { 9535 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 9536 } 9537 9538 bool AMDGPUOperand::isU16Imm() const { 9539 return isImmLiteral() && isUInt<16>(getImm()); 9540 } 9541 9542 //===----------------------------------------------------------------------===// 9543 // dim 9544 //===----------------------------------------------------------------------===// 9545 9546 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 9547 // We want to allow "dim:1D" etc., 9548 // but the initial 1 is tokenized as an integer. 9549 std::string Token; 9550 if (isToken(AsmToken::Integer)) { 9551 SMLoc Loc = getToken().getEndLoc(); 9552 Token = std::string(getTokenStr()); 9553 lex(); 9554 if (getLoc() != Loc) 9555 return false; 9556 } 9557 9558 StringRef Suffix; 9559 if (!parseId(Suffix)) 9560 return false; 9561 Token += Suffix; 9562 9563 StringRef DimId = Token; 9564 DimId.consume_front("SQ_RSRC_IMG_"); 9565 9566 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 9567 if (!DimInfo) 9568 return false; 9569 9570 Encoding = DimInfo->Encoding; 9571 return true; 9572 } 9573 9574 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { 9575 if (!isGFX10Plus()) 9576 return ParseStatus::NoMatch; 9577 9578 SMLoc S = getLoc(); 9579 9580 if (!trySkipId("dim", AsmToken::Colon)) 9581 return ParseStatus::NoMatch; 9582 9583 unsigned Encoding; 9584 SMLoc Loc = getLoc(); 9585 if (!parseDimId(Encoding)) 9586 return Error(Loc, "invalid dim value"); 9587 9588 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 9589 AMDGPUOperand::ImmTyDim)); 9590 return ParseStatus::Success; 9591 } 9592 9593 //===----------------------------------------------------------------------===// 9594 // dpp 9595 //===----------------------------------------------------------------------===// 9596 9597 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 9598 SMLoc S = getLoc(); 9599 9600 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 9601 return ParseStatus::NoMatch; 9602 9603 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 9604 9605 int64_t Sels[8]; 9606 9607 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 9608 return ParseStatus::Failure; 9609 9610 for (size_t i = 0; i < 8; ++i) { 9611 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 9612 return ParseStatus::Failure; 9613 9614 SMLoc Loc = getLoc(); 9615 if (getParser().parseAbsoluteExpression(Sels[i])) 9616 return ParseStatus::Failure; 9617 if (0 > Sels[i] || 7 < Sels[i]) 9618 return Error(Loc, "expected a 3-bit value"); 9619 } 9620 9621 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 9622 return ParseStatus::Failure; 9623 9624 unsigned DPP8 = 0; 9625 for (size_t i = 0; i < 8; ++i) 9626 DPP8 |= (Sels[i] << (i * 3)); 9627 9628 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 9629 return ParseStatus::Success; 9630 } 9631 9632 bool 9633 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 9634 const OperandVector &Operands) { 9635 if (Ctrl == "row_newbcast") 9636 return isGFX90A(); 9637 9638 if (Ctrl == "row_share" || 9639 Ctrl == "row_xmask") 9640 return isGFX10Plus(); 9641 9642 if (Ctrl == "wave_shl" || 9643 Ctrl == "wave_shr" || 9644 Ctrl == "wave_rol" || 9645 Ctrl == "wave_ror" || 9646 Ctrl == "row_bcast") 9647 return isVI() || isGFX9(); 9648 9649 return Ctrl == "row_mirror" || 9650 Ctrl == "row_half_mirror" || 9651 Ctrl == "quad_perm" || 9652 Ctrl == "row_shl" || 9653 Ctrl == "row_shr" || 9654 Ctrl == "row_ror"; 9655 } 9656 9657 int64_t 9658 AMDGPUAsmParser::parseDPPCtrlPerm() { 9659 // quad_perm:[%d,%d,%d,%d] 9660 9661 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 9662 return -1; 9663 9664 int64_t Val = 0; 9665 for (int i = 0; i < 4; ++i) { 9666 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 9667 return -1; 9668 9669 int64_t Temp; 9670 SMLoc Loc = getLoc(); 9671 if (getParser().parseAbsoluteExpression(Temp)) 9672 return -1; 9673 if (Temp < 0 || Temp > 3) { 9674 Error(Loc, "expected a 2-bit value"); 9675 return -1; 9676 } 9677 9678 Val += (Temp << i * 2); 9679 } 9680 9681 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 9682 return -1; 9683 9684 return Val; 9685 } 9686 9687 int64_t 9688 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 9689 using namespace AMDGPU::DPP; 9690 9691 // sel:%d 9692 9693 int64_t Val; 9694 SMLoc Loc = getLoc(); 9695 9696 if (getParser().parseAbsoluteExpression(Val)) 9697 return -1; 9698 9699 struct DppCtrlCheck { 9700 int64_t Ctrl; 9701 int Lo; 9702 int Hi; 9703 }; 9704 9705 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 9706 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 9707 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 9708 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 9709 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 9710 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 9711 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 9712 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 9713 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 9714 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 9715 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 9716 .Default({-1, 0, 0}); 9717 9718 bool Valid; 9719 if (Check.Ctrl == -1) { 9720 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 9721 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 9722 } else { 9723 Valid = Check.Lo <= Val && Val <= Check.Hi; 9724 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 9725 } 9726 9727 if (!Valid) { 9728 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 9729 return -1; 9730 } 9731 9732 return Val; 9733 } 9734 9735 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 9736 using namespace AMDGPU::DPP; 9737 9738 if (!isToken(AsmToken::Identifier) || 9739 !isSupportedDPPCtrl(getTokenStr(), Operands)) 9740 return ParseStatus::NoMatch; 9741 9742 SMLoc S = getLoc(); 9743 int64_t Val = -1; 9744 StringRef Ctrl; 9745 9746 parseId(Ctrl); 9747 9748 if (Ctrl == "row_mirror") { 9749 Val = DppCtrl::ROW_MIRROR; 9750 } else if (Ctrl == "row_half_mirror") { 9751 Val = DppCtrl::ROW_HALF_MIRROR; 9752 } else { 9753 if (skipToken(AsmToken::Colon, "expected a colon")) { 9754 if (Ctrl == "quad_perm") { 9755 Val = parseDPPCtrlPerm(); 9756 } else { 9757 Val = parseDPPCtrlSel(Ctrl); 9758 } 9759 } 9760 } 9761 9762 if (Val == -1) 9763 return ParseStatus::Failure; 9764 9765 Operands.push_back( 9766 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 9767 return ParseStatus::Success; 9768 } 9769 9770 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 9771 bool IsDPP8) { 9772 OptionalImmIndexMap OptionalIdx; 9773 unsigned Opc = Inst.getOpcode(); 9774 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9775 9776 // MAC instructions are special because they have 'old' 9777 // operand which is not tied to dst (but assumed to be). 9778 // They also have dummy unused src2_modifiers. 9779 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); 9780 int Src2ModIdx = 9781 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); 9782 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && 9783 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; 9784 9785 unsigned I = 1; 9786 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9787 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9788 } 9789 9790 int Fi = 0; 9791 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); 9792 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 || 9793 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || 9794 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || 9795 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12; 9796 9797 for (unsigned E = Operands.size(); I != E; ++I) { 9798 9799 if (IsMAC) { 9800 int NumOperands = Inst.getNumOperands(); 9801 if (OldIdx == NumOperands) { 9802 // Handle old operand 9803 constexpr int DST_IDX = 0; 9804 Inst.addOperand(Inst.getOperand(DST_IDX)); 9805 } else if (Src2ModIdx == NumOperands) { 9806 // Add unused dummy src2_modifiers 9807 Inst.addOperand(MCOperand::createImm(0)); 9808 } 9809 } 9810 9811 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) { 9812 Inst.addOperand(Inst.getOperand(0)); 9813 } 9814 9815 if (IsVOP3CvtSrDpp) { 9816 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) { 9817 Inst.addOperand(MCOperand::createImm(0)); 9818 Inst.addOperand(MCOperand::createReg(MCRegister())); 9819 } 9820 } 9821 9822 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 9823 MCOI::TIED_TO); 9824 if (TiedTo != -1) { 9825 assert((unsigned)TiedTo < Inst.getNumOperands()); 9826 // handle tied old or src2 for MAC instructions 9827 Inst.addOperand(Inst.getOperand(TiedTo)); 9828 } 9829 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9830 // Add the register arguments 9831 if (IsDPP8 && Op.isDppFI()) { 9832 Fi = Op.getImm(); 9833 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9834 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 9835 } else if (Op.isReg()) { 9836 Op.addRegOperands(Inst, 1); 9837 } else if (Op.isImm() && 9838 Desc.operands()[Inst.getNumOperands()].RegClass != -1) { 9839 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 9840 Op.addImmOperands(Inst, 1); 9841 } else if (Op.isImm()) { 9842 OptionalIdx[Op.getImmTy()] = I; 9843 } else { 9844 llvm_unreachable("unhandled operand type"); 9845 } 9846 } 9847 9848 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp) 9849 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9850 AMDGPUOperand::ImmTyClamp); 9851 9852 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) 9853 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9854 AMDGPUOperand::ImmTyByteSel); 9855 9856 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9857 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 9858 9859 if (Desc.TSFlags & SIInstrFlags::VOP3P) 9860 cvtVOP3P(Inst, Operands, OptionalIdx); 9861 else if (Desc.TSFlags & SIInstrFlags::VOP3) 9862 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 9863 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 9864 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 9865 } 9866 9867 if (IsDPP8) { 9868 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 9869 using namespace llvm::AMDGPU::DPP; 9870 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 9871 } else { 9872 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 9873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 9874 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 9875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 9876 9877 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) 9878 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9879 AMDGPUOperand::ImmTyDppFI); 9880 } 9881 } 9882 9883 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 9884 OptionalImmIndexMap OptionalIdx; 9885 9886 unsigned I = 1; 9887 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9888 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9889 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9890 } 9891 9892 int Fi = 0; 9893 for (unsigned E = Operands.size(); I != E; ++I) { 9894 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 9895 MCOI::TIED_TO); 9896 if (TiedTo != -1) { 9897 assert((unsigned)TiedTo < Inst.getNumOperands()); 9898 // handle tied old or src2 for MAC instructions 9899 Inst.addOperand(Inst.getOperand(TiedTo)); 9900 } 9901 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9902 // Add the register arguments 9903 if (Op.isReg() && validateVccOperand(Op.getReg())) { 9904 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 9905 // Skip it. 9906 continue; 9907 } 9908 9909 if (IsDPP8) { 9910 if (Op.isDPP8()) { 9911 Op.addImmOperands(Inst, 1); 9912 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9913 Op.addRegWithFPInputModsOperands(Inst, 2); 9914 } else if (Op.isDppFI()) { 9915 Fi = Op.getImm(); 9916 } else if (Op.isReg()) { 9917 Op.addRegOperands(Inst, 1); 9918 } else { 9919 llvm_unreachable("Invalid operand type"); 9920 } 9921 } else { 9922 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9923 Op.addRegWithFPInputModsOperands(Inst, 2); 9924 } else if (Op.isReg()) { 9925 Op.addRegOperands(Inst, 1); 9926 } else if (Op.isDPPCtrl()) { 9927 Op.addImmOperands(Inst, 1); 9928 } else if (Op.isImm()) { 9929 // Handle optional arguments 9930 OptionalIdx[Op.getImmTy()] = I; 9931 } else { 9932 llvm_unreachable("Invalid operand type"); 9933 } 9934 } 9935 } 9936 9937 if (IsDPP8) { 9938 using namespace llvm::AMDGPU::DPP; 9939 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 9940 } else { 9941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 9942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 9943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 9944 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { 9945 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9946 AMDGPUOperand::ImmTyDppFI); 9947 } 9948 } 9949 } 9950 9951 //===----------------------------------------------------------------------===// 9952 // sdwa 9953 //===----------------------------------------------------------------------===// 9954 9955 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, 9956 StringRef Prefix, 9957 AMDGPUOperand::ImmTy Type) { 9958 return parseStringOrIntWithPrefix( 9959 Operands, Prefix, 9960 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"}, 9961 Type); 9962 } 9963 9964 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 9965 return parseStringOrIntWithPrefix( 9966 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"}, 9967 AMDGPUOperand::ImmTySDWADstUnused); 9968 } 9969 9970 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 9971 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 9972 } 9973 9974 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 9975 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 9976 } 9977 9978 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 9979 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 9980 } 9981 9982 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 9983 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 9984 } 9985 9986 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 9987 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 9988 } 9989 9990 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 9991 uint64_t BasicInstType, 9992 bool SkipDstVcc, 9993 bool SkipSrcVcc) { 9994 using namespace llvm::AMDGPU::SDWA; 9995 9996 OptionalImmIndexMap OptionalIdx; 9997 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 9998 bool SkippedVcc = false; 9999 10000 unsigned I = 1; 10001 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 10002 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 10003 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 10004 } 10005 10006 for (unsigned E = Operands.size(); I != E; ++I) { 10007 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 10008 if (SkipVcc && !SkippedVcc && Op.isReg() && 10009 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 10010 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 10011 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 10012 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 10013 // Skip VCC only if we didn't skip it on previous iteration. 10014 // Note that src0 and src1 occupy 2 slots each because of modifiers. 10015 if (BasicInstType == SIInstrFlags::VOP2 && 10016 ((SkipDstVcc && Inst.getNumOperands() == 1) || 10017 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 10018 SkippedVcc = true; 10019 continue; 10020 } 10021 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) { 10022 SkippedVcc = true; 10023 continue; 10024 } 10025 } 10026 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 10027 Op.addRegOrImmWithInputModsOperands(Inst, 2); 10028 } else if (Op.isImm()) { 10029 // Handle optional arguments 10030 OptionalIdx[Op.getImmTy()] = I; 10031 } else { 10032 llvm_unreachable("Invalid operand type"); 10033 } 10034 SkippedVcc = false; 10035 } 10036 10037 const unsigned Opc = Inst.getOpcode(); 10038 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && 10039 Opc != AMDGPU::V_NOP_sdwa_vi) { 10040 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 10041 switch (BasicInstType) { 10042 case SIInstrFlags::VOP1: 10043 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 10044 addOptionalImmOperand(Inst, Operands, OptionalIdx, 10045 AMDGPUOperand::ImmTyClamp, 0); 10046 10047 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 10048 addOptionalImmOperand(Inst, Operands, OptionalIdx, 10049 AMDGPUOperand::ImmTyOModSI, 0); 10050 10051 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) 10052 addOptionalImmOperand(Inst, Operands, OptionalIdx, 10053 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 10054 10055 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) 10056 addOptionalImmOperand(Inst, Operands, OptionalIdx, 10057 AMDGPUOperand::ImmTySDWADstUnused, 10058 DstUnused::UNUSED_PRESERVE); 10059 10060 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 10061 break; 10062 10063 case SIInstrFlags::VOP2: 10064 addOptionalImmOperand(Inst, Operands, OptionalIdx, 10065 AMDGPUOperand::ImmTyClamp, 0); 10066 10067 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) 10068 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 10069 10070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 10071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE); 10072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 10073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 10074 break; 10075 10076 case SIInstrFlags::VOPC: 10077 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) 10078 addOptionalImmOperand(Inst, Operands, OptionalIdx, 10079 AMDGPUOperand::ImmTyClamp, 0); 10080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 10081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 10082 break; 10083 10084 default: 10085 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 10086 } 10087 } 10088 10089 // special case v_mac_{f16, f32}: 10090 // it has src2 register operand that is tied to dst operand 10091 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 10092 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 10093 auto *it = Inst.begin(); 10094 std::advance( 10095 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 10096 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 10097 } 10098 } 10099 10100 /// Force static initialization. 10101 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void 10102 LLVMInitializeAMDGPUAsmParser() { 10103 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); 10104 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 10105 } 10106 10107 #define GET_REGISTER_MATCHER 10108 #define GET_MATCHER_IMPLEMENTATION 10109 #define GET_MNEMONIC_SPELL_CHECKER 10110 #define GET_MNEMONIC_CHECKER 10111 #include "AMDGPUGenAsmMatcher.inc" 10112 10113 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, 10114 unsigned MCK) { 10115 switch (MCK) { 10116 case MCK_addr64: 10117 return parseTokenOp("addr64", Operands); 10118 case MCK_done: 10119 return parseTokenOp("done", Operands); 10120 case MCK_idxen: 10121 return parseTokenOp("idxen", Operands); 10122 case MCK_lds: 10123 return parseTokenOp("lds", Operands); 10124 case MCK_offen: 10125 return parseTokenOp("offen", Operands); 10126 case MCK_off: 10127 return parseTokenOp("off", Operands); 10128 case MCK_row_95_en: 10129 return parseTokenOp("row_en", Operands); 10130 case MCK_gds: 10131 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS); 10132 case MCK_tfe: 10133 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE); 10134 } 10135 return tryCustomParseOperand(Operands, MCK); 10136 } 10137 10138 // This function should be defined after auto-generated include so that we have 10139 // MatchClassKind enum defined 10140 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 10141 unsigned Kind) { 10142 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 10143 // But MatchInstructionImpl() expects to meet token and fails to validate 10144 // operand. This method checks if we are given immediate operand but expect to 10145 // get corresponding token. 10146 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 10147 switch (Kind) { 10148 case MCK_addr64: 10149 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 10150 case MCK_gds: 10151 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 10152 case MCK_lds: 10153 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 10154 case MCK_idxen: 10155 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 10156 case MCK_offen: 10157 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 10158 case MCK_tfe: 10159 return Operand.isTFE() ? Match_Success : Match_InvalidOperand; 10160 case MCK_SSrc_b32: 10161 // When operands have expression values, they will return true for isToken, 10162 // because it is not possible to distinguish between a token and an 10163 // expression at parse time. MatchInstructionImpl() will always try to 10164 // match an operand as a token, when isToken returns true, and when the 10165 // name of the expression is not a valid token, the match will fail, 10166 // so we need to handle it here. 10167 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand; 10168 case MCK_SSrc_f32: 10169 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand; 10170 case MCK_SOPPBrTarget: 10171 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; 10172 case MCK_VReg32OrOff: 10173 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 10174 case MCK_InterpSlot: 10175 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 10176 case MCK_InterpAttr: 10177 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 10178 case MCK_InterpAttrChan: 10179 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; 10180 case MCK_SReg_64: 10181 case MCK_SReg_64_XEXEC: 10182 // Null is defined as a 32-bit register but 10183 // it should also be enabled with 64-bit operands or larger. 10184 // The following code enables it for SReg_64 and larger operands 10185 // used as source and destination. Remaining source 10186 // operands are handled in isInlinableImm. 10187 case MCK_SReg_96: 10188 case MCK_SReg_128: 10189 case MCK_SReg_256: 10190 case MCK_SReg_512: 10191 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 10192 default: 10193 return Match_InvalidOperand; 10194 } 10195 } 10196 10197 //===----------------------------------------------------------------------===// 10198 // endpgm 10199 //===----------------------------------------------------------------------===// 10200 10201 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { 10202 SMLoc S = getLoc(); 10203 int64_t Imm = 0; 10204 10205 if (!parseExpr(Imm)) { 10206 // The operand is optional, if not present default to 0 10207 Imm = 0; 10208 } 10209 10210 if (!isUInt<16>(Imm)) 10211 return Error(S, "expected a 16-bit value"); 10212 10213 Operands.push_back( 10214 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 10215 return ParseStatus::Success; 10216 } 10217 10218 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 10219 10220 //===----------------------------------------------------------------------===// 10221 // Split Barrier 10222 //===----------------------------------------------------------------------===// 10223 10224 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); } 10225