1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/CodeGen/MachineValueType.h" 25 #include "llvm/MC/MCAsmInfo.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCExpr.h" 28 #include "llvm/MC/MCInst.h" 29 #include "llvm/MC/MCInstrDesc.h" 30 #include "llvm/MC/MCParser/MCAsmLexer.h" 31 #include "llvm/MC/MCParser/MCAsmParser.h" 32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 33 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/MC/TargetRegistry.h" 36 #include "llvm/Support/AMDGPUMetadata.h" 37 #include "llvm/Support/AMDHSAKernelDescriptor.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/TargetParser/TargetParser.h" 41 #include <optional> 42 43 using namespace llvm; 44 using namespace llvm::AMDGPU; 45 using namespace llvm::amdhsa; 46 47 namespace { 48 49 class AMDGPUAsmParser; 50 51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 52 53 //===----------------------------------------------------------------------===// 54 // Operand 55 //===----------------------------------------------------------------------===// 56 57 class AMDGPUOperand : public MCParsedAsmOperand { 58 enum KindTy { 59 Token, 60 Immediate, 61 Register, 62 Expression 63 } Kind; 64 65 SMLoc StartLoc, EndLoc; 66 const AMDGPUAsmParser *AsmParser; 67 68 public: 69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 70 : Kind(Kind_), AsmParser(AsmParser_) {} 71 72 using Ptr = std::unique_ptr<AMDGPUOperand>; 73 74 struct Modifiers { 75 bool Abs = false; 76 bool Neg = false; 77 bool Sext = false; 78 bool Lit = false; 79 80 bool hasFPModifiers() const { return Abs || Neg; } 81 bool hasIntModifiers() const { return Sext; } 82 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 83 84 int64_t getFPModifiersOperand() const { 85 int64_t Operand = 0; 86 Operand |= Abs ? SISrcMods::ABS : 0u; 87 Operand |= Neg ? SISrcMods::NEG : 0u; 88 return Operand; 89 } 90 91 int64_t getIntModifiersOperand() const { 92 int64_t Operand = 0; 93 Operand |= Sext ? SISrcMods::SEXT : 0u; 94 return Operand; 95 } 96 97 int64_t getModifiersOperand() const { 98 assert(!(hasFPModifiers() && hasIntModifiers()) 99 && "fp and int modifiers should not be used simultaneously"); 100 if (hasFPModifiers()) { 101 return getFPModifiersOperand(); 102 } else if (hasIntModifiers()) { 103 return getIntModifiersOperand(); 104 } else { 105 return 0; 106 } 107 } 108 109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 110 }; 111 112 enum ImmTy { 113 ImmTyNone, 114 ImmTyGDS, 115 ImmTyLDS, 116 ImmTyOffen, 117 ImmTyIdxen, 118 ImmTyAddr64, 119 ImmTyOffset, 120 ImmTyInstOffset, 121 ImmTyOffset0, 122 ImmTyOffset1, 123 ImmTySMEMOffsetMod, 124 ImmTyCPol, 125 ImmTyTFE, 126 ImmTyD16, 127 ImmTyClampSI, 128 ImmTyOModSI, 129 ImmTySDWADstSel, 130 ImmTySDWASrc0Sel, 131 ImmTySDWASrc1Sel, 132 ImmTySDWADstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyInterpAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTyDPP8, 155 ImmTyDppCtrl, 156 ImmTyDppRowMask, 157 ImmTyDppBankMask, 158 ImmTyDppBoundCtrl, 159 ImmTyDppFI, 160 ImmTySwizzle, 161 ImmTyGprIdxMode, 162 ImmTyHigh, 163 ImmTyBLGP, 164 ImmTyCBSZ, 165 ImmTyABID, 166 ImmTyEndpgm, 167 ImmTyWaitVDST, 168 ImmTyWaitEXP, 169 ImmTyWaitVAVDst, 170 ImmTyWaitVMVSrc, 171 }; 172 173 // Immediate operand kind. 174 // It helps to identify the location of an offending operand after an error. 175 // Note that regular literals and mandatory literals (KImm) must be handled 176 // differently. When looking for an offending operand, we should usually 177 // ignore mandatory literals because they are part of the instruction and 178 // cannot be changed. Report location of mandatory operands only for VOPD, 179 // when both OpX and OpY have a KImm and there are no other literals. 180 enum ImmKindTy { 181 ImmKindTyNone, 182 ImmKindTyLiteral, 183 ImmKindTyMandatoryLiteral, 184 ImmKindTyConst, 185 }; 186 187 private: 188 struct TokOp { 189 const char *Data; 190 unsigned Length; 191 }; 192 193 struct ImmOp { 194 int64_t Val; 195 ImmTy Type; 196 bool IsFPImm; 197 mutable ImmKindTy Kind; 198 Modifiers Mods; 199 }; 200 201 struct RegOp { 202 unsigned RegNo; 203 Modifiers Mods; 204 }; 205 206 union { 207 TokOp Tok; 208 ImmOp Imm; 209 RegOp Reg; 210 const MCExpr *Expr; 211 }; 212 213 public: 214 bool isToken() const override { return Kind == Token; } 215 216 bool isSymbolRefExpr() const { 217 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 218 } 219 220 bool isImm() const override { 221 return Kind == Immediate; 222 } 223 224 void setImmKindNone() const { 225 assert(isImm()); 226 Imm.Kind = ImmKindTyNone; 227 } 228 229 void setImmKindLiteral() const { 230 assert(isImm()); 231 Imm.Kind = ImmKindTyLiteral; 232 } 233 234 void setImmKindMandatoryLiteral() const { 235 assert(isImm()); 236 Imm.Kind = ImmKindTyMandatoryLiteral; 237 } 238 239 void setImmKindConst() const { 240 assert(isImm()); 241 Imm.Kind = ImmKindTyConst; 242 } 243 244 bool IsImmKindLiteral() const { 245 return isImm() && Imm.Kind == ImmKindTyLiteral; 246 } 247 248 bool IsImmKindMandatoryLiteral() const { 249 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; 250 } 251 252 bool isImmKindConst() const { 253 return isImm() && Imm.Kind == ImmKindTyConst; 254 } 255 256 bool isInlinableImm(MVT type) const; 257 bool isLiteralImm(MVT type) const; 258 259 bool isRegKind() const { 260 return Kind == Register; 261 } 262 263 bool isReg() const override { 264 return isRegKind() && !hasModifiers(); 265 } 266 267 bool isRegOrInline(unsigned RCID, MVT type) const { 268 return isRegClass(RCID) || isInlinableImm(type); 269 } 270 271 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 272 return isRegOrInline(RCID, type) || isLiteralImm(type); 273 } 274 275 bool isRegOrImmWithInt16InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 277 } 278 279 bool isRegOrImmWithIntT16InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16); 281 } 282 283 bool isRegOrImmWithInt32InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 285 } 286 287 bool isRegOrInlineImmWithInt16InputMods() const { 288 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 289 } 290 291 bool isRegOrInlineImmWithInt32InputMods() const { 292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 293 } 294 295 bool isRegOrImmWithInt64InputMods() const { 296 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 297 } 298 299 bool isRegOrImmWithFP16InputMods() const { 300 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 301 } 302 303 bool isRegOrImmWithFPT16InputMods() const { 304 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16); 305 } 306 307 bool isRegOrImmWithFP32InputMods() const { 308 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 309 } 310 311 bool isRegOrImmWithFP64InputMods() const { 312 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 313 } 314 315 bool isRegOrInlineImmWithFP16InputMods() const { 316 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 317 } 318 319 bool isRegOrInlineImmWithFP32InputMods() const { 320 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 321 } 322 323 324 bool isVReg() const { 325 return isRegClass(AMDGPU::VGPR_32RegClassID) || 326 isRegClass(AMDGPU::VReg_64RegClassID) || 327 isRegClass(AMDGPU::VReg_96RegClassID) || 328 isRegClass(AMDGPU::VReg_128RegClassID) || 329 isRegClass(AMDGPU::VReg_160RegClassID) || 330 isRegClass(AMDGPU::VReg_192RegClassID) || 331 isRegClass(AMDGPU::VReg_256RegClassID) || 332 isRegClass(AMDGPU::VReg_512RegClassID) || 333 isRegClass(AMDGPU::VReg_1024RegClassID); 334 } 335 336 bool isVReg32() const { 337 return isRegClass(AMDGPU::VGPR_32RegClassID); 338 } 339 340 bool isVReg32OrOff() const { 341 return isOff() || isVReg32(); 342 } 343 344 bool isNull() const { 345 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 346 } 347 348 bool isVRegWithInputMods() const; 349 bool isT16VRegWithInputMods() const; 350 351 bool isSDWAOperand(MVT type) const; 352 bool isSDWAFP16Operand() const; 353 bool isSDWAFP32Operand() const; 354 bool isSDWAInt16Operand() const; 355 bool isSDWAInt32Operand() const; 356 357 bool isImmTy(ImmTy ImmT) const { 358 return isImm() && Imm.Type == ImmT; 359 } 360 361 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); } 362 363 bool isImmLiteral() const { return isImmTy(ImmTyNone); } 364 365 bool isImmModifier() const { 366 return isImm() && Imm.Type != ImmTyNone; 367 } 368 369 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 370 bool isDMask() const { return isImmTy(ImmTyDMask); } 371 bool isDim() const { return isImmTy(ImmTyDim); } 372 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 373 bool isOff() const { return isImmTy(ImmTyOff); } 374 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 375 bool isOffen() const { return isImmTy(ImmTyOffen); } 376 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 377 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 378 bool isOffset() const { return isImmTy(ImmTyOffset); } 379 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 380 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 381 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); } 382 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 383 bool isGDS() const { return isImmTy(ImmTyGDS); } 384 bool isLDS() const { return isImmTy(ImmTyLDS); } 385 bool isCPol() const { return isImmTy(ImmTyCPol); } 386 bool isTFE() const { return isImmTy(ImmTyTFE); } 387 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 388 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); } 389 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); } 390 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 391 bool isDppFI() const { return isImmTy(ImmTyDppFI); } 392 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); } 393 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); } 394 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); } 395 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); } 396 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 397 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 398 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); } 399 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 400 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 401 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 402 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 403 404 bool isRegOrImm() const { 405 return isReg() || isImm(); 406 } 407 408 bool isRegClass(unsigned RCID) const; 409 410 bool isInlineValue() const; 411 412 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 413 return isRegOrInline(RCID, type) && !hasModifiers(); 414 } 415 416 bool isSCSrcB16() const { 417 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 418 } 419 420 bool isSCSrcV2B16() const { 421 return isSCSrcB16(); 422 } 423 424 bool isSCSrcB32() const { 425 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 426 } 427 428 bool isSCSrcB64() const { 429 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 430 } 431 432 bool isBoolReg() const; 433 434 bool isSCSrcF16() const { 435 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 436 } 437 438 bool isSCSrcV2F16() const { 439 return isSCSrcF16(); 440 } 441 442 bool isSCSrcF32() const { 443 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 444 } 445 446 bool isSCSrcF64() const { 447 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 448 } 449 450 bool isSSrcB32() const { 451 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 452 } 453 454 bool isSSrcB16() const { 455 return isSCSrcB16() || isLiteralImm(MVT::i16); 456 } 457 458 bool isSSrcV2B16() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB16(); 461 } 462 463 bool isSSrcB64() const { 464 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 465 // See isVSrc64(). 466 return isSCSrcB64() || isLiteralImm(MVT::i64); 467 } 468 469 bool isSSrcF32() const { 470 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 471 } 472 473 bool isSSrcF64() const { 474 return isSCSrcB64() || isLiteralImm(MVT::f64); 475 } 476 477 bool isSSrcF16() const { 478 return isSCSrcB16() || isLiteralImm(MVT::f16); 479 } 480 481 bool isSSrcV2F16() const { 482 llvm_unreachable("cannot happen"); 483 return isSSrcF16(); 484 } 485 486 bool isSSrcV2FP32() const { 487 llvm_unreachable("cannot happen"); 488 return isSSrcF32(); 489 } 490 491 bool isSCSrcV2FP32() const { 492 llvm_unreachable("cannot happen"); 493 return isSCSrcF32(); 494 } 495 496 bool isSSrcV2INT32() const { 497 llvm_unreachable("cannot happen"); 498 return isSSrcB32(); 499 } 500 501 bool isSCSrcV2INT32() const { 502 llvm_unreachable("cannot happen"); 503 return isSCSrcB32(); 504 } 505 506 bool isSSrcOrLdsB32() const { 507 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 508 isLiteralImm(MVT::i32) || isExpr(); 509 } 510 511 bool isVCSrcB32() const { 512 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 513 } 514 515 bool isVCSrcB64() const { 516 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 517 } 518 519 bool isVCSrcTB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16); 521 } 522 523 bool isVCSrcTB16_Lo128() const { 524 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16); 525 } 526 527 bool isVCSrcFake16B16_Lo128() const { 528 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); 529 } 530 531 bool isVCSrcB16() const { 532 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 533 } 534 535 bool isVCSrcV2B16() const { 536 return isVCSrcB16(); 537 } 538 539 bool isVCSrcF32() const { 540 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 541 } 542 543 bool isVCSrcF64() const { 544 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 545 } 546 547 bool isVCSrcTF16() const { 548 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); 549 } 550 551 bool isVCSrcTF16_Lo128() const { 552 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16); 553 } 554 555 bool isVCSrcFake16F16_Lo128() const { 556 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); 557 } 558 559 bool isVCSrcF16() const { 560 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 561 } 562 563 bool isVCSrcV2F16() const { 564 return isVCSrcF16(); 565 } 566 567 bool isVSrcB32() const { 568 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 569 } 570 571 bool isVSrcB64() const { 572 return isVCSrcF64() || isLiteralImm(MVT::i64); 573 } 574 575 bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); } 576 577 bool isVSrcTB16_Lo128() const { 578 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); 579 } 580 581 bool isVSrcFake16B16_Lo128() const { 582 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16); 583 } 584 585 bool isVSrcB16() const { 586 return isVCSrcB16() || isLiteralImm(MVT::i16); 587 } 588 589 bool isVSrcV2B16() const { 590 return isVSrcB16() || isLiteralImm(MVT::v2i16); 591 } 592 593 bool isVCSrcV2FP32() const { 594 return isVCSrcF64(); 595 } 596 597 bool isVSrcV2FP32() const { 598 return isVSrcF64() || isLiteralImm(MVT::v2f32); 599 } 600 601 bool isVCSrcV2INT32() const { 602 return isVCSrcB64(); 603 } 604 605 bool isVSrcV2INT32() const { 606 return isVSrcB64() || isLiteralImm(MVT::v2i32); 607 } 608 609 bool isVSrcF32() const { 610 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 611 } 612 613 bool isVSrcF64() const { 614 return isVCSrcF64() || isLiteralImm(MVT::f64); 615 } 616 617 bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); } 618 619 bool isVSrcTF16_Lo128() const { 620 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); 621 } 622 623 bool isVSrcFake16F16_Lo128() const { 624 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16); 625 } 626 627 bool isVSrcF16() const { 628 return isVCSrcF16() || isLiteralImm(MVT::f16); 629 } 630 631 bool isVSrcV2F16() const { 632 return isVSrcF16() || isLiteralImm(MVT::v2f16); 633 } 634 635 bool isVISrcB32() const { 636 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 637 } 638 639 bool isVISrcB16() const { 640 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 641 } 642 643 bool isVISrcV2B16() const { 644 return isVISrcB16(); 645 } 646 647 bool isVISrcF32() const { 648 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 649 } 650 651 bool isVISrcF16() const { 652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 653 } 654 655 bool isVISrcV2F16() const { 656 return isVISrcF16() || isVISrcB32(); 657 } 658 659 bool isVISrc_64B64() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 661 } 662 663 bool isVISrc_64F64() const { 664 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 665 } 666 667 bool isVISrc_64V2FP32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_64V2INT32() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 673 } 674 675 bool isVISrc_256B64() const { 676 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 677 } 678 679 bool isVISrc_256F64() const { 680 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 681 } 682 683 bool isVISrc_128B16() const { 684 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 685 } 686 687 bool isVISrc_128V2B16() const { 688 return isVISrc_128B16(); 689 } 690 691 bool isVISrc_128B32() const { 692 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 693 } 694 695 bool isVISrc_128F32() const { 696 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 697 } 698 699 bool isVISrc_256V2FP32() const { 700 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 701 } 702 703 bool isVISrc_256V2INT32() const { 704 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 705 } 706 707 bool isVISrc_512B32() const { 708 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 709 } 710 711 bool isVISrc_512B16() const { 712 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 713 } 714 715 bool isVISrc_512V2B16() const { 716 return isVISrc_512B16(); 717 } 718 719 bool isVISrc_512F32() const { 720 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 721 } 722 723 bool isVISrc_512F16() const { 724 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 725 } 726 727 bool isVISrc_512V2F16() const { 728 return isVISrc_512F16() || isVISrc_512B32(); 729 } 730 731 bool isVISrc_1024B32() const { 732 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 733 } 734 735 bool isVISrc_1024B16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 737 } 738 739 bool isVISrc_1024V2B16() const { 740 return isVISrc_1024B16(); 741 } 742 743 bool isVISrc_1024F32() const { 744 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 745 } 746 747 bool isVISrc_1024F16() const { 748 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 749 } 750 751 bool isVISrc_1024V2F16() const { 752 return isVISrc_1024F16() || isVISrc_1024B32(); 753 } 754 755 bool isAISrcB32() const { 756 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 757 } 758 759 bool isAISrcB16() const { 760 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 761 } 762 763 bool isAISrcV2B16() const { 764 return isAISrcB16(); 765 } 766 767 bool isAISrcF32() const { 768 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 769 } 770 771 bool isAISrcF16() const { 772 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 773 } 774 775 bool isAISrcV2F16() const { 776 return isAISrcF16() || isAISrcB32(); 777 } 778 779 bool isAISrc_64B64() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 781 } 782 783 bool isAISrc_64F64() const { 784 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 785 } 786 787 bool isAISrc_128B32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 789 } 790 791 bool isAISrc_128B16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 793 } 794 795 bool isAISrc_128V2B16() const { 796 return isAISrc_128B16(); 797 } 798 799 bool isAISrc_128F32() const { 800 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 801 } 802 803 bool isAISrc_128F16() const { 804 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 805 } 806 807 bool isAISrc_128V2F16() const { 808 return isAISrc_128F16() || isAISrc_128B32(); 809 } 810 811 bool isVISrc_128F16() const { 812 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 813 } 814 815 bool isVISrc_128V2F16() const { 816 return isVISrc_128F16() || isVISrc_128B32(); 817 } 818 819 bool isAISrc_256B64() const { 820 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 821 } 822 823 bool isAISrc_256F64() const { 824 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 825 } 826 827 bool isAISrc_512B32() const { 828 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 829 } 830 831 bool isAISrc_512B16() const { 832 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 833 } 834 835 bool isAISrc_512V2B16() const { 836 return isAISrc_512B16(); 837 } 838 839 bool isAISrc_512F32() const { 840 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 841 } 842 843 bool isAISrc_512F16() const { 844 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 845 } 846 847 bool isAISrc_512V2F16() const { 848 return isAISrc_512F16() || isAISrc_512B32(); 849 } 850 851 bool isAISrc_1024B32() const { 852 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 853 } 854 855 bool isAISrc_1024B16() const { 856 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 857 } 858 859 bool isAISrc_1024V2B16() const { 860 return isAISrc_1024B16(); 861 } 862 863 bool isAISrc_1024F32() const { 864 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 865 } 866 867 bool isAISrc_1024F16() const { 868 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 869 } 870 871 bool isAISrc_1024V2F16() const { 872 return isAISrc_1024F16() || isAISrc_1024B32(); 873 } 874 875 bool isKImmFP32() const { 876 return isLiteralImm(MVT::f32); 877 } 878 879 bool isKImmFP16() const { 880 return isLiteralImm(MVT::f16); 881 } 882 883 bool isMem() const override { 884 return false; 885 } 886 887 bool isExpr() const { 888 return Kind == Expression; 889 } 890 891 bool isSOPPBrTarget() const { return isExpr() || isImm(); } 892 893 bool isSWaitCnt() const; 894 bool isDepCtr() const; 895 bool isSDelayALU() const; 896 bool isHwreg() const; 897 bool isSendMsg() const; 898 bool isSplitBarrier() const; 899 bool isSwizzle() const; 900 bool isSMRDOffset8() const; 901 bool isSMEMOffset() const; 902 bool isSMRDLiteralOffset() const; 903 bool isDPP8() const; 904 bool isDPPCtrl() const; 905 bool isBLGP() const; 906 bool isCBSZ() const; 907 bool isABID() const; 908 bool isGPRIdxMode() const; 909 bool isS16Imm() const; 910 bool isU16Imm() const; 911 bool isEndpgm() const; 912 bool isWaitVDST() const; 913 bool isWaitEXP() const; 914 bool isWaitVAVDst() const; 915 bool isWaitVMVSrc() const; 916 917 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { 918 return std::bind(P, *this); 919 } 920 921 StringRef getToken() const { 922 assert(isToken()); 923 return StringRef(Tok.Data, Tok.Length); 924 } 925 926 int64_t getImm() const { 927 assert(isImm()); 928 return Imm.Val; 929 } 930 931 void setImm(int64_t Val) { 932 assert(isImm()); 933 Imm.Val = Val; 934 } 935 936 ImmTy getImmTy() const { 937 assert(isImm()); 938 return Imm.Type; 939 } 940 941 unsigned getReg() const override { 942 assert(isRegKind()); 943 return Reg.RegNo; 944 } 945 946 SMLoc getStartLoc() const override { 947 return StartLoc; 948 } 949 950 SMLoc getEndLoc() const override { 951 return EndLoc; 952 } 953 954 SMRange getLocRange() const { 955 return SMRange(StartLoc, EndLoc); 956 } 957 958 Modifiers getModifiers() const { 959 assert(isRegKind() || isImmTy(ImmTyNone)); 960 return isRegKind() ? Reg.Mods : Imm.Mods; 961 } 962 963 void setModifiers(Modifiers Mods) { 964 assert(isRegKind() || isImmTy(ImmTyNone)); 965 if (isRegKind()) 966 Reg.Mods = Mods; 967 else 968 Imm.Mods = Mods; 969 } 970 971 bool hasModifiers() const { 972 return getModifiers().hasModifiers(); 973 } 974 975 bool hasFPModifiers() const { 976 return getModifiers().hasFPModifiers(); 977 } 978 979 bool hasIntModifiers() const { 980 return getModifiers().hasIntModifiers(); 981 } 982 983 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 984 985 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 986 987 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 988 989 void addRegOperands(MCInst &Inst, unsigned N) const; 990 991 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 992 if (isRegKind()) 993 addRegOperands(Inst, N); 994 else 995 addImmOperands(Inst, N); 996 } 997 998 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 999 Modifiers Mods = getModifiers(); 1000 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1001 if (isRegKind()) { 1002 addRegOperands(Inst, N); 1003 } else { 1004 addImmOperands(Inst, N, false); 1005 } 1006 } 1007 1008 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1009 assert(!hasIntModifiers()); 1010 addRegOrImmWithInputModsOperands(Inst, N); 1011 } 1012 1013 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1014 assert(!hasFPModifiers()); 1015 addRegOrImmWithInputModsOperands(Inst, N); 1016 } 1017 1018 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 1019 Modifiers Mods = getModifiers(); 1020 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1021 assert(isRegKind()); 1022 addRegOperands(Inst, N); 1023 } 1024 1025 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1026 assert(!hasIntModifiers()); 1027 addRegWithInputModsOperands(Inst, N); 1028 } 1029 1030 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1031 assert(!hasFPModifiers()); 1032 addRegWithInputModsOperands(Inst, N); 1033 } 1034 1035 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1036 // clang-format off 1037 switch (Type) { 1038 case ImmTyNone: OS << "None"; break; 1039 case ImmTyGDS: OS << "GDS"; break; 1040 case ImmTyLDS: OS << "LDS"; break; 1041 case ImmTyOffen: OS << "Offen"; break; 1042 case ImmTyIdxen: OS << "Idxen"; break; 1043 case ImmTyAddr64: OS << "Addr64"; break; 1044 case ImmTyOffset: OS << "Offset"; break; 1045 case ImmTyInstOffset: OS << "InstOffset"; break; 1046 case ImmTyOffset0: OS << "Offset0"; break; 1047 case ImmTyOffset1: OS << "Offset1"; break; 1048 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break; 1049 case ImmTyCPol: OS << "CPol"; break; 1050 case ImmTyTFE: OS << "TFE"; break; 1051 case ImmTyD16: OS << "D16"; break; 1052 case ImmTyFORMAT: OS << "FORMAT"; break; 1053 case ImmTyClampSI: OS << "ClampSI"; break; 1054 case ImmTyOModSI: OS << "OModSI"; break; 1055 case ImmTyDPP8: OS << "DPP8"; break; 1056 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1057 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1058 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1059 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1060 case ImmTyDppFI: OS << "DppFI"; break; 1061 case ImmTySDWADstSel: OS << "SDWADstSel"; break; 1062 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break; 1063 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break; 1064 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break; 1065 case ImmTyDMask: OS << "DMask"; break; 1066 case ImmTyDim: OS << "Dim"; break; 1067 case ImmTyUNorm: OS << "UNorm"; break; 1068 case ImmTyDA: OS << "DA"; break; 1069 case ImmTyR128A16: OS << "R128A16"; break; 1070 case ImmTyA16: OS << "A16"; break; 1071 case ImmTyLWE: OS << "LWE"; break; 1072 case ImmTyOff: OS << "Off"; break; 1073 case ImmTyExpTgt: OS << "ExpTgt"; break; 1074 case ImmTyExpCompr: OS << "ExpCompr"; break; 1075 case ImmTyExpVM: OS << "ExpVM"; break; 1076 case ImmTyHwreg: OS << "Hwreg"; break; 1077 case ImmTySendMsg: OS << "SendMsg"; break; 1078 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1079 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1080 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break; 1081 case ImmTyOpSel: OS << "OpSel"; break; 1082 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1083 case ImmTyNegLo: OS << "NegLo"; break; 1084 case ImmTyNegHi: OS << "NegHi"; break; 1085 case ImmTySwizzle: OS << "Swizzle"; break; 1086 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1087 case ImmTyHigh: OS << "High"; break; 1088 case ImmTyBLGP: OS << "BLGP"; break; 1089 case ImmTyCBSZ: OS << "CBSZ"; break; 1090 case ImmTyABID: OS << "ABID"; break; 1091 case ImmTyEndpgm: OS << "Endpgm"; break; 1092 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1093 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1094 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; 1095 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; 1096 } 1097 // clang-format on 1098 } 1099 1100 void print(raw_ostream &OS) const override { 1101 switch (Kind) { 1102 case Register: 1103 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1104 break; 1105 case Immediate: 1106 OS << '<' << getImm(); 1107 if (getImmTy() != ImmTyNone) { 1108 OS << " type: "; printImmTy(OS, getImmTy()); 1109 } 1110 OS << " mods: " << Imm.Mods << '>'; 1111 break; 1112 case Token: 1113 OS << '\'' << getToken() << '\''; 1114 break; 1115 case Expression: 1116 OS << "<expr " << *Expr << '>'; 1117 break; 1118 } 1119 } 1120 1121 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1122 int64_t Val, SMLoc Loc, 1123 ImmTy Type = ImmTyNone, 1124 bool IsFPImm = false) { 1125 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1126 Op->Imm.Val = Val; 1127 Op->Imm.IsFPImm = IsFPImm; 1128 Op->Imm.Kind = ImmKindTyNone; 1129 Op->Imm.Type = Type; 1130 Op->Imm.Mods = Modifiers(); 1131 Op->StartLoc = Loc; 1132 Op->EndLoc = Loc; 1133 return Op; 1134 } 1135 1136 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1137 StringRef Str, SMLoc Loc, 1138 bool HasExplicitEncodingSize = true) { 1139 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1140 Res->Tok.Data = Str.data(); 1141 Res->Tok.Length = Str.size(); 1142 Res->StartLoc = Loc; 1143 Res->EndLoc = Loc; 1144 return Res; 1145 } 1146 1147 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1148 unsigned RegNo, SMLoc S, 1149 SMLoc E) { 1150 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1151 Op->Reg.RegNo = RegNo; 1152 Op->Reg.Mods = Modifiers(); 1153 Op->StartLoc = S; 1154 Op->EndLoc = E; 1155 return Op; 1156 } 1157 1158 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1159 const class MCExpr *Expr, SMLoc S) { 1160 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1161 Op->Expr = Expr; 1162 Op->StartLoc = S; 1163 Op->EndLoc = S; 1164 return Op; 1165 } 1166 }; 1167 1168 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1169 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1170 return OS; 1171 } 1172 1173 //===----------------------------------------------------------------------===// 1174 // AsmParser 1175 //===----------------------------------------------------------------------===// 1176 1177 // Holds info related to the current kernel, e.g. count of SGPRs used. 1178 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1179 // .amdgpu_hsa_kernel or at EOF. 1180 class KernelScopeInfo { 1181 int SgprIndexUnusedMin = -1; 1182 int VgprIndexUnusedMin = -1; 1183 int AgprIndexUnusedMin = -1; 1184 MCContext *Ctx = nullptr; 1185 MCSubtargetInfo const *MSTI = nullptr; 1186 1187 void usesSgprAt(int i) { 1188 if (i >= SgprIndexUnusedMin) { 1189 SgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1194 } 1195 } 1196 } 1197 1198 void usesVgprAt(int i) { 1199 if (i >= VgprIndexUnusedMin) { 1200 VgprIndexUnusedMin = ++i; 1201 if (Ctx) { 1202 MCSymbol* const Sym = 1203 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1204 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1205 VgprIndexUnusedMin); 1206 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1207 } 1208 } 1209 } 1210 1211 void usesAgprAt(int i) { 1212 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1213 if (!hasMAIInsts(*MSTI)) 1214 return; 1215 1216 if (i >= AgprIndexUnusedMin) { 1217 AgprIndexUnusedMin = ++i; 1218 if (Ctx) { 1219 MCSymbol* const Sym = 1220 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1221 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1222 1223 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1224 MCSymbol* const vSym = 1225 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1226 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1227 VgprIndexUnusedMin); 1228 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1229 } 1230 } 1231 } 1232 1233 public: 1234 KernelScopeInfo() = default; 1235 1236 void initialize(MCContext &Context) { 1237 Ctx = &Context; 1238 MSTI = Ctx->getSubtargetInfo(); 1239 1240 usesSgprAt(SgprIndexUnusedMin = -1); 1241 usesVgprAt(VgprIndexUnusedMin = -1); 1242 if (hasMAIInsts(*MSTI)) { 1243 usesAgprAt(AgprIndexUnusedMin = -1); 1244 } 1245 } 1246 1247 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1248 unsigned RegWidth) { 1249 switch (RegKind) { 1250 case IS_SGPR: 1251 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1252 break; 1253 case IS_AGPR: 1254 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1255 break; 1256 case IS_VGPR: 1257 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1258 break; 1259 default: 1260 break; 1261 } 1262 } 1263 }; 1264 1265 class AMDGPUAsmParser : public MCTargetAsmParser { 1266 MCAsmParser &Parser; 1267 1268 unsigned ForcedEncodingSize = 0; 1269 bool ForcedDPP = false; 1270 bool ForcedSDWA = false; 1271 KernelScopeInfo KernelScope; 1272 1273 /// @name Auto-generated Match Functions 1274 /// { 1275 1276 #define GET_ASSEMBLER_HEADER 1277 #include "AMDGPUGenAsmMatcher.inc" 1278 1279 /// } 1280 1281 private: 1282 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1283 bool OutOfRangeError(SMRange Range); 1284 /// Calculate VGPR/SGPR blocks required for given target, reserved 1285 /// registers, and user-specified NextFreeXGPR values. 1286 /// 1287 /// \param Features [in] Target features, used for bug corrections. 1288 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1289 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1290 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1291 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1292 /// descriptor field, if valid. 1293 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1294 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1295 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1296 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1297 /// \param VGPRBlocks [out] Result VGPR block count. 1298 /// \param SGPRBlocks [out] Result SGPR block count. 1299 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1300 bool FlatScrUsed, bool XNACKUsed, 1301 std::optional<bool> EnableWavefrontSize32, 1302 unsigned NextFreeVGPR, SMRange VGPRRange, 1303 unsigned NextFreeSGPR, SMRange SGPRRange, 1304 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 1305 bool ParseDirectiveAMDGCNTarget(); 1306 bool ParseDirectiveAMDHSAKernel(); 1307 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1308 bool ParseDirectiveHSACodeObjectVersion(); 1309 bool ParseDirectiveHSACodeObjectISA(); 1310 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1311 bool ParseDirectiveAMDKernelCodeT(); 1312 // TODO: Possibly make subtargetHasRegister const. 1313 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1314 bool ParseDirectiveAMDGPUHsaKernel(); 1315 1316 bool ParseDirectiveISAVersion(); 1317 bool ParseDirectiveHSAMetadata(); 1318 bool ParseDirectivePALMetadataBegin(); 1319 bool ParseDirectivePALMetadata(); 1320 bool ParseDirectiveAMDGPULDS(); 1321 1322 /// Common code to parse out a block of text (typically YAML) between start and 1323 /// end directives. 1324 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1325 const char *AssemblerDirectiveEnd, 1326 std::string &CollectString); 1327 1328 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1329 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1330 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1331 unsigned &RegNum, unsigned &RegWidth, 1332 bool RestoreOnFailure = false); 1333 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1334 unsigned &RegNum, unsigned &RegWidth, 1335 SmallVectorImpl<AsmToken> &Tokens); 1336 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1337 unsigned &RegWidth, 1338 SmallVectorImpl<AsmToken> &Tokens); 1339 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1340 unsigned &RegWidth, 1341 SmallVectorImpl<AsmToken> &Tokens); 1342 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1343 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1344 bool ParseRegRange(unsigned& Num, unsigned& Width); 1345 unsigned getRegularReg(RegisterKind RegKind, 1346 unsigned RegNum, 1347 unsigned RegWidth, 1348 SMLoc Loc); 1349 1350 bool isRegister(); 1351 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1352 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1353 void initializeGprCountSymbol(RegisterKind RegKind); 1354 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1355 unsigned RegWidth); 1356 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1357 bool IsAtomic); 1358 1359 public: 1360 enum AMDGPUMatchResultTy { 1361 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1362 }; 1363 enum OperandMode { 1364 OperandMode_Default, 1365 OperandMode_NSA, 1366 }; 1367 1368 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1369 1370 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1371 const MCInstrInfo &MII, 1372 const MCTargetOptions &Options) 1373 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1374 MCAsmParserExtension::Initialize(Parser); 1375 1376 if (getFeatureBits().none()) { 1377 // Set default features. 1378 copySTI().ToggleFeature("southern-islands"); 1379 } 1380 1381 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1382 1383 { 1384 // TODO: make those pre-defined variables read-only. 1385 // Currently there is none suitable machinery in the core llvm-mc for this. 1386 // MCSymbol::isRedefinable is intended for another purpose, and 1387 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1388 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1389 MCContext &Ctx = getContext(); 1390 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1391 MCSymbol *Sym = 1392 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1393 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1394 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1395 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1396 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1397 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1398 } else { 1399 MCSymbol *Sym = 1400 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1401 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1402 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1403 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1404 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1405 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1406 } 1407 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1408 initializeGprCountSymbol(IS_VGPR); 1409 initializeGprCountSymbol(IS_SGPR); 1410 } else 1411 KernelScope.initialize(getContext()); 1412 } 1413 } 1414 1415 bool hasMIMG_R128() const { 1416 return AMDGPU::hasMIMG_R128(getSTI()); 1417 } 1418 1419 bool hasPackedD16() const { 1420 return AMDGPU::hasPackedD16(getSTI()); 1421 } 1422 1423 bool hasA16() const { return AMDGPU::hasA16(getSTI()); } 1424 1425 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1426 1427 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); } 1428 1429 bool isSI() const { 1430 return AMDGPU::isSI(getSTI()); 1431 } 1432 1433 bool isCI() const { 1434 return AMDGPU::isCI(getSTI()); 1435 } 1436 1437 bool isVI() const { 1438 return AMDGPU::isVI(getSTI()); 1439 } 1440 1441 bool isGFX9() const { 1442 return AMDGPU::isGFX9(getSTI()); 1443 } 1444 1445 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1446 bool isGFX90A() const { 1447 return AMDGPU::isGFX90A(getSTI()); 1448 } 1449 1450 bool isGFX940() const { 1451 return AMDGPU::isGFX940(getSTI()); 1452 } 1453 1454 bool isGFX9Plus() const { 1455 return AMDGPU::isGFX9Plus(getSTI()); 1456 } 1457 1458 bool isGFX10() const { 1459 return AMDGPU::isGFX10(getSTI()); 1460 } 1461 1462 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1463 1464 bool isGFX11() const { 1465 return AMDGPU::isGFX11(getSTI()); 1466 } 1467 1468 bool isGFX11Plus() const { 1469 return AMDGPU::isGFX11Plus(getSTI()); 1470 } 1471 1472 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); } 1473 1474 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); } 1475 1476 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } 1477 1478 bool isGFX10_BEncoding() const { 1479 return AMDGPU::isGFX10_BEncoding(getSTI()); 1480 } 1481 1482 bool hasInv2PiInlineImm() const { 1483 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1484 } 1485 1486 bool hasFlatOffsets() const { 1487 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1488 } 1489 1490 bool hasArchitectedFlatScratch() const { 1491 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1492 } 1493 1494 bool hasSGPR102_SGPR103() const { 1495 return !isVI() && !isGFX9(); 1496 } 1497 1498 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1499 1500 bool hasIntClamp() const { 1501 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1502 } 1503 1504 bool hasPartialNSAEncoding() const { 1505 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; 1506 } 1507 1508 unsigned getNSAMaxSize(bool HasSampler = false) const { 1509 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler); 1510 } 1511 1512 unsigned getMaxNumUserSGPRs() const { 1513 return AMDGPU::getMaxNumUserSGPRs(getSTI()); 1514 } 1515 1516 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } 1517 1518 AMDGPUTargetStreamer &getTargetStreamer() { 1519 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1520 return static_cast<AMDGPUTargetStreamer &>(TS); 1521 } 1522 1523 const MCRegisterInfo *getMRI() const { 1524 // We need this const_cast because for some reason getContext() is not const 1525 // in MCAsmParser. 1526 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1527 } 1528 1529 const MCInstrInfo *getMII() const { 1530 return &MII; 1531 } 1532 1533 const FeatureBitset &getFeatureBits() const { 1534 return getSTI().getFeatureBits(); 1535 } 1536 1537 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1538 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1539 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1540 1541 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1542 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1543 bool isForcedDPP() const { return ForcedDPP; } 1544 bool isForcedSDWA() const { return ForcedSDWA; } 1545 ArrayRef<unsigned> getMatchedVariants() const; 1546 StringRef getMatchedVariantName() const; 1547 1548 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1549 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1550 bool RestoreOnFailure); 1551 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1552 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1553 SMLoc &EndLoc) override; 1554 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1555 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1556 unsigned Kind) override; 1557 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1558 OperandVector &Operands, MCStreamer &Out, 1559 uint64_t &ErrorInfo, 1560 bool MatchingInlineAsm) override; 1561 bool ParseDirective(AsmToken DirectiveID) override; 1562 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, 1563 OperandMode Mode = OperandMode_Default); 1564 StringRef parseMnemonicSuffix(StringRef Name); 1565 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1566 SMLoc NameLoc, OperandVector &Operands) override; 1567 //bool ProcessInstruction(MCInst &Inst); 1568 1569 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); 1570 1571 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); 1572 1573 ParseStatus 1574 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1575 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1576 std::function<bool(int64_t &)> ConvertResult = nullptr); 1577 1578 ParseStatus parseOperandArrayWithPrefix( 1579 const char *Prefix, OperandVector &Operands, 1580 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1581 bool (*ConvertResult)(int64_t &) = nullptr); 1582 1583 ParseStatus 1584 parseNamedBit(StringRef Name, OperandVector &Operands, 1585 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1586 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; 1587 ParseStatus parseCPol(OperandVector &Operands); 1588 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope); 1589 ParseStatus parseTH(OperandVector &Operands, int64_t &TH); 1590 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, 1591 SMLoc &StringLoc); 1592 1593 bool isModifier(); 1594 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1595 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1596 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1597 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1598 bool parseSP3NegModifier(); 1599 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false, 1600 bool HasLit = false); 1601 ParseStatus parseReg(OperandVector &Operands); 1602 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false, 1603 bool HasLit = false); 1604 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, 1605 bool AllowImm = true); 1606 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, 1607 bool AllowImm = true); 1608 ParseStatus parseRegWithFPInputMods(OperandVector &Operands); 1609 ParseStatus parseRegWithIntInputMods(OperandVector &Operands); 1610 ParseStatus parseVReg32OrOff(OperandVector &Operands); 1611 ParseStatus parseDfmtNfmt(int64_t &Format); 1612 ParseStatus parseUfmt(int64_t &Format); 1613 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, 1614 int64_t &Format); 1615 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, 1616 int64_t &Format); 1617 ParseStatus parseFORMAT(OperandVector &Operands); 1618 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); 1619 ParseStatus parseNumericFormat(int64_t &Format); 1620 ParseStatus parseFlatOffset(OperandVector &Operands); 1621 ParseStatus parseR128A16(OperandVector &Operands); 1622 ParseStatus parseBLGP(OperandVector &Operands); 1623 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1624 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1625 1626 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1627 1628 bool parseCnt(int64_t &IntVal); 1629 ParseStatus parseSWaitCnt(OperandVector &Operands); 1630 1631 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1632 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1633 ParseStatus parseDepCtr(OperandVector &Operands); 1634 1635 bool parseDelay(int64_t &Delay); 1636 ParseStatus parseSDelayALU(OperandVector &Operands); 1637 1638 ParseStatus parseHwreg(OperandVector &Operands); 1639 1640 private: 1641 struct OperandInfoTy { 1642 SMLoc Loc; 1643 int64_t Id; 1644 bool IsSymbolic = false; 1645 bool IsDefined = false; 1646 1647 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1648 }; 1649 1650 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1651 bool validateSendMsg(const OperandInfoTy &Msg, 1652 const OperandInfoTy &Op, 1653 const OperandInfoTy &Stream); 1654 1655 bool parseHwregBody(OperandInfoTy &HwReg, 1656 OperandInfoTy &Offset, 1657 OperandInfoTy &Width); 1658 bool validateHwreg(const OperandInfoTy &HwReg, 1659 const OperandInfoTy &Offset, 1660 const OperandInfoTy &Width); 1661 1662 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1663 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1664 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1665 1666 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1667 const OperandVector &Operands) const; 1668 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1669 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1670 SMLoc getLitLoc(const OperandVector &Operands, 1671 bool SearchMandatoryLiterals = false) const; 1672 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; 1673 SMLoc getConstLoc(const OperandVector &Operands) const; 1674 SMLoc getInstLoc(const OperandVector &Operands) const; 1675 1676 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1677 bool validateOffset(const MCInst &Inst, const OperandVector &Operands); 1678 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1679 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1680 bool validateSOPLiteral(const MCInst &Inst) const; 1681 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1682 bool validateVOPDRegBankConstraints(const MCInst &Inst, 1683 const OperandVector &Operands); 1684 bool validateIntClampSupported(const MCInst &Inst); 1685 bool validateMIMGAtomicDMask(const MCInst &Inst); 1686 bool validateMIMGGatherDMask(const MCInst &Inst); 1687 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1688 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); 1689 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); 1690 bool validateMIMGD16(const MCInst &Inst); 1691 bool validateMIMGMSAA(const MCInst &Inst); 1692 bool validateOpSel(const MCInst &Inst); 1693 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1694 bool validateVccOperand(unsigned Reg) const; 1695 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1696 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1697 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); 1698 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1699 bool validateAGPRLdSt(const MCInst &Inst) const; 1700 bool validateVGPRAlign(const MCInst &Inst) const; 1701 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1702 bool validateDS(const MCInst &Inst, const OperandVector &Operands); 1703 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1704 bool validateDivScale(const MCInst &Inst); 1705 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); 1706 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1707 const SMLoc &IDLoc); 1708 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, 1709 const unsigned CPol); 1710 bool validateExeczVcczOperands(const OperandVector &Operands); 1711 bool validateTFE(const MCInst &Inst, const OperandVector &Operands); 1712 std::optional<StringRef> validateLdsDirect(const MCInst &Inst); 1713 unsigned getConstantBusLimit(unsigned Opcode) const; 1714 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1715 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1716 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1717 1718 bool isSupportedMnemo(StringRef Mnemo, 1719 const FeatureBitset &FBS); 1720 bool isSupportedMnemo(StringRef Mnemo, 1721 const FeatureBitset &FBS, 1722 ArrayRef<unsigned> Variants); 1723 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1724 1725 bool isId(const StringRef Id) const; 1726 bool isId(const AsmToken &Token, const StringRef Id) const; 1727 bool isToken(const AsmToken::TokenKind Kind) const; 1728 StringRef getId() const; 1729 bool trySkipId(const StringRef Id); 1730 bool trySkipId(const StringRef Pref, const StringRef Id); 1731 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1732 bool trySkipToken(const AsmToken::TokenKind Kind); 1733 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1734 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1735 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1736 1737 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1738 AsmToken::TokenKind getTokenKind() const; 1739 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1740 bool parseExpr(OperandVector &Operands); 1741 StringRef getTokenStr() const; 1742 AsmToken peekToken(bool ShouldSkipSpace = true); 1743 AsmToken getToken() const; 1744 SMLoc getLoc() const; 1745 void lex(); 1746 1747 public: 1748 void onBeginOfFile() override; 1749 1750 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); 1751 1752 ParseStatus parseExpTgt(OperandVector &Operands); 1753 ParseStatus parseSendMsg(OperandVector &Operands); 1754 ParseStatus parseInterpSlot(OperandVector &Operands); 1755 ParseStatus parseInterpAttr(OperandVector &Operands); 1756 ParseStatus parseSOPPBrTarget(OperandVector &Operands); 1757 ParseStatus parseBoolReg(OperandVector &Operands); 1758 1759 bool parseSwizzleOperand(int64_t &Op, 1760 const unsigned MinVal, 1761 const unsigned MaxVal, 1762 const StringRef ErrMsg, 1763 SMLoc &Loc); 1764 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1765 const unsigned MinVal, 1766 const unsigned MaxVal, 1767 const StringRef ErrMsg); 1768 ParseStatus parseSwizzle(OperandVector &Operands); 1769 bool parseSwizzleOffset(int64_t &Imm); 1770 bool parseSwizzleMacro(int64_t &Imm); 1771 bool parseSwizzleQuadPerm(int64_t &Imm); 1772 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1773 bool parseSwizzleBroadcast(int64_t &Imm); 1774 bool parseSwizzleSwap(int64_t &Imm); 1775 bool parseSwizzleReverse(int64_t &Imm); 1776 1777 ParseStatus parseGPRIdxMode(OperandVector &Operands); 1778 int64_t parseGPRIdxMacro(); 1779 1780 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1781 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1782 1783 ParseStatus parseOModSI(OperandVector &Operands); 1784 1785 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1786 OptionalImmIndexMap &OptionalIdx); 1787 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1788 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1789 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1790 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1791 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1792 OptionalImmIndexMap &OptionalIdx); 1793 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1794 OptionalImmIndexMap &OptionalIdx); 1795 1796 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1797 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1798 1799 bool parseDimId(unsigned &Encoding); 1800 ParseStatus parseDim(OperandVector &Operands); 1801 bool convertDppBoundCtrl(int64_t &BoundCtrl); 1802 ParseStatus parseDPP8(OperandVector &Operands); 1803 ParseStatus parseDPPCtrl(OperandVector &Operands); 1804 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1805 int64_t parseDPPCtrlSel(StringRef Ctrl); 1806 int64_t parseDPPCtrlPerm(); 1807 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1808 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1809 cvtDPP(Inst, Operands, true); 1810 } 1811 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1812 bool IsDPP8 = false); 1813 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1814 cvtVOP3DPP(Inst, Operands, true); 1815 } 1816 1817 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, 1818 AMDGPUOperand::ImmTy Type); 1819 ParseStatus parseSDWADstUnused(OperandVector &Operands); 1820 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1821 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1822 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1823 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1824 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1825 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1826 uint64_t BasicInstType, 1827 bool SkipDstVcc = false, 1828 bool SkipSrcVcc = false); 1829 1830 ParseStatus parseEndpgm(OperandVector &Operands); 1831 1832 ParseStatus parseVOPD(OperandVector &Operands); 1833 }; 1834 1835 } // end anonymous namespace 1836 1837 // May be called with integer type with equivalent bitwidth. 1838 static const fltSemantics *getFltSemantics(unsigned Size) { 1839 switch (Size) { 1840 case 4: 1841 return &APFloat::IEEEsingle(); 1842 case 8: 1843 return &APFloat::IEEEdouble(); 1844 case 2: 1845 return &APFloat::IEEEhalf(); 1846 default: 1847 llvm_unreachable("unsupported fp type"); 1848 } 1849 } 1850 1851 static const fltSemantics *getFltSemantics(MVT VT) { 1852 return getFltSemantics(VT.getSizeInBits() / 8); 1853 } 1854 1855 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1856 switch (OperandType) { 1857 case AMDGPU::OPERAND_REG_IMM_INT32: 1858 case AMDGPU::OPERAND_REG_IMM_FP32: 1859 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1860 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1861 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1862 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1863 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1864 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1865 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1866 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1867 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1868 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1869 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1870 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1871 case AMDGPU::OPERAND_KIMM32: 1872 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1873 return &APFloat::IEEEsingle(); 1874 case AMDGPU::OPERAND_REG_IMM_INT64: 1875 case AMDGPU::OPERAND_REG_IMM_FP64: 1876 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1877 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1878 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1879 return &APFloat::IEEEdouble(); 1880 case AMDGPU::OPERAND_REG_IMM_INT16: 1881 case AMDGPU::OPERAND_REG_IMM_FP16: 1882 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1883 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1884 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1885 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1886 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1887 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1888 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1889 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1890 case AMDGPU::OPERAND_KIMM16: 1891 return &APFloat::IEEEhalf(); 1892 default: 1893 llvm_unreachable("unsupported fp type"); 1894 } 1895 } 1896 1897 //===----------------------------------------------------------------------===// 1898 // Operand 1899 //===----------------------------------------------------------------------===// 1900 1901 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1902 bool Lost; 1903 1904 // Convert literal to single precision 1905 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1906 APFloat::rmNearestTiesToEven, 1907 &Lost); 1908 // We allow precision lost but not overflow or underflow 1909 if (Status != APFloat::opOK && 1910 Lost && 1911 ((Status & APFloat::opOverflow) != 0 || 1912 (Status & APFloat::opUnderflow) != 0)) { 1913 return false; 1914 } 1915 1916 return true; 1917 } 1918 1919 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1920 return isUIntN(Size, Val) || isIntN(Size, Val); 1921 } 1922 1923 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1924 if (VT.getScalarType() == MVT::i16) { 1925 // FP immediate values are broken. 1926 return isInlinableIntLiteral(Val); 1927 } 1928 1929 // f16/v2f16 operands work correctly for all values. 1930 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1931 } 1932 1933 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1934 1935 // This is a hack to enable named inline values like 1936 // shared_base with both 32-bit and 64-bit operands. 1937 // Note that these values are defined as 1938 // 32-bit operands only. 1939 if (isInlineValue()) { 1940 return true; 1941 } 1942 1943 if (!isImmTy(ImmTyNone)) { 1944 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1945 return false; 1946 } 1947 // TODO: We should avoid using host float here. It would be better to 1948 // check the float bit values which is what a few other places do. 1949 // We've had bot failures before due to weird NaN support on mips hosts. 1950 1951 APInt Literal(64, Imm.Val); 1952 1953 if (Imm.IsFPImm) { // We got fp literal token 1954 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1955 return AMDGPU::isInlinableLiteral64(Imm.Val, 1956 AsmParser->hasInv2PiInlineImm()); 1957 } 1958 1959 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1960 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1961 return false; 1962 1963 if (type.getScalarSizeInBits() == 16) { 1964 return isInlineableLiteralOp16( 1965 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1966 type, AsmParser->hasInv2PiInlineImm()); 1967 } 1968 1969 // Check if single precision literal is inlinable 1970 return AMDGPU::isInlinableLiteral32( 1971 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1972 AsmParser->hasInv2PiInlineImm()); 1973 } 1974 1975 // We got int literal token. 1976 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1977 return AMDGPU::isInlinableLiteral64(Imm.Val, 1978 AsmParser->hasInv2PiInlineImm()); 1979 } 1980 1981 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1982 return false; 1983 } 1984 1985 if (type.getScalarSizeInBits() == 16) { 1986 return isInlineableLiteralOp16( 1987 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1988 type, AsmParser->hasInv2PiInlineImm()); 1989 } 1990 1991 return AMDGPU::isInlinableLiteral32( 1992 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1993 AsmParser->hasInv2PiInlineImm()); 1994 } 1995 1996 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1997 // Check that this immediate can be added as literal 1998 if (!isImmTy(ImmTyNone)) { 1999 return false; 2000 } 2001 2002 if (!Imm.IsFPImm) { 2003 // We got int literal token. 2004 2005 if (type == MVT::f64 && hasFPModifiers()) { 2006 // Cannot apply fp modifiers to int literals preserving the same semantics 2007 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 2008 // disable these cases. 2009 return false; 2010 } 2011 2012 unsigned Size = type.getSizeInBits(); 2013 if (Size == 64) 2014 Size = 32; 2015 2016 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2017 // types. 2018 return isSafeTruncation(Imm.Val, Size); 2019 } 2020 2021 // We got fp literal token 2022 if (type == MVT::f64) { // Expected 64-bit fp operand 2023 // We would set low 64-bits of literal to zeroes but we accept this literals 2024 return true; 2025 } 2026 2027 if (type == MVT::i64) { // Expected 64-bit int operand 2028 // We don't allow fp literals in 64-bit integer instructions. It is 2029 // unclear how we should encode them. 2030 return false; 2031 } 2032 2033 // We allow fp literals with f16x2 operands assuming that the specified 2034 // literal goes into the lower half and the upper half is zero. We also 2035 // require that the literal may be losslessly converted to f16. 2036 // 2037 // For i16x2 operands, we assume that the specified literal is encoded as a 2038 // single-precision float. This is pretty odd, but it matches SP3 and what 2039 // happens in hardware. 2040 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16 2041 : (type == MVT::v2i16) ? MVT::f32 2042 : (type == MVT::v2f32) ? MVT::f32 2043 : type; 2044 2045 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2046 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2047 } 2048 2049 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2050 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2051 } 2052 2053 bool AMDGPUOperand::isVRegWithInputMods() const { 2054 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2055 // GFX90A allows DPP on 64-bit operands. 2056 (isRegClass(AMDGPU::VReg_64RegClassID) && 2057 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); 2058 } 2059 2060 bool AMDGPUOperand::isT16VRegWithInputMods() const { 2061 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID); 2062 } 2063 2064 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2065 if (AsmParser->isVI()) 2066 return isVReg32(); 2067 else if (AsmParser->isGFX9Plus()) 2068 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2069 else 2070 return false; 2071 } 2072 2073 bool AMDGPUOperand::isSDWAFP16Operand() const { 2074 return isSDWAOperand(MVT::f16); 2075 } 2076 2077 bool AMDGPUOperand::isSDWAFP32Operand() const { 2078 return isSDWAOperand(MVT::f32); 2079 } 2080 2081 bool AMDGPUOperand::isSDWAInt16Operand() const { 2082 return isSDWAOperand(MVT::i16); 2083 } 2084 2085 bool AMDGPUOperand::isSDWAInt32Operand() const { 2086 return isSDWAOperand(MVT::i32); 2087 } 2088 2089 bool AMDGPUOperand::isBoolReg() const { 2090 auto FB = AsmParser->getFeatureBits(); 2091 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2092 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2093 } 2094 2095 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2096 { 2097 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2098 assert(Size == 2 || Size == 4 || Size == 8); 2099 2100 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2101 2102 if (Imm.Mods.Abs) { 2103 Val &= ~FpSignMask; 2104 } 2105 if (Imm.Mods.Neg) { 2106 Val ^= FpSignMask; 2107 } 2108 2109 return Val; 2110 } 2111 2112 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2113 if (isExpr()) { 2114 Inst.addOperand(MCOperand::createExpr(Expr)); 2115 return; 2116 } 2117 2118 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2119 Inst.getNumOperands())) { 2120 addLiteralImmOperand(Inst, Imm.Val, 2121 ApplyModifiers & 2122 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2123 } else { 2124 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2125 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2126 setImmKindNone(); 2127 } 2128 } 2129 2130 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2131 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2132 auto OpNum = Inst.getNumOperands(); 2133 // Check that this operand accepts literals 2134 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2135 2136 if (ApplyModifiers) { 2137 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2138 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2139 Val = applyInputFPModifiers(Val, Size); 2140 } 2141 2142 APInt Literal(64, Val); 2143 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; 2144 2145 if (Imm.IsFPImm) { // We got fp literal token 2146 switch (OpTy) { 2147 case AMDGPU::OPERAND_REG_IMM_INT64: 2148 case AMDGPU::OPERAND_REG_IMM_FP64: 2149 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2150 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2151 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2152 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2153 AsmParser->hasInv2PiInlineImm())) { 2154 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2155 setImmKindConst(); 2156 return; 2157 } 2158 2159 // Non-inlineable 2160 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2161 // For fp operands we check if low 32 bits are zeros 2162 if (Literal.getLoBits(32) != 0) { 2163 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2164 "Can't encode literal as exact 64-bit floating-point operand. " 2165 "Low 32-bits will be set to zero"); 2166 Val &= 0xffffffff00000000u; 2167 } 2168 2169 Inst.addOperand(MCOperand::createImm(Val)); 2170 setImmKindLiteral(); 2171 return; 2172 } 2173 2174 // We don't allow fp literals in 64-bit integer instructions. It is 2175 // unclear how we should encode them. This case should be checked earlier 2176 // in predicate methods (isLiteralImm()) 2177 llvm_unreachable("fp literal in 64-bit integer instruction."); 2178 2179 case AMDGPU::OPERAND_REG_IMM_INT32: 2180 case AMDGPU::OPERAND_REG_IMM_FP32: 2181 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2182 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2183 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2184 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2185 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2186 case AMDGPU::OPERAND_REG_IMM_INT16: 2187 case AMDGPU::OPERAND_REG_IMM_FP16: 2188 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2189 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2190 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2191 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2192 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2193 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2194 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2195 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2196 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2197 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2198 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2199 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2200 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2201 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2202 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2203 case AMDGPU::OPERAND_KIMM32: 2204 case AMDGPU::OPERAND_KIMM16: 2205 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { 2206 bool lost; 2207 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2208 // Convert literal to single precision 2209 FPLiteral.convert(*getOpFltSemantics(OpTy), 2210 APFloat::rmNearestTiesToEven, &lost); 2211 // We allow precision lost but not overflow or underflow. This should be 2212 // checked earlier in isLiteralImm() 2213 2214 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2215 Inst.addOperand(MCOperand::createImm(ImmVal)); 2216 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { 2217 setImmKindMandatoryLiteral(); 2218 } else { 2219 setImmKindLiteral(); 2220 } 2221 return; 2222 } 2223 default: 2224 llvm_unreachable("invalid operand size"); 2225 } 2226 2227 return; 2228 } 2229 2230 // We got int literal token. 2231 // Only sign extend inline immediates. 2232 switch (OpTy) { 2233 case AMDGPU::OPERAND_REG_IMM_INT32: 2234 case AMDGPU::OPERAND_REG_IMM_FP32: 2235 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2236 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2237 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2238 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2239 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2240 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2241 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2242 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2243 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2244 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2245 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2246 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 2247 if (isSafeTruncation(Val, 32) && 2248 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2249 AsmParser->hasInv2PiInlineImm())) { 2250 Inst.addOperand(MCOperand::createImm(Val)); 2251 setImmKindConst(); 2252 return; 2253 } 2254 2255 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2256 setImmKindLiteral(); 2257 return; 2258 2259 case AMDGPU::OPERAND_REG_IMM_INT64: 2260 case AMDGPU::OPERAND_REG_IMM_FP64: 2261 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2262 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2263 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2264 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2265 Inst.addOperand(MCOperand::createImm(Val)); 2266 setImmKindConst(); 2267 return; 2268 } 2269 2270 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32 2271 : Lo_32(Val); 2272 2273 Inst.addOperand(MCOperand::createImm(Val)); 2274 setImmKindLiteral(); 2275 return; 2276 2277 case AMDGPU::OPERAND_REG_IMM_INT16: 2278 case AMDGPU::OPERAND_REG_IMM_FP16: 2279 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2280 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2281 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2282 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2283 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2284 if (isSafeTruncation(Val, 16) && 2285 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2286 AsmParser->hasInv2PiInlineImm())) { 2287 Inst.addOperand(MCOperand::createImm(Val)); 2288 setImmKindConst(); 2289 return; 2290 } 2291 2292 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2293 setImmKindLiteral(); 2294 return; 2295 2296 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2297 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2298 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2299 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2300 assert(isSafeTruncation(Val, 16)); 2301 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2302 AsmParser->hasInv2PiInlineImm())); 2303 2304 Inst.addOperand(MCOperand::createImm(Val)); 2305 return; 2306 } 2307 case AMDGPU::OPERAND_KIMM32: 2308 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2309 setImmKindMandatoryLiteral(); 2310 return; 2311 case AMDGPU::OPERAND_KIMM16: 2312 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2313 setImmKindMandatoryLiteral(); 2314 return; 2315 default: 2316 llvm_unreachable("invalid operand size"); 2317 } 2318 } 2319 2320 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2321 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2322 } 2323 2324 bool AMDGPUOperand::isInlineValue() const { 2325 return isRegKind() && ::isInlineValue(getReg()); 2326 } 2327 2328 //===----------------------------------------------------------------------===// 2329 // AsmParser 2330 //===----------------------------------------------------------------------===// 2331 2332 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2333 if (Is == IS_VGPR) { 2334 switch (RegWidth) { 2335 default: return -1; 2336 case 32: 2337 return AMDGPU::VGPR_32RegClassID; 2338 case 64: 2339 return AMDGPU::VReg_64RegClassID; 2340 case 96: 2341 return AMDGPU::VReg_96RegClassID; 2342 case 128: 2343 return AMDGPU::VReg_128RegClassID; 2344 case 160: 2345 return AMDGPU::VReg_160RegClassID; 2346 case 192: 2347 return AMDGPU::VReg_192RegClassID; 2348 case 224: 2349 return AMDGPU::VReg_224RegClassID; 2350 case 256: 2351 return AMDGPU::VReg_256RegClassID; 2352 case 288: 2353 return AMDGPU::VReg_288RegClassID; 2354 case 320: 2355 return AMDGPU::VReg_320RegClassID; 2356 case 352: 2357 return AMDGPU::VReg_352RegClassID; 2358 case 384: 2359 return AMDGPU::VReg_384RegClassID; 2360 case 512: 2361 return AMDGPU::VReg_512RegClassID; 2362 case 1024: 2363 return AMDGPU::VReg_1024RegClassID; 2364 } 2365 } else if (Is == IS_TTMP) { 2366 switch (RegWidth) { 2367 default: return -1; 2368 case 32: 2369 return AMDGPU::TTMP_32RegClassID; 2370 case 64: 2371 return AMDGPU::TTMP_64RegClassID; 2372 case 128: 2373 return AMDGPU::TTMP_128RegClassID; 2374 case 256: 2375 return AMDGPU::TTMP_256RegClassID; 2376 case 512: 2377 return AMDGPU::TTMP_512RegClassID; 2378 } 2379 } else if (Is == IS_SGPR) { 2380 switch (RegWidth) { 2381 default: return -1; 2382 case 32: 2383 return AMDGPU::SGPR_32RegClassID; 2384 case 64: 2385 return AMDGPU::SGPR_64RegClassID; 2386 case 96: 2387 return AMDGPU::SGPR_96RegClassID; 2388 case 128: 2389 return AMDGPU::SGPR_128RegClassID; 2390 case 160: 2391 return AMDGPU::SGPR_160RegClassID; 2392 case 192: 2393 return AMDGPU::SGPR_192RegClassID; 2394 case 224: 2395 return AMDGPU::SGPR_224RegClassID; 2396 case 256: 2397 return AMDGPU::SGPR_256RegClassID; 2398 case 288: 2399 return AMDGPU::SGPR_288RegClassID; 2400 case 320: 2401 return AMDGPU::SGPR_320RegClassID; 2402 case 352: 2403 return AMDGPU::SGPR_352RegClassID; 2404 case 384: 2405 return AMDGPU::SGPR_384RegClassID; 2406 case 512: 2407 return AMDGPU::SGPR_512RegClassID; 2408 } 2409 } else if (Is == IS_AGPR) { 2410 switch (RegWidth) { 2411 default: return -1; 2412 case 32: 2413 return AMDGPU::AGPR_32RegClassID; 2414 case 64: 2415 return AMDGPU::AReg_64RegClassID; 2416 case 96: 2417 return AMDGPU::AReg_96RegClassID; 2418 case 128: 2419 return AMDGPU::AReg_128RegClassID; 2420 case 160: 2421 return AMDGPU::AReg_160RegClassID; 2422 case 192: 2423 return AMDGPU::AReg_192RegClassID; 2424 case 224: 2425 return AMDGPU::AReg_224RegClassID; 2426 case 256: 2427 return AMDGPU::AReg_256RegClassID; 2428 case 288: 2429 return AMDGPU::AReg_288RegClassID; 2430 case 320: 2431 return AMDGPU::AReg_320RegClassID; 2432 case 352: 2433 return AMDGPU::AReg_352RegClassID; 2434 case 384: 2435 return AMDGPU::AReg_384RegClassID; 2436 case 512: 2437 return AMDGPU::AReg_512RegClassID; 2438 case 1024: 2439 return AMDGPU::AReg_1024RegClassID; 2440 } 2441 } 2442 return -1; 2443 } 2444 2445 static unsigned getSpecialRegForName(StringRef RegName) { 2446 return StringSwitch<unsigned>(RegName) 2447 .Case("exec", AMDGPU::EXEC) 2448 .Case("vcc", AMDGPU::VCC) 2449 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2450 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2451 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2452 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2453 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2454 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2455 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2456 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2457 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2458 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2459 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2460 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2461 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2462 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2463 .Case("m0", AMDGPU::M0) 2464 .Case("vccz", AMDGPU::SRC_VCCZ) 2465 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2466 .Case("execz", AMDGPU::SRC_EXECZ) 2467 .Case("src_execz", AMDGPU::SRC_EXECZ) 2468 .Case("scc", AMDGPU::SRC_SCC) 2469 .Case("src_scc", AMDGPU::SRC_SCC) 2470 .Case("tba", AMDGPU::TBA) 2471 .Case("tma", AMDGPU::TMA) 2472 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2473 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2474 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2475 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2476 .Case("vcc_lo", AMDGPU::VCC_LO) 2477 .Case("vcc_hi", AMDGPU::VCC_HI) 2478 .Case("exec_lo", AMDGPU::EXEC_LO) 2479 .Case("exec_hi", AMDGPU::EXEC_HI) 2480 .Case("tma_lo", AMDGPU::TMA_LO) 2481 .Case("tma_hi", AMDGPU::TMA_HI) 2482 .Case("tba_lo", AMDGPU::TBA_LO) 2483 .Case("tba_hi", AMDGPU::TBA_HI) 2484 .Case("pc", AMDGPU::PC_REG) 2485 .Case("null", AMDGPU::SGPR_NULL) 2486 .Default(AMDGPU::NoRegister); 2487 } 2488 2489 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2490 SMLoc &EndLoc, bool RestoreOnFailure) { 2491 auto R = parseRegister(); 2492 if (!R) return true; 2493 assert(R->isReg()); 2494 RegNo = R->getReg(); 2495 StartLoc = R->getStartLoc(); 2496 EndLoc = R->getEndLoc(); 2497 return false; 2498 } 2499 2500 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 2501 SMLoc &EndLoc) { 2502 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2503 } 2504 2505 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 2506 SMLoc &EndLoc) { 2507 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2508 bool PendingErrors = getParser().hasPendingError(); 2509 getParser().clearPendingErrors(); 2510 if (PendingErrors) 2511 return ParseStatus::Failure; 2512 if (Result) 2513 return ParseStatus::NoMatch; 2514 return ParseStatus::Success; 2515 } 2516 2517 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2518 RegisterKind RegKind, unsigned Reg1, 2519 SMLoc Loc) { 2520 switch (RegKind) { 2521 case IS_SPECIAL: 2522 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2523 Reg = AMDGPU::EXEC; 2524 RegWidth = 64; 2525 return true; 2526 } 2527 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2528 Reg = AMDGPU::FLAT_SCR; 2529 RegWidth = 64; 2530 return true; 2531 } 2532 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2533 Reg = AMDGPU::XNACK_MASK; 2534 RegWidth = 64; 2535 return true; 2536 } 2537 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2538 Reg = AMDGPU::VCC; 2539 RegWidth = 64; 2540 return true; 2541 } 2542 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2543 Reg = AMDGPU::TBA; 2544 RegWidth = 64; 2545 return true; 2546 } 2547 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2548 Reg = AMDGPU::TMA; 2549 RegWidth = 64; 2550 return true; 2551 } 2552 Error(Loc, "register does not fit in the list"); 2553 return false; 2554 case IS_VGPR: 2555 case IS_SGPR: 2556 case IS_AGPR: 2557 case IS_TTMP: 2558 if (Reg1 != Reg + RegWidth / 32) { 2559 Error(Loc, "registers in a list must have consecutive indices"); 2560 return false; 2561 } 2562 RegWidth += 32; 2563 return true; 2564 default: 2565 llvm_unreachable("unexpected register kind"); 2566 } 2567 } 2568 2569 struct RegInfo { 2570 StringLiteral Name; 2571 RegisterKind Kind; 2572 }; 2573 2574 static constexpr RegInfo RegularRegisters[] = { 2575 {{"v"}, IS_VGPR}, 2576 {{"s"}, IS_SGPR}, 2577 {{"ttmp"}, IS_TTMP}, 2578 {{"acc"}, IS_AGPR}, 2579 {{"a"}, IS_AGPR}, 2580 }; 2581 2582 static bool isRegularReg(RegisterKind Kind) { 2583 return Kind == IS_VGPR || 2584 Kind == IS_SGPR || 2585 Kind == IS_TTMP || 2586 Kind == IS_AGPR; 2587 } 2588 2589 static const RegInfo* getRegularRegInfo(StringRef Str) { 2590 for (const RegInfo &Reg : RegularRegisters) 2591 if (Str.starts_with(Reg.Name)) 2592 return &Reg; 2593 return nullptr; 2594 } 2595 2596 static bool getRegNum(StringRef Str, unsigned& Num) { 2597 return !Str.getAsInteger(10, Num); 2598 } 2599 2600 bool 2601 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2602 const AsmToken &NextToken) const { 2603 2604 // A list of consecutive registers: [s0,s1,s2,s3] 2605 if (Token.is(AsmToken::LBrac)) 2606 return true; 2607 2608 if (!Token.is(AsmToken::Identifier)) 2609 return false; 2610 2611 // A single register like s0 or a range of registers like s[0:1] 2612 2613 StringRef Str = Token.getString(); 2614 const RegInfo *Reg = getRegularRegInfo(Str); 2615 if (Reg) { 2616 StringRef RegName = Reg->Name; 2617 StringRef RegSuffix = Str.substr(RegName.size()); 2618 if (!RegSuffix.empty()) { 2619 unsigned Num; 2620 // A single register with an index: rXX 2621 if (getRegNum(RegSuffix, Num)) 2622 return true; 2623 } else { 2624 // A range of registers: r[XX:YY]. 2625 if (NextToken.is(AsmToken::LBrac)) 2626 return true; 2627 } 2628 } 2629 2630 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2631 } 2632 2633 bool 2634 AMDGPUAsmParser::isRegister() 2635 { 2636 return isRegister(getToken(), peekToken()); 2637 } 2638 2639 unsigned 2640 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2641 unsigned RegNum, 2642 unsigned RegWidth, 2643 SMLoc Loc) { 2644 2645 assert(isRegularReg(RegKind)); 2646 2647 unsigned AlignSize = 1; 2648 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2649 // SGPR and TTMP registers must be aligned. 2650 // Max required alignment is 4 dwords. 2651 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u); 2652 } 2653 2654 if (RegNum % AlignSize != 0) { 2655 Error(Loc, "invalid register alignment"); 2656 return AMDGPU::NoRegister; 2657 } 2658 2659 unsigned RegIdx = RegNum / AlignSize; 2660 int RCID = getRegClass(RegKind, RegWidth); 2661 if (RCID == -1) { 2662 Error(Loc, "invalid or unsupported register size"); 2663 return AMDGPU::NoRegister; 2664 } 2665 2666 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2667 const MCRegisterClass RC = TRI->getRegClass(RCID); 2668 if (RegIdx >= RC.getNumRegs()) { 2669 Error(Loc, "register index is out of range"); 2670 return AMDGPU::NoRegister; 2671 } 2672 2673 return RC.getRegister(RegIdx); 2674 } 2675 2676 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2677 int64_t RegLo, RegHi; 2678 if (!skipToken(AsmToken::LBrac, "missing register index")) 2679 return false; 2680 2681 SMLoc FirstIdxLoc = getLoc(); 2682 SMLoc SecondIdxLoc; 2683 2684 if (!parseExpr(RegLo)) 2685 return false; 2686 2687 if (trySkipToken(AsmToken::Colon)) { 2688 SecondIdxLoc = getLoc(); 2689 if (!parseExpr(RegHi)) 2690 return false; 2691 } else { 2692 RegHi = RegLo; 2693 } 2694 2695 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2696 return false; 2697 2698 if (!isUInt<32>(RegLo)) { 2699 Error(FirstIdxLoc, "invalid register index"); 2700 return false; 2701 } 2702 2703 if (!isUInt<32>(RegHi)) { 2704 Error(SecondIdxLoc, "invalid register index"); 2705 return false; 2706 } 2707 2708 if (RegLo > RegHi) { 2709 Error(FirstIdxLoc, "first register index should not exceed second index"); 2710 return false; 2711 } 2712 2713 Num = static_cast<unsigned>(RegLo); 2714 RegWidth = 32 * ((RegHi - RegLo) + 1); 2715 return true; 2716 } 2717 2718 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2719 unsigned &RegNum, unsigned &RegWidth, 2720 SmallVectorImpl<AsmToken> &Tokens) { 2721 assert(isToken(AsmToken::Identifier)); 2722 unsigned Reg = getSpecialRegForName(getTokenStr()); 2723 if (Reg) { 2724 RegNum = 0; 2725 RegWidth = 32; 2726 RegKind = IS_SPECIAL; 2727 Tokens.push_back(getToken()); 2728 lex(); // skip register name 2729 } 2730 return Reg; 2731 } 2732 2733 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2734 unsigned &RegNum, unsigned &RegWidth, 2735 SmallVectorImpl<AsmToken> &Tokens) { 2736 assert(isToken(AsmToken::Identifier)); 2737 StringRef RegName = getTokenStr(); 2738 auto Loc = getLoc(); 2739 2740 const RegInfo *RI = getRegularRegInfo(RegName); 2741 if (!RI) { 2742 Error(Loc, "invalid register name"); 2743 return AMDGPU::NoRegister; 2744 } 2745 2746 Tokens.push_back(getToken()); 2747 lex(); // skip register name 2748 2749 RegKind = RI->Kind; 2750 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2751 if (!RegSuffix.empty()) { 2752 // Single 32-bit register: vXX. 2753 if (!getRegNum(RegSuffix, RegNum)) { 2754 Error(Loc, "invalid register index"); 2755 return AMDGPU::NoRegister; 2756 } 2757 RegWidth = 32; 2758 } else { 2759 // Range of registers: v[XX:YY]. ":YY" is optional. 2760 if (!ParseRegRange(RegNum, RegWidth)) 2761 return AMDGPU::NoRegister; 2762 } 2763 2764 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2765 } 2766 2767 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2768 unsigned &RegWidth, 2769 SmallVectorImpl<AsmToken> &Tokens) { 2770 unsigned Reg = AMDGPU::NoRegister; 2771 auto ListLoc = getLoc(); 2772 2773 if (!skipToken(AsmToken::LBrac, 2774 "expected a register or a list of registers")) { 2775 return AMDGPU::NoRegister; 2776 } 2777 2778 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2779 2780 auto Loc = getLoc(); 2781 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2782 return AMDGPU::NoRegister; 2783 if (RegWidth != 32) { 2784 Error(Loc, "expected a single 32-bit register"); 2785 return AMDGPU::NoRegister; 2786 } 2787 2788 for (; trySkipToken(AsmToken::Comma); ) { 2789 RegisterKind NextRegKind; 2790 unsigned NextReg, NextRegNum, NextRegWidth; 2791 Loc = getLoc(); 2792 2793 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2794 NextRegNum, NextRegWidth, 2795 Tokens)) { 2796 return AMDGPU::NoRegister; 2797 } 2798 if (NextRegWidth != 32) { 2799 Error(Loc, "expected a single 32-bit register"); 2800 return AMDGPU::NoRegister; 2801 } 2802 if (NextRegKind != RegKind) { 2803 Error(Loc, "registers in a list must be of the same kind"); 2804 return AMDGPU::NoRegister; 2805 } 2806 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2807 return AMDGPU::NoRegister; 2808 } 2809 2810 if (!skipToken(AsmToken::RBrac, 2811 "expected a comma or a closing square bracket")) { 2812 return AMDGPU::NoRegister; 2813 } 2814 2815 if (isRegularReg(RegKind)) 2816 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2817 2818 return Reg; 2819 } 2820 2821 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2822 unsigned &RegNum, unsigned &RegWidth, 2823 SmallVectorImpl<AsmToken> &Tokens) { 2824 auto Loc = getLoc(); 2825 Reg = AMDGPU::NoRegister; 2826 2827 if (isToken(AsmToken::Identifier)) { 2828 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2829 if (Reg == AMDGPU::NoRegister) 2830 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2831 } else { 2832 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2833 } 2834 2835 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2836 if (Reg == AMDGPU::NoRegister) { 2837 assert(Parser.hasPendingError()); 2838 return false; 2839 } 2840 2841 if (!subtargetHasRegister(*TRI, Reg)) { 2842 if (Reg == AMDGPU::SGPR_NULL) { 2843 Error(Loc, "'null' operand is not supported on this GPU"); 2844 } else { 2845 Error(Loc, "register not available on this GPU"); 2846 } 2847 return false; 2848 } 2849 2850 return true; 2851 } 2852 2853 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2854 unsigned &RegNum, unsigned &RegWidth, 2855 bool RestoreOnFailure /*=false*/) { 2856 Reg = AMDGPU::NoRegister; 2857 2858 SmallVector<AsmToken, 1> Tokens; 2859 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2860 if (RestoreOnFailure) { 2861 while (!Tokens.empty()) { 2862 getLexer().UnLex(Tokens.pop_back_val()); 2863 } 2864 } 2865 return true; 2866 } 2867 return false; 2868 } 2869 2870 std::optional<StringRef> 2871 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2872 switch (RegKind) { 2873 case IS_VGPR: 2874 return StringRef(".amdgcn.next_free_vgpr"); 2875 case IS_SGPR: 2876 return StringRef(".amdgcn.next_free_sgpr"); 2877 default: 2878 return std::nullopt; 2879 } 2880 } 2881 2882 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2883 auto SymbolName = getGprCountSymbolName(RegKind); 2884 assert(SymbolName && "initializing invalid register kind"); 2885 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2886 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2887 } 2888 2889 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2890 unsigned DwordRegIndex, 2891 unsigned RegWidth) { 2892 // Symbols are only defined for GCN targets 2893 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2894 return true; 2895 2896 auto SymbolName = getGprCountSymbolName(RegKind); 2897 if (!SymbolName) 2898 return true; 2899 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2900 2901 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2902 int64_t OldCount; 2903 2904 if (!Sym->isVariable()) 2905 return !Error(getLoc(), 2906 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2907 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2908 return !Error( 2909 getLoc(), 2910 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2911 2912 if (OldCount <= NewMax) 2913 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2914 2915 return true; 2916 } 2917 2918 std::unique_ptr<AMDGPUOperand> 2919 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2920 const auto &Tok = getToken(); 2921 SMLoc StartLoc = Tok.getLoc(); 2922 SMLoc EndLoc = Tok.getEndLoc(); 2923 RegisterKind RegKind; 2924 unsigned Reg, RegNum, RegWidth; 2925 2926 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2927 return nullptr; 2928 } 2929 if (isHsaAbi(getSTI())) { 2930 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2931 return nullptr; 2932 } else 2933 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2934 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2935 } 2936 2937 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, 2938 bool HasSP3AbsModifier, bool HasLit) { 2939 // TODO: add syntactic sugar for 1/(2*PI) 2940 2941 if (isRegister()) 2942 return ParseStatus::NoMatch; 2943 assert(!isModifier()); 2944 2945 if (!HasLit) { 2946 HasLit = trySkipId("lit"); 2947 if (HasLit) { 2948 if (!skipToken(AsmToken::LParen, "expected left paren after lit")) 2949 return ParseStatus::Failure; 2950 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit); 2951 if (S.isSuccess() && 2952 !skipToken(AsmToken::RParen, "expected closing parentheses")) 2953 return ParseStatus::Failure; 2954 return S; 2955 } 2956 } 2957 2958 const auto& Tok = getToken(); 2959 const auto& NextTok = peekToken(); 2960 bool IsReal = Tok.is(AsmToken::Real); 2961 SMLoc S = getLoc(); 2962 bool Negate = false; 2963 2964 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2965 lex(); 2966 IsReal = true; 2967 Negate = true; 2968 } 2969 2970 AMDGPUOperand::Modifiers Mods; 2971 Mods.Lit = HasLit; 2972 2973 if (IsReal) { 2974 // Floating-point expressions are not supported. 2975 // Can only allow floating-point literals with an 2976 // optional sign. 2977 2978 StringRef Num = getTokenStr(); 2979 lex(); 2980 2981 APFloat RealVal(APFloat::IEEEdouble()); 2982 auto roundMode = APFloat::rmNearestTiesToEven; 2983 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) 2984 return ParseStatus::Failure; 2985 if (Negate) 2986 RealVal.changeSign(); 2987 2988 Operands.push_back( 2989 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2990 AMDGPUOperand::ImmTyNone, true)); 2991 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2992 Op.setModifiers(Mods); 2993 2994 return ParseStatus::Success; 2995 2996 } else { 2997 int64_t IntVal; 2998 const MCExpr *Expr; 2999 SMLoc S = getLoc(); 3000 3001 if (HasSP3AbsModifier) { 3002 // This is a workaround for handling expressions 3003 // as arguments of SP3 'abs' modifier, for example: 3004 // |1.0| 3005 // |-1| 3006 // |1+x| 3007 // This syntax is not compatible with syntax of standard 3008 // MC expressions (due to the trailing '|'). 3009 SMLoc EndLoc; 3010 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 3011 return ParseStatus::Failure; 3012 } else { 3013 if (Parser.parseExpression(Expr)) 3014 return ParseStatus::Failure; 3015 } 3016 3017 if (Expr->evaluateAsAbsolute(IntVal)) { 3018 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 3019 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3020 Op.setModifiers(Mods); 3021 } else { 3022 if (HasLit) 3023 return ParseStatus::NoMatch; 3024 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3025 } 3026 3027 return ParseStatus::Success; 3028 } 3029 3030 return ParseStatus::NoMatch; 3031 } 3032 3033 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { 3034 if (!isRegister()) 3035 return ParseStatus::NoMatch; 3036 3037 if (auto R = parseRegister()) { 3038 assert(R->isReg()); 3039 Operands.push_back(std::move(R)); 3040 return ParseStatus::Success; 3041 } 3042 return ParseStatus::Failure; 3043 } 3044 3045 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, 3046 bool HasSP3AbsMod, bool HasLit) { 3047 ParseStatus Res = parseReg(Operands); 3048 if (!Res.isNoMatch()) 3049 return Res; 3050 if (isModifier()) 3051 return ParseStatus::NoMatch; 3052 return parseImm(Operands, HasSP3AbsMod, HasLit); 3053 } 3054 3055 bool 3056 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3057 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3058 const auto &str = Token.getString(); 3059 return str == "abs" || str == "neg" || str == "sext"; 3060 } 3061 return false; 3062 } 3063 3064 bool 3065 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3066 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3067 } 3068 3069 bool 3070 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3071 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3072 } 3073 3074 bool 3075 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3076 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3077 } 3078 3079 // Check if this is an operand modifier or an opcode modifier 3080 // which may look like an expression but it is not. We should 3081 // avoid parsing these modifiers as expressions. Currently 3082 // recognized sequences are: 3083 // |...| 3084 // abs(...) 3085 // neg(...) 3086 // sext(...) 3087 // -reg 3088 // -|...| 3089 // -abs(...) 3090 // name:... 3091 // 3092 bool 3093 AMDGPUAsmParser::isModifier() { 3094 3095 AsmToken Tok = getToken(); 3096 AsmToken NextToken[2]; 3097 peekTokens(NextToken); 3098 3099 return isOperandModifier(Tok, NextToken[0]) || 3100 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3101 isOpcodeModifierWithVal(Tok, NextToken[0]); 3102 } 3103 3104 // Check if the current token is an SP3 'neg' modifier. 3105 // Currently this modifier is allowed in the following context: 3106 // 3107 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3108 // 2. Before an 'abs' modifier: -abs(...) 3109 // 3. Before an SP3 'abs' modifier: -|...| 3110 // 3111 // In all other cases "-" is handled as a part 3112 // of an expression that follows the sign. 3113 // 3114 // Note: When "-" is followed by an integer literal, 3115 // this is interpreted as integer negation rather 3116 // than a floating-point NEG modifier applied to N. 3117 // Beside being contr-intuitive, such use of floating-point 3118 // NEG modifier would have resulted in different meaning 3119 // of integer literals used with VOP1/2/C and VOP3, 3120 // for example: 3121 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3122 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3123 // Negative fp literals with preceding "-" are 3124 // handled likewise for uniformity 3125 // 3126 bool 3127 AMDGPUAsmParser::parseSP3NegModifier() { 3128 3129 AsmToken NextToken[2]; 3130 peekTokens(NextToken); 3131 3132 if (isToken(AsmToken::Minus) && 3133 (isRegister(NextToken[0], NextToken[1]) || 3134 NextToken[0].is(AsmToken::Pipe) || 3135 isId(NextToken[0], "abs"))) { 3136 lex(); 3137 return true; 3138 } 3139 3140 return false; 3141 } 3142 3143 ParseStatus 3144 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3145 bool AllowImm) { 3146 bool Neg, SP3Neg; 3147 bool Abs, SP3Abs; 3148 bool Lit; 3149 SMLoc Loc; 3150 3151 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3152 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) 3153 return Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3154 3155 SP3Neg = parseSP3NegModifier(); 3156 3157 Loc = getLoc(); 3158 Neg = trySkipId("neg"); 3159 if (Neg && SP3Neg) 3160 return Error(Loc, "expected register or immediate"); 3161 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3162 return ParseStatus::Failure; 3163 3164 Abs = trySkipId("abs"); 3165 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3166 return ParseStatus::Failure; 3167 3168 Lit = trySkipId("lit"); 3169 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit")) 3170 return ParseStatus::Failure; 3171 3172 Loc = getLoc(); 3173 SP3Abs = trySkipToken(AsmToken::Pipe); 3174 if (Abs && SP3Abs) 3175 return Error(Loc, "expected register or immediate"); 3176 3177 ParseStatus Res; 3178 if (AllowImm) { 3179 Res = parseRegOrImm(Operands, SP3Abs, Lit); 3180 } else { 3181 Res = parseReg(Operands); 3182 } 3183 if (!Res.isSuccess()) 3184 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res; 3185 3186 if (Lit && !Operands.back()->isImm()) 3187 Error(Loc, "expected immediate with lit modifier"); 3188 3189 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3190 return ParseStatus::Failure; 3191 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3192 return ParseStatus::Failure; 3193 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3194 return ParseStatus::Failure; 3195 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3196 return ParseStatus::Failure; 3197 3198 AMDGPUOperand::Modifiers Mods; 3199 Mods.Abs = Abs || SP3Abs; 3200 Mods.Neg = Neg || SP3Neg; 3201 Mods.Lit = Lit; 3202 3203 if (Mods.hasFPModifiers() || Lit) { 3204 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3205 if (Op.isExpr()) 3206 return Error(Op.getStartLoc(), "expected an absolute expression"); 3207 Op.setModifiers(Mods); 3208 } 3209 return ParseStatus::Success; 3210 } 3211 3212 ParseStatus 3213 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3214 bool AllowImm) { 3215 bool Sext = trySkipId("sext"); 3216 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3217 return ParseStatus::Failure; 3218 3219 ParseStatus Res; 3220 if (AllowImm) { 3221 Res = parseRegOrImm(Operands); 3222 } else { 3223 Res = parseReg(Operands); 3224 } 3225 if (!Res.isSuccess()) 3226 return Sext ? ParseStatus::Failure : Res; 3227 3228 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3229 return ParseStatus::Failure; 3230 3231 AMDGPUOperand::Modifiers Mods; 3232 Mods.Sext = Sext; 3233 3234 if (Mods.hasIntModifiers()) { 3235 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3236 if (Op.isExpr()) 3237 return Error(Op.getStartLoc(), "expected an absolute expression"); 3238 Op.setModifiers(Mods); 3239 } 3240 3241 return ParseStatus::Success; 3242 } 3243 3244 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3245 return parseRegOrImmWithFPInputMods(Operands, false); 3246 } 3247 3248 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3249 return parseRegOrImmWithIntInputMods(Operands, false); 3250 } 3251 3252 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3253 auto Loc = getLoc(); 3254 if (trySkipId("off")) { 3255 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3256 AMDGPUOperand::ImmTyOff, false)); 3257 return ParseStatus::Success; 3258 } 3259 3260 if (!isRegister()) 3261 return ParseStatus::NoMatch; 3262 3263 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3264 if (Reg) { 3265 Operands.push_back(std::move(Reg)); 3266 return ParseStatus::Success; 3267 } 3268 3269 return ParseStatus::Failure; 3270 } 3271 3272 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3273 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3274 3275 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3276 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3277 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3278 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3279 return Match_InvalidOperand; 3280 3281 if ((TSFlags & SIInstrFlags::VOP3) && 3282 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3283 getForcedEncodingSize() != 64) 3284 return Match_PreferE32; 3285 3286 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3287 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3288 // v_mac_f32/16 allow only dst_sel == DWORD; 3289 auto OpNum = 3290 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3291 const auto &Op = Inst.getOperand(OpNum); 3292 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3293 return Match_InvalidOperand; 3294 } 3295 } 3296 3297 return Match_Success; 3298 } 3299 3300 static ArrayRef<unsigned> getAllVariants() { 3301 static const unsigned Variants[] = { 3302 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3303 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3304 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3305 }; 3306 3307 return ArrayRef(Variants); 3308 } 3309 3310 // What asm variants we should check 3311 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3312 if (isForcedDPP() && isForcedVOP3()) { 3313 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3314 return ArrayRef(Variants); 3315 } 3316 if (getForcedEncodingSize() == 32) { 3317 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3318 return ArrayRef(Variants); 3319 } 3320 3321 if (isForcedVOP3()) { 3322 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3323 return ArrayRef(Variants); 3324 } 3325 3326 if (isForcedSDWA()) { 3327 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3328 AMDGPUAsmVariants::SDWA9}; 3329 return ArrayRef(Variants); 3330 } 3331 3332 if (isForcedDPP()) { 3333 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3334 return ArrayRef(Variants); 3335 } 3336 3337 return getAllVariants(); 3338 } 3339 3340 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3341 if (isForcedDPP() && isForcedVOP3()) 3342 return "e64_dpp"; 3343 3344 if (getForcedEncodingSize() == 32) 3345 return "e32"; 3346 3347 if (isForcedVOP3()) 3348 return "e64"; 3349 3350 if (isForcedSDWA()) 3351 return "sdwa"; 3352 3353 if (isForcedDPP()) 3354 return "dpp"; 3355 3356 return ""; 3357 } 3358 3359 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3360 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3361 for (MCPhysReg Reg : Desc.implicit_uses()) { 3362 switch (Reg) { 3363 case AMDGPU::FLAT_SCR: 3364 case AMDGPU::VCC: 3365 case AMDGPU::VCC_LO: 3366 case AMDGPU::VCC_HI: 3367 case AMDGPU::M0: 3368 return Reg; 3369 default: 3370 break; 3371 } 3372 } 3373 return AMDGPU::NoRegister; 3374 } 3375 3376 // NB: This code is correct only when used to check constant 3377 // bus limitations because GFX7 support no f16 inline constants. 3378 // Note that there are no cases when a GFX7 opcode violates 3379 // constant bus limitations due to the use of an f16 constant. 3380 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3381 unsigned OpIdx) const { 3382 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3383 3384 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) || 3385 AMDGPU::isKImmOperand(Desc, OpIdx)) { 3386 return false; 3387 } 3388 3389 const MCOperand &MO = Inst.getOperand(OpIdx); 3390 3391 int64_t Val = MO.getImm(); 3392 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3393 3394 switch (OpSize) { // expected operand size 3395 case 8: 3396 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3397 case 4: 3398 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3399 case 2: { 3400 const unsigned OperandType = Desc.operands()[OpIdx].OperandType; 3401 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3402 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3403 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3404 return AMDGPU::isInlinableIntLiteral(Val); 3405 3406 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3407 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3408 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3409 return AMDGPU::isInlinableLiteralV2I16(Val); 3410 3411 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3412 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3413 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3414 return AMDGPU::isInlinableLiteralV2F16(Val); 3415 3416 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3417 } 3418 default: 3419 llvm_unreachable("invalid operand size"); 3420 } 3421 } 3422 3423 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3424 if (!isGFX10Plus()) 3425 return 1; 3426 3427 switch (Opcode) { 3428 // 64-bit shift instructions can use only one scalar value input 3429 case AMDGPU::V_LSHLREV_B64_e64: 3430 case AMDGPU::V_LSHLREV_B64_gfx10: 3431 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3432 case AMDGPU::V_LSHLREV_B64_e32_gfx12: 3433 case AMDGPU::V_LSHLREV_B64_e64_gfx12: 3434 case AMDGPU::V_LSHRREV_B64_e64: 3435 case AMDGPU::V_LSHRREV_B64_gfx10: 3436 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3437 case AMDGPU::V_LSHRREV_B64_e64_gfx12: 3438 case AMDGPU::V_ASHRREV_I64_e64: 3439 case AMDGPU::V_ASHRREV_I64_gfx10: 3440 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3441 case AMDGPU::V_ASHRREV_I64_e64_gfx12: 3442 case AMDGPU::V_LSHL_B64_e64: 3443 case AMDGPU::V_LSHR_B64_e64: 3444 case AMDGPU::V_ASHR_I64_e64: 3445 return 1; 3446 default: 3447 return 2; 3448 } 3449 } 3450 3451 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; 3452 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; 3453 3454 // Get regular operand indices in the same order as specified 3455 // in the instruction (but append mandatory literals to the end). 3456 static OperandIndices getSrcOperandIndices(unsigned Opcode, 3457 bool AddMandatoryLiterals = false) { 3458 3459 int16_t ImmIdx = 3460 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1; 3461 3462 if (isVOPD(Opcode)) { 3463 int16_t ImmDeferredIdx = 3464 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred) 3465 : -1; 3466 3467 return {getNamedOperandIdx(Opcode, OpName::src0X), 3468 getNamedOperandIdx(Opcode, OpName::vsrc1X), 3469 getNamedOperandIdx(Opcode, OpName::src0Y), 3470 getNamedOperandIdx(Opcode, OpName::vsrc1Y), 3471 ImmDeferredIdx, 3472 ImmIdx}; 3473 } 3474 3475 return {getNamedOperandIdx(Opcode, OpName::src0), 3476 getNamedOperandIdx(Opcode, OpName::src1), 3477 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx}; 3478 } 3479 3480 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3481 const MCOperand &MO = Inst.getOperand(OpIdx); 3482 if (MO.isImm()) { 3483 return !isInlineConstant(Inst, OpIdx); 3484 } else if (MO.isReg()) { 3485 auto Reg = MO.getReg(); 3486 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3487 auto PReg = mc2PseudoReg(Reg); 3488 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3489 } else { 3490 return true; 3491 } 3492 } 3493 3494 bool AMDGPUAsmParser::validateConstantBusLimitations( 3495 const MCInst &Inst, const OperandVector &Operands) { 3496 const unsigned Opcode = Inst.getOpcode(); 3497 const MCInstrDesc &Desc = MII.get(Opcode); 3498 unsigned LastSGPR = AMDGPU::NoRegister; 3499 unsigned ConstantBusUseCount = 0; 3500 unsigned NumLiterals = 0; 3501 unsigned LiteralSize; 3502 3503 if (!(Desc.TSFlags & 3504 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3505 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && 3506 !isVOPD(Opcode)) 3507 return true; 3508 3509 // Check special imm operands (used by madmk, etc) 3510 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { 3511 ++NumLiterals; 3512 LiteralSize = 4; 3513 } 3514 3515 SmallDenseSet<unsigned> SGPRsUsed; 3516 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3517 if (SGPRUsed != AMDGPU::NoRegister) { 3518 SGPRsUsed.insert(SGPRUsed); 3519 ++ConstantBusUseCount; 3520 } 3521 3522 OperandIndices OpIndices = getSrcOperandIndices(Opcode); 3523 3524 for (int OpIdx : OpIndices) { 3525 if (OpIdx == -1) 3526 continue; 3527 3528 const MCOperand &MO = Inst.getOperand(OpIdx); 3529 if (usesConstantBus(Inst, OpIdx)) { 3530 if (MO.isReg()) { 3531 LastSGPR = mc2PseudoReg(MO.getReg()); 3532 // Pairs of registers with a partial intersections like these 3533 // s0, s[0:1] 3534 // flat_scratch_lo, flat_scratch 3535 // flat_scratch_lo, flat_scratch_hi 3536 // are theoretically valid but they are disabled anyway. 3537 // Note that this code mimics SIInstrInfo::verifyInstruction 3538 if (SGPRsUsed.insert(LastSGPR).second) { 3539 ++ConstantBusUseCount; 3540 } 3541 } else { // Expression or a literal 3542 3543 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3544 continue; // special operand like VINTERP attr_chan 3545 3546 // An instruction may use only one literal. 3547 // This has been validated on the previous step. 3548 // See validateVOPLiteral. 3549 // This literal may be used as more than one operand. 3550 // If all these operands are of the same size, 3551 // this literal counts as one scalar value. 3552 // Otherwise it counts as 2 scalar values. 3553 // See "GFX10 Shader Programming", section 3.6.2.3. 3554 3555 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3556 if (Size < 4) 3557 Size = 4; 3558 3559 if (NumLiterals == 0) { 3560 NumLiterals = 1; 3561 LiteralSize = Size; 3562 } else if (LiteralSize != Size) { 3563 NumLiterals = 2; 3564 } 3565 } 3566 } 3567 } 3568 ConstantBusUseCount += NumLiterals; 3569 3570 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3571 return true; 3572 3573 SMLoc LitLoc = getLitLoc(Operands); 3574 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3575 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3576 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3577 return false; 3578 } 3579 3580 bool AMDGPUAsmParser::validateVOPDRegBankConstraints( 3581 const MCInst &Inst, const OperandVector &Operands) { 3582 3583 const unsigned Opcode = Inst.getOpcode(); 3584 if (!isVOPD(Opcode)) 3585 return true; 3586 3587 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3588 3589 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { 3590 const MCOperand &Opr = Inst.getOperand(OperandIdx); 3591 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) 3592 ? Opr.getReg() 3593 : MCRegister::NoRegister; 3594 }; 3595 3596 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. 3597 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; 3598 3599 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 3600 auto InvalidCompOprIdx = 3601 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc); 3602 if (!InvalidCompOprIdx) 3603 return true; 3604 3605 auto CompOprIdx = *InvalidCompOprIdx; 3606 auto ParsedIdx = 3607 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), 3608 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); 3609 assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); 3610 3611 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); 3612 if (CompOprIdx == VOPD::Component::DST) { 3613 Error(Loc, "one dst register must be even and the other odd"); 3614 } else { 3615 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; 3616 Error(Loc, Twine("src") + Twine(CompSrcIdx) + 3617 " operands must use different VGPR banks"); 3618 } 3619 3620 return false; 3621 } 3622 3623 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3624 3625 const unsigned Opc = Inst.getOpcode(); 3626 const MCInstrDesc &Desc = MII.get(Opc); 3627 3628 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3629 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3630 assert(ClampIdx != -1); 3631 return Inst.getOperand(ClampIdx).getImm() == 0; 3632 } 3633 3634 return true; 3635 } 3636 3637 constexpr uint64_t MIMGFlags = 3638 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; 3639 3640 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, 3641 const SMLoc &IDLoc) { 3642 3643 const unsigned Opc = Inst.getOpcode(); 3644 const MCInstrDesc &Desc = MII.get(Opc); 3645 3646 if ((Desc.TSFlags & MIMGFlags) == 0) 3647 return true; 3648 3649 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3650 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3651 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3652 3653 assert(VDataIdx != -1); 3654 3655 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray 3656 return true; 3657 3658 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3659 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3660 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3661 if (DMask == 0) 3662 DMask = 1; 3663 3664 bool IsPackedD16 = false; 3665 unsigned DataSize = 3666 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask); 3667 if (hasPackedD16()) { 3668 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3669 IsPackedD16 = D16Idx >= 0; 3670 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm()) 3671 DataSize = (DataSize + 1) / 2; 3672 } 3673 3674 if ((VDataSize / 4) == DataSize + TFESize) 3675 return true; 3676 3677 StringRef Modifiers; 3678 if (isGFX90A()) 3679 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask"; 3680 else 3681 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe"; 3682 3683 Error(IDLoc, Twine("image data size does not match ") + Modifiers); 3684 return false; 3685 } 3686 3687 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, 3688 const SMLoc &IDLoc) { 3689 const unsigned Opc = Inst.getOpcode(); 3690 const MCInstrDesc &Desc = MII.get(Opc); 3691 3692 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus()) 3693 return true; 3694 3695 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3696 3697 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3698 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3699 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3700 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc 3701 : AMDGPU::OpName::rsrc; 3702 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName); 3703 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3704 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3705 3706 assert(VAddr0Idx != -1); 3707 assert(SrsrcIdx != -1); 3708 assert(SrsrcIdx > VAddr0Idx); 3709 3710 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3711 if (BaseOpcode->BVH) { 3712 if (IsA16 == BaseOpcode->A16) 3713 return true; 3714 Error(IDLoc, "image address size does not match a16"); 3715 return false; 3716 } 3717 3718 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3719 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3720 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3721 unsigned ActualAddrSize = 3722 IsNSA ? SrsrcIdx - VAddr0Idx 3723 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3724 3725 unsigned ExpectedAddrSize = 3726 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3727 3728 if (IsNSA) { 3729 if (hasPartialNSAEncoding() && 3730 ExpectedAddrSize > 3731 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) { 3732 int VAddrLastIdx = SrsrcIdx - 1; 3733 unsigned VAddrLastSize = 3734 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; 3735 3736 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; 3737 } 3738 } else { 3739 if (ExpectedAddrSize > 12) 3740 ExpectedAddrSize = 16; 3741 3742 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3743 // This provides backward compatibility for assembly created 3744 // before 160b/192b/224b types were directly supported. 3745 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3746 return true; 3747 } 3748 3749 if (ActualAddrSize == ExpectedAddrSize) 3750 return true; 3751 3752 Error(IDLoc, "image address size does not match dim and a16"); 3753 return false; 3754 } 3755 3756 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3757 3758 const unsigned Opc = Inst.getOpcode(); 3759 const MCInstrDesc &Desc = MII.get(Opc); 3760 3761 if ((Desc.TSFlags & MIMGFlags) == 0) 3762 return true; 3763 if (!Desc.mayLoad() || !Desc.mayStore()) 3764 return true; // Not atomic 3765 3766 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3767 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3768 3769 // This is an incomplete check because image_atomic_cmpswap 3770 // may only use 0x3 and 0xf while other atomic operations 3771 // may use 0x1 and 0x3. However these limitations are 3772 // verified when we check that dmask matches dst size. 3773 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3774 } 3775 3776 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3777 3778 const unsigned Opc = Inst.getOpcode(); 3779 const MCInstrDesc &Desc = MII.get(Opc); 3780 3781 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3782 return true; 3783 3784 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3785 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3786 3787 // GATHER4 instructions use dmask in a different fashion compared to 3788 // other MIMG instructions. The only useful DMASK values are 3789 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3790 // (red,red,red,red) etc.) The ISA document doesn't mention 3791 // this. 3792 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3793 } 3794 3795 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3796 const unsigned Opc = Inst.getOpcode(); 3797 const MCInstrDesc &Desc = MII.get(Opc); 3798 3799 if ((Desc.TSFlags & MIMGFlags) == 0) 3800 return true; 3801 3802 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3803 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3804 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3805 3806 if (!BaseOpcode->MSAA) 3807 return true; 3808 3809 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3810 assert(DimIdx != -1); 3811 3812 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3813 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3814 3815 return DimInfo->MSAA; 3816 } 3817 3818 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3819 { 3820 switch (Opcode) { 3821 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3822 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3823 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3824 return true; 3825 default: 3826 return false; 3827 } 3828 } 3829 3830 // movrels* opcodes should only allow VGPRS as src0. 3831 // This is specified in .td description for vop1/vop3, 3832 // but sdwa is handled differently. See isSDWAOperand. 3833 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3834 const OperandVector &Operands) { 3835 3836 const unsigned Opc = Inst.getOpcode(); 3837 const MCInstrDesc &Desc = MII.get(Opc); 3838 3839 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3840 return true; 3841 3842 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3843 assert(Src0Idx != -1); 3844 3845 SMLoc ErrLoc; 3846 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3847 if (Src0.isReg()) { 3848 auto Reg = mc2PseudoReg(Src0.getReg()); 3849 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3850 if (!isSGPR(Reg, TRI)) 3851 return true; 3852 ErrLoc = getRegLoc(Reg, Operands); 3853 } else { 3854 ErrLoc = getConstLoc(Operands); 3855 } 3856 3857 Error(ErrLoc, "source operand must be a VGPR"); 3858 return false; 3859 } 3860 3861 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3862 const OperandVector &Operands) { 3863 3864 const unsigned Opc = Inst.getOpcode(); 3865 3866 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3867 return true; 3868 3869 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3870 assert(Src0Idx != -1); 3871 3872 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3873 if (!Src0.isReg()) 3874 return true; 3875 3876 auto Reg = mc2PseudoReg(Src0.getReg()); 3877 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3878 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3879 Error(getRegLoc(Reg, Operands), 3880 "source operand must be either a VGPR or an inline constant"); 3881 return false; 3882 } 3883 3884 return true; 3885 } 3886 3887 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, 3888 const OperandVector &Operands) { 3889 unsigned Opcode = Inst.getOpcode(); 3890 const MCInstrDesc &Desc = MII.get(Opcode); 3891 3892 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || 3893 !getFeatureBits()[FeatureMFMAInlineLiteralBug]) 3894 return true; 3895 3896 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2); 3897 if (Src2Idx == -1) 3898 return true; 3899 3900 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) { 3901 Error(getConstLoc(Operands), 3902 "inline constants are not allowed for this operand"); 3903 return false; 3904 } 3905 3906 return true; 3907 } 3908 3909 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3910 const OperandVector &Operands) { 3911 const unsigned Opc = Inst.getOpcode(); 3912 const MCInstrDesc &Desc = MII.get(Opc); 3913 3914 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3915 return true; 3916 3917 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3918 if (Src2Idx == -1) 3919 return true; 3920 3921 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3922 if (!Src2.isReg()) 3923 return true; 3924 3925 MCRegister Src2Reg = Src2.getReg(); 3926 MCRegister DstReg = Inst.getOperand(0).getReg(); 3927 if (Src2Reg == DstReg) 3928 return true; 3929 3930 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3931 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) 3932 return true; 3933 3934 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3935 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3936 "source 2 operand must not partially overlap with dst"); 3937 return false; 3938 } 3939 3940 return true; 3941 } 3942 3943 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3944 switch (Inst.getOpcode()) { 3945 default: 3946 return true; 3947 case V_DIV_SCALE_F32_gfx6_gfx7: 3948 case V_DIV_SCALE_F32_vi: 3949 case V_DIV_SCALE_F32_gfx10: 3950 case V_DIV_SCALE_F64_gfx6_gfx7: 3951 case V_DIV_SCALE_F64_vi: 3952 case V_DIV_SCALE_F64_gfx10: 3953 break; 3954 } 3955 3956 // TODO: Check that src0 = src1 or src2. 3957 3958 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3959 AMDGPU::OpName::src2_modifiers, 3960 AMDGPU::OpName::src2_modifiers}) { 3961 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3962 .getImm() & 3963 SISrcMods::ABS) { 3964 return false; 3965 } 3966 } 3967 3968 return true; 3969 } 3970 3971 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3972 3973 const unsigned Opc = Inst.getOpcode(); 3974 const MCInstrDesc &Desc = MII.get(Opc); 3975 3976 if ((Desc.TSFlags & MIMGFlags) == 0) 3977 return true; 3978 3979 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3980 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3981 if (isCI() || isSI()) 3982 return false; 3983 } 3984 3985 return true; 3986 } 3987 3988 static bool IsRevOpcode(const unsigned Opcode) 3989 { 3990 switch (Opcode) { 3991 case AMDGPU::V_SUBREV_F32_e32: 3992 case AMDGPU::V_SUBREV_F32_e64: 3993 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3994 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3995 case AMDGPU::V_SUBREV_F32_e32_vi: 3996 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3997 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3998 case AMDGPU::V_SUBREV_F32_e64_vi: 3999 4000 case AMDGPU::V_SUBREV_CO_U32_e32: 4001 case AMDGPU::V_SUBREV_CO_U32_e64: 4002 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 4003 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 4004 4005 case AMDGPU::V_SUBBREV_U32_e32: 4006 case AMDGPU::V_SUBBREV_U32_e64: 4007 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 4008 case AMDGPU::V_SUBBREV_U32_e32_vi: 4009 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 4010 case AMDGPU::V_SUBBREV_U32_e64_vi: 4011 4012 case AMDGPU::V_SUBREV_U32_e32: 4013 case AMDGPU::V_SUBREV_U32_e64: 4014 case AMDGPU::V_SUBREV_U32_e32_gfx9: 4015 case AMDGPU::V_SUBREV_U32_e32_vi: 4016 case AMDGPU::V_SUBREV_U32_e64_gfx9: 4017 case AMDGPU::V_SUBREV_U32_e64_vi: 4018 4019 case AMDGPU::V_SUBREV_F16_e32: 4020 case AMDGPU::V_SUBREV_F16_e64: 4021 case AMDGPU::V_SUBREV_F16_e32_gfx10: 4022 case AMDGPU::V_SUBREV_F16_e32_vi: 4023 case AMDGPU::V_SUBREV_F16_e64_gfx10: 4024 case AMDGPU::V_SUBREV_F16_e64_vi: 4025 4026 case AMDGPU::V_SUBREV_U16_e32: 4027 case AMDGPU::V_SUBREV_U16_e64: 4028 case AMDGPU::V_SUBREV_U16_e32_vi: 4029 case AMDGPU::V_SUBREV_U16_e64_vi: 4030 4031 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 4032 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 4033 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 4034 4035 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 4036 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 4037 4038 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 4039 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 4040 4041 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 4042 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 4043 4044 case AMDGPU::V_LSHRREV_B32_e32: 4045 case AMDGPU::V_LSHRREV_B32_e64: 4046 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 4047 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 4048 case AMDGPU::V_LSHRREV_B32_e32_vi: 4049 case AMDGPU::V_LSHRREV_B32_e64_vi: 4050 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 4051 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 4052 4053 case AMDGPU::V_ASHRREV_I32_e32: 4054 case AMDGPU::V_ASHRREV_I32_e64: 4055 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 4056 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 4057 case AMDGPU::V_ASHRREV_I32_e32_vi: 4058 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 4059 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 4060 case AMDGPU::V_ASHRREV_I32_e64_vi: 4061 4062 case AMDGPU::V_LSHLREV_B32_e32: 4063 case AMDGPU::V_LSHLREV_B32_e64: 4064 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 4065 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 4066 case AMDGPU::V_LSHLREV_B32_e32_vi: 4067 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 4068 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 4069 case AMDGPU::V_LSHLREV_B32_e64_vi: 4070 4071 case AMDGPU::V_LSHLREV_B16_e32: 4072 case AMDGPU::V_LSHLREV_B16_e64: 4073 case AMDGPU::V_LSHLREV_B16_e32_vi: 4074 case AMDGPU::V_LSHLREV_B16_e64_vi: 4075 case AMDGPU::V_LSHLREV_B16_gfx10: 4076 4077 case AMDGPU::V_LSHRREV_B16_e32: 4078 case AMDGPU::V_LSHRREV_B16_e64: 4079 case AMDGPU::V_LSHRREV_B16_e32_vi: 4080 case AMDGPU::V_LSHRREV_B16_e64_vi: 4081 case AMDGPU::V_LSHRREV_B16_gfx10: 4082 4083 case AMDGPU::V_ASHRREV_I16_e32: 4084 case AMDGPU::V_ASHRREV_I16_e64: 4085 case AMDGPU::V_ASHRREV_I16_e32_vi: 4086 case AMDGPU::V_ASHRREV_I16_e64_vi: 4087 case AMDGPU::V_ASHRREV_I16_gfx10: 4088 4089 case AMDGPU::V_LSHLREV_B64_e64: 4090 case AMDGPU::V_LSHLREV_B64_gfx10: 4091 case AMDGPU::V_LSHLREV_B64_vi: 4092 4093 case AMDGPU::V_LSHRREV_B64_e64: 4094 case AMDGPU::V_LSHRREV_B64_gfx10: 4095 case AMDGPU::V_LSHRREV_B64_vi: 4096 4097 case AMDGPU::V_ASHRREV_I64_e64: 4098 case AMDGPU::V_ASHRREV_I64_gfx10: 4099 case AMDGPU::V_ASHRREV_I64_vi: 4100 4101 case AMDGPU::V_PK_LSHLREV_B16: 4102 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 4103 case AMDGPU::V_PK_LSHLREV_B16_vi: 4104 4105 case AMDGPU::V_PK_LSHRREV_B16: 4106 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4107 case AMDGPU::V_PK_LSHRREV_B16_vi: 4108 case AMDGPU::V_PK_ASHRREV_I16: 4109 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4110 case AMDGPU::V_PK_ASHRREV_I16_vi: 4111 return true; 4112 default: 4113 return false; 4114 } 4115 } 4116 4117 std::optional<StringRef> 4118 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4119 4120 using namespace SIInstrFlags; 4121 const unsigned Opcode = Inst.getOpcode(); 4122 const MCInstrDesc &Desc = MII.get(Opcode); 4123 4124 // lds_direct register is defined so that it can be used 4125 // with 9-bit operands only. Ignore encodings which do not accept these. 4126 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4127 if ((Desc.TSFlags & Enc) == 0) 4128 return std::nullopt; 4129 4130 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4131 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4132 if (SrcIdx == -1) 4133 break; 4134 const auto &Src = Inst.getOperand(SrcIdx); 4135 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4136 4137 if (isGFX90A() || isGFX11Plus()) 4138 return StringRef("lds_direct is not supported on this GPU"); 4139 4140 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4141 return StringRef("lds_direct cannot be used with this instruction"); 4142 4143 if (SrcName != OpName::src0) 4144 return StringRef("lds_direct may be used as src0 only"); 4145 } 4146 } 4147 4148 return std::nullopt; 4149 } 4150 4151 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4152 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4153 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4154 if (Op.isFlatOffset()) 4155 return Op.getStartLoc(); 4156 } 4157 return getLoc(); 4158 } 4159 4160 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, 4161 const OperandVector &Operands) { 4162 auto Opcode = Inst.getOpcode(); 4163 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4164 if (OpNum == -1) 4165 return true; 4166 4167 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4168 if ((TSFlags & SIInstrFlags::FLAT)) 4169 return validateFlatOffset(Inst, Operands); 4170 4171 if ((TSFlags & SIInstrFlags::SMRD)) 4172 return validateSMEMOffset(Inst, Operands); 4173 4174 const auto &Op = Inst.getOperand(OpNum); 4175 if (isGFX12Plus() && 4176 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4177 const unsigned OffsetSize = 24; 4178 if (!isIntN(OffsetSize, Op.getImm())) { 4179 Error(getFlatOffsetLoc(Operands), 4180 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4181 return false; 4182 } 4183 } else { 4184 const unsigned OffsetSize = 16; 4185 if (!isUIntN(OffsetSize, Op.getImm())) { 4186 Error(getFlatOffsetLoc(Operands), 4187 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4188 return false; 4189 } 4190 } 4191 return true; 4192 } 4193 4194 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4195 const OperandVector &Operands) { 4196 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4197 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4198 return true; 4199 4200 auto Opcode = Inst.getOpcode(); 4201 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4202 assert(OpNum != -1); 4203 4204 const auto &Op = Inst.getOperand(OpNum); 4205 if (!hasFlatOffsets() && Op.getImm() != 0) { 4206 Error(getFlatOffsetLoc(Operands), 4207 "flat offset modifier is not supported on this GPU"); 4208 return false; 4209 } 4210 4211 // For pre-GFX12 FLAT instructions the offset must be positive; 4212 // MSB is ignored and forced to zero. 4213 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); 4214 bool AllowNegative = 4215 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || 4216 isGFX12Plus(); 4217 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { 4218 Error(getFlatOffsetLoc(Operands), 4219 Twine("expected a ") + 4220 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" 4221 : Twine(OffsetSize - 1) + "-bit unsigned offset")); 4222 return false; 4223 } 4224 4225 return true; 4226 } 4227 4228 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4229 // Start with second operand because SMEM Offset cannot be dst or src0. 4230 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4231 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4232 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) 4233 return Op.getStartLoc(); 4234 } 4235 return getLoc(); 4236 } 4237 4238 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4239 const OperandVector &Operands) { 4240 if (isCI() || isSI()) 4241 return true; 4242 4243 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4244 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4245 return true; 4246 4247 auto Opcode = Inst.getOpcode(); 4248 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4249 if (OpNum == -1) 4250 return true; 4251 4252 const auto &Op = Inst.getOperand(OpNum); 4253 if (!Op.isImm()) 4254 return true; 4255 4256 uint64_t Offset = Op.getImm(); 4257 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4258 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4259 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4260 return true; 4261 4262 Error(getSMEMOffsetLoc(Operands), 4263 isGFX12Plus() ? "expected a 24-bit signed offset" 4264 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" 4265 : "expected a 21-bit signed offset"); 4266 4267 return false; 4268 } 4269 4270 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4271 unsigned Opcode = Inst.getOpcode(); 4272 const MCInstrDesc &Desc = MII.get(Opcode); 4273 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4274 return true; 4275 4276 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4277 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4278 4279 const int OpIndices[] = { Src0Idx, Src1Idx }; 4280 4281 unsigned NumExprs = 0; 4282 unsigned NumLiterals = 0; 4283 uint32_t LiteralValue; 4284 4285 for (int OpIdx : OpIndices) { 4286 if (OpIdx == -1) break; 4287 4288 const MCOperand &MO = Inst.getOperand(OpIdx); 4289 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4290 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4291 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4292 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4293 if (NumLiterals == 0 || LiteralValue != Value) { 4294 LiteralValue = Value; 4295 ++NumLiterals; 4296 } 4297 } else if (MO.isExpr()) { 4298 ++NumExprs; 4299 } 4300 } 4301 } 4302 4303 return NumLiterals + NumExprs <= 1; 4304 } 4305 4306 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4307 const unsigned Opc = Inst.getOpcode(); 4308 if (isPermlane16(Opc)) { 4309 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4310 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4311 4312 if (OpSel & ~3) 4313 return false; 4314 } 4315 4316 uint64_t TSFlags = MII.get(Opc).TSFlags; 4317 4318 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4319 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4320 if (OpSelIdx != -1) { 4321 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4322 return false; 4323 } 4324 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4325 if (OpSelHiIdx != -1) { 4326 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4327 return false; 4328 } 4329 } 4330 4331 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4332 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && 4333 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { 4334 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4335 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4336 if (OpSel & 3) 4337 return false; 4338 } 4339 4340 return true; 4341 } 4342 4343 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4344 const OperandVector &Operands) { 4345 const unsigned Opc = Inst.getOpcode(); 4346 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4347 if (DppCtrlIdx >= 0) { 4348 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4349 4350 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) && 4351 AMDGPU::isDPALU_DPP(MII.get(Opc))) { 4352 // DP ALU DPP is supported for row_newbcast only on GFX9* 4353 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4354 Error(S, "DP ALU dpp only supports row_newbcast"); 4355 return false; 4356 } 4357 } 4358 4359 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8); 4360 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0; 4361 4362 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) { 4363 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 4364 if (Src1Idx >= 0) { 4365 const MCOperand &Src1 = Inst.getOperand(Src1Idx); 4366 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4367 if (Src1.isImm() || 4368 (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) { 4369 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]); 4370 Error(Op.getStartLoc(), "invalid operand for instruction"); 4371 return false; 4372 } 4373 } 4374 } 4375 4376 return true; 4377 } 4378 4379 // Check if VCC register matches wavefront size 4380 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4381 auto FB = getFeatureBits(); 4382 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4383 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4384 } 4385 4386 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4387 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4388 const OperandVector &Operands) { 4389 unsigned Opcode = Inst.getOpcode(); 4390 const MCInstrDesc &Desc = MII.get(Opcode); 4391 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1; 4392 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4393 !HasMandatoryLiteral && !isVOPD(Opcode)) 4394 return true; 4395 4396 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral); 4397 4398 unsigned NumExprs = 0; 4399 unsigned NumLiterals = 0; 4400 uint32_t LiteralValue; 4401 4402 for (int OpIdx : OpIndices) { 4403 if (OpIdx == -1) 4404 continue; 4405 4406 const MCOperand &MO = Inst.getOperand(OpIdx); 4407 if (!MO.isImm() && !MO.isExpr()) 4408 continue; 4409 if (!isSISrcOperand(Desc, OpIdx)) 4410 continue; 4411 4412 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4413 uint64_t Value = static_cast<uint64_t>(MO.getImm()); 4414 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) && 4415 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; 4416 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); 4417 4418 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) { 4419 Error(getLitLoc(Operands), "invalid operand for instruction"); 4420 return false; 4421 } 4422 4423 if (IsFP64 && IsValid32Op) 4424 Value = Hi_32(Value); 4425 4426 if (NumLiterals == 0 || LiteralValue != Value) { 4427 LiteralValue = Value; 4428 ++NumLiterals; 4429 } 4430 } else if (MO.isExpr()) { 4431 ++NumExprs; 4432 } 4433 } 4434 NumLiterals += NumExprs; 4435 4436 if (!NumLiterals) 4437 return true; 4438 4439 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { 4440 Error(getLitLoc(Operands), "literal operands are not supported"); 4441 return false; 4442 } 4443 4444 if (NumLiterals > 1) { 4445 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed"); 4446 return false; 4447 } 4448 4449 return true; 4450 } 4451 4452 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4453 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4454 const MCRegisterInfo *MRI) { 4455 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4456 if (OpIdx < 0) 4457 return -1; 4458 4459 const MCOperand &Op = Inst.getOperand(OpIdx); 4460 if (!Op.isReg()) 4461 return -1; 4462 4463 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4464 auto Reg = Sub ? Sub : Op.getReg(); 4465 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4466 return AGPR32.contains(Reg) ? 1 : 0; 4467 } 4468 4469 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4470 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4471 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4472 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4473 SIInstrFlags::DS)) == 0) 4474 return true; 4475 4476 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4477 : AMDGPU::OpName::vdata; 4478 4479 const MCRegisterInfo *MRI = getMRI(); 4480 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4481 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4482 4483 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4484 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4485 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4486 return false; 4487 } 4488 4489 auto FB = getFeatureBits(); 4490 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4491 if (DataAreg < 0 || DstAreg < 0) 4492 return true; 4493 return DstAreg == DataAreg; 4494 } 4495 4496 return DstAreg < 1 && DataAreg < 1; 4497 } 4498 4499 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4500 auto FB = getFeatureBits(); 4501 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4502 return true; 4503 4504 const MCRegisterInfo *MRI = getMRI(); 4505 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4506 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4507 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4508 const MCOperand &Op = Inst.getOperand(I); 4509 if (!Op.isReg()) 4510 continue; 4511 4512 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4513 if (!Sub) 4514 continue; 4515 4516 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4517 return false; 4518 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4519 return false; 4520 } 4521 4522 return true; 4523 } 4524 4525 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4526 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4527 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4528 if (Op.isBLGP()) 4529 return Op.getStartLoc(); 4530 } 4531 return SMLoc(); 4532 } 4533 4534 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4535 const OperandVector &Operands) { 4536 unsigned Opc = Inst.getOpcode(); 4537 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4538 if (BlgpIdx == -1) 4539 return true; 4540 SMLoc BLGPLoc = getBLGPLoc(Operands); 4541 if (!BLGPLoc.isValid()) 4542 return true; 4543 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); 4544 auto FB = getFeatureBits(); 4545 bool UsesNeg = false; 4546 if (FB[AMDGPU::FeatureGFX940Insts]) { 4547 switch (Opc) { 4548 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4549 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4550 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4551 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4552 UsesNeg = true; 4553 } 4554 } 4555 4556 if (IsNeg == UsesNeg) 4557 return true; 4558 4559 Error(BLGPLoc, 4560 UsesNeg ? "invalid modifier: blgp is not supported" 4561 : "invalid modifier: neg is not supported"); 4562 4563 return false; 4564 } 4565 4566 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, 4567 const OperandVector &Operands) { 4568 if (!isGFX11Plus()) 4569 return true; 4570 4571 unsigned Opc = Inst.getOpcode(); 4572 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && 4573 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && 4574 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && 4575 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) 4576 return true; 4577 4578 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 4579 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); 4580 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()); 4581 if (Reg == AMDGPU::SGPR_NULL) 4582 return true; 4583 4584 SMLoc RegLoc = getRegLoc(Reg, Operands); 4585 Error(RegLoc, "src0 must be null"); 4586 return false; 4587 } 4588 4589 bool AMDGPUAsmParser::validateDS(const MCInst &Inst, 4590 const OperandVector &Operands) { 4591 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4592 if ((TSFlags & SIInstrFlags::DS) == 0) 4593 return true; 4594 if (TSFlags & SIInstrFlags::GWS) 4595 return validateGWS(Inst, Operands); 4596 // Only validate GDS for non-GWS instructions. 4597 if (hasGDS()) 4598 return true; 4599 int GDSIdx = 4600 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds); 4601 if (GDSIdx < 0) 4602 return true; 4603 unsigned GDS = Inst.getOperand(GDSIdx).getImm(); 4604 if (GDS) { 4605 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands); 4606 Error(S, "gds modifier is not supported on this GPU"); 4607 return false; 4608 } 4609 return true; 4610 } 4611 4612 // gfx90a has an undocumented limitation: 4613 // DS_GWS opcodes must use even aligned registers. 4614 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4615 const OperandVector &Operands) { 4616 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4617 return true; 4618 4619 int Opc = Inst.getOpcode(); 4620 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4621 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4622 return true; 4623 4624 const MCRegisterInfo *MRI = getMRI(); 4625 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4626 int Data0Pos = 4627 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4628 assert(Data0Pos != -1); 4629 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4630 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4631 if (RegIdx & 1) { 4632 SMLoc RegLoc = getRegLoc(Reg, Operands); 4633 Error(RegLoc, "vgpr must be even aligned"); 4634 return false; 4635 } 4636 4637 return true; 4638 } 4639 4640 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4641 const OperandVector &Operands, 4642 const SMLoc &IDLoc) { 4643 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4644 AMDGPU::OpName::cpol); 4645 if (CPolPos == -1) 4646 return true; 4647 4648 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4649 4650 if (isGFX12Plus()) 4651 return validateTHAndScopeBits(Inst, Operands, CPol); 4652 4653 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4654 if (TSFlags & SIInstrFlags::SMRD) { 4655 if (CPol && (isSI() || isCI())) { 4656 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4657 Error(S, "cache policy is not supported for SMRD instructions"); 4658 return false; 4659 } 4660 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4661 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4662 return false; 4663 } 4664 } 4665 4666 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4667 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | 4668 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4669 SIInstrFlags::FLAT; 4670 if (!(TSFlags & AllowSCCModifier)) { 4671 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4672 StringRef CStr(S.getPointer()); 4673 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4674 Error(S, 4675 "scc modifier is not supported for this instruction on this GPU"); 4676 return false; 4677 } 4678 } 4679 4680 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4681 return true; 4682 4683 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4684 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4685 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4686 : "instruction must use glc"); 4687 return false; 4688 } 4689 } else { 4690 if (CPol & CPol::GLC) { 4691 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4692 StringRef CStr(S.getPointer()); 4693 S = SMLoc::getFromPointer( 4694 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4695 Error(S, isGFX940() ? "instruction must not use sc0" 4696 : "instruction must not use glc"); 4697 return false; 4698 } 4699 } 4700 4701 return true; 4702 } 4703 4704 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst, 4705 const OperandVector &Operands, 4706 const unsigned CPol) { 4707 const unsigned TH = CPol & AMDGPU::CPol::TH; 4708 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE; 4709 4710 const unsigned Opcode = Inst.getOpcode(); 4711 const MCInstrDesc &TID = MII.get(Opcode); 4712 4713 auto PrintError = [&](StringRef Msg) { 4714 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4715 Error(S, Msg); 4716 return false; 4717 }; 4718 4719 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) && 4720 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) && 4721 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN))) 4722 return PrintError("instruction must use th:TH_ATOMIC_RETURN"); 4723 4724 if (TH == 0) 4725 return true; 4726 4727 if ((TID.TSFlags & SIInstrFlags::SMRD) && 4728 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) || 4729 (TH == AMDGPU::CPol::TH_NT_HT))) 4730 return PrintError("invalid th value for SMEM instruction"); 4731 4732 if (TH == AMDGPU::CPol::TH_BYPASS) { 4733 if ((Scope != AMDGPU::CPol::SCOPE_SYS && 4734 CPol & AMDGPU::CPol::TH_REAL_BYPASS) || 4735 (Scope == AMDGPU::CPol::SCOPE_SYS && 4736 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS))) 4737 return PrintError("scope and th combination is not valid"); 4738 } 4739 4740 bool IsStore = TID.mayStore(); 4741 bool IsAtomic = 4742 TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet); 4743 4744 if (IsAtomic) { 4745 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC)) 4746 return PrintError("invalid th value for atomic instructions"); 4747 } else if (IsStore) { 4748 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE)) 4749 return PrintError("invalid th value for store instructions"); 4750 } else { 4751 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD)) 4752 return PrintError("invalid th value for load instructions"); 4753 } 4754 4755 return true; 4756 } 4757 4758 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4759 if (!isGFX11Plus()) 4760 return true; 4761 for (auto &Operand : Operands) { 4762 if (!Operand->isReg()) 4763 continue; 4764 unsigned Reg = Operand->getReg(); 4765 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4766 Error(getRegLoc(Reg, Operands), 4767 "execz and vccz are not supported on this GPU"); 4768 return false; 4769 } 4770 } 4771 return true; 4772 } 4773 4774 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, 4775 const OperandVector &Operands) { 4776 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4777 if (Desc.mayStore() && 4778 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4779 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands); 4780 if (Loc != getInstLoc(Operands)) { 4781 Error(Loc, "TFE modifier has no meaning for store instructions"); 4782 return false; 4783 } 4784 } 4785 4786 return true; 4787 } 4788 4789 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4790 const SMLoc &IDLoc, 4791 const OperandVector &Operands) { 4792 if (auto ErrMsg = validateLdsDirect(Inst)) { 4793 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4794 return false; 4795 } 4796 if (!validateSOPLiteral(Inst)) { 4797 Error(getLitLoc(Operands), 4798 "only one unique literal operand is allowed"); 4799 return false; 4800 } 4801 if (!validateVOPLiteral(Inst, Operands)) { 4802 return false; 4803 } 4804 if (!validateConstantBusLimitations(Inst, Operands)) { 4805 return false; 4806 } 4807 if (!validateVOPDRegBankConstraints(Inst, Operands)) { 4808 return false; 4809 } 4810 if (!validateIntClampSupported(Inst)) { 4811 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4812 "integer clamping is not supported on this GPU"); 4813 return false; 4814 } 4815 if (!validateOpSel(Inst)) { 4816 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4817 "invalid op_sel operand"); 4818 return false; 4819 } 4820 if (!validateDPP(Inst, Operands)) { 4821 return false; 4822 } 4823 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4824 if (!validateMIMGD16(Inst)) { 4825 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4826 "d16 modifier is not supported on this GPU"); 4827 return false; 4828 } 4829 if (!validateMIMGMSAA(Inst)) { 4830 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4831 "invalid dim; must be MSAA type"); 4832 return false; 4833 } 4834 if (!validateMIMGDataSize(Inst, IDLoc)) { 4835 return false; 4836 } 4837 if (!validateMIMGAddrSize(Inst, IDLoc)) 4838 return false; 4839 if (!validateMIMGAtomicDMask(Inst)) { 4840 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4841 "invalid atomic image dmask"); 4842 return false; 4843 } 4844 if (!validateMIMGGatherDMask(Inst)) { 4845 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4846 "invalid image_gather dmask: only one bit must be set"); 4847 return false; 4848 } 4849 if (!validateMovrels(Inst, Operands)) { 4850 return false; 4851 } 4852 if (!validateOffset(Inst, Operands)) { 4853 return false; 4854 } 4855 if (!validateMAIAccWrite(Inst, Operands)) { 4856 return false; 4857 } 4858 if (!validateMAISrc2(Inst, Operands)) { 4859 return false; 4860 } 4861 if (!validateMFMA(Inst, Operands)) { 4862 return false; 4863 } 4864 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4865 return false; 4866 } 4867 4868 if (!validateAGPRLdSt(Inst)) { 4869 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4870 ? "invalid register class: data and dst should be all VGPR or AGPR" 4871 : "invalid register class: agpr loads and stores not supported on this GPU" 4872 ); 4873 return false; 4874 } 4875 if (!validateVGPRAlign(Inst)) { 4876 Error(IDLoc, 4877 "invalid register class: vgpr tuples must be 64 bit aligned"); 4878 return false; 4879 } 4880 if (!validateDS(Inst, Operands)) { 4881 return false; 4882 } 4883 4884 if (!validateBLGP(Inst, Operands)) { 4885 return false; 4886 } 4887 4888 if (!validateDivScale(Inst)) { 4889 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4890 return false; 4891 } 4892 if (!validateWaitCnt(Inst, Operands)) { 4893 return false; 4894 } 4895 if (!validateExeczVcczOperands(Operands)) { 4896 return false; 4897 } 4898 if (!validateTFE(Inst, Operands)) { 4899 return false; 4900 } 4901 4902 return true; 4903 } 4904 4905 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4906 const FeatureBitset &FBS, 4907 unsigned VariantID = 0); 4908 4909 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4910 const FeatureBitset &AvailableFeatures, 4911 unsigned VariantID); 4912 4913 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4914 const FeatureBitset &FBS) { 4915 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4916 } 4917 4918 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4919 const FeatureBitset &FBS, 4920 ArrayRef<unsigned> Variants) { 4921 for (auto Variant : Variants) { 4922 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4923 return true; 4924 } 4925 4926 return false; 4927 } 4928 4929 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4930 const SMLoc &IDLoc) { 4931 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits()); 4932 4933 // Check if requested instruction variant is supported. 4934 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4935 return false; 4936 4937 // This instruction is not supported. 4938 // Clear any other pending errors because they are no longer relevant. 4939 getParser().clearPendingErrors(); 4940 4941 // Requested instruction variant is not supported. 4942 // Check if any other variants are supported. 4943 StringRef VariantName = getMatchedVariantName(); 4944 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4945 return Error(IDLoc, 4946 Twine(VariantName, 4947 " variant of this instruction is not supported")); 4948 } 4949 4950 // Check if this instruction may be used with a different wavesize. 4951 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && 4952 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { 4953 4954 FeatureBitset FeaturesWS32 = getFeatureBits(); 4955 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) 4956 .flip(AMDGPU::FeatureWavefrontSize32); 4957 FeatureBitset AvailableFeaturesWS32 = 4958 ComputeAvailableFeatures(FeaturesWS32); 4959 4960 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants())) 4961 return Error(IDLoc, "instruction requires wavesize=32"); 4962 } 4963 4964 // Finally check if this instruction is supported on any other GPU. 4965 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4966 return Error(IDLoc, "instruction not supported on this GPU"); 4967 } 4968 4969 // Instruction not supported on any GPU. Probably a typo. 4970 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4971 return Error(IDLoc, "invalid instruction" + Suggestion); 4972 } 4973 4974 static bool isInvalidVOPDY(const OperandVector &Operands, 4975 uint64_t InvalidOprIdx) { 4976 assert(InvalidOprIdx < Operands.size()); 4977 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); 4978 if (Op.isToken() && InvalidOprIdx > 1) { 4979 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); 4980 return PrevOp.isToken() && PrevOp.getToken() == "::"; 4981 } 4982 return false; 4983 } 4984 4985 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4986 OperandVector &Operands, 4987 MCStreamer &Out, 4988 uint64_t &ErrorInfo, 4989 bool MatchingInlineAsm) { 4990 MCInst Inst; 4991 unsigned Result = Match_Success; 4992 for (auto Variant : getMatchedVariants()) { 4993 uint64_t EI; 4994 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4995 Variant); 4996 // We order match statuses from least to most specific. We use most specific 4997 // status as resulting 4998 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4999 if ((R == Match_Success) || 5000 (R == Match_PreferE32) || 5001 (R == Match_MissingFeature && Result != Match_PreferE32) || 5002 (R == Match_InvalidOperand && Result != Match_MissingFeature 5003 && Result != Match_PreferE32) || 5004 (R == Match_MnemonicFail && Result != Match_InvalidOperand 5005 && Result != Match_MissingFeature 5006 && Result != Match_PreferE32)) { 5007 Result = R; 5008 ErrorInfo = EI; 5009 } 5010 if (R == Match_Success) 5011 break; 5012 } 5013 5014 if (Result == Match_Success) { 5015 if (!validateInstruction(Inst, IDLoc, Operands)) { 5016 return true; 5017 } 5018 Inst.setLoc(IDLoc); 5019 Out.emitInstruction(Inst, getSTI()); 5020 return false; 5021 } 5022 5023 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5024 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 5025 return true; 5026 } 5027 5028 switch (Result) { 5029 default: break; 5030 case Match_MissingFeature: 5031 // It has been verified that the specified instruction 5032 // mnemonic is valid. A match was found but it requires 5033 // features which are not supported on this GPU. 5034 return Error(IDLoc, "operands are not valid for this GPU or mode"); 5035 5036 case Match_InvalidOperand: { 5037 SMLoc ErrorLoc = IDLoc; 5038 if (ErrorInfo != ~0ULL) { 5039 if (ErrorInfo >= Operands.size()) { 5040 return Error(IDLoc, "too few operands for instruction"); 5041 } 5042 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 5043 if (ErrorLoc == SMLoc()) 5044 ErrorLoc = IDLoc; 5045 5046 if (isInvalidVOPDY(Operands, ErrorInfo)) 5047 return Error(ErrorLoc, "invalid VOPDY instruction"); 5048 } 5049 return Error(ErrorLoc, "invalid operand for instruction"); 5050 } 5051 5052 case Match_PreferE32: 5053 return Error(IDLoc, "internal error: instruction without _e64 suffix " 5054 "should be encoded as e32"); 5055 case Match_MnemonicFail: 5056 llvm_unreachable("Invalid instructions should have been handled already"); 5057 } 5058 llvm_unreachable("Implement any new match types added!"); 5059 } 5060 5061 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 5062 int64_t Tmp = -1; 5063 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 5064 return true; 5065 } 5066 if (getParser().parseAbsoluteExpression(Tmp)) { 5067 return true; 5068 } 5069 Ret = static_cast<uint32_t>(Tmp); 5070 return false; 5071 } 5072 5073 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 5074 uint32_t &Minor) { 5075 if (ParseAsAbsoluteExpression(Major)) 5076 return TokError("invalid major version"); 5077 5078 if (!trySkipToken(AsmToken::Comma)) 5079 return TokError("minor version number required, comma expected"); 5080 5081 if (ParseAsAbsoluteExpression(Minor)) 5082 return TokError("invalid minor version"); 5083 5084 return false; 5085 } 5086 5087 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 5088 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5089 return TokError("directive only supported for amdgcn architecture"); 5090 5091 std::string TargetIDDirective; 5092 SMLoc TargetStart = getTok().getLoc(); 5093 if (getParser().parseEscapedString(TargetIDDirective)) 5094 return true; 5095 5096 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 5097 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5098 return getParser().Error(TargetRange.Start, 5099 (Twine(".amdgcn_target directive's target id ") + 5100 Twine(TargetIDDirective) + 5101 Twine(" does not match the specified target id ") + 5102 Twine(getTargetStreamer().getTargetID()->toString())).str()); 5103 5104 return false; 5105 } 5106 5107 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 5108 return Error(Range.Start, "value out of range", Range); 5109 } 5110 5111 bool AMDGPUAsmParser::calculateGPRBlocks( 5112 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 5113 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32, 5114 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR, 5115 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 5116 // TODO(scott.linder): These calculations are duplicated from 5117 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 5118 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 5119 5120 unsigned NumVGPRs = NextFreeVGPR; 5121 unsigned NumSGPRs = NextFreeSGPR; 5122 5123 if (Version.Major >= 10) 5124 NumSGPRs = 0; 5125 else { 5126 unsigned MaxAddressableNumSGPRs = 5127 IsaInfo::getAddressableNumSGPRs(&getSTI()); 5128 5129 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 5130 NumSGPRs > MaxAddressableNumSGPRs) 5131 return OutOfRangeError(SGPRRange); 5132 5133 NumSGPRs += 5134 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 5135 5136 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 5137 NumSGPRs > MaxAddressableNumSGPRs) 5138 return OutOfRangeError(SGPRRange); 5139 5140 if (Features.test(FeatureSGPRInitBug)) 5141 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 5142 } 5143 5144 VGPRBlocks = 5145 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 5146 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 5147 5148 return false; 5149 } 5150 5151 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 5152 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5153 return TokError("directive only supported for amdgcn architecture"); 5154 5155 if (!isHsaAbi(getSTI())) 5156 return TokError("directive only supported for amdhsa OS"); 5157 5158 StringRef KernelName; 5159 if (getParser().parseIdentifier(KernelName)) 5160 return true; 5161 5162 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 5163 5164 StringSet<> Seen; 5165 5166 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 5167 5168 SMRange VGPRRange; 5169 uint64_t NextFreeVGPR = 0; 5170 uint64_t AccumOffset = 0; 5171 uint64_t SharedVGPRCount = 0; 5172 uint64_t PreloadLength = 0; 5173 uint64_t PreloadOffset = 0; 5174 SMRange SGPRRange; 5175 uint64_t NextFreeSGPR = 0; 5176 5177 // Count the number of user SGPRs implied from the enabled feature bits. 5178 unsigned ImpliedUserSGPRCount = 0; 5179 5180 // Track if the asm explicitly contains the directive for the user SGPR 5181 // count. 5182 std::optional<unsigned> ExplicitUserSGPRCount; 5183 bool ReserveVCC = true; 5184 bool ReserveFlatScr = true; 5185 std::optional<bool> EnableWavefrontSize32; 5186 5187 while (true) { 5188 while (trySkipToken(AsmToken::EndOfStatement)); 5189 5190 StringRef ID; 5191 SMRange IDRange = getTok().getLocRange(); 5192 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 5193 return true; 5194 5195 if (ID == ".end_amdhsa_kernel") 5196 break; 5197 5198 if (!Seen.insert(ID).second) 5199 return TokError(".amdhsa_ directives cannot be repeated"); 5200 5201 SMLoc ValStart = getLoc(); 5202 int64_t IVal; 5203 if (getParser().parseAbsoluteExpression(IVal)) 5204 return true; 5205 SMLoc ValEnd = getLoc(); 5206 SMRange ValRange = SMRange(ValStart, ValEnd); 5207 5208 if (IVal < 0) 5209 return OutOfRangeError(ValRange); 5210 5211 uint64_t Val = IVal; 5212 5213 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 5214 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 5215 return OutOfRangeError(RANGE); \ 5216 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 5217 5218 if (ID == ".amdhsa_group_segment_fixed_size") { 5219 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 5220 return OutOfRangeError(ValRange); 5221 KD.group_segment_fixed_size = Val; 5222 } else if (ID == ".amdhsa_private_segment_fixed_size") { 5223 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 5224 return OutOfRangeError(ValRange); 5225 KD.private_segment_fixed_size = Val; 5226 } else if (ID == ".amdhsa_kernarg_size") { 5227 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 5228 return OutOfRangeError(ValRange); 5229 KD.kernarg_size = Val; 5230 } else if (ID == ".amdhsa_user_sgpr_count") { 5231 ExplicitUserSGPRCount = Val; 5232 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 5233 if (hasArchitectedFlatScratch()) 5234 return Error(IDRange.Start, 5235 "directive is not supported with architected flat scratch", 5236 IDRange); 5237 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5238 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 5239 Val, ValRange); 5240 if (Val) 5241 ImpliedUserSGPRCount += 4; 5242 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") { 5243 if (!hasKernargPreload()) 5244 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5245 5246 if (Val > getMaxNumUserSGPRs()) 5247 return OutOfRangeError(ValRange); 5248 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val, 5249 ValRange); 5250 if (Val) { 5251 ImpliedUserSGPRCount += Val; 5252 PreloadLength = Val; 5253 } 5254 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") { 5255 if (!hasKernargPreload()) 5256 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5257 5258 if (Val >= 1024) 5259 return OutOfRangeError(ValRange); 5260 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val, 5261 ValRange); 5262 if (Val) 5263 PreloadOffset = Val; 5264 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 5265 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5266 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 5267 ValRange); 5268 if (Val) 5269 ImpliedUserSGPRCount += 2; 5270 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 5271 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5272 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 5273 ValRange); 5274 if (Val) 5275 ImpliedUserSGPRCount += 2; 5276 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 5277 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5278 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 5279 Val, ValRange); 5280 if (Val) 5281 ImpliedUserSGPRCount += 2; 5282 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 5283 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5284 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 5285 ValRange); 5286 if (Val) 5287 ImpliedUserSGPRCount += 2; 5288 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 5289 if (hasArchitectedFlatScratch()) 5290 return Error(IDRange.Start, 5291 "directive is not supported with architected flat scratch", 5292 IDRange); 5293 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5294 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 5295 ValRange); 5296 if (Val) 5297 ImpliedUserSGPRCount += 2; 5298 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 5299 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5300 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 5301 Val, ValRange); 5302 if (Val) 5303 ImpliedUserSGPRCount += 1; 5304 } else if (ID == ".amdhsa_wavefront_size32") { 5305 if (IVersion.Major < 10) 5306 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5307 EnableWavefrontSize32 = Val; 5308 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5309 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5310 Val, ValRange); 5311 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5312 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5313 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); 5314 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5315 if (hasArchitectedFlatScratch()) 5316 return Error(IDRange.Start, 5317 "directive is not supported with architected flat scratch", 5318 IDRange); 5319 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5320 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5321 } else if (ID == ".amdhsa_enable_private_segment") { 5322 if (!hasArchitectedFlatScratch()) 5323 return Error( 5324 IDRange.Start, 5325 "directive is not supported without architected flat scratch", 5326 IDRange); 5327 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5328 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5329 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5330 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5331 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5332 ValRange); 5333 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5334 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5335 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5336 ValRange); 5337 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5338 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5339 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5340 ValRange); 5341 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5342 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5343 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5344 ValRange); 5345 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5346 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5347 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5348 ValRange); 5349 } else if (ID == ".amdhsa_next_free_vgpr") { 5350 VGPRRange = ValRange; 5351 NextFreeVGPR = Val; 5352 } else if (ID == ".amdhsa_next_free_sgpr") { 5353 SGPRRange = ValRange; 5354 NextFreeSGPR = Val; 5355 } else if (ID == ".amdhsa_accum_offset") { 5356 if (!isGFX90A()) 5357 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5358 AccumOffset = Val; 5359 } else if (ID == ".amdhsa_reserve_vcc") { 5360 if (!isUInt<1>(Val)) 5361 return OutOfRangeError(ValRange); 5362 ReserveVCC = Val; 5363 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5364 if (IVersion.Major < 7) 5365 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5366 if (hasArchitectedFlatScratch()) 5367 return Error(IDRange.Start, 5368 "directive is not supported with architected flat scratch", 5369 IDRange); 5370 if (!isUInt<1>(Val)) 5371 return OutOfRangeError(ValRange); 5372 ReserveFlatScr = Val; 5373 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5374 if (IVersion.Major < 8) 5375 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5376 if (!isUInt<1>(Val)) 5377 return OutOfRangeError(ValRange); 5378 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5379 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5380 IDRange); 5381 } else if (ID == ".amdhsa_float_round_mode_32") { 5382 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5383 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5384 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5385 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5386 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5387 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5388 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5389 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5390 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5391 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5392 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5393 ValRange); 5394 } else if (ID == ".amdhsa_dx10_clamp") { 5395 if (IVersion.Major >= 12) 5396 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5397 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5398 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val, 5399 ValRange); 5400 } else if (ID == ".amdhsa_ieee_mode") { 5401 if (IVersion.Major >= 12) 5402 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5403 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5404 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val, 5405 ValRange); 5406 } else if (ID == ".amdhsa_fp16_overflow") { 5407 if (IVersion.Major < 9) 5408 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5409 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val, 5410 ValRange); 5411 } else if (ID == ".amdhsa_tg_split") { 5412 if (!isGFX90A()) 5413 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5414 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5415 ValRange); 5416 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5417 if (IVersion.Major < 10) 5418 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5419 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val, 5420 ValRange); 5421 } else if (ID == ".amdhsa_memory_ordered") { 5422 if (IVersion.Major < 10) 5423 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5424 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val, 5425 ValRange); 5426 } else if (ID == ".amdhsa_forward_progress") { 5427 if (IVersion.Major < 10) 5428 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5429 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val, 5430 ValRange); 5431 } else if (ID == ".amdhsa_shared_vgpr_count") { 5432 if (IVersion.Major < 10 || IVersion.Major >= 12) 5433 return Error(IDRange.Start, "directive requires gfx10 or gfx11", 5434 IDRange); 5435 SharedVGPRCount = Val; 5436 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5437 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val, 5438 ValRange); 5439 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5440 PARSE_BITS_ENTRY( 5441 KD.compute_pgm_rsrc2, 5442 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5443 ValRange); 5444 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5445 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5446 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5447 Val, ValRange); 5448 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5449 PARSE_BITS_ENTRY( 5450 KD.compute_pgm_rsrc2, 5451 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5452 ValRange); 5453 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5454 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5455 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5456 Val, ValRange); 5457 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5458 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5459 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5460 Val, ValRange); 5461 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5462 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5463 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5464 Val, ValRange); 5465 } else if (ID == ".amdhsa_exception_int_div_zero") { 5466 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5467 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5468 Val, ValRange); 5469 } else if (ID == ".amdhsa_round_robin_scheduling") { 5470 if (IVersion.Major < 12) 5471 return Error(IDRange.Start, "directive requires gfx12+", IDRange); 5472 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5473 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val, 5474 ValRange); 5475 } else { 5476 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5477 } 5478 5479 #undef PARSE_BITS_ENTRY 5480 } 5481 5482 if (!Seen.contains(".amdhsa_next_free_vgpr")) 5483 return TokError(".amdhsa_next_free_vgpr directive is required"); 5484 5485 if (!Seen.contains(".amdhsa_next_free_sgpr")) 5486 return TokError(".amdhsa_next_free_sgpr directive is required"); 5487 5488 unsigned VGPRBlocks; 5489 unsigned SGPRBlocks; 5490 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5491 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5492 EnableWavefrontSize32, NextFreeVGPR, 5493 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5494 SGPRBlocks)) 5495 return true; 5496 5497 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5498 VGPRBlocks)) 5499 return OutOfRangeError(VGPRRange); 5500 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5501 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5502 5503 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5504 SGPRBlocks)) 5505 return OutOfRangeError(SGPRRange); 5506 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5507 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5508 SGPRBlocks); 5509 5510 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5511 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5512 "enabled user SGPRs"); 5513 5514 unsigned UserSGPRCount = 5515 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5516 5517 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5518 return TokError("too many user SGPRs enabled"); 5519 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5520 UserSGPRCount); 5521 5522 if (PreloadLength && KD.kernarg_size && 5523 (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size)) 5524 return TokError("Kernarg preload length + offset is larger than the " 5525 "kernarg segment size"); 5526 5527 if (isGFX90A()) { 5528 if (!Seen.contains(".amdhsa_accum_offset")) 5529 return TokError(".amdhsa_accum_offset directive is required"); 5530 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5531 return TokError("accum_offset should be in range [4..256] in " 5532 "increments of 4"); 5533 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5534 return TokError("accum_offset exceeds total VGPR allocation"); 5535 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5536 (AccumOffset / 4 - 1)); 5537 } 5538 5539 if (IVersion.Major >= 10 && IVersion.Major < 12) { 5540 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5541 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { 5542 return TokError("shared_vgpr_count directive not valid on " 5543 "wavefront size 32"); 5544 } 5545 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5546 return TokError("shared_vgpr_count*2 + " 5547 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5548 "exceed 63\n"); 5549 } 5550 } 5551 5552 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5553 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5554 ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion()); 5555 return false; 5556 } 5557 5558 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5559 uint32_t Major; 5560 uint32_t Minor; 5561 5562 if (ParseDirectiveMajorMinor(Major, Minor)) 5563 return true; 5564 5565 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5566 return false; 5567 } 5568 5569 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5570 uint32_t Major; 5571 uint32_t Minor; 5572 uint32_t Stepping; 5573 StringRef VendorName; 5574 StringRef ArchName; 5575 5576 // If this directive has no arguments, then use the ISA version for the 5577 // targeted GPU. 5578 if (isToken(AsmToken::EndOfStatement)) { 5579 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5580 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5581 ISA.Stepping, 5582 "AMD", "AMDGPU"); 5583 return false; 5584 } 5585 5586 if (ParseDirectiveMajorMinor(Major, Minor)) 5587 return true; 5588 5589 if (!trySkipToken(AsmToken::Comma)) 5590 return TokError("stepping version number required, comma expected"); 5591 5592 if (ParseAsAbsoluteExpression(Stepping)) 5593 return TokError("invalid stepping version"); 5594 5595 if (!trySkipToken(AsmToken::Comma)) 5596 return TokError("vendor name required, comma expected"); 5597 5598 if (!parseString(VendorName, "invalid vendor name")) 5599 return true; 5600 5601 if (!trySkipToken(AsmToken::Comma)) 5602 return TokError("arch name required, comma expected"); 5603 5604 if (!parseString(ArchName, "invalid arch name")) 5605 return true; 5606 5607 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5608 VendorName, ArchName); 5609 return false; 5610 } 5611 5612 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5613 amd_kernel_code_t &Header) { 5614 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5615 // assembly for backwards compatibility. 5616 if (ID == "max_scratch_backing_memory_byte_size") { 5617 Parser.eatToEndOfStatement(); 5618 return false; 5619 } 5620 5621 SmallString<40> ErrStr; 5622 raw_svector_ostream Err(ErrStr); 5623 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5624 return TokError(Err.str()); 5625 } 5626 Lex(); 5627 5628 if (ID == "enable_dx10_clamp") { 5629 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) && 5630 isGFX12Plus()) 5631 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+"); 5632 } 5633 5634 if (ID == "enable_ieee_mode") { 5635 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) && 5636 isGFX12Plus()) 5637 return TokError("enable_ieee_mode=1 is not allowed on GFX12+"); 5638 } 5639 5640 if (ID == "enable_wavefront_size32") { 5641 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5642 if (!isGFX10Plus()) 5643 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5644 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5645 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5646 } else { 5647 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5648 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5649 } 5650 } 5651 5652 if (ID == "wavefront_size") { 5653 if (Header.wavefront_size == 5) { 5654 if (!isGFX10Plus()) 5655 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5656 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5657 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5658 } else if (Header.wavefront_size == 6) { 5659 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5660 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5661 } 5662 } 5663 5664 if (ID == "enable_wgp_mode") { 5665 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5666 !isGFX10Plus()) 5667 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5668 } 5669 5670 if (ID == "enable_mem_ordered") { 5671 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5672 !isGFX10Plus()) 5673 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5674 } 5675 5676 if (ID == "enable_fwd_progress") { 5677 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5678 !isGFX10Plus()) 5679 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5680 } 5681 5682 return false; 5683 } 5684 5685 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5686 amd_kernel_code_t Header; 5687 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5688 5689 while (true) { 5690 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5691 // will set the current token to EndOfStatement. 5692 while(trySkipToken(AsmToken::EndOfStatement)); 5693 5694 StringRef ID; 5695 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5696 return true; 5697 5698 if (ID == ".end_amd_kernel_code_t") 5699 break; 5700 5701 if (ParseAMDKernelCodeTValue(ID, Header)) 5702 return true; 5703 } 5704 5705 getTargetStreamer().EmitAMDKernelCodeT(Header); 5706 5707 return false; 5708 } 5709 5710 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5711 StringRef KernelName; 5712 if (!parseId(KernelName, "expected symbol name")) 5713 return true; 5714 5715 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5716 ELF::STT_AMDGPU_HSA_KERNEL); 5717 5718 KernelScope.initialize(getContext()); 5719 return false; 5720 } 5721 5722 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5723 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5724 return Error(getLoc(), 5725 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5726 "architectures"); 5727 } 5728 5729 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5730 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5731 return Error(getParser().getTok().getLoc(), "target id must match options"); 5732 5733 getTargetStreamer().EmitISAVersion(); 5734 Lex(); 5735 5736 return false; 5737 } 5738 5739 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5740 assert(isHsaAbi(getSTI())); 5741 5742 std::string HSAMetadataString; 5743 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin, 5744 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString)) 5745 return true; 5746 5747 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5748 return Error(getLoc(), "invalid HSA metadata"); 5749 5750 return false; 5751 } 5752 5753 /// Common code to parse out a block of text (typically YAML) between start and 5754 /// end directives. 5755 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5756 const char *AssemblerDirectiveEnd, 5757 std::string &CollectString) { 5758 5759 raw_string_ostream CollectStream(CollectString); 5760 5761 getLexer().setSkipSpace(false); 5762 5763 bool FoundEnd = false; 5764 while (!isToken(AsmToken::Eof)) { 5765 while (isToken(AsmToken::Space)) { 5766 CollectStream << getTokenStr(); 5767 Lex(); 5768 } 5769 5770 if (trySkipId(AssemblerDirectiveEnd)) { 5771 FoundEnd = true; 5772 break; 5773 } 5774 5775 CollectStream << Parser.parseStringToEndOfStatement() 5776 << getContext().getAsmInfo()->getSeparatorString(); 5777 5778 Parser.eatToEndOfStatement(); 5779 } 5780 5781 getLexer().setSkipSpace(true); 5782 5783 if (isToken(AsmToken::Eof) && !FoundEnd) { 5784 return TokError(Twine("expected directive ") + 5785 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5786 } 5787 5788 CollectStream.flush(); 5789 return false; 5790 } 5791 5792 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5793 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5794 std::string String; 5795 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5796 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5797 return true; 5798 5799 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5800 if (!PALMetadata->setFromString(String)) 5801 return Error(getLoc(), "invalid PAL metadata"); 5802 return false; 5803 } 5804 5805 /// Parse the assembler directive for old linear-format PAL metadata. 5806 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5807 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5808 return Error(getLoc(), 5809 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5810 "not available on non-amdpal OSes")).str()); 5811 } 5812 5813 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5814 PALMetadata->setLegacy(); 5815 for (;;) { 5816 uint32_t Key, Value; 5817 if (ParseAsAbsoluteExpression(Key)) { 5818 return TokError(Twine("invalid value in ") + 5819 Twine(PALMD::AssemblerDirective)); 5820 } 5821 if (!trySkipToken(AsmToken::Comma)) { 5822 return TokError(Twine("expected an even number of values in ") + 5823 Twine(PALMD::AssemblerDirective)); 5824 } 5825 if (ParseAsAbsoluteExpression(Value)) { 5826 return TokError(Twine("invalid value in ") + 5827 Twine(PALMD::AssemblerDirective)); 5828 } 5829 PALMetadata->setRegister(Key, Value); 5830 if (!trySkipToken(AsmToken::Comma)) 5831 break; 5832 } 5833 return false; 5834 } 5835 5836 /// ParseDirectiveAMDGPULDS 5837 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5838 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5839 if (getParser().checkForValidSection()) 5840 return true; 5841 5842 StringRef Name; 5843 SMLoc NameLoc = getLoc(); 5844 if (getParser().parseIdentifier(Name)) 5845 return TokError("expected identifier in directive"); 5846 5847 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5848 if (getParser().parseComma()) 5849 return true; 5850 5851 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5852 5853 int64_t Size; 5854 SMLoc SizeLoc = getLoc(); 5855 if (getParser().parseAbsoluteExpression(Size)) 5856 return true; 5857 if (Size < 0) 5858 return Error(SizeLoc, "size must be non-negative"); 5859 if (Size > LocalMemorySize) 5860 return Error(SizeLoc, "size is too large"); 5861 5862 int64_t Alignment = 4; 5863 if (trySkipToken(AsmToken::Comma)) { 5864 SMLoc AlignLoc = getLoc(); 5865 if (getParser().parseAbsoluteExpression(Alignment)) 5866 return true; 5867 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5868 return Error(AlignLoc, "alignment must be a power of two"); 5869 5870 // Alignment larger than the size of LDS is possible in theory, as long 5871 // as the linker manages to place to symbol at address 0, but we do want 5872 // to make sure the alignment fits nicely into a 32-bit integer. 5873 if (Alignment >= 1u << 31) 5874 return Error(AlignLoc, "alignment is too large"); 5875 } 5876 5877 if (parseEOL()) 5878 return true; 5879 5880 Symbol->redefineIfPossible(); 5881 if (!Symbol->isUndefined()) 5882 return Error(NameLoc, "invalid symbol redefinition"); 5883 5884 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5885 return false; 5886 } 5887 5888 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5889 StringRef IDVal = DirectiveID.getString(); 5890 5891 if (isHsaAbi(getSTI())) { 5892 if (IDVal == ".amdhsa_kernel") 5893 return ParseDirectiveAMDHSAKernel(); 5894 5895 // TODO: Restructure/combine with PAL metadata directive. 5896 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5897 return ParseDirectiveHSAMetadata(); 5898 } else { 5899 if (IDVal == ".hsa_code_object_version") 5900 return ParseDirectiveHSACodeObjectVersion(); 5901 5902 if (IDVal == ".hsa_code_object_isa") 5903 return ParseDirectiveHSACodeObjectISA(); 5904 5905 if (IDVal == ".amd_kernel_code_t") 5906 return ParseDirectiveAMDKernelCodeT(); 5907 5908 if (IDVal == ".amdgpu_hsa_kernel") 5909 return ParseDirectiveAMDGPUHsaKernel(); 5910 5911 if (IDVal == ".amd_amdgpu_isa") 5912 return ParseDirectiveISAVersion(); 5913 5914 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) { 5915 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) + 5916 Twine(" directive is " 5917 "not available on non-amdhsa OSes")) 5918 .str()); 5919 } 5920 } 5921 5922 if (IDVal == ".amdgcn_target") 5923 return ParseDirectiveAMDGCNTarget(); 5924 5925 if (IDVal == ".amdgpu_lds") 5926 return ParseDirectiveAMDGPULDS(); 5927 5928 if (IDVal == PALMD::AssemblerDirectiveBegin) 5929 return ParseDirectivePALMetadataBegin(); 5930 5931 if (IDVal == PALMD::AssemblerDirective) 5932 return ParseDirectivePALMetadata(); 5933 5934 return true; 5935 } 5936 5937 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5938 unsigned RegNo) { 5939 5940 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5941 return isGFX9Plus(); 5942 5943 // GFX10+ has 2 more SGPRs 104 and 105. 5944 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5945 return hasSGPR104_SGPR105(); 5946 5947 switch (RegNo) { 5948 case AMDGPU::SRC_SHARED_BASE_LO: 5949 case AMDGPU::SRC_SHARED_BASE: 5950 case AMDGPU::SRC_SHARED_LIMIT_LO: 5951 case AMDGPU::SRC_SHARED_LIMIT: 5952 case AMDGPU::SRC_PRIVATE_BASE_LO: 5953 case AMDGPU::SRC_PRIVATE_BASE: 5954 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 5955 case AMDGPU::SRC_PRIVATE_LIMIT: 5956 return isGFX9Plus(); 5957 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5958 return isGFX9Plus() && !isGFX11Plus(); 5959 case AMDGPU::TBA: 5960 case AMDGPU::TBA_LO: 5961 case AMDGPU::TBA_HI: 5962 case AMDGPU::TMA: 5963 case AMDGPU::TMA_LO: 5964 case AMDGPU::TMA_HI: 5965 return !isGFX9Plus(); 5966 case AMDGPU::XNACK_MASK: 5967 case AMDGPU::XNACK_MASK_LO: 5968 case AMDGPU::XNACK_MASK_HI: 5969 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5970 case AMDGPU::SGPR_NULL: 5971 return isGFX10Plus(); 5972 default: 5973 break; 5974 } 5975 5976 if (isCI()) 5977 return true; 5978 5979 if (isSI() || isGFX10Plus()) { 5980 // No flat_scr on SI. 5981 // On GFX10Plus flat scratch is not a valid register operand and can only be 5982 // accessed with s_setreg/s_getreg. 5983 switch (RegNo) { 5984 case AMDGPU::FLAT_SCR: 5985 case AMDGPU::FLAT_SCR_LO: 5986 case AMDGPU::FLAT_SCR_HI: 5987 return false; 5988 default: 5989 return true; 5990 } 5991 } 5992 5993 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5994 // SI/CI have. 5995 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5996 return hasSGPR102_SGPR103(); 5997 5998 return true; 5999 } 6000 6001 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, 6002 StringRef Mnemonic, 6003 OperandMode Mode) { 6004 ParseStatus Res = parseVOPD(Operands); 6005 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6006 return Res; 6007 6008 // Try to parse with a custom parser 6009 Res = MatchOperandParserImpl(Operands, Mnemonic); 6010 6011 // If we successfully parsed the operand or if there as an error parsing, 6012 // we are done. 6013 // 6014 // If we are parsing after we reach EndOfStatement then this means we 6015 // are appending default values to the Operands list. This is only done 6016 // by custom parser, so we shouldn't continue on to the generic parsing. 6017 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6018 return Res; 6019 6020 SMLoc RBraceLoc; 6021 SMLoc LBraceLoc = getLoc(); 6022 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 6023 unsigned Prefix = Operands.size(); 6024 6025 for (;;) { 6026 auto Loc = getLoc(); 6027 Res = parseReg(Operands); 6028 if (Res.isNoMatch()) 6029 Error(Loc, "expected a register"); 6030 if (!Res.isSuccess()) 6031 return ParseStatus::Failure; 6032 6033 RBraceLoc = getLoc(); 6034 if (trySkipToken(AsmToken::RBrac)) 6035 break; 6036 6037 if (!skipToken(AsmToken::Comma, 6038 "expected a comma or a closing square bracket")) 6039 return ParseStatus::Failure; 6040 } 6041 6042 if (Operands.size() - Prefix > 1) { 6043 Operands.insert(Operands.begin() + Prefix, 6044 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 6045 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 6046 } 6047 6048 return ParseStatus::Success; 6049 } 6050 6051 return parseRegOrImm(Operands); 6052 } 6053 6054 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 6055 // Clear any forced encodings from the previous instruction. 6056 setForcedEncodingSize(0); 6057 setForcedDPP(false); 6058 setForcedSDWA(false); 6059 6060 if (Name.ends_with("_e64_dpp")) { 6061 setForcedDPP(true); 6062 setForcedEncodingSize(64); 6063 return Name.substr(0, Name.size() - 8); 6064 } else if (Name.ends_with("_e64")) { 6065 setForcedEncodingSize(64); 6066 return Name.substr(0, Name.size() - 4); 6067 } else if (Name.ends_with("_e32")) { 6068 setForcedEncodingSize(32); 6069 return Name.substr(0, Name.size() - 4); 6070 } else if (Name.ends_with("_dpp")) { 6071 setForcedDPP(true); 6072 return Name.substr(0, Name.size() - 4); 6073 } else if (Name.ends_with("_sdwa")) { 6074 setForcedSDWA(true); 6075 return Name.substr(0, Name.size() - 5); 6076 } 6077 return Name; 6078 } 6079 6080 static void applyMnemonicAliases(StringRef &Mnemonic, 6081 const FeatureBitset &Features, 6082 unsigned VariantID); 6083 6084 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 6085 StringRef Name, 6086 SMLoc NameLoc, OperandVector &Operands) { 6087 // Add the instruction mnemonic 6088 Name = parseMnemonicSuffix(Name); 6089 6090 // If the target architecture uses MnemonicAlias, call it here to parse 6091 // operands correctly. 6092 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 6093 6094 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 6095 6096 bool IsMIMG = Name.starts_with("image_"); 6097 6098 while (!trySkipToken(AsmToken::EndOfStatement)) { 6099 OperandMode Mode = OperandMode_Default; 6100 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 6101 Mode = OperandMode_NSA; 6102 ParseStatus Res = parseOperand(Operands, Name, Mode); 6103 6104 if (!Res.isSuccess()) { 6105 checkUnsupportedInstruction(Name, NameLoc); 6106 if (!Parser.hasPendingError()) { 6107 // FIXME: use real operand location rather than the current location. 6108 StringRef Msg = Res.isFailure() ? "failed parsing operand." 6109 : "not a valid operand."; 6110 Error(getLoc(), Msg); 6111 } 6112 while (!trySkipToken(AsmToken::EndOfStatement)) { 6113 lex(); 6114 } 6115 return true; 6116 } 6117 6118 // Eat the comma or space if there is one. 6119 trySkipToken(AsmToken::Comma); 6120 } 6121 6122 return false; 6123 } 6124 6125 //===----------------------------------------------------------------------===// 6126 // Utility functions 6127 //===----------------------------------------------------------------------===// 6128 6129 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, 6130 OperandVector &Operands) { 6131 SMLoc S = getLoc(); 6132 if (!trySkipId(Name)) 6133 return ParseStatus::NoMatch; 6134 6135 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S)); 6136 return ParseStatus::Success; 6137 } 6138 6139 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, 6140 int64_t &IntVal) { 6141 6142 if (!trySkipId(Prefix, AsmToken::Colon)) 6143 return ParseStatus::NoMatch; 6144 6145 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure; 6146 } 6147 6148 ParseStatus AMDGPUAsmParser::parseIntWithPrefix( 6149 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6150 std::function<bool(int64_t &)> ConvertResult) { 6151 SMLoc S = getLoc(); 6152 int64_t Value = 0; 6153 6154 ParseStatus Res = parseIntWithPrefix(Prefix, Value); 6155 if (!Res.isSuccess()) 6156 return Res; 6157 6158 if (ConvertResult && !ConvertResult(Value)) { 6159 Error(S, "invalid " + StringRef(Prefix) + " value."); 6160 } 6161 6162 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 6163 return ParseStatus::Success; 6164 } 6165 6166 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( 6167 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6168 bool (*ConvertResult)(int64_t &)) { 6169 SMLoc S = getLoc(); 6170 if (!trySkipId(Prefix, AsmToken::Colon)) 6171 return ParseStatus::NoMatch; 6172 6173 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 6174 return ParseStatus::Failure; 6175 6176 unsigned Val = 0; 6177 const unsigned MaxSize = 4; 6178 6179 // FIXME: How to verify the number of elements matches the number of src 6180 // operands? 6181 for (int I = 0; ; ++I) { 6182 int64_t Op; 6183 SMLoc Loc = getLoc(); 6184 if (!parseExpr(Op)) 6185 return ParseStatus::Failure; 6186 6187 if (Op != 0 && Op != 1) 6188 return Error(Loc, "invalid " + StringRef(Prefix) + " value."); 6189 6190 Val |= (Op << I); 6191 6192 if (trySkipToken(AsmToken::RBrac)) 6193 break; 6194 6195 if (I + 1 == MaxSize) 6196 return Error(getLoc(), "expected a closing square bracket"); 6197 6198 if (!skipToken(AsmToken::Comma, "expected a comma")) 6199 return ParseStatus::Failure; 6200 } 6201 6202 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 6203 return ParseStatus::Success; 6204 } 6205 6206 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, 6207 OperandVector &Operands, 6208 AMDGPUOperand::ImmTy ImmTy) { 6209 int64_t Bit; 6210 SMLoc S = getLoc(); 6211 6212 if (trySkipId(Name)) { 6213 Bit = 1; 6214 } else if (trySkipId("no", Name)) { 6215 Bit = 0; 6216 } else { 6217 return ParseStatus::NoMatch; 6218 } 6219 6220 if (Name == "r128" && !hasMIMG_R128()) 6221 return Error(S, "r128 modifier is not supported on this GPU"); 6222 if (Name == "a16" && !hasA16()) 6223 return Error(S, "a16 modifier is not supported on this GPU"); 6224 6225 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 6226 ImmTy = AMDGPUOperand::ImmTyR128A16; 6227 6228 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 6229 return ParseStatus::Success; 6230 } 6231 6232 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, 6233 bool &Disabling) const { 6234 Disabling = Id.consume_front("no"); 6235 6236 if (isGFX940() && !Mnemo.starts_with("s_")) { 6237 return StringSwitch<unsigned>(Id) 6238 .Case("nt", AMDGPU::CPol::NT) 6239 .Case("sc0", AMDGPU::CPol::SC0) 6240 .Case("sc1", AMDGPU::CPol::SC1) 6241 .Default(0); 6242 } 6243 6244 return StringSwitch<unsigned>(Id) 6245 .Case("dlc", AMDGPU::CPol::DLC) 6246 .Case("glc", AMDGPU::CPol::GLC) 6247 .Case("scc", AMDGPU::CPol::SCC) 6248 .Case("slc", AMDGPU::CPol::SLC) 6249 .Default(0); 6250 } 6251 6252 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 6253 if (isGFX12Plus()) { 6254 SMLoc StringLoc = getLoc(); 6255 6256 int64_t CPolVal = 0; 6257 ParseStatus ResTH = ParseStatus::NoMatch; 6258 ParseStatus ResScope = ParseStatus::NoMatch; 6259 6260 for (;;) { 6261 if (ResTH.isNoMatch()) { 6262 int64_t TH; 6263 ResTH = parseTH(Operands, TH); 6264 if (ResTH.isFailure()) 6265 return ResTH; 6266 if (ResTH.isSuccess()) { 6267 CPolVal |= TH; 6268 continue; 6269 } 6270 } 6271 6272 if (ResScope.isNoMatch()) { 6273 int64_t Scope; 6274 ResScope = parseScope(Operands, Scope); 6275 if (ResScope.isFailure()) 6276 return ResScope; 6277 if (ResScope.isSuccess()) { 6278 CPolVal |= Scope; 6279 continue; 6280 } 6281 } 6282 6283 break; 6284 } 6285 6286 if (ResTH.isNoMatch() && ResScope.isNoMatch()) 6287 return ParseStatus::NoMatch; 6288 6289 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc, 6290 AMDGPUOperand::ImmTyCPol)); 6291 return ParseStatus::Success; 6292 } 6293 6294 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 6295 SMLoc OpLoc = getLoc(); 6296 unsigned Enabled = 0, Seen = 0; 6297 for (;;) { 6298 SMLoc S = getLoc(); 6299 bool Disabling; 6300 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); 6301 if (!CPol) 6302 break; 6303 6304 lex(); 6305 6306 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) 6307 return Error(S, "dlc modifier is not supported on this GPU"); 6308 6309 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) 6310 return Error(S, "scc modifier is not supported on this GPU"); 6311 6312 if (Seen & CPol) 6313 return Error(S, "duplicate cache policy modifier"); 6314 6315 if (!Disabling) 6316 Enabled |= CPol; 6317 6318 Seen |= CPol; 6319 } 6320 6321 if (!Seen) 6322 return ParseStatus::NoMatch; 6323 6324 Operands.push_back( 6325 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol)); 6326 return ParseStatus::Success; 6327 } 6328 6329 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands, 6330 int64_t &Scope) { 6331 Scope = AMDGPU::CPol::SCOPE_CU; // default; 6332 6333 StringRef Value; 6334 SMLoc StringLoc; 6335 ParseStatus Res; 6336 6337 Res = parseStringWithPrefix("scope", Value, StringLoc); 6338 if (!Res.isSuccess()) 6339 return Res; 6340 6341 Scope = StringSwitch<int64_t>(Value) 6342 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU) 6343 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE) 6344 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV) 6345 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS) 6346 .Default(0xffffffff); 6347 6348 if (Scope == 0xffffffff) 6349 return Error(StringLoc, "invalid scope value"); 6350 6351 return ParseStatus::Success; 6352 } 6353 6354 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { 6355 TH = AMDGPU::CPol::TH_RT; // default 6356 6357 StringRef Value; 6358 SMLoc StringLoc; 6359 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc); 6360 if (!Res.isSuccess()) 6361 return Res; 6362 6363 if (Value == "TH_DEFAULT") 6364 TH = AMDGPU::CPol::TH_RT; 6365 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" || 6366 Value == "TH_LOAD_NT_WB") { 6367 return Error(StringLoc, "invalid th value"); 6368 } else if (Value.starts_with("TH_ATOMIC_")) { 6369 Value = Value.drop_front(10); 6370 TH = AMDGPU::CPol::TH_TYPE_ATOMIC; 6371 } else if (Value.starts_with("TH_LOAD_")) { 6372 Value = Value.drop_front(8); 6373 TH = AMDGPU::CPol::TH_TYPE_LOAD; 6374 } else if (Value.starts_with("TH_STORE_")) { 6375 Value = Value.drop_front(9); 6376 TH = AMDGPU::CPol::TH_TYPE_STORE; 6377 } else { 6378 return Error(StringLoc, "invalid th value"); 6379 } 6380 6381 if (Value == "BYPASS") 6382 TH |= AMDGPU::CPol::TH_REAL_BYPASS; 6383 6384 if (TH != 0) { 6385 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC) 6386 TH |= StringSwitch<int64_t>(Value) 6387 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6388 .Case("RT", AMDGPU::CPol::TH_RT) 6389 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6390 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT) 6391 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT | 6392 AMDGPU::CPol::TH_ATOMIC_RETURN) 6393 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE) 6394 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE | 6395 AMDGPU::CPol::TH_ATOMIC_NT) 6396 .Default(0xffffffff); 6397 else 6398 TH |= StringSwitch<int64_t>(Value) 6399 .Case("RT", AMDGPU::CPol::TH_RT) 6400 .Case("NT", AMDGPU::CPol::TH_NT) 6401 .Case("HT", AMDGPU::CPol::TH_HT) 6402 .Case("LU", AMDGPU::CPol::TH_LU) 6403 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB) 6404 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT) 6405 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT) 6406 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT) 6407 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB) 6408 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS) 6409 .Default(0xffffffff); 6410 } 6411 6412 if (TH == 0xffffffff) 6413 return Error(StringLoc, "invalid th value"); 6414 6415 return ParseStatus::Success; 6416 } 6417 6418 static void addOptionalImmOperand( 6419 MCInst& Inst, const OperandVector& Operands, 6420 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 6421 AMDGPUOperand::ImmTy ImmT, 6422 int64_t Default = 0) { 6423 auto i = OptionalIdx.find(ImmT); 6424 if (i != OptionalIdx.end()) { 6425 unsigned Idx = i->second; 6426 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 6427 } else { 6428 Inst.addOperand(MCOperand::createImm(Default)); 6429 } 6430 } 6431 6432 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6433 StringRef &Value, 6434 SMLoc &StringLoc) { 6435 if (!trySkipId(Prefix, AsmToken::Colon)) 6436 return ParseStatus::NoMatch; 6437 6438 StringLoc = getLoc(); 6439 return parseId(Value, "expected an identifier") ? ParseStatus::Success 6440 : ParseStatus::Failure; 6441 } 6442 6443 //===----------------------------------------------------------------------===// 6444 // MTBUF format 6445 //===----------------------------------------------------------------------===// 6446 6447 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6448 int64_t MaxVal, 6449 int64_t &Fmt) { 6450 int64_t Val; 6451 SMLoc Loc = getLoc(); 6452 6453 auto Res = parseIntWithPrefix(Pref, Val); 6454 if (Res.isFailure()) 6455 return false; 6456 if (Res.isNoMatch()) 6457 return true; 6458 6459 if (Val < 0 || Val > MaxVal) { 6460 Error(Loc, Twine("out of range ", StringRef(Pref))); 6461 return false; 6462 } 6463 6464 Fmt = Val; 6465 return true; 6466 } 6467 6468 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6469 // values to live in a joint format operand in the MCInst encoding. 6470 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6471 using namespace llvm::AMDGPU::MTBUFFormat; 6472 6473 int64_t Dfmt = DFMT_UNDEF; 6474 int64_t Nfmt = NFMT_UNDEF; 6475 6476 // dfmt and nfmt can appear in either order, and each is optional. 6477 for (int I = 0; I < 2; ++I) { 6478 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6479 return ParseStatus::Failure; 6480 6481 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) 6482 return ParseStatus::Failure; 6483 6484 // Skip optional comma between dfmt/nfmt 6485 // but guard against 2 commas following each other. 6486 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6487 !peekToken().is(AsmToken::Comma)) { 6488 trySkipToken(AsmToken::Comma); 6489 } 6490 } 6491 6492 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6493 return ParseStatus::NoMatch; 6494 6495 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6496 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6497 6498 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6499 return ParseStatus::Success; 6500 } 6501 6502 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6503 using namespace llvm::AMDGPU::MTBUFFormat; 6504 6505 int64_t Fmt = UFMT_UNDEF; 6506 6507 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6508 return ParseStatus::Failure; 6509 6510 if (Fmt == UFMT_UNDEF) 6511 return ParseStatus::NoMatch; 6512 6513 Format = Fmt; 6514 return ParseStatus::Success; 6515 } 6516 6517 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6518 int64_t &Nfmt, 6519 StringRef FormatStr, 6520 SMLoc Loc) { 6521 using namespace llvm::AMDGPU::MTBUFFormat; 6522 int64_t Format; 6523 6524 Format = getDfmt(FormatStr); 6525 if (Format != DFMT_UNDEF) { 6526 Dfmt = Format; 6527 return true; 6528 } 6529 6530 Format = getNfmt(FormatStr, getSTI()); 6531 if (Format != NFMT_UNDEF) { 6532 Nfmt = Format; 6533 return true; 6534 } 6535 6536 Error(Loc, "unsupported format"); 6537 return false; 6538 } 6539 6540 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6541 SMLoc FormatLoc, 6542 int64_t &Format) { 6543 using namespace llvm::AMDGPU::MTBUFFormat; 6544 6545 int64_t Dfmt = DFMT_UNDEF; 6546 int64_t Nfmt = NFMT_UNDEF; 6547 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6548 return ParseStatus::Failure; 6549 6550 if (trySkipToken(AsmToken::Comma)) { 6551 StringRef Str; 6552 SMLoc Loc = getLoc(); 6553 if (!parseId(Str, "expected a format string") || 6554 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) 6555 return ParseStatus::Failure; 6556 if (Dfmt == DFMT_UNDEF) 6557 return Error(Loc, "duplicate numeric format"); 6558 if (Nfmt == NFMT_UNDEF) 6559 return Error(Loc, "duplicate data format"); 6560 } 6561 6562 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6563 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6564 6565 if (isGFX10Plus()) { 6566 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6567 if (Ufmt == UFMT_UNDEF) 6568 return Error(FormatLoc, "unsupported format"); 6569 Format = Ufmt; 6570 } else { 6571 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6572 } 6573 6574 return ParseStatus::Success; 6575 } 6576 6577 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6578 SMLoc Loc, 6579 int64_t &Format) { 6580 using namespace llvm::AMDGPU::MTBUFFormat; 6581 6582 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6583 if (Id == UFMT_UNDEF) 6584 return ParseStatus::NoMatch; 6585 6586 if (!isGFX10Plus()) 6587 return Error(Loc, "unified format is not supported on this GPU"); 6588 6589 Format = Id; 6590 return ParseStatus::Success; 6591 } 6592 6593 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6594 using namespace llvm::AMDGPU::MTBUFFormat; 6595 SMLoc Loc = getLoc(); 6596 6597 if (!parseExpr(Format)) 6598 return ParseStatus::Failure; 6599 if (!isValidFormatEncoding(Format, getSTI())) 6600 return Error(Loc, "out of range format"); 6601 6602 return ParseStatus::Success; 6603 } 6604 6605 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6606 using namespace llvm::AMDGPU::MTBUFFormat; 6607 6608 if (!trySkipId("format", AsmToken::Colon)) 6609 return ParseStatus::NoMatch; 6610 6611 if (trySkipToken(AsmToken::LBrac)) { 6612 StringRef FormatStr; 6613 SMLoc Loc = getLoc(); 6614 if (!parseId(FormatStr, "expected a format string")) 6615 return ParseStatus::Failure; 6616 6617 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6618 if (Res.isNoMatch()) 6619 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6620 if (!Res.isSuccess()) 6621 return Res; 6622 6623 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6624 return ParseStatus::Failure; 6625 6626 return ParseStatus::Success; 6627 } 6628 6629 return parseNumericFormat(Format); 6630 } 6631 6632 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6633 using namespace llvm::AMDGPU::MTBUFFormat; 6634 6635 int64_t Format = getDefaultFormatEncoding(getSTI()); 6636 ParseStatus Res; 6637 SMLoc Loc = getLoc(); 6638 6639 // Parse legacy format syntax. 6640 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6641 if (Res.isFailure()) 6642 return Res; 6643 6644 bool FormatFound = Res.isSuccess(); 6645 6646 Operands.push_back( 6647 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6648 6649 if (FormatFound) 6650 trySkipToken(AsmToken::Comma); 6651 6652 if (isToken(AsmToken::EndOfStatement)) { 6653 // We are expecting an soffset operand, 6654 // but let matcher handle the error. 6655 return ParseStatus::Success; 6656 } 6657 6658 // Parse soffset. 6659 Res = parseRegOrImm(Operands); 6660 if (!Res.isSuccess()) 6661 return Res; 6662 6663 trySkipToken(AsmToken::Comma); 6664 6665 if (!FormatFound) { 6666 Res = parseSymbolicOrNumericFormat(Format); 6667 if (Res.isFailure()) 6668 return Res; 6669 if (Res.isSuccess()) { 6670 auto Size = Operands.size(); 6671 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6672 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6673 Op.setImm(Format); 6674 } 6675 return ParseStatus::Success; 6676 } 6677 6678 if (isId("format") && peekToken().is(AsmToken::Colon)) 6679 return Error(getLoc(), "duplicate format"); 6680 return ParseStatus::Success; 6681 } 6682 6683 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { 6684 ParseStatus Res = 6685 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 6686 if (Res.isNoMatch()) { 6687 Res = parseIntWithPrefix("inst_offset", Operands, 6688 AMDGPUOperand::ImmTyInstOffset); 6689 } 6690 return Res; 6691 } 6692 6693 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { 6694 ParseStatus Res = 6695 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16); 6696 if (Res.isNoMatch()) 6697 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16); 6698 return Res; 6699 } 6700 6701 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { 6702 ParseStatus Res = 6703 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP); 6704 if (Res.isNoMatch()) { 6705 Res = 6706 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP); 6707 } 6708 return Res; 6709 } 6710 6711 //===----------------------------------------------------------------------===// 6712 // Exp 6713 //===----------------------------------------------------------------------===// 6714 6715 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6716 OptionalImmIndexMap OptionalIdx; 6717 6718 unsigned OperandIdx[4]; 6719 unsigned EnMask = 0; 6720 int SrcIdx = 0; 6721 6722 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6723 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6724 6725 // Add the register arguments 6726 if (Op.isReg()) { 6727 assert(SrcIdx < 4); 6728 OperandIdx[SrcIdx] = Inst.size(); 6729 Op.addRegOperands(Inst, 1); 6730 ++SrcIdx; 6731 continue; 6732 } 6733 6734 if (Op.isOff()) { 6735 assert(SrcIdx < 4); 6736 OperandIdx[SrcIdx] = Inst.size(); 6737 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6738 ++SrcIdx; 6739 continue; 6740 } 6741 6742 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6743 Op.addImmOperands(Inst, 1); 6744 continue; 6745 } 6746 6747 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6748 continue; 6749 6750 // Handle optional arguments 6751 OptionalIdx[Op.getImmTy()] = i; 6752 } 6753 6754 assert(SrcIdx == 4); 6755 6756 bool Compr = false; 6757 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6758 Compr = true; 6759 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6760 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6761 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6762 } 6763 6764 for (auto i = 0; i < SrcIdx; ++i) { 6765 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6766 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6767 } 6768 } 6769 6770 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6771 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6772 6773 Inst.addOperand(MCOperand::createImm(EnMask)); 6774 } 6775 6776 //===----------------------------------------------------------------------===// 6777 // s_waitcnt 6778 //===----------------------------------------------------------------------===// 6779 6780 static bool 6781 encodeCnt( 6782 const AMDGPU::IsaVersion ISA, 6783 int64_t &IntVal, 6784 int64_t CntVal, 6785 bool Saturate, 6786 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6787 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6788 { 6789 bool Failed = false; 6790 6791 IntVal = encode(ISA, IntVal, CntVal); 6792 if (CntVal != decode(ISA, IntVal)) { 6793 if (Saturate) { 6794 IntVal = encode(ISA, IntVal, -1); 6795 } else { 6796 Failed = true; 6797 } 6798 } 6799 return Failed; 6800 } 6801 6802 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6803 6804 SMLoc CntLoc = getLoc(); 6805 StringRef CntName = getTokenStr(); 6806 6807 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6808 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6809 return false; 6810 6811 int64_t CntVal; 6812 SMLoc ValLoc = getLoc(); 6813 if (!parseExpr(CntVal)) 6814 return false; 6815 6816 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6817 6818 bool Failed = true; 6819 bool Sat = CntName.ends_with("_sat"); 6820 6821 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6822 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6823 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6824 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6825 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6826 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6827 } else { 6828 Error(CntLoc, "invalid counter name " + CntName); 6829 return false; 6830 } 6831 6832 if (Failed) { 6833 Error(ValLoc, "too large value for " + CntName); 6834 return false; 6835 } 6836 6837 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6838 return false; 6839 6840 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6841 if (isToken(AsmToken::EndOfStatement)) { 6842 Error(getLoc(), "expected a counter name"); 6843 return false; 6844 } 6845 } 6846 6847 return true; 6848 } 6849 6850 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { 6851 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6852 int64_t Waitcnt = getWaitcntBitMask(ISA); 6853 SMLoc S = getLoc(); 6854 6855 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6856 while (!isToken(AsmToken::EndOfStatement)) { 6857 if (!parseCnt(Waitcnt)) 6858 return ParseStatus::Failure; 6859 } 6860 } else { 6861 if (!parseExpr(Waitcnt)) 6862 return ParseStatus::Failure; 6863 } 6864 6865 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6866 return ParseStatus::Success; 6867 } 6868 6869 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6870 SMLoc FieldLoc = getLoc(); 6871 StringRef FieldName = getTokenStr(); 6872 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6873 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6874 return false; 6875 6876 SMLoc ValueLoc = getLoc(); 6877 StringRef ValueName = getTokenStr(); 6878 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6879 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6880 return false; 6881 6882 unsigned Shift; 6883 if (FieldName == "instid0") { 6884 Shift = 0; 6885 } else if (FieldName == "instskip") { 6886 Shift = 4; 6887 } else if (FieldName == "instid1") { 6888 Shift = 7; 6889 } else { 6890 Error(FieldLoc, "invalid field name " + FieldName); 6891 return false; 6892 } 6893 6894 int Value; 6895 if (Shift == 4) { 6896 // Parse values for instskip. 6897 Value = StringSwitch<int>(ValueName) 6898 .Case("SAME", 0) 6899 .Case("NEXT", 1) 6900 .Case("SKIP_1", 2) 6901 .Case("SKIP_2", 3) 6902 .Case("SKIP_3", 4) 6903 .Case("SKIP_4", 5) 6904 .Default(-1); 6905 } else { 6906 // Parse values for instid0 and instid1. 6907 Value = StringSwitch<int>(ValueName) 6908 .Case("NO_DEP", 0) 6909 .Case("VALU_DEP_1", 1) 6910 .Case("VALU_DEP_2", 2) 6911 .Case("VALU_DEP_3", 3) 6912 .Case("VALU_DEP_4", 4) 6913 .Case("TRANS32_DEP_1", 5) 6914 .Case("TRANS32_DEP_2", 6) 6915 .Case("TRANS32_DEP_3", 7) 6916 .Case("FMA_ACCUM_CYCLE_1", 8) 6917 .Case("SALU_CYCLE_1", 9) 6918 .Case("SALU_CYCLE_2", 10) 6919 .Case("SALU_CYCLE_3", 11) 6920 .Default(-1); 6921 } 6922 if (Value < 0) { 6923 Error(ValueLoc, "invalid value name " + ValueName); 6924 return false; 6925 } 6926 6927 Delay |= Value << Shift; 6928 return true; 6929 } 6930 6931 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { 6932 int64_t Delay = 0; 6933 SMLoc S = getLoc(); 6934 6935 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6936 do { 6937 if (!parseDelay(Delay)) 6938 return ParseStatus::Failure; 6939 } while (trySkipToken(AsmToken::Pipe)); 6940 } else { 6941 if (!parseExpr(Delay)) 6942 return ParseStatus::Failure; 6943 } 6944 6945 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6946 return ParseStatus::Success; 6947 } 6948 6949 bool 6950 AMDGPUOperand::isSWaitCnt() const { 6951 return isImm(); 6952 } 6953 6954 bool AMDGPUOperand::isSDelayALU() const { return isImm(); } 6955 6956 //===----------------------------------------------------------------------===// 6957 // DepCtr 6958 //===----------------------------------------------------------------------===// 6959 6960 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6961 StringRef DepCtrName) { 6962 switch (ErrorId) { 6963 case OPR_ID_UNKNOWN: 6964 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6965 return; 6966 case OPR_ID_UNSUPPORTED: 6967 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6968 return; 6969 case OPR_ID_DUPLICATE: 6970 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6971 return; 6972 case OPR_VAL_INVALID: 6973 Error(Loc, Twine("invalid value for ", DepCtrName)); 6974 return; 6975 default: 6976 assert(false); 6977 } 6978 } 6979 6980 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6981 6982 using namespace llvm::AMDGPU::DepCtr; 6983 6984 SMLoc DepCtrLoc = getLoc(); 6985 StringRef DepCtrName = getTokenStr(); 6986 6987 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6988 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6989 return false; 6990 6991 int64_t ExprVal; 6992 if (!parseExpr(ExprVal)) 6993 return false; 6994 6995 unsigned PrevOprMask = UsedOprMask; 6996 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6997 6998 if (CntVal < 0) { 6999 depCtrError(DepCtrLoc, CntVal, DepCtrName); 7000 return false; 7001 } 7002 7003 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7004 return false; 7005 7006 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 7007 if (isToken(AsmToken::EndOfStatement)) { 7008 Error(getLoc(), "expected a counter name"); 7009 return false; 7010 } 7011 } 7012 7013 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 7014 DepCtr = (DepCtr & ~CntValMask) | CntVal; 7015 return true; 7016 } 7017 7018 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { 7019 using namespace llvm::AMDGPU::DepCtr; 7020 7021 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 7022 SMLoc Loc = getLoc(); 7023 7024 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7025 unsigned UsedOprMask = 0; 7026 while (!isToken(AsmToken::EndOfStatement)) { 7027 if (!parseDepCtr(DepCtr, UsedOprMask)) 7028 return ParseStatus::Failure; 7029 } 7030 } else { 7031 if (!parseExpr(DepCtr)) 7032 return ParseStatus::Failure; 7033 } 7034 7035 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 7036 return ParseStatus::Success; 7037 } 7038 7039 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 7040 7041 //===----------------------------------------------------------------------===// 7042 // hwreg 7043 //===----------------------------------------------------------------------===// 7044 7045 bool 7046 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 7047 OperandInfoTy &Offset, 7048 OperandInfoTy &Width) { 7049 using namespace llvm::AMDGPU::Hwreg; 7050 7051 // The register may be specified by name or using a numeric code 7052 HwReg.Loc = getLoc(); 7053 if (isToken(AsmToken::Identifier) && 7054 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7055 HwReg.IsSymbolic = true; 7056 lex(); // skip register name 7057 } else if (!parseExpr(HwReg.Id, "a register name")) { 7058 return false; 7059 } 7060 7061 if (trySkipToken(AsmToken::RParen)) 7062 return true; 7063 7064 // parse optional params 7065 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 7066 return false; 7067 7068 Offset.Loc = getLoc(); 7069 if (!parseExpr(Offset.Id)) 7070 return false; 7071 7072 if (!skipToken(AsmToken::Comma, "expected a comma")) 7073 return false; 7074 7075 Width.Loc = getLoc(); 7076 return parseExpr(Width.Id) && 7077 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7078 } 7079 7080 bool 7081 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 7082 const OperandInfoTy &Offset, 7083 const OperandInfoTy &Width) { 7084 7085 using namespace llvm::AMDGPU::Hwreg; 7086 7087 if (HwReg.IsSymbolic) { 7088 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 7089 Error(HwReg.Loc, 7090 "specified hardware register is not supported on this GPU"); 7091 return false; 7092 } 7093 } else { 7094 if (!isValidHwreg(HwReg.Id)) { 7095 Error(HwReg.Loc, 7096 "invalid code of hardware register: only 6-bit values are legal"); 7097 return false; 7098 } 7099 } 7100 if (!isValidHwregOffset(Offset.Id)) { 7101 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 7102 return false; 7103 } 7104 if (!isValidHwregWidth(Width.Id)) { 7105 Error(Width.Loc, 7106 "invalid bitfield width: only values from 1 to 32 are legal"); 7107 return false; 7108 } 7109 return true; 7110 } 7111 7112 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 7113 using namespace llvm::AMDGPU::Hwreg; 7114 7115 int64_t ImmVal = 0; 7116 SMLoc Loc = getLoc(); 7117 7118 if (trySkipId("hwreg", AsmToken::LParen)) { 7119 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 7120 OperandInfoTy Offset(OFFSET_DEFAULT_); 7121 OperandInfoTy Width(WIDTH_DEFAULT_); 7122 if (parseHwregBody(HwReg, Offset, Width) && 7123 validateHwreg(HwReg, Offset, Width)) { 7124 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 7125 } else { 7126 return ParseStatus::Failure; 7127 } 7128 } else if (parseExpr(ImmVal, "a hwreg macro")) { 7129 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7130 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7131 } else { 7132 return ParseStatus::Failure; 7133 } 7134 7135 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 7136 return ParseStatus::Success; 7137 } 7138 7139 bool AMDGPUOperand::isHwreg() const { 7140 return isImmTy(ImmTyHwreg); 7141 } 7142 7143 //===----------------------------------------------------------------------===// 7144 // sendmsg 7145 //===----------------------------------------------------------------------===// 7146 7147 bool 7148 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 7149 OperandInfoTy &Op, 7150 OperandInfoTy &Stream) { 7151 using namespace llvm::AMDGPU::SendMsg; 7152 7153 Msg.Loc = getLoc(); 7154 if (isToken(AsmToken::Identifier) && 7155 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7156 Msg.IsSymbolic = true; 7157 lex(); // skip message name 7158 } else if (!parseExpr(Msg.Id, "a message name")) { 7159 return false; 7160 } 7161 7162 if (trySkipToken(AsmToken::Comma)) { 7163 Op.IsDefined = true; 7164 Op.Loc = getLoc(); 7165 if (isToken(AsmToken::Identifier) && 7166 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 7167 lex(); // skip operation name 7168 } else if (!parseExpr(Op.Id, "an operation name")) { 7169 return false; 7170 } 7171 7172 if (trySkipToken(AsmToken::Comma)) { 7173 Stream.IsDefined = true; 7174 Stream.Loc = getLoc(); 7175 if (!parseExpr(Stream.Id)) 7176 return false; 7177 } 7178 } 7179 7180 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7181 } 7182 7183 bool 7184 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 7185 const OperandInfoTy &Op, 7186 const OperandInfoTy &Stream) { 7187 using namespace llvm::AMDGPU::SendMsg; 7188 7189 // Validation strictness depends on whether message is specified 7190 // in a symbolic or in a numeric form. In the latter case 7191 // only encoding possibility is checked. 7192 bool Strict = Msg.IsSymbolic; 7193 7194 if (Strict) { 7195 if (Msg.Id == OPR_ID_UNSUPPORTED) { 7196 Error(Msg.Loc, "specified message id is not supported on this GPU"); 7197 return false; 7198 } 7199 } else { 7200 if (!isValidMsgId(Msg.Id, getSTI())) { 7201 Error(Msg.Loc, "invalid message id"); 7202 return false; 7203 } 7204 } 7205 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 7206 if (Op.IsDefined) { 7207 Error(Op.Loc, "message does not support operations"); 7208 } else { 7209 Error(Msg.Loc, "missing message operation"); 7210 } 7211 return false; 7212 } 7213 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 7214 Error(Op.Loc, "invalid operation id"); 7215 return false; 7216 } 7217 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 7218 Stream.IsDefined) { 7219 Error(Stream.Loc, "message operation does not support streams"); 7220 return false; 7221 } 7222 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 7223 Error(Stream.Loc, "invalid message stream id"); 7224 return false; 7225 } 7226 return true; 7227 } 7228 7229 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { 7230 using namespace llvm::AMDGPU::SendMsg; 7231 7232 int64_t ImmVal = 0; 7233 SMLoc Loc = getLoc(); 7234 7235 if (trySkipId("sendmsg", AsmToken::LParen)) { 7236 OperandInfoTy Msg(OPR_ID_UNKNOWN); 7237 OperandInfoTy Op(OP_NONE_); 7238 OperandInfoTy Stream(STREAM_ID_NONE_); 7239 if (parseSendMsgBody(Msg, Op, Stream) && 7240 validateSendMsg(Msg, Op, Stream)) { 7241 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 7242 } else { 7243 return ParseStatus::Failure; 7244 } 7245 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 7246 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7247 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7248 } else { 7249 return ParseStatus::Failure; 7250 } 7251 7252 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 7253 return ParseStatus::Success; 7254 } 7255 7256 bool AMDGPUOperand::isSendMsg() const { 7257 return isImmTy(ImmTySendMsg); 7258 } 7259 7260 //===----------------------------------------------------------------------===// 7261 // v_interp 7262 //===----------------------------------------------------------------------===// 7263 7264 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 7265 StringRef Str; 7266 SMLoc S = getLoc(); 7267 7268 if (!parseId(Str)) 7269 return ParseStatus::NoMatch; 7270 7271 int Slot = StringSwitch<int>(Str) 7272 .Case("p10", 0) 7273 .Case("p20", 1) 7274 .Case("p0", 2) 7275 .Default(-1); 7276 7277 if (Slot == -1) 7278 return Error(S, "invalid interpolation slot"); 7279 7280 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 7281 AMDGPUOperand::ImmTyInterpSlot)); 7282 return ParseStatus::Success; 7283 } 7284 7285 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 7286 StringRef Str; 7287 SMLoc S = getLoc(); 7288 7289 if (!parseId(Str)) 7290 return ParseStatus::NoMatch; 7291 7292 if (!Str.starts_with("attr")) 7293 return Error(S, "invalid interpolation attribute"); 7294 7295 StringRef Chan = Str.take_back(2); 7296 int AttrChan = StringSwitch<int>(Chan) 7297 .Case(".x", 0) 7298 .Case(".y", 1) 7299 .Case(".z", 2) 7300 .Case(".w", 3) 7301 .Default(-1); 7302 if (AttrChan == -1) 7303 return Error(S, "invalid or missing interpolation attribute channel"); 7304 7305 Str = Str.drop_back(2).drop_front(4); 7306 7307 uint8_t Attr; 7308 if (Str.getAsInteger(10, Attr)) 7309 return Error(S, "invalid or missing interpolation attribute number"); 7310 7311 if (Attr > 32) 7312 return Error(S, "out of bounds interpolation attribute number"); 7313 7314 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 7315 7316 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 7317 AMDGPUOperand::ImmTyInterpAttr)); 7318 Operands.push_back(AMDGPUOperand::CreateImm( 7319 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan)); 7320 return ParseStatus::Success; 7321 } 7322 7323 //===----------------------------------------------------------------------===// 7324 // exp 7325 //===----------------------------------------------------------------------===// 7326 7327 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 7328 using namespace llvm::AMDGPU::Exp; 7329 7330 StringRef Str; 7331 SMLoc S = getLoc(); 7332 7333 if (!parseId(Str)) 7334 return ParseStatus::NoMatch; 7335 7336 unsigned Id = getTgtId(Str); 7337 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) 7338 return Error(S, (Id == ET_INVALID) 7339 ? "invalid exp target" 7340 : "exp target is not supported on this GPU"); 7341 7342 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 7343 AMDGPUOperand::ImmTyExpTgt)); 7344 return ParseStatus::Success; 7345 } 7346 7347 //===----------------------------------------------------------------------===// 7348 // parser helpers 7349 //===----------------------------------------------------------------------===// 7350 7351 bool 7352 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 7353 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 7354 } 7355 7356 bool 7357 AMDGPUAsmParser::isId(const StringRef Id) const { 7358 return isId(getToken(), Id); 7359 } 7360 7361 bool 7362 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 7363 return getTokenKind() == Kind; 7364 } 7365 7366 StringRef AMDGPUAsmParser::getId() const { 7367 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef(); 7368 } 7369 7370 bool 7371 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7372 if (isId(Id)) { 7373 lex(); 7374 return true; 7375 } 7376 return false; 7377 } 7378 7379 bool 7380 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7381 if (isToken(AsmToken::Identifier)) { 7382 StringRef Tok = getTokenStr(); 7383 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) { 7384 lex(); 7385 return true; 7386 } 7387 } 7388 return false; 7389 } 7390 7391 bool 7392 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7393 if (isId(Id) && peekToken().is(Kind)) { 7394 lex(); 7395 lex(); 7396 return true; 7397 } 7398 return false; 7399 } 7400 7401 bool 7402 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7403 if (isToken(Kind)) { 7404 lex(); 7405 return true; 7406 } 7407 return false; 7408 } 7409 7410 bool 7411 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7412 const StringRef ErrMsg) { 7413 if (!trySkipToken(Kind)) { 7414 Error(getLoc(), ErrMsg); 7415 return false; 7416 } 7417 return true; 7418 } 7419 7420 bool 7421 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7422 SMLoc S = getLoc(); 7423 7424 const MCExpr *Expr; 7425 if (Parser.parseExpression(Expr)) 7426 return false; 7427 7428 if (Expr->evaluateAsAbsolute(Imm)) 7429 return true; 7430 7431 if (Expected.empty()) { 7432 Error(S, "expected absolute expression"); 7433 } else { 7434 Error(S, Twine("expected ", Expected) + 7435 Twine(" or an absolute expression")); 7436 } 7437 return false; 7438 } 7439 7440 bool 7441 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7442 SMLoc S = getLoc(); 7443 7444 const MCExpr *Expr; 7445 if (Parser.parseExpression(Expr)) 7446 return false; 7447 7448 int64_t IntVal; 7449 if (Expr->evaluateAsAbsolute(IntVal)) { 7450 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7451 } else { 7452 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7453 } 7454 return true; 7455 } 7456 7457 bool 7458 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7459 if (isToken(AsmToken::String)) { 7460 Val = getToken().getStringContents(); 7461 lex(); 7462 return true; 7463 } else { 7464 Error(getLoc(), ErrMsg); 7465 return false; 7466 } 7467 } 7468 7469 bool 7470 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7471 if (isToken(AsmToken::Identifier)) { 7472 Val = getTokenStr(); 7473 lex(); 7474 return true; 7475 } else { 7476 if (!ErrMsg.empty()) 7477 Error(getLoc(), ErrMsg); 7478 return false; 7479 } 7480 } 7481 7482 AsmToken 7483 AMDGPUAsmParser::getToken() const { 7484 return Parser.getTok(); 7485 } 7486 7487 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7488 return isToken(AsmToken::EndOfStatement) 7489 ? getToken() 7490 : getLexer().peekTok(ShouldSkipSpace); 7491 } 7492 7493 void 7494 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7495 auto TokCount = getLexer().peekTokens(Tokens); 7496 7497 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7498 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7499 } 7500 7501 AsmToken::TokenKind 7502 AMDGPUAsmParser::getTokenKind() const { 7503 return getLexer().getKind(); 7504 } 7505 7506 SMLoc 7507 AMDGPUAsmParser::getLoc() const { 7508 return getToken().getLoc(); 7509 } 7510 7511 StringRef 7512 AMDGPUAsmParser::getTokenStr() const { 7513 return getToken().getString(); 7514 } 7515 7516 void 7517 AMDGPUAsmParser::lex() { 7518 Parser.Lex(); 7519 } 7520 7521 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { 7522 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7523 } 7524 7525 SMLoc 7526 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7527 const OperandVector &Operands) const { 7528 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7529 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7530 if (Test(Op)) 7531 return Op.getStartLoc(); 7532 } 7533 return getInstLoc(Operands); 7534 } 7535 7536 SMLoc 7537 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7538 const OperandVector &Operands) const { 7539 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7540 return getOperandLoc(Test, Operands); 7541 } 7542 7543 SMLoc 7544 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7545 const OperandVector &Operands) const { 7546 auto Test = [=](const AMDGPUOperand& Op) { 7547 return Op.isRegKind() && Op.getReg() == Reg; 7548 }; 7549 return getOperandLoc(Test, Operands); 7550 } 7551 7552 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, 7553 bool SearchMandatoryLiterals) const { 7554 auto Test = [](const AMDGPUOperand& Op) { 7555 return Op.IsImmKindLiteral() || Op.isExpr(); 7556 }; 7557 SMLoc Loc = getOperandLoc(Test, Operands); 7558 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) 7559 Loc = getMandatoryLitLoc(Operands); 7560 return Loc; 7561 } 7562 7563 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { 7564 auto Test = [](const AMDGPUOperand &Op) { 7565 return Op.IsImmKindMandatoryLiteral(); 7566 }; 7567 return getOperandLoc(Test, Operands); 7568 } 7569 7570 SMLoc 7571 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7572 auto Test = [](const AMDGPUOperand& Op) { 7573 return Op.isImmKindConst(); 7574 }; 7575 return getOperandLoc(Test, Operands); 7576 } 7577 7578 //===----------------------------------------------------------------------===// 7579 // swizzle 7580 //===----------------------------------------------------------------------===// 7581 7582 LLVM_READNONE 7583 static unsigned 7584 encodeBitmaskPerm(const unsigned AndMask, 7585 const unsigned OrMask, 7586 const unsigned XorMask) { 7587 using namespace llvm::AMDGPU::Swizzle; 7588 7589 return BITMASK_PERM_ENC | 7590 (AndMask << BITMASK_AND_SHIFT) | 7591 (OrMask << BITMASK_OR_SHIFT) | 7592 (XorMask << BITMASK_XOR_SHIFT); 7593 } 7594 7595 bool 7596 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7597 const unsigned MinVal, 7598 const unsigned MaxVal, 7599 const StringRef ErrMsg, 7600 SMLoc &Loc) { 7601 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7602 return false; 7603 } 7604 Loc = getLoc(); 7605 if (!parseExpr(Op)) { 7606 return false; 7607 } 7608 if (Op < MinVal || Op > MaxVal) { 7609 Error(Loc, ErrMsg); 7610 return false; 7611 } 7612 7613 return true; 7614 } 7615 7616 bool 7617 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7618 const unsigned MinVal, 7619 const unsigned MaxVal, 7620 const StringRef ErrMsg) { 7621 SMLoc Loc; 7622 for (unsigned i = 0; i < OpNum; ++i) { 7623 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7624 return false; 7625 } 7626 7627 return true; 7628 } 7629 7630 bool 7631 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7632 using namespace llvm::AMDGPU::Swizzle; 7633 7634 int64_t Lane[LANE_NUM]; 7635 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7636 "expected a 2-bit lane id")) { 7637 Imm = QUAD_PERM_ENC; 7638 for (unsigned I = 0; I < LANE_NUM; ++I) { 7639 Imm |= Lane[I] << (LANE_SHIFT * I); 7640 } 7641 return true; 7642 } 7643 return false; 7644 } 7645 7646 bool 7647 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7648 using namespace llvm::AMDGPU::Swizzle; 7649 7650 SMLoc Loc; 7651 int64_t GroupSize; 7652 int64_t LaneIdx; 7653 7654 if (!parseSwizzleOperand(GroupSize, 7655 2, 32, 7656 "group size must be in the interval [2,32]", 7657 Loc)) { 7658 return false; 7659 } 7660 if (!isPowerOf2_64(GroupSize)) { 7661 Error(Loc, "group size must be a power of two"); 7662 return false; 7663 } 7664 if (parseSwizzleOperand(LaneIdx, 7665 0, GroupSize - 1, 7666 "lane id must be in the interval [0,group size - 1]", 7667 Loc)) { 7668 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7669 return true; 7670 } 7671 return false; 7672 } 7673 7674 bool 7675 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7676 using namespace llvm::AMDGPU::Swizzle; 7677 7678 SMLoc Loc; 7679 int64_t GroupSize; 7680 7681 if (!parseSwizzleOperand(GroupSize, 7682 2, 32, 7683 "group size must be in the interval [2,32]", 7684 Loc)) { 7685 return false; 7686 } 7687 if (!isPowerOf2_64(GroupSize)) { 7688 Error(Loc, "group size must be a power of two"); 7689 return false; 7690 } 7691 7692 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7693 return true; 7694 } 7695 7696 bool 7697 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7698 using namespace llvm::AMDGPU::Swizzle; 7699 7700 SMLoc Loc; 7701 int64_t GroupSize; 7702 7703 if (!parseSwizzleOperand(GroupSize, 7704 1, 16, 7705 "group size must be in the interval [1,16]", 7706 Loc)) { 7707 return false; 7708 } 7709 if (!isPowerOf2_64(GroupSize)) { 7710 Error(Loc, "group size must be a power of two"); 7711 return false; 7712 } 7713 7714 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7715 return true; 7716 } 7717 7718 bool 7719 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7720 using namespace llvm::AMDGPU::Swizzle; 7721 7722 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7723 return false; 7724 } 7725 7726 StringRef Ctl; 7727 SMLoc StrLoc = getLoc(); 7728 if (!parseString(Ctl)) { 7729 return false; 7730 } 7731 if (Ctl.size() != BITMASK_WIDTH) { 7732 Error(StrLoc, "expected a 5-character mask"); 7733 return false; 7734 } 7735 7736 unsigned AndMask = 0; 7737 unsigned OrMask = 0; 7738 unsigned XorMask = 0; 7739 7740 for (size_t i = 0; i < Ctl.size(); ++i) { 7741 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7742 switch(Ctl[i]) { 7743 default: 7744 Error(StrLoc, "invalid mask"); 7745 return false; 7746 case '0': 7747 break; 7748 case '1': 7749 OrMask |= Mask; 7750 break; 7751 case 'p': 7752 AndMask |= Mask; 7753 break; 7754 case 'i': 7755 AndMask |= Mask; 7756 XorMask |= Mask; 7757 break; 7758 } 7759 } 7760 7761 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7762 return true; 7763 } 7764 7765 bool 7766 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7767 7768 SMLoc OffsetLoc = getLoc(); 7769 7770 if (!parseExpr(Imm, "a swizzle macro")) { 7771 return false; 7772 } 7773 if (!isUInt<16>(Imm)) { 7774 Error(OffsetLoc, "expected a 16-bit offset"); 7775 return false; 7776 } 7777 return true; 7778 } 7779 7780 bool 7781 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7782 using namespace llvm::AMDGPU::Swizzle; 7783 7784 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7785 7786 SMLoc ModeLoc = getLoc(); 7787 bool Ok = false; 7788 7789 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7790 Ok = parseSwizzleQuadPerm(Imm); 7791 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7792 Ok = parseSwizzleBitmaskPerm(Imm); 7793 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7794 Ok = parseSwizzleBroadcast(Imm); 7795 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7796 Ok = parseSwizzleSwap(Imm); 7797 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7798 Ok = parseSwizzleReverse(Imm); 7799 } else { 7800 Error(ModeLoc, "expected a swizzle mode"); 7801 } 7802 7803 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7804 } 7805 7806 return false; 7807 } 7808 7809 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { 7810 SMLoc S = getLoc(); 7811 int64_t Imm = 0; 7812 7813 if (trySkipId("offset")) { 7814 7815 bool Ok = false; 7816 if (skipToken(AsmToken::Colon, "expected a colon")) { 7817 if (trySkipId("swizzle")) { 7818 Ok = parseSwizzleMacro(Imm); 7819 } else { 7820 Ok = parseSwizzleOffset(Imm); 7821 } 7822 } 7823 7824 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7825 7826 return Ok ? ParseStatus::Success : ParseStatus::Failure; 7827 } 7828 return ParseStatus::NoMatch; 7829 } 7830 7831 bool 7832 AMDGPUOperand::isSwizzle() const { 7833 return isImmTy(ImmTySwizzle); 7834 } 7835 7836 //===----------------------------------------------------------------------===// 7837 // VGPR Index Mode 7838 //===----------------------------------------------------------------------===// 7839 7840 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7841 7842 using namespace llvm::AMDGPU::VGPRIndexMode; 7843 7844 if (trySkipToken(AsmToken::RParen)) { 7845 return OFF; 7846 } 7847 7848 int64_t Imm = 0; 7849 7850 while (true) { 7851 unsigned Mode = 0; 7852 SMLoc S = getLoc(); 7853 7854 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7855 if (trySkipId(IdSymbolic[ModeId])) { 7856 Mode = 1 << ModeId; 7857 break; 7858 } 7859 } 7860 7861 if (Mode == 0) { 7862 Error(S, (Imm == 0)? 7863 "expected a VGPR index mode or a closing parenthesis" : 7864 "expected a VGPR index mode"); 7865 return UNDEF; 7866 } 7867 7868 if (Imm & Mode) { 7869 Error(S, "duplicate VGPR index mode"); 7870 return UNDEF; 7871 } 7872 Imm |= Mode; 7873 7874 if (trySkipToken(AsmToken::RParen)) 7875 break; 7876 if (!skipToken(AsmToken::Comma, 7877 "expected a comma or a closing parenthesis")) 7878 return UNDEF; 7879 } 7880 7881 return Imm; 7882 } 7883 7884 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7885 7886 using namespace llvm::AMDGPU::VGPRIndexMode; 7887 7888 int64_t Imm = 0; 7889 SMLoc S = getLoc(); 7890 7891 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7892 Imm = parseGPRIdxMacro(); 7893 if (Imm == UNDEF) 7894 return ParseStatus::Failure; 7895 } else { 7896 if (getParser().parseAbsoluteExpression(Imm)) 7897 return ParseStatus::Failure; 7898 if (Imm < 0 || !isUInt<4>(Imm)) 7899 return Error(S, "invalid immediate: only 4-bit values are legal"); 7900 } 7901 7902 Operands.push_back( 7903 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7904 return ParseStatus::Success; 7905 } 7906 7907 bool AMDGPUOperand::isGPRIdxMode() const { 7908 return isImmTy(ImmTyGprIdxMode); 7909 } 7910 7911 //===----------------------------------------------------------------------===// 7912 // sopp branch targets 7913 //===----------------------------------------------------------------------===// 7914 7915 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { 7916 7917 // Make sure we are not parsing something 7918 // that looks like a label or an expression but is not. 7919 // This will improve error messages. 7920 if (isRegister() || isModifier()) 7921 return ParseStatus::NoMatch; 7922 7923 if (!parseExpr(Operands)) 7924 return ParseStatus::Failure; 7925 7926 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7927 assert(Opr.isImm() || Opr.isExpr()); 7928 SMLoc Loc = Opr.getStartLoc(); 7929 7930 // Currently we do not support arbitrary expressions as branch targets. 7931 // Only labels and absolute expressions are accepted. 7932 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7933 Error(Loc, "expected an absolute expression or a label"); 7934 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7935 Error(Loc, "expected a 16-bit signed jump offset"); 7936 } 7937 7938 return ParseStatus::Success; 7939 } 7940 7941 //===----------------------------------------------------------------------===// 7942 // Boolean holding registers 7943 //===----------------------------------------------------------------------===// 7944 7945 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7946 return parseReg(Operands); 7947 } 7948 7949 //===----------------------------------------------------------------------===// 7950 // mubuf 7951 //===----------------------------------------------------------------------===// 7952 7953 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7954 const OperandVector &Operands, 7955 bool IsAtomic) { 7956 OptionalImmIndexMap OptionalIdx; 7957 unsigned FirstOperandIdx = 1; 7958 bool IsAtomicReturn = false; 7959 7960 if (IsAtomic) { 7961 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7962 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7963 if (!Op.isCPol()) 7964 continue; 7965 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7966 break; 7967 } 7968 7969 if (!IsAtomicReturn) { 7970 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7971 if (NewOpc != -1) 7972 Inst.setOpcode(NewOpc); 7973 } 7974 7975 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7976 SIInstrFlags::IsAtomicRet; 7977 } 7978 7979 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7980 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7981 7982 // Add the register arguments 7983 if (Op.isReg()) { 7984 Op.addRegOperands(Inst, 1); 7985 // Insert a tied src for atomic return dst. 7986 // This cannot be postponed as subsequent calls to 7987 // addImmOperands rely on correct number of MC operands. 7988 if (IsAtomicReturn && i == FirstOperandIdx) 7989 Op.addRegOperands(Inst, 1); 7990 continue; 7991 } 7992 7993 // Handle the case where soffset is an immediate 7994 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7995 Op.addImmOperands(Inst, 1); 7996 continue; 7997 } 7998 7999 // Handle tokens like 'offen' which are sometimes hard-coded into the 8000 // asm string. There are no MCInst operands for these. 8001 if (Op.isToken()) { 8002 continue; 8003 } 8004 assert(Op.isImm()); 8005 8006 // Handle optional arguments 8007 OptionalIdx[Op.getImmTy()] = i; 8008 } 8009 8010 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 8011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 8012 } 8013 8014 //===----------------------------------------------------------------------===// 8015 // smrd 8016 //===----------------------------------------------------------------------===// 8017 8018 bool AMDGPUOperand::isSMRDOffset8() const { 8019 return isImmLiteral() && isUInt<8>(getImm()); 8020 } 8021 8022 bool AMDGPUOperand::isSMEMOffset() const { 8023 // Offset range is checked later by validator. 8024 return isImmLiteral(); 8025 } 8026 8027 bool AMDGPUOperand::isSMRDLiteralOffset() const { 8028 // 32-bit literals are only supported on CI and we only want to use them 8029 // when the offset is > 8-bits. 8030 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 8031 } 8032 8033 //===----------------------------------------------------------------------===// 8034 // vop3 8035 //===----------------------------------------------------------------------===// 8036 8037 static bool ConvertOmodMul(int64_t &Mul) { 8038 if (Mul != 1 && Mul != 2 && Mul != 4) 8039 return false; 8040 8041 Mul >>= 1; 8042 return true; 8043 } 8044 8045 static bool ConvertOmodDiv(int64_t &Div) { 8046 if (Div == 1) { 8047 Div = 0; 8048 return true; 8049 } 8050 8051 if (Div == 2) { 8052 Div = 3; 8053 return true; 8054 } 8055 8056 return false; 8057 } 8058 8059 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 8060 // This is intentional and ensures compatibility with sp3. 8061 // See bug 35397 for details. 8062 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { 8063 if (BoundCtrl == 0 || BoundCtrl == 1) { 8064 if (!isGFX11Plus()) 8065 BoundCtrl = 1; 8066 return true; 8067 } 8068 return false; 8069 } 8070 8071 void AMDGPUAsmParser::onBeginOfFile() { 8072 if (!getParser().getStreamer().getTargetStreamer() || 8073 getSTI().getTargetTriple().getArch() == Triple::r600) 8074 return; 8075 8076 if (!getTargetStreamer().getTargetID()) 8077 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(), 8078 // TODO: Should try to check code object version from directive??? 8079 AMDGPU::getAmdhsaCodeObjectVersion()); 8080 8081 if (isHsaAbi(getSTI())) 8082 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 8083 } 8084 8085 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { 8086 StringRef Name = getTokenStr(); 8087 if (Name == "mul") { 8088 return parseIntWithPrefix("mul", Operands, 8089 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8090 } 8091 8092 if (Name == "div") { 8093 return parseIntWithPrefix("div", Operands, 8094 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8095 } 8096 8097 return ParseStatus::NoMatch; 8098 } 8099 8100 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8101 // the number of src operands present, then copies that bit into src0_modifiers. 8102 void cvtVOP3DstOpSelOnly(MCInst &Inst) { 8103 int Opc = Inst.getOpcode(); 8104 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8105 if (OpSelIdx == -1) 8106 return; 8107 8108 int SrcNum; 8109 const int Ops[] = { AMDGPU::OpName::src0, 8110 AMDGPU::OpName::src1, 8111 AMDGPU::OpName::src2 }; 8112 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); 8113 ++SrcNum) 8114 ; 8115 assert(SrcNum > 0); 8116 8117 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8118 8119 if ((OpSel & (1 << SrcNum)) != 0) { 8120 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8121 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8122 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8123 } 8124 } 8125 8126 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8127 const OperandVector &Operands) { 8128 cvtVOP3P(Inst, Operands); 8129 cvtVOP3DstOpSelOnly(Inst); 8130 } 8131 8132 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8133 OptionalImmIndexMap &OptionalIdx) { 8134 cvtVOP3P(Inst, Operands, OptionalIdx); 8135 cvtVOP3DstOpSelOnly(Inst); 8136 } 8137 8138 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8139 return 8140 // 1. This operand is input modifiers 8141 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8142 // 2. This is not last operand 8143 && Desc.NumOperands > (OpNum + 1) 8144 // 3. Next operand is register class 8145 && Desc.operands()[OpNum + 1].RegClass != -1 8146 // 4. Next register is not tied to any other operand 8147 && Desc.getOperandConstraint(OpNum + 1, 8148 MCOI::OperandConstraint::TIED_TO) == -1; 8149 } 8150 8151 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8152 { 8153 OptionalImmIndexMap OptionalIdx; 8154 unsigned Opc = Inst.getOpcode(); 8155 8156 unsigned I = 1; 8157 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8158 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8159 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8160 } 8161 8162 for (unsigned E = Operands.size(); I != E; ++I) { 8163 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8164 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8165 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8166 } else if (Op.isInterpSlot() || Op.isInterpAttr() || 8167 Op.isInterpAttrChan()) { 8168 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8169 } else if (Op.isImmModifier()) { 8170 OptionalIdx[Op.getImmTy()] = I; 8171 } else { 8172 llvm_unreachable("unhandled operand type"); 8173 } 8174 } 8175 8176 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) 8177 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8178 AMDGPUOperand::ImmTyHigh); 8179 8180 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8181 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8182 AMDGPUOperand::ImmTyClampSI); 8183 8184 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8185 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8186 AMDGPUOperand::ImmTyOModSI); 8187 } 8188 8189 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8190 { 8191 OptionalImmIndexMap OptionalIdx; 8192 unsigned Opc = Inst.getOpcode(); 8193 8194 unsigned I = 1; 8195 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8196 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8197 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8198 } 8199 8200 for (unsigned E = Operands.size(); I != E; ++I) { 8201 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8202 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8203 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8204 } else if (Op.isImmModifier()) { 8205 OptionalIdx[Op.getImmTy()] = I; 8206 } else { 8207 llvm_unreachable("unhandled operand type"); 8208 } 8209 } 8210 8211 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8212 8213 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8214 if (OpSelIdx != -1) 8215 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8216 8217 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8218 8219 if (OpSelIdx == -1) 8220 return; 8221 8222 const int Ops[] = { AMDGPU::OpName::src0, 8223 AMDGPU::OpName::src1, 8224 AMDGPU::OpName::src2 }; 8225 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8226 AMDGPU::OpName::src1_modifiers, 8227 AMDGPU::OpName::src2_modifiers }; 8228 8229 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8230 8231 for (int J = 0; J < 3; ++J) { 8232 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8233 if (OpIdx == -1) 8234 break; 8235 8236 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8237 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8238 8239 if ((OpSel & (1 << J)) != 0) 8240 ModVal |= SISrcMods::OP_SEL_0; 8241 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8242 (OpSel & (1 << 3)) != 0) 8243 ModVal |= SISrcMods::DST_OP_SEL; 8244 8245 Inst.getOperand(ModIdx).setImm(ModVal); 8246 } 8247 } 8248 8249 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8250 OptionalImmIndexMap &OptionalIdx) { 8251 unsigned Opc = Inst.getOpcode(); 8252 8253 unsigned I = 1; 8254 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8255 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8256 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8257 } 8258 8259 for (unsigned E = Operands.size(); I != E; ++I) { 8260 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8261 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8262 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8263 } else if (Op.isImmModifier()) { 8264 OptionalIdx[Op.getImmTy()] = I; 8265 } else if (Op.isRegOrImm()) { 8266 Op.addRegOrImmOperands(Inst, 1); 8267 } else { 8268 llvm_unreachable("unhandled operand type"); 8269 } 8270 } 8271 8272 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8273 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8274 AMDGPUOperand::ImmTyClampSI); 8275 8276 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8277 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8278 AMDGPUOperand::ImmTyOModSI); 8279 8280 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8281 // it has src2 register operand that is tied to dst operand 8282 // we don't allow modifiers for this operand in assembler so src2_modifiers 8283 // should be 0. 8284 if (isMAC(Opc)) { 8285 auto it = Inst.begin(); 8286 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8287 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8288 ++it; 8289 // Copy the operand to ensure it's not invalidated when Inst grows. 8290 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8291 } 8292 } 8293 8294 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8295 OptionalImmIndexMap OptionalIdx; 8296 cvtVOP3(Inst, Operands, OptionalIdx); 8297 } 8298 8299 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8300 OptionalImmIndexMap &OptIdx) { 8301 const int Opc = Inst.getOpcode(); 8302 const MCInstrDesc &Desc = MII.get(Opc); 8303 8304 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8305 8306 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 8307 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { 8308 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 8309 Inst.addOperand(Inst.getOperand(0)); 8310 } 8311 8312 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { 8313 assert(!IsPacked); 8314 Inst.addOperand(Inst.getOperand(0)); 8315 } 8316 8317 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8318 // instruction, and then figure out where to actually put the modifiers 8319 8320 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8321 if (OpSelIdx != -1) { 8322 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8323 } 8324 8325 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8326 if (OpSelHiIdx != -1) { 8327 int DefaultVal = IsPacked ? -1 : 0; 8328 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8329 DefaultVal); 8330 } 8331 8332 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8333 if (NegLoIdx != -1) { 8334 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8335 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8336 } 8337 8338 const int Ops[] = { AMDGPU::OpName::src0, 8339 AMDGPU::OpName::src1, 8340 AMDGPU::OpName::src2 }; 8341 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8342 AMDGPU::OpName::src1_modifiers, 8343 AMDGPU::OpName::src2_modifiers }; 8344 8345 unsigned OpSel = 0; 8346 unsigned OpSelHi = 0; 8347 unsigned NegLo = 0; 8348 unsigned NegHi = 0; 8349 8350 if (OpSelIdx != -1) 8351 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8352 8353 if (OpSelHiIdx != -1) 8354 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8355 8356 if (NegLoIdx != -1) { 8357 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8358 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8359 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8360 } 8361 8362 for (int J = 0; J < 3; ++J) { 8363 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8364 if (OpIdx == -1) 8365 break; 8366 8367 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8368 8369 if (ModIdx == -1) 8370 continue; 8371 8372 uint32_t ModVal = 0; 8373 8374 if ((OpSel & (1 << J)) != 0) 8375 ModVal |= SISrcMods::OP_SEL_0; 8376 8377 if ((OpSelHi & (1 << J)) != 0) 8378 ModVal |= SISrcMods::OP_SEL_1; 8379 8380 if ((NegLo & (1 << J)) != 0) 8381 ModVal |= SISrcMods::NEG; 8382 8383 if ((NegHi & (1 << J)) != 0) 8384 ModVal |= SISrcMods::NEG_HI; 8385 8386 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8387 } 8388 } 8389 8390 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8391 OptionalImmIndexMap OptIdx; 8392 cvtVOP3(Inst, Operands, OptIdx); 8393 cvtVOP3P(Inst, Operands, OptIdx); 8394 } 8395 8396 //===----------------------------------------------------------------------===// 8397 // VOPD 8398 //===----------------------------------------------------------------------===// 8399 8400 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8401 if (!hasVOPD(getSTI())) 8402 return ParseStatus::NoMatch; 8403 8404 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8405 SMLoc S = getLoc(); 8406 lex(); 8407 lex(); 8408 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8409 SMLoc OpYLoc = getLoc(); 8410 StringRef OpYName; 8411 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) { 8412 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc)); 8413 return ParseStatus::Success; 8414 } 8415 return Error(OpYLoc, "expected a VOPDY instruction after ::"); 8416 } 8417 return ParseStatus::NoMatch; 8418 } 8419 8420 // Create VOPD MCInst operands using parsed assembler operands. 8421 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8422 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer 8423 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); 8424 if (Op.isReg()) { 8425 Op.addRegOperands(Inst, 1); 8426 return; 8427 } 8428 if (Op.isImm()) { 8429 Op.addImmOperands(Inst, 1); 8430 return; 8431 } 8432 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8433 }; 8434 8435 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); 8436 8437 // MCInst operands are ordered as follows: 8438 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8439 8440 for (auto CompIdx : VOPD::COMPONENTS) { 8441 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); 8442 } 8443 8444 for (auto CompIdx : VOPD::COMPONENTS) { 8445 const auto &CInfo = InstInfo[CompIdx]; 8446 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); 8447 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) 8448 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); 8449 if (CInfo.hasSrc2Acc()) 8450 addOp(CInfo.getIndexOfDstInParsedOperands()); 8451 } 8452 } 8453 8454 //===----------------------------------------------------------------------===// 8455 // dpp 8456 //===----------------------------------------------------------------------===// 8457 8458 bool AMDGPUOperand::isDPP8() const { 8459 return isImmTy(ImmTyDPP8); 8460 } 8461 8462 bool AMDGPUOperand::isDPPCtrl() const { 8463 using namespace AMDGPU::DPP; 8464 8465 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8466 if (result) { 8467 int64_t Imm = getImm(); 8468 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8469 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8470 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8471 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8472 (Imm == DppCtrl::WAVE_SHL1) || 8473 (Imm == DppCtrl::WAVE_ROL1) || 8474 (Imm == DppCtrl::WAVE_SHR1) || 8475 (Imm == DppCtrl::WAVE_ROR1) || 8476 (Imm == DppCtrl::ROW_MIRROR) || 8477 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8478 (Imm == DppCtrl::BCAST15) || 8479 (Imm == DppCtrl::BCAST31) || 8480 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8481 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8482 } 8483 return false; 8484 } 8485 8486 //===----------------------------------------------------------------------===// 8487 // mAI 8488 //===----------------------------------------------------------------------===// 8489 8490 bool AMDGPUOperand::isBLGP() const { 8491 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8492 } 8493 8494 bool AMDGPUOperand::isCBSZ() const { 8495 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8496 } 8497 8498 bool AMDGPUOperand::isABID() const { 8499 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8500 } 8501 8502 bool AMDGPUOperand::isS16Imm() const { 8503 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8504 } 8505 8506 bool AMDGPUOperand::isU16Imm() const { 8507 return isImmLiteral() && isUInt<16>(getImm()); 8508 } 8509 8510 //===----------------------------------------------------------------------===// 8511 // dim 8512 //===----------------------------------------------------------------------===// 8513 8514 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8515 // We want to allow "dim:1D" etc., 8516 // but the initial 1 is tokenized as an integer. 8517 std::string Token; 8518 if (isToken(AsmToken::Integer)) { 8519 SMLoc Loc = getToken().getEndLoc(); 8520 Token = std::string(getTokenStr()); 8521 lex(); 8522 if (getLoc() != Loc) 8523 return false; 8524 } 8525 8526 StringRef Suffix; 8527 if (!parseId(Suffix)) 8528 return false; 8529 Token += Suffix; 8530 8531 StringRef DimId = Token; 8532 if (DimId.starts_with("SQ_RSRC_IMG_")) 8533 DimId = DimId.drop_front(12); 8534 8535 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8536 if (!DimInfo) 8537 return false; 8538 8539 Encoding = DimInfo->Encoding; 8540 return true; 8541 } 8542 8543 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8544 if (!isGFX10Plus()) 8545 return ParseStatus::NoMatch; 8546 8547 SMLoc S = getLoc(); 8548 8549 if (!trySkipId("dim", AsmToken::Colon)) 8550 return ParseStatus::NoMatch; 8551 8552 unsigned Encoding; 8553 SMLoc Loc = getLoc(); 8554 if (!parseDimId(Encoding)) 8555 return Error(Loc, "invalid dim value"); 8556 8557 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8558 AMDGPUOperand::ImmTyDim)); 8559 return ParseStatus::Success; 8560 } 8561 8562 //===----------------------------------------------------------------------===// 8563 // dpp 8564 //===----------------------------------------------------------------------===// 8565 8566 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8567 SMLoc S = getLoc(); 8568 8569 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8570 return ParseStatus::NoMatch; 8571 8572 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8573 8574 int64_t Sels[8]; 8575 8576 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8577 return ParseStatus::Failure; 8578 8579 for (size_t i = 0; i < 8; ++i) { 8580 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8581 return ParseStatus::Failure; 8582 8583 SMLoc Loc = getLoc(); 8584 if (getParser().parseAbsoluteExpression(Sels[i])) 8585 return ParseStatus::Failure; 8586 if (0 > Sels[i] || 7 < Sels[i]) 8587 return Error(Loc, "expected a 3-bit value"); 8588 } 8589 8590 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8591 return ParseStatus::Failure; 8592 8593 unsigned DPP8 = 0; 8594 for (size_t i = 0; i < 8; ++i) 8595 DPP8 |= (Sels[i] << (i * 3)); 8596 8597 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8598 return ParseStatus::Success; 8599 } 8600 8601 bool 8602 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8603 const OperandVector &Operands) { 8604 if (Ctrl == "row_newbcast") 8605 return isGFX90A(); 8606 8607 if (Ctrl == "row_share" || 8608 Ctrl == "row_xmask") 8609 return isGFX10Plus(); 8610 8611 if (Ctrl == "wave_shl" || 8612 Ctrl == "wave_shr" || 8613 Ctrl == "wave_rol" || 8614 Ctrl == "wave_ror" || 8615 Ctrl == "row_bcast") 8616 return isVI() || isGFX9(); 8617 8618 return Ctrl == "row_mirror" || 8619 Ctrl == "row_half_mirror" || 8620 Ctrl == "quad_perm" || 8621 Ctrl == "row_shl" || 8622 Ctrl == "row_shr" || 8623 Ctrl == "row_ror"; 8624 } 8625 8626 int64_t 8627 AMDGPUAsmParser::parseDPPCtrlPerm() { 8628 // quad_perm:[%d,%d,%d,%d] 8629 8630 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8631 return -1; 8632 8633 int64_t Val = 0; 8634 for (int i = 0; i < 4; ++i) { 8635 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8636 return -1; 8637 8638 int64_t Temp; 8639 SMLoc Loc = getLoc(); 8640 if (getParser().parseAbsoluteExpression(Temp)) 8641 return -1; 8642 if (Temp < 0 || Temp > 3) { 8643 Error(Loc, "expected a 2-bit value"); 8644 return -1; 8645 } 8646 8647 Val += (Temp << i * 2); 8648 } 8649 8650 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8651 return -1; 8652 8653 return Val; 8654 } 8655 8656 int64_t 8657 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8658 using namespace AMDGPU::DPP; 8659 8660 // sel:%d 8661 8662 int64_t Val; 8663 SMLoc Loc = getLoc(); 8664 8665 if (getParser().parseAbsoluteExpression(Val)) 8666 return -1; 8667 8668 struct DppCtrlCheck { 8669 int64_t Ctrl; 8670 int Lo; 8671 int Hi; 8672 }; 8673 8674 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8675 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8676 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8677 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8678 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8679 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8680 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8681 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8682 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8683 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8684 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8685 .Default({-1, 0, 0}); 8686 8687 bool Valid; 8688 if (Check.Ctrl == -1) { 8689 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8690 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8691 } else { 8692 Valid = Check.Lo <= Val && Val <= Check.Hi; 8693 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8694 } 8695 8696 if (!Valid) { 8697 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8698 return -1; 8699 } 8700 8701 return Val; 8702 } 8703 8704 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8705 using namespace AMDGPU::DPP; 8706 8707 if (!isToken(AsmToken::Identifier) || 8708 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8709 return ParseStatus::NoMatch; 8710 8711 SMLoc S = getLoc(); 8712 int64_t Val = -1; 8713 StringRef Ctrl; 8714 8715 parseId(Ctrl); 8716 8717 if (Ctrl == "row_mirror") { 8718 Val = DppCtrl::ROW_MIRROR; 8719 } else if (Ctrl == "row_half_mirror") { 8720 Val = DppCtrl::ROW_HALF_MIRROR; 8721 } else { 8722 if (skipToken(AsmToken::Colon, "expected a colon")) { 8723 if (Ctrl == "quad_perm") { 8724 Val = parseDPPCtrlPerm(); 8725 } else { 8726 Val = parseDPPCtrlSel(Ctrl); 8727 } 8728 } 8729 } 8730 8731 if (Val == -1) 8732 return ParseStatus::Failure; 8733 8734 Operands.push_back( 8735 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8736 return ParseStatus::Success; 8737 } 8738 8739 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 8740 bool IsDPP8) { 8741 OptionalImmIndexMap OptionalIdx; 8742 unsigned Opc = Inst.getOpcode(); 8743 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8744 8745 // MAC instructions are special because they have 'old' 8746 // operand which is not tied to dst (but assumed to be). 8747 // They also have dummy unused src2_modifiers. 8748 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); 8749 int Src2ModIdx = 8750 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); 8751 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && 8752 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; 8753 8754 unsigned I = 1; 8755 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8756 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8757 } 8758 8759 int Fi = 0; 8760 for (unsigned E = Operands.size(); I != E; ++I) { 8761 8762 if (IsMAC) { 8763 int NumOperands = Inst.getNumOperands(); 8764 if (OldIdx == NumOperands) { 8765 // Handle old operand 8766 constexpr int DST_IDX = 0; 8767 Inst.addOperand(Inst.getOperand(DST_IDX)); 8768 } else if (Src2ModIdx == NumOperands) { 8769 // Add unused dummy src2_modifiers 8770 Inst.addOperand(MCOperand::createImm(0)); 8771 } 8772 } 8773 8774 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8775 MCOI::TIED_TO); 8776 if (TiedTo != -1) { 8777 assert((unsigned)TiedTo < Inst.getNumOperands()); 8778 // handle tied old or src2 for MAC instructions 8779 Inst.addOperand(Inst.getOperand(TiedTo)); 8780 } 8781 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8782 // Add the register arguments 8783 if (IsDPP8 && Op.isDppFI()) { 8784 Fi = Op.getImm(); 8785 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8786 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8787 } else if (Op.isReg()) { 8788 Op.addRegOperands(Inst, 1); 8789 } else if (Op.isImm() && 8790 Desc.operands()[Inst.getNumOperands()].RegClass != -1) { 8791 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8792 Op.addImmOperands(Inst, 1); 8793 } else if (Op.isImm()) { 8794 OptionalIdx[Op.getImmTy()] = I; 8795 } else { 8796 llvm_unreachable("unhandled operand type"); 8797 } 8798 } 8799 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8800 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8801 8802 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8803 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8804 8805 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8806 cvtVOP3P(Inst, Operands, OptionalIdx); 8807 else if (Desc.TSFlags & SIInstrFlags::VOP3) 8808 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 8809 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 8810 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8811 } 8812 8813 if (IsDPP8) { 8814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8815 using namespace llvm::AMDGPU::DPP; 8816 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8817 } else { 8818 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8819 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8821 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8822 8823 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) 8824 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8825 AMDGPUOperand::ImmTyDppFI); 8826 } 8827 } 8828 8829 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8830 OptionalImmIndexMap OptionalIdx; 8831 8832 unsigned I = 1; 8833 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8834 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8835 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8836 } 8837 8838 int Fi = 0; 8839 for (unsigned E = Operands.size(); I != E; ++I) { 8840 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8841 MCOI::TIED_TO); 8842 if (TiedTo != -1) { 8843 assert((unsigned)TiedTo < Inst.getNumOperands()); 8844 // handle tied old or src2 for MAC instructions 8845 Inst.addOperand(Inst.getOperand(TiedTo)); 8846 } 8847 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8848 // Add the register arguments 8849 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8850 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8851 // Skip it. 8852 continue; 8853 } 8854 8855 if (IsDPP8) { 8856 if (Op.isDPP8()) { 8857 Op.addImmOperands(Inst, 1); 8858 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8859 Op.addRegWithFPInputModsOperands(Inst, 2); 8860 } else if (Op.isDppFI()) { 8861 Fi = Op.getImm(); 8862 } else if (Op.isReg()) { 8863 Op.addRegOperands(Inst, 1); 8864 } else { 8865 llvm_unreachable("Invalid operand type"); 8866 } 8867 } else { 8868 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8869 Op.addRegWithFPInputModsOperands(Inst, 2); 8870 } else if (Op.isReg()) { 8871 Op.addRegOperands(Inst, 1); 8872 } else if (Op.isDPPCtrl()) { 8873 Op.addImmOperands(Inst, 1); 8874 } else if (Op.isImm()) { 8875 // Handle optional arguments 8876 OptionalIdx[Op.getImmTy()] = I; 8877 } else { 8878 llvm_unreachable("Invalid operand type"); 8879 } 8880 } 8881 } 8882 8883 if (IsDPP8) { 8884 using namespace llvm::AMDGPU::DPP; 8885 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8886 } else { 8887 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8888 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8889 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8890 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { 8891 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8892 AMDGPUOperand::ImmTyDppFI); 8893 } 8894 } 8895 } 8896 8897 //===----------------------------------------------------------------------===// 8898 // sdwa 8899 //===----------------------------------------------------------------------===// 8900 8901 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, 8902 StringRef Prefix, 8903 AMDGPUOperand::ImmTy Type) { 8904 using namespace llvm::AMDGPU::SDWA; 8905 8906 SMLoc S = getLoc(); 8907 StringRef Value; 8908 8909 SMLoc StringLoc; 8910 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc); 8911 if (!Res.isSuccess()) 8912 return Res; 8913 8914 int64_t Int; 8915 Int = StringSwitch<int64_t>(Value) 8916 .Case("BYTE_0", SdwaSel::BYTE_0) 8917 .Case("BYTE_1", SdwaSel::BYTE_1) 8918 .Case("BYTE_2", SdwaSel::BYTE_2) 8919 .Case("BYTE_3", SdwaSel::BYTE_3) 8920 .Case("WORD_0", SdwaSel::WORD_0) 8921 .Case("WORD_1", SdwaSel::WORD_1) 8922 .Case("DWORD", SdwaSel::DWORD) 8923 .Default(0xffffffff); 8924 8925 if (Int == 0xffffffff) 8926 return Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8927 8928 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8929 return ParseStatus::Success; 8930 } 8931 8932 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8933 using namespace llvm::AMDGPU::SDWA; 8934 8935 SMLoc S = getLoc(); 8936 StringRef Value; 8937 8938 SMLoc StringLoc; 8939 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8940 if (!Res.isSuccess()) 8941 return Res; 8942 8943 int64_t Int; 8944 Int = StringSwitch<int64_t>(Value) 8945 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8946 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8947 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8948 .Default(0xffffffff); 8949 8950 if (Int == 0xffffffff) 8951 return Error(StringLoc, "invalid dst_unused value"); 8952 8953 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused)); 8954 return ParseStatus::Success; 8955 } 8956 8957 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8958 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8959 } 8960 8961 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8962 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8963 } 8964 8965 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8966 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8967 } 8968 8969 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8970 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8971 } 8972 8973 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8974 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8975 } 8976 8977 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8978 uint64_t BasicInstType, 8979 bool SkipDstVcc, 8980 bool SkipSrcVcc) { 8981 using namespace llvm::AMDGPU::SDWA; 8982 8983 OptionalImmIndexMap OptionalIdx; 8984 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8985 bool SkippedVcc = false; 8986 8987 unsigned I = 1; 8988 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8989 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8990 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8991 } 8992 8993 for (unsigned E = Operands.size(); I != E; ++I) { 8994 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8995 if (SkipVcc && !SkippedVcc && Op.isReg() && 8996 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8997 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8998 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8999 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9000 // Skip VCC only if we didn't skip it on previous iteration. 9001 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9002 if (BasicInstType == SIInstrFlags::VOP2 && 9003 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9004 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9005 SkippedVcc = true; 9006 continue; 9007 } else if (BasicInstType == SIInstrFlags::VOPC && 9008 Inst.getNumOperands() == 0) { 9009 SkippedVcc = true; 9010 continue; 9011 } 9012 } 9013 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9014 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9015 } else if (Op.isImm()) { 9016 // Handle optional arguments 9017 OptionalIdx[Op.getImmTy()] = I; 9018 } else { 9019 llvm_unreachable("Invalid operand type"); 9020 } 9021 SkippedVcc = false; 9022 } 9023 9024 const unsigned Opc = Inst.getOpcode(); 9025 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && 9026 Opc != AMDGPU::V_NOP_sdwa_vi) { 9027 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9028 switch (BasicInstType) { 9029 case SIInstrFlags::VOP1: 9030 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9031 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9032 AMDGPUOperand::ImmTyClampSI, 0); 9033 9034 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9035 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9036 AMDGPUOperand::ImmTyOModSI, 0); 9037 9038 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) 9039 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9040 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9041 9042 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) 9043 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9044 AMDGPUOperand::ImmTySDWADstUnused, 9045 DstUnused::UNUSED_PRESERVE); 9046 9047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9048 break; 9049 9050 case SIInstrFlags::VOP2: 9051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9052 9053 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) 9054 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9055 9056 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9057 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE); 9058 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9059 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9060 break; 9061 9062 case SIInstrFlags::VOPC: 9063 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) 9064 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9065 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9066 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9067 break; 9068 9069 default: 9070 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9071 } 9072 } 9073 9074 // special case v_mac_{f16, f32}: 9075 // it has src2 register operand that is tied to dst operand 9076 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9077 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9078 auto it = Inst.begin(); 9079 std::advance( 9080 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9081 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9082 } 9083 } 9084 9085 /// Force static initialization. 9086 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9087 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); 9088 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9089 } 9090 9091 #define GET_REGISTER_MATCHER 9092 #define GET_MATCHER_IMPLEMENTATION 9093 #define GET_MNEMONIC_SPELL_CHECKER 9094 #define GET_MNEMONIC_CHECKER 9095 #include "AMDGPUGenAsmMatcher.inc" 9096 9097 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, 9098 unsigned MCK) { 9099 switch (MCK) { 9100 case MCK_addr64: 9101 return parseTokenOp("addr64", Operands); 9102 case MCK_done: 9103 return parseTokenOp("done", Operands); 9104 case MCK_idxen: 9105 return parseTokenOp("idxen", Operands); 9106 case MCK_lds: 9107 return parseTokenOp("lds", Operands); 9108 case MCK_offen: 9109 return parseTokenOp("offen", Operands); 9110 case MCK_off: 9111 return parseTokenOp("off", Operands); 9112 case MCK_row_95_en: 9113 return parseTokenOp("row_en", Operands); 9114 case MCK_gds: 9115 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS); 9116 case MCK_tfe: 9117 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE); 9118 } 9119 return tryCustomParseOperand(Operands, MCK); 9120 } 9121 9122 // This function should be defined after auto-generated include so that we have 9123 // MatchClassKind enum defined 9124 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9125 unsigned Kind) { 9126 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9127 // But MatchInstructionImpl() expects to meet token and fails to validate 9128 // operand. This method checks if we are given immediate operand but expect to 9129 // get corresponding token. 9130 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9131 switch (Kind) { 9132 case MCK_addr64: 9133 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9134 case MCK_gds: 9135 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9136 case MCK_lds: 9137 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9138 case MCK_idxen: 9139 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9140 case MCK_offen: 9141 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9142 case MCK_tfe: 9143 return Operand.isTFE() ? Match_Success : Match_InvalidOperand; 9144 case MCK_SSrcB32: 9145 // When operands have expression values, they will return true for isToken, 9146 // because it is not possible to distinguish between a token and an 9147 // expression at parse time. MatchInstructionImpl() will always try to 9148 // match an operand as a token, when isToken returns true, and when the 9149 // name of the expression is not a valid token, the match will fail, 9150 // so we need to handle it here. 9151 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9152 case MCK_SSrcF32: 9153 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9154 case MCK_SOPPBrTarget: 9155 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; 9156 case MCK_VReg32OrOff: 9157 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9158 case MCK_InterpSlot: 9159 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9160 case MCK_InterpAttr: 9161 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9162 case MCK_InterpAttrChan: 9163 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; 9164 case MCK_SReg_64: 9165 case MCK_SReg_64_XEXEC: 9166 // Null is defined as a 32-bit register but 9167 // it should also be enabled with 64-bit operands. 9168 // The following code enables it for SReg_64 operands 9169 // used as source and destination. Remaining source 9170 // operands are handled in isInlinableImm. 9171 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9172 default: 9173 return Match_InvalidOperand; 9174 } 9175 } 9176 9177 //===----------------------------------------------------------------------===// 9178 // endpgm 9179 //===----------------------------------------------------------------------===// 9180 9181 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { 9182 SMLoc S = getLoc(); 9183 int64_t Imm = 0; 9184 9185 if (!parseExpr(Imm)) { 9186 // The operand is optional, if not present default to 0 9187 Imm = 0; 9188 } 9189 9190 if (!isUInt<16>(Imm)) 9191 return Error(S, "expected a 16-bit value"); 9192 9193 Operands.push_back( 9194 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9195 return ParseStatus::Success; 9196 } 9197 9198 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9199 9200 //===----------------------------------------------------------------------===// 9201 // LDSDIR 9202 //===----------------------------------------------------------------------===// 9203 9204 bool AMDGPUOperand::isWaitVDST() const { 9205 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9206 } 9207 9208 bool AMDGPUOperand::isWaitVAVDst() const { 9209 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm()); 9210 } 9211 9212 bool AMDGPUOperand::isWaitVMVSrc() const { 9213 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm()); 9214 } 9215 9216 //===----------------------------------------------------------------------===// 9217 // VINTERP 9218 //===----------------------------------------------------------------------===// 9219 9220 bool AMDGPUOperand::isWaitEXP() const { 9221 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9222 } 9223 9224 //===----------------------------------------------------------------------===// 9225 // Split Barrier 9226 //===----------------------------------------------------------------------===// 9227 9228 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); } 9229