1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/CodeGen/MachineValueType.h" 25 #include "llvm/MC/MCAsmInfo.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCExpr.h" 28 #include "llvm/MC/MCInst.h" 29 #include "llvm/MC/MCInstrDesc.h" 30 #include "llvm/MC/MCParser/MCAsmLexer.h" 31 #include "llvm/MC/MCParser/MCAsmParser.h" 32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 33 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/MC/TargetRegistry.h" 36 #include "llvm/Support/AMDGPUMetadata.h" 37 #include "llvm/Support/AMDHSAKernelDescriptor.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/TargetParser/TargetParser.h" 41 #include <optional> 42 43 using namespace llvm; 44 using namespace llvm::AMDGPU; 45 using namespace llvm::amdhsa; 46 47 namespace { 48 49 class AMDGPUAsmParser; 50 51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 52 53 //===----------------------------------------------------------------------===// 54 // Operand 55 //===----------------------------------------------------------------------===// 56 57 class AMDGPUOperand : public MCParsedAsmOperand { 58 enum KindTy { 59 Token, 60 Immediate, 61 Register, 62 Expression 63 } Kind; 64 65 SMLoc StartLoc, EndLoc; 66 const AMDGPUAsmParser *AsmParser; 67 68 public: 69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 70 : Kind(Kind_), AsmParser(AsmParser_) {} 71 72 using Ptr = std::unique_ptr<AMDGPUOperand>; 73 74 struct Modifiers { 75 bool Abs = false; 76 bool Neg = false; 77 bool Sext = false; 78 bool Lit = false; 79 80 bool hasFPModifiers() const { return Abs || Neg; } 81 bool hasIntModifiers() const { return Sext; } 82 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 83 84 int64_t getFPModifiersOperand() const { 85 int64_t Operand = 0; 86 Operand |= Abs ? SISrcMods::ABS : 0u; 87 Operand |= Neg ? SISrcMods::NEG : 0u; 88 return Operand; 89 } 90 91 int64_t getIntModifiersOperand() const { 92 int64_t Operand = 0; 93 Operand |= Sext ? SISrcMods::SEXT : 0u; 94 return Operand; 95 } 96 97 int64_t getModifiersOperand() const { 98 assert(!(hasFPModifiers() && hasIntModifiers()) 99 && "fp and int modifiers should not be used simultaneously"); 100 if (hasFPModifiers()) { 101 return getFPModifiersOperand(); 102 } else if (hasIntModifiers()) { 103 return getIntModifiersOperand(); 104 } else { 105 return 0; 106 } 107 } 108 109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 110 }; 111 112 enum ImmTy { 113 ImmTyNone, 114 ImmTyGDS, 115 ImmTyLDS, 116 ImmTyOffen, 117 ImmTyIdxen, 118 ImmTyAddr64, 119 ImmTyOffset, 120 ImmTyInstOffset, 121 ImmTyOffset0, 122 ImmTyOffset1, 123 ImmTySMEMOffsetMod, 124 ImmTyCPol, 125 ImmTyTFE, 126 ImmTyD16, 127 ImmTyClampSI, 128 ImmTyOModSI, 129 ImmTySDWADstSel, 130 ImmTySDWASrc0Sel, 131 ImmTySDWASrc1Sel, 132 ImmTySDWADstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyInterpAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTyDPP8, 155 ImmTyDppCtrl, 156 ImmTyDppRowMask, 157 ImmTyDppBankMask, 158 ImmTyDppBoundCtrl, 159 ImmTyDppFI, 160 ImmTySwizzle, 161 ImmTyGprIdxMode, 162 ImmTyHigh, 163 ImmTyBLGP, 164 ImmTyCBSZ, 165 ImmTyABID, 166 ImmTyEndpgm, 167 ImmTyWaitVDST, 168 ImmTyWaitEXP, 169 ImmTyWaitVAVDst, 170 ImmTyWaitVMVSrc, 171 }; 172 173 // Immediate operand kind. 174 // It helps to identify the location of an offending operand after an error. 175 // Note that regular literals and mandatory literals (KImm) must be handled 176 // differently. When looking for an offending operand, we should usually 177 // ignore mandatory literals because they are part of the instruction and 178 // cannot be changed. Report location of mandatory operands only for VOPD, 179 // when both OpX and OpY have a KImm and there are no other literals. 180 enum ImmKindTy { 181 ImmKindTyNone, 182 ImmKindTyLiteral, 183 ImmKindTyMandatoryLiteral, 184 ImmKindTyConst, 185 }; 186 187 private: 188 struct TokOp { 189 const char *Data; 190 unsigned Length; 191 }; 192 193 struct ImmOp { 194 int64_t Val; 195 ImmTy Type; 196 bool IsFPImm; 197 mutable ImmKindTy Kind; 198 Modifiers Mods; 199 }; 200 201 struct RegOp { 202 unsigned RegNo; 203 Modifiers Mods; 204 }; 205 206 union { 207 TokOp Tok; 208 ImmOp Imm; 209 RegOp Reg; 210 const MCExpr *Expr; 211 }; 212 213 public: 214 bool isToken() const override { return Kind == Token; } 215 216 bool isSymbolRefExpr() const { 217 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 218 } 219 220 bool isImm() const override { 221 return Kind == Immediate; 222 } 223 224 void setImmKindNone() const { 225 assert(isImm()); 226 Imm.Kind = ImmKindTyNone; 227 } 228 229 void setImmKindLiteral() const { 230 assert(isImm()); 231 Imm.Kind = ImmKindTyLiteral; 232 } 233 234 void setImmKindMandatoryLiteral() const { 235 assert(isImm()); 236 Imm.Kind = ImmKindTyMandatoryLiteral; 237 } 238 239 void setImmKindConst() const { 240 assert(isImm()); 241 Imm.Kind = ImmKindTyConst; 242 } 243 244 bool IsImmKindLiteral() const { 245 return isImm() && Imm.Kind == ImmKindTyLiteral; 246 } 247 248 bool IsImmKindMandatoryLiteral() const { 249 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; 250 } 251 252 bool isImmKindConst() const { 253 return isImm() && Imm.Kind == ImmKindTyConst; 254 } 255 256 bool isInlinableImm(MVT type) const; 257 bool isLiteralImm(MVT type) const; 258 259 bool isRegKind() const { 260 return Kind == Register; 261 } 262 263 bool isReg() const override { 264 return isRegKind() && !hasModifiers(); 265 } 266 267 bool isRegOrInline(unsigned RCID, MVT type) const { 268 return isRegClass(RCID) || isInlinableImm(type); 269 } 270 271 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 272 return isRegOrInline(RCID, type) || isLiteralImm(type); 273 } 274 275 bool isRegOrImmWithInt16InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 277 } 278 279 bool isRegOrImmWithIntT16InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16); 281 } 282 283 bool isRegOrImmWithInt32InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 285 } 286 287 bool isRegOrInlineImmWithInt16InputMods() const { 288 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 289 } 290 291 bool isRegOrInlineImmWithInt32InputMods() const { 292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 293 } 294 295 bool isRegOrImmWithInt64InputMods() const { 296 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 297 } 298 299 bool isRegOrImmWithFP16InputMods() const { 300 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 301 } 302 303 bool isRegOrImmWithFPT16InputMods() const { 304 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16); 305 } 306 307 bool isRegOrImmWithFP32InputMods() const { 308 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 309 } 310 311 bool isRegOrImmWithFP64InputMods() const { 312 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 313 } 314 315 bool isRegOrInlineImmWithFP16InputMods() const { 316 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 317 } 318 319 bool isRegOrInlineImmWithFP32InputMods() const { 320 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 321 } 322 323 324 bool isVReg() const { 325 return isRegClass(AMDGPU::VGPR_32RegClassID) || 326 isRegClass(AMDGPU::VReg_64RegClassID) || 327 isRegClass(AMDGPU::VReg_96RegClassID) || 328 isRegClass(AMDGPU::VReg_128RegClassID) || 329 isRegClass(AMDGPU::VReg_160RegClassID) || 330 isRegClass(AMDGPU::VReg_192RegClassID) || 331 isRegClass(AMDGPU::VReg_256RegClassID) || 332 isRegClass(AMDGPU::VReg_512RegClassID) || 333 isRegClass(AMDGPU::VReg_1024RegClassID); 334 } 335 336 bool isVReg32() const { 337 return isRegClass(AMDGPU::VGPR_32RegClassID); 338 } 339 340 bool isVReg32OrOff() const { 341 return isOff() || isVReg32(); 342 } 343 344 bool isNull() const { 345 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 346 } 347 348 bool isVRegWithInputMods() const; 349 bool isT16VRegWithInputMods() const; 350 351 bool isSDWAOperand(MVT type) const; 352 bool isSDWAFP16Operand() const; 353 bool isSDWAFP32Operand() const; 354 bool isSDWAInt16Operand() const; 355 bool isSDWAInt32Operand() const; 356 357 bool isImmTy(ImmTy ImmT) const { 358 return isImm() && Imm.Type == ImmT; 359 } 360 361 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); } 362 363 bool isImmLiteral() const { return isImmTy(ImmTyNone); } 364 365 bool isImmModifier() const { 366 return isImm() && Imm.Type != ImmTyNone; 367 } 368 369 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 370 bool isDMask() const { return isImmTy(ImmTyDMask); } 371 bool isDim() const { return isImmTy(ImmTyDim); } 372 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 373 bool isOff() const { return isImmTy(ImmTyOff); } 374 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 375 bool isOffen() const { return isImmTy(ImmTyOffen); } 376 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 377 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 378 bool isOffset() const { return isImmTy(ImmTyOffset); } 379 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 380 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 381 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); } 382 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 383 bool isGDS() const { return isImmTy(ImmTyGDS); } 384 bool isLDS() const { return isImmTy(ImmTyLDS); } 385 bool isCPol() const { return isImmTy(ImmTyCPol); } 386 bool isTFE() const { return isImmTy(ImmTyTFE); } 387 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 388 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); } 389 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); } 390 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 391 bool isDppFI() const { return isImmTy(ImmTyDppFI); } 392 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); } 393 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); } 394 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); } 395 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); } 396 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 397 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 398 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); } 399 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 400 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 401 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 402 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 403 404 bool isRegOrImm() const { 405 return isReg() || isImm(); 406 } 407 408 bool isRegClass(unsigned RCID) const; 409 410 bool isInlineValue() const; 411 412 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 413 return isRegOrInline(RCID, type) && !hasModifiers(); 414 } 415 416 bool isSCSrcB16() const { 417 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 418 } 419 420 bool isSCSrcV2B16() const { 421 return isSCSrcB16(); 422 } 423 424 bool isSCSrcB32() const { 425 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 426 } 427 428 bool isSCSrcB64() const { 429 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 430 } 431 432 bool isBoolReg() const; 433 434 bool isSCSrcF16() const { 435 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 436 } 437 438 bool isSCSrcV2F16() const { 439 return isSCSrcF16(); 440 } 441 442 bool isSCSrcF32() const { 443 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 444 } 445 446 bool isSCSrcF64() const { 447 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 448 } 449 450 bool isSSrcB32() const { 451 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 452 } 453 454 bool isSSrcB16() const { 455 return isSCSrcB16() || isLiteralImm(MVT::i16); 456 } 457 458 bool isSSrcV2B16() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB16(); 461 } 462 463 bool isSSrcB64() const { 464 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 465 // See isVSrc64(). 466 return isSCSrcB64() || isLiteralImm(MVT::i64); 467 } 468 469 bool isSSrcF32() const { 470 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 471 } 472 473 bool isSSrcF64() const { 474 return isSCSrcB64() || isLiteralImm(MVT::f64); 475 } 476 477 bool isSSrcF16() const { 478 return isSCSrcB16() || isLiteralImm(MVT::f16); 479 } 480 481 bool isSSrcV2F16() const { 482 llvm_unreachable("cannot happen"); 483 return isSSrcF16(); 484 } 485 486 bool isSSrcV2FP32() const { 487 llvm_unreachable("cannot happen"); 488 return isSSrcF32(); 489 } 490 491 bool isSCSrcV2FP32() const { 492 llvm_unreachable("cannot happen"); 493 return isSCSrcF32(); 494 } 495 496 bool isSSrcV2INT32() const { 497 llvm_unreachable("cannot happen"); 498 return isSSrcB32(); 499 } 500 501 bool isSCSrcV2INT32() const { 502 llvm_unreachable("cannot happen"); 503 return isSCSrcB32(); 504 } 505 506 bool isSSrcOrLdsB32() const { 507 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 508 isLiteralImm(MVT::i32) || isExpr(); 509 } 510 511 bool isVCSrcB32() const { 512 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 513 } 514 515 bool isVCSrcB64() const { 516 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 517 } 518 519 bool isVCSrcTB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16); 521 } 522 523 bool isVCSrcTB16_Lo128() const { 524 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16); 525 } 526 527 bool isVCSrcFake16B16_Lo128() const { 528 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); 529 } 530 531 bool isVCSrcB16() const { 532 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 533 } 534 535 bool isVCSrcV2B16() const { 536 return isVCSrcB16(); 537 } 538 539 bool isVCSrcF32() const { 540 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 541 } 542 543 bool isVCSrcF64() const { 544 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 545 } 546 547 bool isVCSrcTF16() const { 548 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); 549 } 550 551 bool isVCSrcTF16_Lo128() const { 552 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16); 553 } 554 555 bool isVCSrcFake16F16_Lo128() const { 556 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); 557 } 558 559 bool isVCSrcF16() const { 560 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 561 } 562 563 bool isVCSrcV2F16() const { 564 return isVCSrcF16(); 565 } 566 567 bool isVSrcB32() const { 568 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 569 } 570 571 bool isVSrcB64() const { 572 return isVCSrcF64() || isLiteralImm(MVT::i64); 573 } 574 575 bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); } 576 577 bool isVSrcTB16_Lo128() const { 578 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); 579 } 580 581 bool isVSrcFake16B16_Lo128() const { 582 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16); 583 } 584 585 bool isVSrcB16() const { 586 return isVCSrcB16() || isLiteralImm(MVT::i16); 587 } 588 589 bool isVSrcV2B16() const { 590 return isVSrcB16() || isLiteralImm(MVT::v2i16); 591 } 592 593 bool isVCSrcV2FP32() const { 594 return isVCSrcF64(); 595 } 596 597 bool isVSrcV2FP32() const { 598 return isVSrcF64() || isLiteralImm(MVT::v2f32); 599 } 600 601 bool isVCSrcV2INT32() const { 602 return isVCSrcB64(); 603 } 604 605 bool isVSrcV2INT32() const { 606 return isVSrcB64() || isLiteralImm(MVT::v2i32); 607 } 608 609 bool isVSrcF32() const { 610 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 611 } 612 613 bool isVSrcF64() const { 614 return isVCSrcF64() || isLiteralImm(MVT::f64); 615 } 616 617 bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); } 618 619 bool isVSrcTF16_Lo128() const { 620 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); 621 } 622 623 bool isVSrcFake16F16_Lo128() const { 624 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16); 625 } 626 627 bool isVSrcF16() const { 628 return isVCSrcF16() || isLiteralImm(MVT::f16); 629 } 630 631 bool isVSrcV2F16() const { 632 return isVSrcF16() || isLiteralImm(MVT::v2f16); 633 } 634 635 bool isVISrcB32() const { 636 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 637 } 638 639 bool isVISrcB16() const { 640 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 641 } 642 643 bool isVISrcV2B16() const { 644 return isVISrcB16(); 645 } 646 647 bool isVISrcF32() const { 648 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 649 } 650 651 bool isVISrcF16() const { 652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 653 } 654 655 bool isVISrcV2F16() const { 656 return isVISrcF16() || isVISrcB32(); 657 } 658 659 bool isVISrc_64B64() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 661 } 662 663 bool isVISrc_64F64() const { 664 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 665 } 666 667 bool isVISrc_64V2FP32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_64V2INT32() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 673 } 674 675 bool isVISrc_256B64() const { 676 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 677 } 678 679 bool isVISrc_256F64() const { 680 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 681 } 682 683 bool isVISrc_128B16() const { 684 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 685 } 686 687 bool isVISrc_128V2B16() const { 688 return isVISrc_128B16(); 689 } 690 691 bool isVISrc_128B32() const { 692 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 693 } 694 695 bool isVISrc_128F32() const { 696 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 697 } 698 699 bool isVISrc_256V2FP32() const { 700 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 701 } 702 703 bool isVISrc_256V2INT32() const { 704 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 705 } 706 707 bool isVISrc_512B32() const { 708 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 709 } 710 711 bool isVISrc_512B16() const { 712 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 713 } 714 715 bool isVISrc_512V2B16() const { 716 return isVISrc_512B16(); 717 } 718 719 bool isVISrc_512F32() const { 720 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 721 } 722 723 bool isVISrc_512F16() const { 724 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 725 } 726 727 bool isVISrc_512V2F16() const { 728 return isVISrc_512F16() || isVISrc_512B32(); 729 } 730 731 bool isVISrc_1024B32() const { 732 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 733 } 734 735 bool isVISrc_1024B16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 737 } 738 739 bool isVISrc_1024V2B16() const { 740 return isVISrc_1024B16(); 741 } 742 743 bool isVISrc_1024F32() const { 744 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 745 } 746 747 bool isVISrc_1024F16() const { 748 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 749 } 750 751 bool isVISrc_1024V2F16() const { 752 return isVISrc_1024F16() || isVISrc_1024B32(); 753 } 754 755 bool isAISrcB32() const { 756 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 757 } 758 759 bool isAISrcB16() const { 760 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 761 } 762 763 bool isAISrcV2B16() const { 764 return isAISrcB16(); 765 } 766 767 bool isAISrcF32() const { 768 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 769 } 770 771 bool isAISrcF16() const { 772 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 773 } 774 775 bool isAISrcV2F16() const { 776 return isAISrcF16() || isAISrcB32(); 777 } 778 779 bool isAISrc_64B64() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 781 } 782 783 bool isAISrc_64F64() const { 784 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 785 } 786 787 bool isAISrc_128B32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 789 } 790 791 bool isAISrc_128B16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 793 } 794 795 bool isAISrc_128V2B16() const { 796 return isAISrc_128B16(); 797 } 798 799 bool isAISrc_128F32() const { 800 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 801 } 802 803 bool isAISrc_128F16() const { 804 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 805 } 806 807 bool isAISrc_128V2F16() const { 808 return isAISrc_128F16() || isAISrc_128B32(); 809 } 810 811 bool isVISrc_128F16() const { 812 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 813 } 814 815 bool isVISrc_128V2F16() const { 816 return isVISrc_128F16() || isVISrc_128B32(); 817 } 818 819 bool isAISrc_256B64() const { 820 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 821 } 822 823 bool isAISrc_256F64() const { 824 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 825 } 826 827 bool isAISrc_512B32() const { 828 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 829 } 830 831 bool isAISrc_512B16() const { 832 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 833 } 834 835 bool isAISrc_512V2B16() const { 836 return isAISrc_512B16(); 837 } 838 839 bool isAISrc_512F32() const { 840 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 841 } 842 843 bool isAISrc_512F16() const { 844 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 845 } 846 847 bool isAISrc_512V2F16() const { 848 return isAISrc_512F16() || isAISrc_512B32(); 849 } 850 851 bool isAISrc_1024B32() const { 852 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 853 } 854 855 bool isAISrc_1024B16() const { 856 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 857 } 858 859 bool isAISrc_1024V2B16() const { 860 return isAISrc_1024B16(); 861 } 862 863 bool isAISrc_1024F32() const { 864 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 865 } 866 867 bool isAISrc_1024F16() const { 868 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 869 } 870 871 bool isAISrc_1024V2F16() const { 872 return isAISrc_1024F16() || isAISrc_1024B32(); 873 } 874 875 bool isKImmFP32() const { 876 return isLiteralImm(MVT::f32); 877 } 878 879 bool isKImmFP16() const { 880 return isLiteralImm(MVT::f16); 881 } 882 883 bool isMem() const override { 884 return false; 885 } 886 887 bool isExpr() const { 888 return Kind == Expression; 889 } 890 891 bool isSOPPBrTarget() const { return isExpr() || isImm(); } 892 893 bool isSWaitCnt() const; 894 bool isDepCtr() const; 895 bool isSDelayALU() const; 896 bool isHwreg() const; 897 bool isSendMsg() const; 898 bool isSplitBarrier() const; 899 bool isSwizzle() const; 900 bool isSMRDOffset8() const; 901 bool isSMEMOffset() const; 902 bool isSMRDLiteralOffset() const; 903 bool isDPP8() const; 904 bool isDPPCtrl() const; 905 bool isBLGP() const; 906 bool isCBSZ() const; 907 bool isABID() const; 908 bool isGPRIdxMode() const; 909 bool isS16Imm() const; 910 bool isU16Imm() const; 911 bool isEndpgm() const; 912 bool isWaitVDST() const; 913 bool isWaitEXP() const; 914 bool isWaitVAVDst() const; 915 bool isWaitVMVSrc() const; 916 917 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { 918 return std::bind(P, *this); 919 } 920 921 StringRef getToken() const { 922 assert(isToken()); 923 return StringRef(Tok.Data, Tok.Length); 924 } 925 926 int64_t getImm() const { 927 assert(isImm()); 928 return Imm.Val; 929 } 930 931 void setImm(int64_t Val) { 932 assert(isImm()); 933 Imm.Val = Val; 934 } 935 936 ImmTy getImmTy() const { 937 assert(isImm()); 938 return Imm.Type; 939 } 940 941 unsigned getReg() const override { 942 assert(isRegKind()); 943 return Reg.RegNo; 944 } 945 946 SMLoc getStartLoc() const override { 947 return StartLoc; 948 } 949 950 SMLoc getEndLoc() const override { 951 return EndLoc; 952 } 953 954 SMRange getLocRange() const { 955 return SMRange(StartLoc, EndLoc); 956 } 957 958 Modifiers getModifiers() const { 959 assert(isRegKind() || isImmTy(ImmTyNone)); 960 return isRegKind() ? Reg.Mods : Imm.Mods; 961 } 962 963 void setModifiers(Modifiers Mods) { 964 assert(isRegKind() || isImmTy(ImmTyNone)); 965 if (isRegKind()) 966 Reg.Mods = Mods; 967 else 968 Imm.Mods = Mods; 969 } 970 971 bool hasModifiers() const { 972 return getModifiers().hasModifiers(); 973 } 974 975 bool hasFPModifiers() const { 976 return getModifiers().hasFPModifiers(); 977 } 978 979 bool hasIntModifiers() const { 980 return getModifiers().hasIntModifiers(); 981 } 982 983 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 984 985 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 986 987 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 988 989 void addRegOperands(MCInst &Inst, unsigned N) const; 990 991 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 992 if (isRegKind()) 993 addRegOperands(Inst, N); 994 else 995 addImmOperands(Inst, N); 996 } 997 998 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 999 Modifiers Mods = getModifiers(); 1000 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1001 if (isRegKind()) { 1002 addRegOperands(Inst, N); 1003 } else { 1004 addImmOperands(Inst, N, false); 1005 } 1006 } 1007 1008 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1009 assert(!hasIntModifiers()); 1010 addRegOrImmWithInputModsOperands(Inst, N); 1011 } 1012 1013 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1014 assert(!hasFPModifiers()); 1015 addRegOrImmWithInputModsOperands(Inst, N); 1016 } 1017 1018 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 1019 Modifiers Mods = getModifiers(); 1020 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1021 assert(isRegKind()); 1022 addRegOperands(Inst, N); 1023 } 1024 1025 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1026 assert(!hasIntModifiers()); 1027 addRegWithInputModsOperands(Inst, N); 1028 } 1029 1030 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1031 assert(!hasFPModifiers()); 1032 addRegWithInputModsOperands(Inst, N); 1033 } 1034 1035 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1036 // clang-format off 1037 switch (Type) { 1038 case ImmTyNone: OS << "None"; break; 1039 case ImmTyGDS: OS << "GDS"; break; 1040 case ImmTyLDS: OS << "LDS"; break; 1041 case ImmTyOffen: OS << "Offen"; break; 1042 case ImmTyIdxen: OS << "Idxen"; break; 1043 case ImmTyAddr64: OS << "Addr64"; break; 1044 case ImmTyOffset: OS << "Offset"; break; 1045 case ImmTyInstOffset: OS << "InstOffset"; break; 1046 case ImmTyOffset0: OS << "Offset0"; break; 1047 case ImmTyOffset1: OS << "Offset1"; break; 1048 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break; 1049 case ImmTyCPol: OS << "CPol"; break; 1050 case ImmTyTFE: OS << "TFE"; break; 1051 case ImmTyD16: OS << "D16"; break; 1052 case ImmTyFORMAT: OS << "FORMAT"; break; 1053 case ImmTyClampSI: OS << "ClampSI"; break; 1054 case ImmTyOModSI: OS << "OModSI"; break; 1055 case ImmTyDPP8: OS << "DPP8"; break; 1056 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1057 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1058 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1059 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1060 case ImmTyDppFI: OS << "DppFI"; break; 1061 case ImmTySDWADstSel: OS << "SDWADstSel"; break; 1062 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break; 1063 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break; 1064 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break; 1065 case ImmTyDMask: OS << "DMask"; break; 1066 case ImmTyDim: OS << "Dim"; break; 1067 case ImmTyUNorm: OS << "UNorm"; break; 1068 case ImmTyDA: OS << "DA"; break; 1069 case ImmTyR128A16: OS << "R128A16"; break; 1070 case ImmTyA16: OS << "A16"; break; 1071 case ImmTyLWE: OS << "LWE"; break; 1072 case ImmTyOff: OS << "Off"; break; 1073 case ImmTyExpTgt: OS << "ExpTgt"; break; 1074 case ImmTyExpCompr: OS << "ExpCompr"; break; 1075 case ImmTyExpVM: OS << "ExpVM"; break; 1076 case ImmTyHwreg: OS << "Hwreg"; break; 1077 case ImmTySendMsg: OS << "SendMsg"; break; 1078 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1079 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1080 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break; 1081 case ImmTyOpSel: OS << "OpSel"; break; 1082 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1083 case ImmTyNegLo: OS << "NegLo"; break; 1084 case ImmTyNegHi: OS << "NegHi"; break; 1085 case ImmTySwizzle: OS << "Swizzle"; break; 1086 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1087 case ImmTyHigh: OS << "High"; break; 1088 case ImmTyBLGP: OS << "BLGP"; break; 1089 case ImmTyCBSZ: OS << "CBSZ"; break; 1090 case ImmTyABID: OS << "ABID"; break; 1091 case ImmTyEndpgm: OS << "Endpgm"; break; 1092 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1093 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1094 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; 1095 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; 1096 } 1097 // clang-format on 1098 } 1099 1100 void print(raw_ostream &OS) const override { 1101 switch (Kind) { 1102 case Register: 1103 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1104 break; 1105 case Immediate: 1106 OS << '<' << getImm(); 1107 if (getImmTy() != ImmTyNone) { 1108 OS << " type: "; printImmTy(OS, getImmTy()); 1109 } 1110 OS << " mods: " << Imm.Mods << '>'; 1111 break; 1112 case Token: 1113 OS << '\'' << getToken() << '\''; 1114 break; 1115 case Expression: 1116 OS << "<expr " << *Expr << '>'; 1117 break; 1118 } 1119 } 1120 1121 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1122 int64_t Val, SMLoc Loc, 1123 ImmTy Type = ImmTyNone, 1124 bool IsFPImm = false) { 1125 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1126 Op->Imm.Val = Val; 1127 Op->Imm.IsFPImm = IsFPImm; 1128 Op->Imm.Kind = ImmKindTyNone; 1129 Op->Imm.Type = Type; 1130 Op->Imm.Mods = Modifiers(); 1131 Op->StartLoc = Loc; 1132 Op->EndLoc = Loc; 1133 return Op; 1134 } 1135 1136 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1137 StringRef Str, SMLoc Loc, 1138 bool HasExplicitEncodingSize = true) { 1139 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1140 Res->Tok.Data = Str.data(); 1141 Res->Tok.Length = Str.size(); 1142 Res->StartLoc = Loc; 1143 Res->EndLoc = Loc; 1144 return Res; 1145 } 1146 1147 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1148 unsigned RegNo, SMLoc S, 1149 SMLoc E) { 1150 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1151 Op->Reg.RegNo = RegNo; 1152 Op->Reg.Mods = Modifiers(); 1153 Op->StartLoc = S; 1154 Op->EndLoc = E; 1155 return Op; 1156 } 1157 1158 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1159 const class MCExpr *Expr, SMLoc S) { 1160 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1161 Op->Expr = Expr; 1162 Op->StartLoc = S; 1163 Op->EndLoc = S; 1164 return Op; 1165 } 1166 }; 1167 1168 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1169 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1170 return OS; 1171 } 1172 1173 //===----------------------------------------------------------------------===// 1174 // AsmParser 1175 //===----------------------------------------------------------------------===// 1176 1177 // Holds info related to the current kernel, e.g. count of SGPRs used. 1178 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1179 // .amdgpu_hsa_kernel or at EOF. 1180 class KernelScopeInfo { 1181 int SgprIndexUnusedMin = -1; 1182 int VgprIndexUnusedMin = -1; 1183 int AgprIndexUnusedMin = -1; 1184 MCContext *Ctx = nullptr; 1185 MCSubtargetInfo const *MSTI = nullptr; 1186 1187 void usesSgprAt(int i) { 1188 if (i >= SgprIndexUnusedMin) { 1189 SgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1194 } 1195 } 1196 } 1197 1198 void usesVgprAt(int i) { 1199 if (i >= VgprIndexUnusedMin) { 1200 VgprIndexUnusedMin = ++i; 1201 if (Ctx) { 1202 MCSymbol* const Sym = 1203 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1204 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1205 VgprIndexUnusedMin); 1206 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1207 } 1208 } 1209 } 1210 1211 void usesAgprAt(int i) { 1212 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1213 if (!hasMAIInsts(*MSTI)) 1214 return; 1215 1216 if (i >= AgprIndexUnusedMin) { 1217 AgprIndexUnusedMin = ++i; 1218 if (Ctx) { 1219 MCSymbol* const Sym = 1220 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1221 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1222 1223 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1224 MCSymbol* const vSym = 1225 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1226 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1227 VgprIndexUnusedMin); 1228 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1229 } 1230 } 1231 } 1232 1233 public: 1234 KernelScopeInfo() = default; 1235 1236 void initialize(MCContext &Context) { 1237 Ctx = &Context; 1238 MSTI = Ctx->getSubtargetInfo(); 1239 1240 usesSgprAt(SgprIndexUnusedMin = -1); 1241 usesVgprAt(VgprIndexUnusedMin = -1); 1242 if (hasMAIInsts(*MSTI)) { 1243 usesAgprAt(AgprIndexUnusedMin = -1); 1244 } 1245 } 1246 1247 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1248 unsigned RegWidth) { 1249 switch (RegKind) { 1250 case IS_SGPR: 1251 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1252 break; 1253 case IS_AGPR: 1254 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1255 break; 1256 case IS_VGPR: 1257 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1258 break; 1259 default: 1260 break; 1261 } 1262 } 1263 }; 1264 1265 class AMDGPUAsmParser : public MCTargetAsmParser { 1266 MCAsmParser &Parser; 1267 1268 unsigned ForcedEncodingSize = 0; 1269 bool ForcedDPP = false; 1270 bool ForcedSDWA = false; 1271 KernelScopeInfo KernelScope; 1272 1273 /// @name Auto-generated Match Functions 1274 /// { 1275 1276 #define GET_ASSEMBLER_HEADER 1277 #include "AMDGPUGenAsmMatcher.inc" 1278 1279 /// } 1280 1281 private: 1282 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1283 bool OutOfRangeError(SMRange Range); 1284 /// Calculate VGPR/SGPR blocks required for given target, reserved 1285 /// registers, and user-specified NextFreeXGPR values. 1286 /// 1287 /// \param Features [in] Target features, used for bug corrections. 1288 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1289 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1290 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1291 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1292 /// descriptor field, if valid. 1293 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1294 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1295 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1296 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1297 /// \param VGPRBlocks [out] Result VGPR block count. 1298 /// \param SGPRBlocks [out] Result SGPR block count. 1299 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1300 bool FlatScrUsed, bool XNACKUsed, 1301 std::optional<bool> EnableWavefrontSize32, 1302 unsigned NextFreeVGPR, SMRange VGPRRange, 1303 unsigned NextFreeSGPR, SMRange SGPRRange, 1304 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 1305 bool ParseDirectiveAMDGCNTarget(); 1306 bool ParseDirectiveAMDHSAKernel(); 1307 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1308 bool ParseDirectiveHSACodeObjectVersion(); 1309 bool ParseDirectiveHSACodeObjectISA(); 1310 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1311 bool ParseDirectiveAMDKernelCodeT(); 1312 // TODO: Possibly make subtargetHasRegister const. 1313 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1314 bool ParseDirectiveAMDGPUHsaKernel(); 1315 1316 bool ParseDirectiveISAVersion(); 1317 bool ParseDirectiveHSAMetadata(); 1318 bool ParseDirectivePALMetadataBegin(); 1319 bool ParseDirectivePALMetadata(); 1320 bool ParseDirectiveAMDGPULDS(); 1321 1322 /// Common code to parse out a block of text (typically YAML) between start and 1323 /// end directives. 1324 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1325 const char *AssemblerDirectiveEnd, 1326 std::string &CollectString); 1327 1328 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1329 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1330 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1331 unsigned &RegNum, unsigned &RegWidth, 1332 bool RestoreOnFailure = false); 1333 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1334 unsigned &RegNum, unsigned &RegWidth, 1335 SmallVectorImpl<AsmToken> &Tokens); 1336 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1337 unsigned &RegWidth, 1338 SmallVectorImpl<AsmToken> &Tokens); 1339 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1340 unsigned &RegWidth, 1341 SmallVectorImpl<AsmToken> &Tokens); 1342 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1343 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1344 bool ParseRegRange(unsigned& Num, unsigned& Width); 1345 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg, 1346 unsigned RegWidth, SMLoc Loc); 1347 1348 bool isRegister(); 1349 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1350 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1351 void initializeGprCountSymbol(RegisterKind RegKind); 1352 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1353 unsigned RegWidth); 1354 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1355 bool IsAtomic); 1356 1357 public: 1358 enum AMDGPUMatchResultTy { 1359 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1360 }; 1361 enum OperandMode { 1362 OperandMode_Default, 1363 OperandMode_NSA, 1364 }; 1365 1366 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1367 1368 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1369 const MCInstrInfo &MII, 1370 const MCTargetOptions &Options) 1371 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1372 MCAsmParserExtension::Initialize(Parser); 1373 1374 if (getFeatureBits().none()) { 1375 // Set default features. 1376 copySTI().ToggleFeature("southern-islands"); 1377 } 1378 1379 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1380 1381 { 1382 // TODO: make those pre-defined variables read-only. 1383 // Currently there is none suitable machinery in the core llvm-mc for this. 1384 // MCSymbol::isRedefinable is intended for another purpose, and 1385 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1386 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1387 MCContext &Ctx = getContext(); 1388 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1389 MCSymbol *Sym = 1390 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1391 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1392 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1393 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1394 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1395 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1396 } else { 1397 MCSymbol *Sym = 1398 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1399 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1400 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1401 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1402 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1403 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1404 } 1405 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1406 initializeGprCountSymbol(IS_VGPR); 1407 initializeGprCountSymbol(IS_SGPR); 1408 } else 1409 KernelScope.initialize(getContext()); 1410 } 1411 } 1412 1413 bool hasMIMG_R128() const { 1414 return AMDGPU::hasMIMG_R128(getSTI()); 1415 } 1416 1417 bool hasPackedD16() const { 1418 return AMDGPU::hasPackedD16(getSTI()); 1419 } 1420 1421 bool hasA16() const { return AMDGPU::hasA16(getSTI()); } 1422 1423 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1424 1425 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); } 1426 1427 bool isSI() const { 1428 return AMDGPU::isSI(getSTI()); 1429 } 1430 1431 bool isCI() const { 1432 return AMDGPU::isCI(getSTI()); 1433 } 1434 1435 bool isVI() const { 1436 return AMDGPU::isVI(getSTI()); 1437 } 1438 1439 bool isGFX9() const { 1440 return AMDGPU::isGFX9(getSTI()); 1441 } 1442 1443 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1444 bool isGFX90A() const { 1445 return AMDGPU::isGFX90A(getSTI()); 1446 } 1447 1448 bool isGFX940() const { 1449 return AMDGPU::isGFX940(getSTI()); 1450 } 1451 1452 bool isGFX9Plus() const { 1453 return AMDGPU::isGFX9Plus(getSTI()); 1454 } 1455 1456 bool isGFX10() const { 1457 return AMDGPU::isGFX10(getSTI()); 1458 } 1459 1460 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1461 1462 bool isGFX11() const { 1463 return AMDGPU::isGFX11(getSTI()); 1464 } 1465 1466 bool isGFX11Plus() const { 1467 return AMDGPU::isGFX11Plus(getSTI()); 1468 } 1469 1470 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); } 1471 1472 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); } 1473 1474 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } 1475 1476 bool isGFX10_BEncoding() const { 1477 return AMDGPU::isGFX10_BEncoding(getSTI()); 1478 } 1479 1480 bool hasInv2PiInlineImm() const { 1481 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1482 } 1483 1484 bool hasFlatOffsets() const { 1485 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1486 } 1487 1488 bool hasArchitectedFlatScratch() const { 1489 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1490 } 1491 1492 bool hasSGPR102_SGPR103() const { 1493 return !isVI() && !isGFX9(); 1494 } 1495 1496 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1497 1498 bool hasIntClamp() const { 1499 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1500 } 1501 1502 bool hasPartialNSAEncoding() const { 1503 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; 1504 } 1505 1506 unsigned getNSAMaxSize(bool HasSampler = false) const { 1507 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler); 1508 } 1509 1510 unsigned getMaxNumUserSGPRs() const { 1511 return AMDGPU::getMaxNumUserSGPRs(getSTI()); 1512 } 1513 1514 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } 1515 1516 AMDGPUTargetStreamer &getTargetStreamer() { 1517 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1518 return static_cast<AMDGPUTargetStreamer &>(TS); 1519 } 1520 1521 const MCRegisterInfo *getMRI() const { 1522 // We need this const_cast because for some reason getContext() is not const 1523 // in MCAsmParser. 1524 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1525 } 1526 1527 const MCInstrInfo *getMII() const { 1528 return &MII; 1529 } 1530 1531 const FeatureBitset &getFeatureBits() const { 1532 return getSTI().getFeatureBits(); 1533 } 1534 1535 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1536 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1537 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1538 1539 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1540 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1541 bool isForcedDPP() const { return ForcedDPP; } 1542 bool isForcedSDWA() const { return ForcedSDWA; } 1543 ArrayRef<unsigned> getMatchedVariants() const; 1544 StringRef getMatchedVariantName() const; 1545 1546 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1547 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1548 bool RestoreOnFailure); 1549 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1550 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1551 SMLoc &EndLoc) override; 1552 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1553 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1554 unsigned Kind) override; 1555 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1556 OperandVector &Operands, MCStreamer &Out, 1557 uint64_t &ErrorInfo, 1558 bool MatchingInlineAsm) override; 1559 bool ParseDirective(AsmToken DirectiveID) override; 1560 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, 1561 OperandMode Mode = OperandMode_Default); 1562 StringRef parseMnemonicSuffix(StringRef Name); 1563 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1564 SMLoc NameLoc, OperandVector &Operands) override; 1565 //bool ProcessInstruction(MCInst &Inst); 1566 1567 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); 1568 1569 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); 1570 1571 ParseStatus 1572 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1573 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1574 std::function<bool(int64_t &)> ConvertResult = nullptr); 1575 1576 ParseStatus parseOperandArrayWithPrefix( 1577 const char *Prefix, OperandVector &Operands, 1578 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1579 bool (*ConvertResult)(int64_t &) = nullptr); 1580 1581 ParseStatus 1582 parseNamedBit(StringRef Name, OperandVector &Operands, 1583 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1584 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; 1585 ParseStatus parseCPol(OperandVector &Operands); 1586 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope); 1587 ParseStatus parseTH(OperandVector &Operands, int64_t &TH); 1588 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, 1589 SMLoc &StringLoc); 1590 1591 bool isModifier(); 1592 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1593 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1594 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1595 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1596 bool parseSP3NegModifier(); 1597 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false, 1598 bool HasLit = false); 1599 ParseStatus parseReg(OperandVector &Operands); 1600 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false, 1601 bool HasLit = false); 1602 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, 1603 bool AllowImm = true); 1604 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, 1605 bool AllowImm = true); 1606 ParseStatus parseRegWithFPInputMods(OperandVector &Operands); 1607 ParseStatus parseRegWithIntInputMods(OperandVector &Operands); 1608 ParseStatus parseVReg32OrOff(OperandVector &Operands); 1609 ParseStatus parseDfmtNfmt(int64_t &Format); 1610 ParseStatus parseUfmt(int64_t &Format); 1611 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, 1612 int64_t &Format); 1613 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, 1614 int64_t &Format); 1615 ParseStatus parseFORMAT(OperandVector &Operands); 1616 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); 1617 ParseStatus parseNumericFormat(int64_t &Format); 1618 ParseStatus parseFlatOffset(OperandVector &Operands); 1619 ParseStatus parseR128A16(OperandVector &Operands); 1620 ParseStatus parseBLGP(OperandVector &Operands); 1621 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1622 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1623 1624 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1625 1626 bool parseCnt(int64_t &IntVal); 1627 ParseStatus parseSWaitCnt(OperandVector &Operands); 1628 1629 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1630 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1631 ParseStatus parseDepCtr(OperandVector &Operands); 1632 1633 bool parseDelay(int64_t &Delay); 1634 ParseStatus parseSDelayALU(OperandVector &Operands); 1635 1636 ParseStatus parseHwreg(OperandVector &Operands); 1637 1638 private: 1639 struct OperandInfoTy { 1640 SMLoc Loc; 1641 int64_t Id; 1642 bool IsSymbolic = false; 1643 bool IsDefined = false; 1644 1645 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1646 }; 1647 1648 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1649 bool validateSendMsg(const OperandInfoTy &Msg, 1650 const OperandInfoTy &Op, 1651 const OperandInfoTy &Stream); 1652 1653 bool parseHwregBody(OperandInfoTy &HwReg, 1654 OperandInfoTy &Offset, 1655 OperandInfoTy &Width); 1656 bool validateHwreg(const OperandInfoTy &HwReg, 1657 const OperandInfoTy &Offset, 1658 const OperandInfoTy &Width); 1659 1660 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1661 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1662 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1663 1664 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1665 const OperandVector &Operands) const; 1666 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1667 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1668 SMLoc getLitLoc(const OperandVector &Operands, 1669 bool SearchMandatoryLiterals = false) const; 1670 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; 1671 SMLoc getConstLoc(const OperandVector &Operands) const; 1672 SMLoc getInstLoc(const OperandVector &Operands) const; 1673 1674 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1675 bool validateOffset(const MCInst &Inst, const OperandVector &Operands); 1676 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1677 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1678 bool validateSOPLiteral(const MCInst &Inst) const; 1679 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1680 bool validateVOPDRegBankConstraints(const MCInst &Inst, 1681 const OperandVector &Operands); 1682 bool validateIntClampSupported(const MCInst &Inst); 1683 bool validateMIMGAtomicDMask(const MCInst &Inst); 1684 bool validateMIMGGatherDMask(const MCInst &Inst); 1685 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1686 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); 1687 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); 1688 bool validateMIMGD16(const MCInst &Inst); 1689 bool validateMIMGMSAA(const MCInst &Inst); 1690 bool validateOpSel(const MCInst &Inst); 1691 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1692 bool validateVccOperand(unsigned Reg) const; 1693 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1694 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1695 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); 1696 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1697 bool validateAGPRLdSt(const MCInst &Inst) const; 1698 bool validateVGPRAlign(const MCInst &Inst) const; 1699 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1700 bool validateDS(const MCInst &Inst, const OperandVector &Operands); 1701 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1702 bool validateDivScale(const MCInst &Inst); 1703 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); 1704 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1705 const SMLoc &IDLoc); 1706 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, 1707 const unsigned CPol); 1708 bool validateExeczVcczOperands(const OperandVector &Operands); 1709 bool validateTFE(const MCInst &Inst, const OperandVector &Operands); 1710 std::optional<StringRef> validateLdsDirect(const MCInst &Inst); 1711 unsigned getConstantBusLimit(unsigned Opcode) const; 1712 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1713 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1714 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1715 1716 bool isSupportedMnemo(StringRef Mnemo, 1717 const FeatureBitset &FBS); 1718 bool isSupportedMnemo(StringRef Mnemo, 1719 const FeatureBitset &FBS, 1720 ArrayRef<unsigned> Variants); 1721 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1722 1723 bool isId(const StringRef Id) const; 1724 bool isId(const AsmToken &Token, const StringRef Id) const; 1725 bool isToken(const AsmToken::TokenKind Kind) const; 1726 StringRef getId() const; 1727 bool trySkipId(const StringRef Id); 1728 bool trySkipId(const StringRef Pref, const StringRef Id); 1729 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1730 bool trySkipToken(const AsmToken::TokenKind Kind); 1731 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1732 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1733 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1734 1735 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1736 AsmToken::TokenKind getTokenKind() const; 1737 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1738 bool parseExpr(OperandVector &Operands); 1739 StringRef getTokenStr() const; 1740 AsmToken peekToken(bool ShouldSkipSpace = true); 1741 AsmToken getToken() const; 1742 SMLoc getLoc() const; 1743 void lex(); 1744 1745 public: 1746 void onBeginOfFile() override; 1747 1748 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); 1749 1750 ParseStatus parseExpTgt(OperandVector &Operands); 1751 ParseStatus parseSendMsg(OperandVector &Operands); 1752 ParseStatus parseInterpSlot(OperandVector &Operands); 1753 ParseStatus parseInterpAttr(OperandVector &Operands); 1754 ParseStatus parseSOPPBrTarget(OperandVector &Operands); 1755 ParseStatus parseBoolReg(OperandVector &Operands); 1756 1757 bool parseSwizzleOperand(int64_t &Op, 1758 const unsigned MinVal, 1759 const unsigned MaxVal, 1760 const StringRef ErrMsg, 1761 SMLoc &Loc); 1762 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1763 const unsigned MinVal, 1764 const unsigned MaxVal, 1765 const StringRef ErrMsg); 1766 ParseStatus parseSwizzle(OperandVector &Operands); 1767 bool parseSwizzleOffset(int64_t &Imm); 1768 bool parseSwizzleMacro(int64_t &Imm); 1769 bool parseSwizzleQuadPerm(int64_t &Imm); 1770 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1771 bool parseSwizzleBroadcast(int64_t &Imm); 1772 bool parseSwizzleSwap(int64_t &Imm); 1773 bool parseSwizzleReverse(int64_t &Imm); 1774 1775 ParseStatus parseGPRIdxMode(OperandVector &Operands); 1776 int64_t parseGPRIdxMacro(); 1777 1778 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1779 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1780 1781 ParseStatus parseOModSI(OperandVector &Operands); 1782 1783 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1784 OptionalImmIndexMap &OptionalIdx); 1785 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1786 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1787 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1788 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1789 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1790 OptionalImmIndexMap &OptionalIdx); 1791 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1792 OptionalImmIndexMap &OptionalIdx); 1793 1794 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1795 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1796 1797 bool parseDimId(unsigned &Encoding); 1798 ParseStatus parseDim(OperandVector &Operands); 1799 bool convertDppBoundCtrl(int64_t &BoundCtrl); 1800 ParseStatus parseDPP8(OperandVector &Operands); 1801 ParseStatus parseDPPCtrl(OperandVector &Operands); 1802 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1803 int64_t parseDPPCtrlSel(StringRef Ctrl); 1804 int64_t parseDPPCtrlPerm(); 1805 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1806 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1807 cvtDPP(Inst, Operands, true); 1808 } 1809 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1810 bool IsDPP8 = false); 1811 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1812 cvtVOP3DPP(Inst, Operands, true); 1813 } 1814 1815 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, 1816 AMDGPUOperand::ImmTy Type); 1817 ParseStatus parseSDWADstUnused(OperandVector &Operands); 1818 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1819 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1820 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1821 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1822 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1823 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1824 uint64_t BasicInstType, 1825 bool SkipDstVcc = false, 1826 bool SkipSrcVcc = false); 1827 1828 ParseStatus parseEndpgm(OperandVector &Operands); 1829 1830 ParseStatus parseVOPD(OperandVector &Operands); 1831 }; 1832 1833 } // end anonymous namespace 1834 1835 // May be called with integer type with equivalent bitwidth. 1836 static const fltSemantics *getFltSemantics(unsigned Size) { 1837 switch (Size) { 1838 case 4: 1839 return &APFloat::IEEEsingle(); 1840 case 8: 1841 return &APFloat::IEEEdouble(); 1842 case 2: 1843 return &APFloat::IEEEhalf(); 1844 default: 1845 llvm_unreachable("unsupported fp type"); 1846 } 1847 } 1848 1849 static const fltSemantics *getFltSemantics(MVT VT) { 1850 return getFltSemantics(VT.getSizeInBits() / 8); 1851 } 1852 1853 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1854 switch (OperandType) { 1855 case AMDGPU::OPERAND_REG_IMM_INT32: 1856 case AMDGPU::OPERAND_REG_IMM_FP32: 1857 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1858 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1859 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1860 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1861 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1862 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1863 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1864 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1865 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1866 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1868 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1869 case AMDGPU::OPERAND_KIMM32: 1870 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1871 return &APFloat::IEEEsingle(); 1872 case AMDGPU::OPERAND_REG_IMM_INT64: 1873 case AMDGPU::OPERAND_REG_IMM_FP64: 1874 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1875 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1876 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1877 return &APFloat::IEEEdouble(); 1878 case AMDGPU::OPERAND_REG_IMM_INT16: 1879 case AMDGPU::OPERAND_REG_IMM_FP16: 1880 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1881 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1882 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1883 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1884 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1885 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1886 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1887 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1888 case AMDGPU::OPERAND_KIMM16: 1889 return &APFloat::IEEEhalf(); 1890 default: 1891 llvm_unreachable("unsupported fp type"); 1892 } 1893 } 1894 1895 //===----------------------------------------------------------------------===// 1896 // Operand 1897 //===----------------------------------------------------------------------===// 1898 1899 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1900 bool Lost; 1901 1902 // Convert literal to single precision 1903 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1904 APFloat::rmNearestTiesToEven, 1905 &Lost); 1906 // We allow precision lost but not overflow or underflow 1907 if (Status != APFloat::opOK && 1908 Lost && 1909 ((Status & APFloat::opOverflow) != 0 || 1910 (Status & APFloat::opUnderflow) != 0)) { 1911 return false; 1912 } 1913 1914 return true; 1915 } 1916 1917 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1918 return isUIntN(Size, Val) || isIntN(Size, Val); 1919 } 1920 1921 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1922 if (VT.getScalarType() == MVT::i16) { 1923 // FP immediate values are broken. 1924 return isInlinableIntLiteral(Val); 1925 } 1926 1927 // f16/v2f16 operands work correctly for all values. 1928 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1929 } 1930 1931 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1932 1933 // This is a hack to enable named inline values like 1934 // shared_base with both 32-bit and 64-bit operands. 1935 // Note that these values are defined as 1936 // 32-bit operands only. 1937 if (isInlineValue()) { 1938 return true; 1939 } 1940 1941 if (!isImmTy(ImmTyNone)) { 1942 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1943 return false; 1944 } 1945 // TODO: We should avoid using host float here. It would be better to 1946 // check the float bit values which is what a few other places do. 1947 // We've had bot failures before due to weird NaN support on mips hosts. 1948 1949 APInt Literal(64, Imm.Val); 1950 1951 if (Imm.IsFPImm) { // We got fp literal token 1952 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1953 return AMDGPU::isInlinableLiteral64(Imm.Val, 1954 AsmParser->hasInv2PiInlineImm()); 1955 } 1956 1957 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1958 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1959 return false; 1960 1961 if (type.getScalarSizeInBits() == 16) { 1962 return isInlineableLiteralOp16( 1963 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1964 type, AsmParser->hasInv2PiInlineImm()); 1965 } 1966 1967 // Check if single precision literal is inlinable 1968 return AMDGPU::isInlinableLiteral32( 1969 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1970 AsmParser->hasInv2PiInlineImm()); 1971 } 1972 1973 // We got int literal token. 1974 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1975 return AMDGPU::isInlinableLiteral64(Imm.Val, 1976 AsmParser->hasInv2PiInlineImm()); 1977 } 1978 1979 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1980 return false; 1981 } 1982 1983 if (type.getScalarSizeInBits() == 16) { 1984 return isInlineableLiteralOp16( 1985 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1986 type, AsmParser->hasInv2PiInlineImm()); 1987 } 1988 1989 return AMDGPU::isInlinableLiteral32( 1990 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1991 AsmParser->hasInv2PiInlineImm()); 1992 } 1993 1994 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1995 // Check that this immediate can be added as literal 1996 if (!isImmTy(ImmTyNone)) { 1997 return false; 1998 } 1999 2000 if (!Imm.IsFPImm) { 2001 // We got int literal token. 2002 2003 if (type == MVT::f64 && hasFPModifiers()) { 2004 // Cannot apply fp modifiers to int literals preserving the same semantics 2005 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 2006 // disable these cases. 2007 return false; 2008 } 2009 2010 unsigned Size = type.getSizeInBits(); 2011 if (Size == 64) 2012 Size = 32; 2013 2014 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2015 // types. 2016 return isSafeTruncation(Imm.Val, Size); 2017 } 2018 2019 // We got fp literal token 2020 if (type == MVT::f64) { // Expected 64-bit fp operand 2021 // We would set low 64-bits of literal to zeroes but we accept this literals 2022 return true; 2023 } 2024 2025 if (type == MVT::i64) { // Expected 64-bit int operand 2026 // We don't allow fp literals in 64-bit integer instructions. It is 2027 // unclear how we should encode them. 2028 return false; 2029 } 2030 2031 // We allow fp literals with f16x2 operands assuming that the specified 2032 // literal goes into the lower half and the upper half is zero. We also 2033 // require that the literal may be losslessly converted to f16. 2034 // 2035 // For i16x2 operands, we assume that the specified literal is encoded as a 2036 // single-precision float. This is pretty odd, but it matches SP3 and what 2037 // happens in hardware. 2038 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16 2039 : (type == MVT::v2i16) ? MVT::f32 2040 : (type == MVT::v2f32) ? MVT::f32 2041 : type; 2042 2043 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2044 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2045 } 2046 2047 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2048 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2049 } 2050 2051 bool AMDGPUOperand::isVRegWithInputMods() const { 2052 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2053 // GFX90A allows DPP on 64-bit operands. 2054 (isRegClass(AMDGPU::VReg_64RegClassID) && 2055 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); 2056 } 2057 2058 bool AMDGPUOperand::isT16VRegWithInputMods() const { 2059 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID); 2060 } 2061 2062 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2063 if (AsmParser->isVI()) 2064 return isVReg32(); 2065 else if (AsmParser->isGFX9Plus()) 2066 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2067 else 2068 return false; 2069 } 2070 2071 bool AMDGPUOperand::isSDWAFP16Operand() const { 2072 return isSDWAOperand(MVT::f16); 2073 } 2074 2075 bool AMDGPUOperand::isSDWAFP32Operand() const { 2076 return isSDWAOperand(MVT::f32); 2077 } 2078 2079 bool AMDGPUOperand::isSDWAInt16Operand() const { 2080 return isSDWAOperand(MVT::i16); 2081 } 2082 2083 bool AMDGPUOperand::isSDWAInt32Operand() const { 2084 return isSDWAOperand(MVT::i32); 2085 } 2086 2087 bool AMDGPUOperand::isBoolReg() const { 2088 auto FB = AsmParser->getFeatureBits(); 2089 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2090 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2091 } 2092 2093 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2094 { 2095 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2096 assert(Size == 2 || Size == 4 || Size == 8); 2097 2098 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2099 2100 if (Imm.Mods.Abs) { 2101 Val &= ~FpSignMask; 2102 } 2103 if (Imm.Mods.Neg) { 2104 Val ^= FpSignMask; 2105 } 2106 2107 return Val; 2108 } 2109 2110 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2111 if (isExpr()) { 2112 Inst.addOperand(MCOperand::createExpr(Expr)); 2113 return; 2114 } 2115 2116 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2117 Inst.getNumOperands())) { 2118 addLiteralImmOperand(Inst, Imm.Val, 2119 ApplyModifiers & 2120 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2121 } else { 2122 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2123 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2124 setImmKindNone(); 2125 } 2126 } 2127 2128 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2129 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2130 auto OpNum = Inst.getNumOperands(); 2131 // Check that this operand accepts literals 2132 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2133 2134 if (ApplyModifiers) { 2135 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2136 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2137 Val = applyInputFPModifiers(Val, Size); 2138 } 2139 2140 APInt Literal(64, Val); 2141 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; 2142 2143 if (Imm.IsFPImm) { // We got fp literal token 2144 switch (OpTy) { 2145 case AMDGPU::OPERAND_REG_IMM_INT64: 2146 case AMDGPU::OPERAND_REG_IMM_FP64: 2147 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2148 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2149 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2150 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2151 AsmParser->hasInv2PiInlineImm())) { 2152 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2153 setImmKindConst(); 2154 return; 2155 } 2156 2157 // Non-inlineable 2158 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2159 // For fp operands we check if low 32 bits are zeros 2160 if (Literal.getLoBits(32) != 0) { 2161 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2162 "Can't encode literal as exact 64-bit floating-point operand. " 2163 "Low 32-bits will be set to zero"); 2164 Val &= 0xffffffff00000000u; 2165 } 2166 2167 Inst.addOperand(MCOperand::createImm(Val)); 2168 setImmKindLiteral(); 2169 return; 2170 } 2171 2172 // We don't allow fp literals in 64-bit integer instructions. It is 2173 // unclear how we should encode them. This case should be checked earlier 2174 // in predicate methods (isLiteralImm()) 2175 llvm_unreachable("fp literal in 64-bit integer instruction."); 2176 2177 case AMDGPU::OPERAND_REG_IMM_INT32: 2178 case AMDGPU::OPERAND_REG_IMM_FP32: 2179 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2180 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2181 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2182 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2183 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2184 case AMDGPU::OPERAND_REG_IMM_INT16: 2185 case AMDGPU::OPERAND_REG_IMM_FP16: 2186 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2187 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2188 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2189 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2190 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2191 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2192 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2193 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2194 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2195 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2196 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2197 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2198 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2199 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2200 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2201 case AMDGPU::OPERAND_KIMM32: 2202 case AMDGPU::OPERAND_KIMM16: 2203 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { 2204 bool lost; 2205 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2206 // Convert literal to single precision 2207 FPLiteral.convert(*getOpFltSemantics(OpTy), 2208 APFloat::rmNearestTiesToEven, &lost); 2209 // We allow precision lost but not overflow or underflow. This should be 2210 // checked earlier in isLiteralImm() 2211 2212 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2213 Inst.addOperand(MCOperand::createImm(ImmVal)); 2214 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { 2215 setImmKindMandatoryLiteral(); 2216 } else { 2217 setImmKindLiteral(); 2218 } 2219 return; 2220 } 2221 default: 2222 llvm_unreachable("invalid operand size"); 2223 } 2224 2225 return; 2226 } 2227 2228 // We got int literal token. 2229 // Only sign extend inline immediates. 2230 switch (OpTy) { 2231 case AMDGPU::OPERAND_REG_IMM_INT32: 2232 case AMDGPU::OPERAND_REG_IMM_FP32: 2233 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2234 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2235 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2236 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2237 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2238 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2239 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2240 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2241 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2242 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2243 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2244 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 2245 if (isSafeTruncation(Val, 32) && 2246 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2247 AsmParser->hasInv2PiInlineImm())) { 2248 Inst.addOperand(MCOperand::createImm(Val)); 2249 setImmKindConst(); 2250 return; 2251 } 2252 2253 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2254 setImmKindLiteral(); 2255 return; 2256 2257 case AMDGPU::OPERAND_REG_IMM_INT64: 2258 case AMDGPU::OPERAND_REG_IMM_FP64: 2259 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2260 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2261 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2262 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2263 Inst.addOperand(MCOperand::createImm(Val)); 2264 setImmKindConst(); 2265 return; 2266 } 2267 2268 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32 2269 : Lo_32(Val); 2270 2271 Inst.addOperand(MCOperand::createImm(Val)); 2272 setImmKindLiteral(); 2273 return; 2274 2275 case AMDGPU::OPERAND_REG_IMM_INT16: 2276 case AMDGPU::OPERAND_REG_IMM_FP16: 2277 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2278 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2279 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2280 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2281 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2282 if (isSafeTruncation(Val, 16) && 2283 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2284 AsmParser->hasInv2PiInlineImm())) { 2285 Inst.addOperand(MCOperand::createImm(Val)); 2286 setImmKindConst(); 2287 return; 2288 } 2289 2290 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2291 setImmKindLiteral(); 2292 return; 2293 2294 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2295 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2296 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2297 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2298 assert(isSafeTruncation(Val, 16)); 2299 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2300 AsmParser->hasInv2PiInlineImm())); 2301 2302 Inst.addOperand(MCOperand::createImm(Val)); 2303 return; 2304 } 2305 case AMDGPU::OPERAND_KIMM32: 2306 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2307 setImmKindMandatoryLiteral(); 2308 return; 2309 case AMDGPU::OPERAND_KIMM16: 2310 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2311 setImmKindMandatoryLiteral(); 2312 return; 2313 default: 2314 llvm_unreachable("invalid operand size"); 2315 } 2316 } 2317 2318 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2319 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2320 } 2321 2322 bool AMDGPUOperand::isInlineValue() const { 2323 return isRegKind() && ::isInlineValue(getReg()); 2324 } 2325 2326 //===----------------------------------------------------------------------===// 2327 // AsmParser 2328 //===----------------------------------------------------------------------===// 2329 2330 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2331 if (Is == IS_VGPR) { 2332 switch (RegWidth) { 2333 default: return -1; 2334 case 32: 2335 return AMDGPU::VGPR_32RegClassID; 2336 case 64: 2337 return AMDGPU::VReg_64RegClassID; 2338 case 96: 2339 return AMDGPU::VReg_96RegClassID; 2340 case 128: 2341 return AMDGPU::VReg_128RegClassID; 2342 case 160: 2343 return AMDGPU::VReg_160RegClassID; 2344 case 192: 2345 return AMDGPU::VReg_192RegClassID; 2346 case 224: 2347 return AMDGPU::VReg_224RegClassID; 2348 case 256: 2349 return AMDGPU::VReg_256RegClassID; 2350 case 288: 2351 return AMDGPU::VReg_288RegClassID; 2352 case 320: 2353 return AMDGPU::VReg_320RegClassID; 2354 case 352: 2355 return AMDGPU::VReg_352RegClassID; 2356 case 384: 2357 return AMDGPU::VReg_384RegClassID; 2358 case 512: 2359 return AMDGPU::VReg_512RegClassID; 2360 case 1024: 2361 return AMDGPU::VReg_1024RegClassID; 2362 } 2363 } else if (Is == IS_TTMP) { 2364 switch (RegWidth) { 2365 default: return -1; 2366 case 32: 2367 return AMDGPU::TTMP_32RegClassID; 2368 case 64: 2369 return AMDGPU::TTMP_64RegClassID; 2370 case 128: 2371 return AMDGPU::TTMP_128RegClassID; 2372 case 256: 2373 return AMDGPU::TTMP_256RegClassID; 2374 case 512: 2375 return AMDGPU::TTMP_512RegClassID; 2376 } 2377 } else if (Is == IS_SGPR) { 2378 switch (RegWidth) { 2379 default: return -1; 2380 case 32: 2381 return AMDGPU::SGPR_32RegClassID; 2382 case 64: 2383 return AMDGPU::SGPR_64RegClassID; 2384 case 96: 2385 return AMDGPU::SGPR_96RegClassID; 2386 case 128: 2387 return AMDGPU::SGPR_128RegClassID; 2388 case 160: 2389 return AMDGPU::SGPR_160RegClassID; 2390 case 192: 2391 return AMDGPU::SGPR_192RegClassID; 2392 case 224: 2393 return AMDGPU::SGPR_224RegClassID; 2394 case 256: 2395 return AMDGPU::SGPR_256RegClassID; 2396 case 288: 2397 return AMDGPU::SGPR_288RegClassID; 2398 case 320: 2399 return AMDGPU::SGPR_320RegClassID; 2400 case 352: 2401 return AMDGPU::SGPR_352RegClassID; 2402 case 384: 2403 return AMDGPU::SGPR_384RegClassID; 2404 case 512: 2405 return AMDGPU::SGPR_512RegClassID; 2406 } 2407 } else if (Is == IS_AGPR) { 2408 switch (RegWidth) { 2409 default: return -1; 2410 case 32: 2411 return AMDGPU::AGPR_32RegClassID; 2412 case 64: 2413 return AMDGPU::AReg_64RegClassID; 2414 case 96: 2415 return AMDGPU::AReg_96RegClassID; 2416 case 128: 2417 return AMDGPU::AReg_128RegClassID; 2418 case 160: 2419 return AMDGPU::AReg_160RegClassID; 2420 case 192: 2421 return AMDGPU::AReg_192RegClassID; 2422 case 224: 2423 return AMDGPU::AReg_224RegClassID; 2424 case 256: 2425 return AMDGPU::AReg_256RegClassID; 2426 case 288: 2427 return AMDGPU::AReg_288RegClassID; 2428 case 320: 2429 return AMDGPU::AReg_320RegClassID; 2430 case 352: 2431 return AMDGPU::AReg_352RegClassID; 2432 case 384: 2433 return AMDGPU::AReg_384RegClassID; 2434 case 512: 2435 return AMDGPU::AReg_512RegClassID; 2436 case 1024: 2437 return AMDGPU::AReg_1024RegClassID; 2438 } 2439 } 2440 return -1; 2441 } 2442 2443 static unsigned getSpecialRegForName(StringRef RegName) { 2444 return StringSwitch<unsigned>(RegName) 2445 .Case("exec", AMDGPU::EXEC) 2446 .Case("vcc", AMDGPU::VCC) 2447 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2448 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2449 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2450 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2451 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2452 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2453 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2454 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2455 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2456 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2457 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2458 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2459 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2460 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2461 .Case("m0", AMDGPU::M0) 2462 .Case("vccz", AMDGPU::SRC_VCCZ) 2463 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2464 .Case("execz", AMDGPU::SRC_EXECZ) 2465 .Case("src_execz", AMDGPU::SRC_EXECZ) 2466 .Case("scc", AMDGPU::SRC_SCC) 2467 .Case("src_scc", AMDGPU::SRC_SCC) 2468 .Case("tba", AMDGPU::TBA) 2469 .Case("tma", AMDGPU::TMA) 2470 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2471 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2472 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2473 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2474 .Case("vcc_lo", AMDGPU::VCC_LO) 2475 .Case("vcc_hi", AMDGPU::VCC_HI) 2476 .Case("exec_lo", AMDGPU::EXEC_LO) 2477 .Case("exec_hi", AMDGPU::EXEC_HI) 2478 .Case("tma_lo", AMDGPU::TMA_LO) 2479 .Case("tma_hi", AMDGPU::TMA_HI) 2480 .Case("tba_lo", AMDGPU::TBA_LO) 2481 .Case("tba_hi", AMDGPU::TBA_HI) 2482 .Case("pc", AMDGPU::PC_REG) 2483 .Case("null", AMDGPU::SGPR_NULL) 2484 .Default(AMDGPU::NoRegister); 2485 } 2486 2487 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2488 SMLoc &EndLoc, bool RestoreOnFailure) { 2489 auto R = parseRegister(); 2490 if (!R) return true; 2491 assert(R->isReg()); 2492 RegNo = R->getReg(); 2493 StartLoc = R->getStartLoc(); 2494 EndLoc = R->getEndLoc(); 2495 return false; 2496 } 2497 2498 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 2499 SMLoc &EndLoc) { 2500 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2501 } 2502 2503 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 2504 SMLoc &EndLoc) { 2505 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2506 bool PendingErrors = getParser().hasPendingError(); 2507 getParser().clearPendingErrors(); 2508 if (PendingErrors) 2509 return ParseStatus::Failure; 2510 if (Result) 2511 return ParseStatus::NoMatch; 2512 return ParseStatus::Success; 2513 } 2514 2515 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2516 RegisterKind RegKind, unsigned Reg1, 2517 SMLoc Loc) { 2518 switch (RegKind) { 2519 case IS_SPECIAL: 2520 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2521 Reg = AMDGPU::EXEC; 2522 RegWidth = 64; 2523 return true; 2524 } 2525 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2526 Reg = AMDGPU::FLAT_SCR; 2527 RegWidth = 64; 2528 return true; 2529 } 2530 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2531 Reg = AMDGPU::XNACK_MASK; 2532 RegWidth = 64; 2533 return true; 2534 } 2535 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2536 Reg = AMDGPU::VCC; 2537 RegWidth = 64; 2538 return true; 2539 } 2540 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2541 Reg = AMDGPU::TBA; 2542 RegWidth = 64; 2543 return true; 2544 } 2545 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2546 Reg = AMDGPU::TMA; 2547 RegWidth = 64; 2548 return true; 2549 } 2550 Error(Loc, "register does not fit in the list"); 2551 return false; 2552 case IS_VGPR: 2553 case IS_SGPR: 2554 case IS_AGPR: 2555 case IS_TTMP: 2556 if (Reg1 != Reg + RegWidth / 32) { 2557 Error(Loc, "registers in a list must have consecutive indices"); 2558 return false; 2559 } 2560 RegWidth += 32; 2561 return true; 2562 default: 2563 llvm_unreachable("unexpected register kind"); 2564 } 2565 } 2566 2567 struct RegInfo { 2568 StringLiteral Name; 2569 RegisterKind Kind; 2570 }; 2571 2572 static constexpr RegInfo RegularRegisters[] = { 2573 {{"v"}, IS_VGPR}, 2574 {{"s"}, IS_SGPR}, 2575 {{"ttmp"}, IS_TTMP}, 2576 {{"acc"}, IS_AGPR}, 2577 {{"a"}, IS_AGPR}, 2578 }; 2579 2580 static bool isRegularReg(RegisterKind Kind) { 2581 return Kind == IS_VGPR || 2582 Kind == IS_SGPR || 2583 Kind == IS_TTMP || 2584 Kind == IS_AGPR; 2585 } 2586 2587 static const RegInfo* getRegularRegInfo(StringRef Str) { 2588 for (const RegInfo &Reg : RegularRegisters) 2589 if (Str.starts_with(Reg.Name)) 2590 return &Reg; 2591 return nullptr; 2592 } 2593 2594 static bool getRegNum(StringRef Str, unsigned& Num) { 2595 return !Str.getAsInteger(10, Num); 2596 } 2597 2598 bool 2599 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2600 const AsmToken &NextToken) const { 2601 2602 // A list of consecutive registers: [s0,s1,s2,s3] 2603 if (Token.is(AsmToken::LBrac)) 2604 return true; 2605 2606 if (!Token.is(AsmToken::Identifier)) 2607 return false; 2608 2609 // A single register like s0 or a range of registers like s[0:1] 2610 2611 StringRef Str = Token.getString(); 2612 const RegInfo *Reg = getRegularRegInfo(Str); 2613 if (Reg) { 2614 StringRef RegName = Reg->Name; 2615 StringRef RegSuffix = Str.substr(RegName.size()); 2616 if (!RegSuffix.empty()) { 2617 RegSuffix.consume_back(".l"); 2618 RegSuffix.consume_back(".h"); 2619 unsigned Num; 2620 // A single register with an index: rXX 2621 if (getRegNum(RegSuffix, Num)) 2622 return true; 2623 } else { 2624 // A range of registers: r[XX:YY]. 2625 if (NextToken.is(AsmToken::LBrac)) 2626 return true; 2627 } 2628 } 2629 2630 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2631 } 2632 2633 bool 2634 AMDGPUAsmParser::isRegister() 2635 { 2636 return isRegister(getToken(), peekToken()); 2637 } 2638 2639 unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, 2640 unsigned SubReg, unsigned RegWidth, 2641 SMLoc Loc) { 2642 assert(isRegularReg(RegKind)); 2643 2644 unsigned AlignSize = 1; 2645 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2646 // SGPR and TTMP registers must be aligned. 2647 // Max required alignment is 4 dwords. 2648 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u); 2649 } 2650 2651 if (RegNum % AlignSize != 0) { 2652 Error(Loc, "invalid register alignment"); 2653 return AMDGPU::NoRegister; 2654 } 2655 2656 unsigned RegIdx = RegNum / AlignSize; 2657 int RCID = getRegClass(RegKind, RegWidth); 2658 if (RCID == -1) { 2659 Error(Loc, "invalid or unsupported register size"); 2660 return AMDGPU::NoRegister; 2661 } 2662 2663 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2664 const MCRegisterClass RC = TRI->getRegClass(RCID); 2665 if (RegIdx >= RC.getNumRegs()) { 2666 Error(Loc, "register index is out of range"); 2667 return AMDGPU::NoRegister; 2668 } 2669 2670 unsigned Reg = RC.getRegister(RegIdx); 2671 2672 if (SubReg) { 2673 Reg = TRI->getSubReg(Reg, SubReg); 2674 2675 // Currently all regular registers have their .l and .h subregisters, so 2676 // we should never need to generate an error here. 2677 assert(Reg && "Invalid subregister!"); 2678 } 2679 2680 return Reg; 2681 } 2682 2683 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2684 int64_t RegLo, RegHi; 2685 if (!skipToken(AsmToken::LBrac, "missing register index")) 2686 return false; 2687 2688 SMLoc FirstIdxLoc = getLoc(); 2689 SMLoc SecondIdxLoc; 2690 2691 if (!parseExpr(RegLo)) 2692 return false; 2693 2694 if (trySkipToken(AsmToken::Colon)) { 2695 SecondIdxLoc = getLoc(); 2696 if (!parseExpr(RegHi)) 2697 return false; 2698 } else { 2699 RegHi = RegLo; 2700 } 2701 2702 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2703 return false; 2704 2705 if (!isUInt<32>(RegLo)) { 2706 Error(FirstIdxLoc, "invalid register index"); 2707 return false; 2708 } 2709 2710 if (!isUInt<32>(RegHi)) { 2711 Error(SecondIdxLoc, "invalid register index"); 2712 return false; 2713 } 2714 2715 if (RegLo > RegHi) { 2716 Error(FirstIdxLoc, "first register index should not exceed second index"); 2717 return false; 2718 } 2719 2720 Num = static_cast<unsigned>(RegLo); 2721 RegWidth = 32 * ((RegHi - RegLo) + 1); 2722 return true; 2723 } 2724 2725 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2726 unsigned &RegNum, unsigned &RegWidth, 2727 SmallVectorImpl<AsmToken> &Tokens) { 2728 assert(isToken(AsmToken::Identifier)); 2729 unsigned Reg = getSpecialRegForName(getTokenStr()); 2730 if (Reg) { 2731 RegNum = 0; 2732 RegWidth = 32; 2733 RegKind = IS_SPECIAL; 2734 Tokens.push_back(getToken()); 2735 lex(); // skip register name 2736 } 2737 return Reg; 2738 } 2739 2740 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2741 unsigned &RegNum, unsigned &RegWidth, 2742 SmallVectorImpl<AsmToken> &Tokens) { 2743 assert(isToken(AsmToken::Identifier)); 2744 StringRef RegName = getTokenStr(); 2745 auto Loc = getLoc(); 2746 2747 const RegInfo *RI = getRegularRegInfo(RegName); 2748 if (!RI) { 2749 Error(Loc, "invalid register name"); 2750 return AMDGPU::NoRegister; 2751 } 2752 2753 Tokens.push_back(getToken()); 2754 lex(); // skip register name 2755 2756 RegKind = RI->Kind; 2757 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2758 unsigned SubReg = NoSubRegister; 2759 if (!RegSuffix.empty()) { 2760 // We don't know the opcode till we are done parsing, so we don't know if 2761 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or 2762 // .h to correctly specify 16 bit registers. We also can't determine class 2763 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16. 2764 if (RegSuffix.consume_back(".l")) 2765 SubReg = AMDGPU::lo16; 2766 else if (RegSuffix.consume_back(".h")) 2767 SubReg = AMDGPU::hi16; 2768 2769 // Single 32-bit register: vXX. 2770 if (!getRegNum(RegSuffix, RegNum)) { 2771 Error(Loc, "invalid register index"); 2772 return AMDGPU::NoRegister; 2773 } 2774 RegWidth = 32; 2775 } else { 2776 // Range of registers: v[XX:YY]. ":YY" is optional. 2777 if (!ParseRegRange(RegNum, RegWidth)) 2778 return AMDGPU::NoRegister; 2779 } 2780 2781 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); 2782 } 2783 2784 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2785 unsigned &RegWidth, 2786 SmallVectorImpl<AsmToken> &Tokens) { 2787 unsigned Reg = AMDGPU::NoRegister; 2788 auto ListLoc = getLoc(); 2789 2790 if (!skipToken(AsmToken::LBrac, 2791 "expected a register or a list of registers")) { 2792 return AMDGPU::NoRegister; 2793 } 2794 2795 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2796 2797 auto Loc = getLoc(); 2798 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2799 return AMDGPU::NoRegister; 2800 if (RegWidth != 32) { 2801 Error(Loc, "expected a single 32-bit register"); 2802 return AMDGPU::NoRegister; 2803 } 2804 2805 for (; trySkipToken(AsmToken::Comma); ) { 2806 RegisterKind NextRegKind; 2807 unsigned NextReg, NextRegNum, NextRegWidth; 2808 Loc = getLoc(); 2809 2810 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2811 NextRegNum, NextRegWidth, 2812 Tokens)) { 2813 return AMDGPU::NoRegister; 2814 } 2815 if (NextRegWidth != 32) { 2816 Error(Loc, "expected a single 32-bit register"); 2817 return AMDGPU::NoRegister; 2818 } 2819 if (NextRegKind != RegKind) { 2820 Error(Loc, "registers in a list must be of the same kind"); 2821 return AMDGPU::NoRegister; 2822 } 2823 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2824 return AMDGPU::NoRegister; 2825 } 2826 2827 if (!skipToken(AsmToken::RBrac, 2828 "expected a comma or a closing square bracket")) { 2829 return AMDGPU::NoRegister; 2830 } 2831 2832 if (isRegularReg(RegKind)) 2833 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc); 2834 2835 return Reg; 2836 } 2837 2838 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2839 unsigned &RegNum, unsigned &RegWidth, 2840 SmallVectorImpl<AsmToken> &Tokens) { 2841 auto Loc = getLoc(); 2842 Reg = AMDGPU::NoRegister; 2843 2844 if (isToken(AsmToken::Identifier)) { 2845 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2846 if (Reg == AMDGPU::NoRegister) 2847 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2848 } else { 2849 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2850 } 2851 2852 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2853 if (Reg == AMDGPU::NoRegister) { 2854 assert(Parser.hasPendingError()); 2855 return false; 2856 } 2857 2858 if (!subtargetHasRegister(*TRI, Reg)) { 2859 if (Reg == AMDGPU::SGPR_NULL) { 2860 Error(Loc, "'null' operand is not supported on this GPU"); 2861 } else { 2862 Error(Loc, "register not available on this GPU"); 2863 } 2864 return false; 2865 } 2866 2867 return true; 2868 } 2869 2870 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2871 unsigned &RegNum, unsigned &RegWidth, 2872 bool RestoreOnFailure /*=false*/) { 2873 Reg = AMDGPU::NoRegister; 2874 2875 SmallVector<AsmToken, 1> Tokens; 2876 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2877 if (RestoreOnFailure) { 2878 while (!Tokens.empty()) { 2879 getLexer().UnLex(Tokens.pop_back_val()); 2880 } 2881 } 2882 return true; 2883 } 2884 return false; 2885 } 2886 2887 std::optional<StringRef> 2888 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2889 switch (RegKind) { 2890 case IS_VGPR: 2891 return StringRef(".amdgcn.next_free_vgpr"); 2892 case IS_SGPR: 2893 return StringRef(".amdgcn.next_free_sgpr"); 2894 default: 2895 return std::nullopt; 2896 } 2897 } 2898 2899 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2900 auto SymbolName = getGprCountSymbolName(RegKind); 2901 assert(SymbolName && "initializing invalid register kind"); 2902 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2903 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2904 } 2905 2906 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2907 unsigned DwordRegIndex, 2908 unsigned RegWidth) { 2909 // Symbols are only defined for GCN targets 2910 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2911 return true; 2912 2913 auto SymbolName = getGprCountSymbolName(RegKind); 2914 if (!SymbolName) 2915 return true; 2916 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2917 2918 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2919 int64_t OldCount; 2920 2921 if (!Sym->isVariable()) 2922 return !Error(getLoc(), 2923 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2924 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2925 return !Error( 2926 getLoc(), 2927 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2928 2929 if (OldCount <= NewMax) 2930 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2931 2932 return true; 2933 } 2934 2935 std::unique_ptr<AMDGPUOperand> 2936 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2937 const auto &Tok = getToken(); 2938 SMLoc StartLoc = Tok.getLoc(); 2939 SMLoc EndLoc = Tok.getEndLoc(); 2940 RegisterKind RegKind; 2941 unsigned Reg, RegNum, RegWidth; 2942 2943 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2944 return nullptr; 2945 } 2946 if (isHsaAbi(getSTI())) { 2947 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2948 return nullptr; 2949 } else 2950 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2951 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2952 } 2953 2954 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, 2955 bool HasSP3AbsModifier, bool HasLit) { 2956 // TODO: add syntactic sugar for 1/(2*PI) 2957 2958 if (isRegister()) 2959 return ParseStatus::NoMatch; 2960 assert(!isModifier()); 2961 2962 if (!HasLit) { 2963 HasLit = trySkipId("lit"); 2964 if (HasLit) { 2965 if (!skipToken(AsmToken::LParen, "expected left paren after lit")) 2966 return ParseStatus::Failure; 2967 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit); 2968 if (S.isSuccess() && 2969 !skipToken(AsmToken::RParen, "expected closing parentheses")) 2970 return ParseStatus::Failure; 2971 return S; 2972 } 2973 } 2974 2975 const auto& Tok = getToken(); 2976 const auto& NextTok = peekToken(); 2977 bool IsReal = Tok.is(AsmToken::Real); 2978 SMLoc S = getLoc(); 2979 bool Negate = false; 2980 2981 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2982 lex(); 2983 IsReal = true; 2984 Negate = true; 2985 } 2986 2987 AMDGPUOperand::Modifiers Mods; 2988 Mods.Lit = HasLit; 2989 2990 if (IsReal) { 2991 // Floating-point expressions are not supported. 2992 // Can only allow floating-point literals with an 2993 // optional sign. 2994 2995 StringRef Num = getTokenStr(); 2996 lex(); 2997 2998 APFloat RealVal(APFloat::IEEEdouble()); 2999 auto roundMode = APFloat::rmNearestTiesToEven; 3000 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) 3001 return ParseStatus::Failure; 3002 if (Negate) 3003 RealVal.changeSign(); 3004 3005 Operands.push_back( 3006 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 3007 AMDGPUOperand::ImmTyNone, true)); 3008 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3009 Op.setModifiers(Mods); 3010 3011 return ParseStatus::Success; 3012 3013 } else { 3014 int64_t IntVal; 3015 const MCExpr *Expr; 3016 SMLoc S = getLoc(); 3017 3018 if (HasSP3AbsModifier) { 3019 // This is a workaround for handling expressions 3020 // as arguments of SP3 'abs' modifier, for example: 3021 // |1.0| 3022 // |-1| 3023 // |1+x| 3024 // This syntax is not compatible with syntax of standard 3025 // MC expressions (due to the trailing '|'). 3026 SMLoc EndLoc; 3027 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 3028 return ParseStatus::Failure; 3029 } else { 3030 if (Parser.parseExpression(Expr)) 3031 return ParseStatus::Failure; 3032 } 3033 3034 if (Expr->evaluateAsAbsolute(IntVal)) { 3035 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 3036 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3037 Op.setModifiers(Mods); 3038 } else { 3039 if (HasLit) 3040 return ParseStatus::NoMatch; 3041 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3042 } 3043 3044 return ParseStatus::Success; 3045 } 3046 3047 return ParseStatus::NoMatch; 3048 } 3049 3050 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { 3051 if (!isRegister()) 3052 return ParseStatus::NoMatch; 3053 3054 if (auto R = parseRegister()) { 3055 assert(R->isReg()); 3056 Operands.push_back(std::move(R)); 3057 return ParseStatus::Success; 3058 } 3059 return ParseStatus::Failure; 3060 } 3061 3062 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, 3063 bool HasSP3AbsMod, bool HasLit) { 3064 ParseStatus Res = parseReg(Operands); 3065 if (!Res.isNoMatch()) 3066 return Res; 3067 if (isModifier()) 3068 return ParseStatus::NoMatch; 3069 return parseImm(Operands, HasSP3AbsMod, HasLit); 3070 } 3071 3072 bool 3073 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3074 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3075 const auto &str = Token.getString(); 3076 return str == "abs" || str == "neg" || str == "sext"; 3077 } 3078 return false; 3079 } 3080 3081 bool 3082 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3083 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3084 } 3085 3086 bool 3087 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3088 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3089 } 3090 3091 bool 3092 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3093 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3094 } 3095 3096 // Check if this is an operand modifier or an opcode modifier 3097 // which may look like an expression but it is not. We should 3098 // avoid parsing these modifiers as expressions. Currently 3099 // recognized sequences are: 3100 // |...| 3101 // abs(...) 3102 // neg(...) 3103 // sext(...) 3104 // -reg 3105 // -|...| 3106 // -abs(...) 3107 // name:... 3108 // 3109 bool 3110 AMDGPUAsmParser::isModifier() { 3111 3112 AsmToken Tok = getToken(); 3113 AsmToken NextToken[2]; 3114 peekTokens(NextToken); 3115 3116 return isOperandModifier(Tok, NextToken[0]) || 3117 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3118 isOpcodeModifierWithVal(Tok, NextToken[0]); 3119 } 3120 3121 // Check if the current token is an SP3 'neg' modifier. 3122 // Currently this modifier is allowed in the following context: 3123 // 3124 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3125 // 2. Before an 'abs' modifier: -abs(...) 3126 // 3. Before an SP3 'abs' modifier: -|...| 3127 // 3128 // In all other cases "-" is handled as a part 3129 // of an expression that follows the sign. 3130 // 3131 // Note: When "-" is followed by an integer literal, 3132 // this is interpreted as integer negation rather 3133 // than a floating-point NEG modifier applied to N. 3134 // Beside being contr-intuitive, such use of floating-point 3135 // NEG modifier would have resulted in different meaning 3136 // of integer literals used with VOP1/2/C and VOP3, 3137 // for example: 3138 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3139 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3140 // Negative fp literals with preceding "-" are 3141 // handled likewise for uniformity 3142 // 3143 bool 3144 AMDGPUAsmParser::parseSP3NegModifier() { 3145 3146 AsmToken NextToken[2]; 3147 peekTokens(NextToken); 3148 3149 if (isToken(AsmToken::Minus) && 3150 (isRegister(NextToken[0], NextToken[1]) || 3151 NextToken[0].is(AsmToken::Pipe) || 3152 isId(NextToken[0], "abs"))) { 3153 lex(); 3154 return true; 3155 } 3156 3157 return false; 3158 } 3159 3160 ParseStatus 3161 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3162 bool AllowImm) { 3163 bool Neg, SP3Neg; 3164 bool Abs, SP3Abs; 3165 bool Lit; 3166 SMLoc Loc; 3167 3168 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3169 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) 3170 return Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3171 3172 SP3Neg = parseSP3NegModifier(); 3173 3174 Loc = getLoc(); 3175 Neg = trySkipId("neg"); 3176 if (Neg && SP3Neg) 3177 return Error(Loc, "expected register or immediate"); 3178 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3179 return ParseStatus::Failure; 3180 3181 Abs = trySkipId("abs"); 3182 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3183 return ParseStatus::Failure; 3184 3185 Lit = trySkipId("lit"); 3186 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit")) 3187 return ParseStatus::Failure; 3188 3189 Loc = getLoc(); 3190 SP3Abs = trySkipToken(AsmToken::Pipe); 3191 if (Abs && SP3Abs) 3192 return Error(Loc, "expected register or immediate"); 3193 3194 ParseStatus Res; 3195 if (AllowImm) { 3196 Res = parseRegOrImm(Operands, SP3Abs, Lit); 3197 } else { 3198 Res = parseReg(Operands); 3199 } 3200 if (!Res.isSuccess()) 3201 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res; 3202 3203 if (Lit && !Operands.back()->isImm()) 3204 Error(Loc, "expected immediate with lit modifier"); 3205 3206 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3207 return ParseStatus::Failure; 3208 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3209 return ParseStatus::Failure; 3210 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3211 return ParseStatus::Failure; 3212 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3213 return ParseStatus::Failure; 3214 3215 AMDGPUOperand::Modifiers Mods; 3216 Mods.Abs = Abs || SP3Abs; 3217 Mods.Neg = Neg || SP3Neg; 3218 Mods.Lit = Lit; 3219 3220 if (Mods.hasFPModifiers() || Lit) { 3221 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3222 if (Op.isExpr()) 3223 return Error(Op.getStartLoc(), "expected an absolute expression"); 3224 Op.setModifiers(Mods); 3225 } 3226 return ParseStatus::Success; 3227 } 3228 3229 ParseStatus 3230 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3231 bool AllowImm) { 3232 bool Sext = trySkipId("sext"); 3233 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3234 return ParseStatus::Failure; 3235 3236 ParseStatus Res; 3237 if (AllowImm) { 3238 Res = parseRegOrImm(Operands); 3239 } else { 3240 Res = parseReg(Operands); 3241 } 3242 if (!Res.isSuccess()) 3243 return Sext ? ParseStatus::Failure : Res; 3244 3245 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3246 return ParseStatus::Failure; 3247 3248 AMDGPUOperand::Modifiers Mods; 3249 Mods.Sext = Sext; 3250 3251 if (Mods.hasIntModifiers()) { 3252 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3253 if (Op.isExpr()) 3254 return Error(Op.getStartLoc(), "expected an absolute expression"); 3255 Op.setModifiers(Mods); 3256 } 3257 3258 return ParseStatus::Success; 3259 } 3260 3261 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3262 return parseRegOrImmWithFPInputMods(Operands, false); 3263 } 3264 3265 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3266 return parseRegOrImmWithIntInputMods(Operands, false); 3267 } 3268 3269 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3270 auto Loc = getLoc(); 3271 if (trySkipId("off")) { 3272 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3273 AMDGPUOperand::ImmTyOff, false)); 3274 return ParseStatus::Success; 3275 } 3276 3277 if (!isRegister()) 3278 return ParseStatus::NoMatch; 3279 3280 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3281 if (Reg) { 3282 Operands.push_back(std::move(Reg)); 3283 return ParseStatus::Success; 3284 } 3285 3286 return ParseStatus::Failure; 3287 } 3288 3289 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3290 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3291 3292 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3293 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3294 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3295 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3296 return Match_InvalidOperand; 3297 3298 if ((TSFlags & SIInstrFlags::VOP3) && 3299 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3300 getForcedEncodingSize() != 64) 3301 return Match_PreferE32; 3302 3303 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3304 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3305 // v_mac_f32/16 allow only dst_sel == DWORD; 3306 auto OpNum = 3307 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3308 const auto &Op = Inst.getOperand(OpNum); 3309 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3310 return Match_InvalidOperand; 3311 } 3312 } 3313 3314 return Match_Success; 3315 } 3316 3317 static ArrayRef<unsigned> getAllVariants() { 3318 static const unsigned Variants[] = { 3319 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3320 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3321 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3322 }; 3323 3324 return ArrayRef(Variants); 3325 } 3326 3327 // What asm variants we should check 3328 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3329 if (isForcedDPP() && isForcedVOP3()) { 3330 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3331 return ArrayRef(Variants); 3332 } 3333 if (getForcedEncodingSize() == 32) { 3334 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3335 return ArrayRef(Variants); 3336 } 3337 3338 if (isForcedVOP3()) { 3339 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3340 return ArrayRef(Variants); 3341 } 3342 3343 if (isForcedSDWA()) { 3344 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3345 AMDGPUAsmVariants::SDWA9}; 3346 return ArrayRef(Variants); 3347 } 3348 3349 if (isForcedDPP()) { 3350 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3351 return ArrayRef(Variants); 3352 } 3353 3354 return getAllVariants(); 3355 } 3356 3357 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3358 if (isForcedDPP() && isForcedVOP3()) 3359 return "e64_dpp"; 3360 3361 if (getForcedEncodingSize() == 32) 3362 return "e32"; 3363 3364 if (isForcedVOP3()) 3365 return "e64"; 3366 3367 if (isForcedSDWA()) 3368 return "sdwa"; 3369 3370 if (isForcedDPP()) 3371 return "dpp"; 3372 3373 return ""; 3374 } 3375 3376 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3377 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3378 for (MCPhysReg Reg : Desc.implicit_uses()) { 3379 switch (Reg) { 3380 case AMDGPU::FLAT_SCR: 3381 case AMDGPU::VCC: 3382 case AMDGPU::VCC_LO: 3383 case AMDGPU::VCC_HI: 3384 case AMDGPU::M0: 3385 return Reg; 3386 default: 3387 break; 3388 } 3389 } 3390 return AMDGPU::NoRegister; 3391 } 3392 3393 // NB: This code is correct only when used to check constant 3394 // bus limitations because GFX7 support no f16 inline constants. 3395 // Note that there are no cases when a GFX7 opcode violates 3396 // constant bus limitations due to the use of an f16 constant. 3397 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3398 unsigned OpIdx) const { 3399 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3400 3401 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) || 3402 AMDGPU::isKImmOperand(Desc, OpIdx)) { 3403 return false; 3404 } 3405 3406 const MCOperand &MO = Inst.getOperand(OpIdx); 3407 3408 int64_t Val = MO.getImm(); 3409 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3410 3411 switch (OpSize) { // expected operand size 3412 case 8: 3413 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3414 case 4: 3415 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3416 case 2: { 3417 const unsigned OperandType = Desc.operands()[OpIdx].OperandType; 3418 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3419 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3420 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3421 return AMDGPU::isInlinableIntLiteral(Val); 3422 3423 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3424 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3425 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3426 return AMDGPU::isInlinableLiteralV2I16(Val); 3427 3428 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3429 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3430 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3431 return AMDGPU::isInlinableLiteralV2F16(Val); 3432 3433 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3434 } 3435 default: 3436 llvm_unreachable("invalid operand size"); 3437 } 3438 } 3439 3440 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3441 if (!isGFX10Plus()) 3442 return 1; 3443 3444 switch (Opcode) { 3445 // 64-bit shift instructions can use only one scalar value input 3446 case AMDGPU::V_LSHLREV_B64_e64: 3447 case AMDGPU::V_LSHLREV_B64_gfx10: 3448 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3449 case AMDGPU::V_LSHLREV_B64_e32_gfx12: 3450 case AMDGPU::V_LSHLREV_B64_e64_gfx12: 3451 case AMDGPU::V_LSHRREV_B64_e64: 3452 case AMDGPU::V_LSHRREV_B64_gfx10: 3453 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3454 case AMDGPU::V_LSHRREV_B64_e64_gfx12: 3455 case AMDGPU::V_ASHRREV_I64_e64: 3456 case AMDGPU::V_ASHRREV_I64_gfx10: 3457 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3458 case AMDGPU::V_ASHRREV_I64_e64_gfx12: 3459 case AMDGPU::V_LSHL_B64_e64: 3460 case AMDGPU::V_LSHR_B64_e64: 3461 case AMDGPU::V_ASHR_I64_e64: 3462 return 1; 3463 default: 3464 return 2; 3465 } 3466 } 3467 3468 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; 3469 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; 3470 3471 // Get regular operand indices in the same order as specified 3472 // in the instruction (but append mandatory literals to the end). 3473 static OperandIndices getSrcOperandIndices(unsigned Opcode, 3474 bool AddMandatoryLiterals = false) { 3475 3476 int16_t ImmIdx = 3477 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1; 3478 3479 if (isVOPD(Opcode)) { 3480 int16_t ImmDeferredIdx = 3481 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred) 3482 : -1; 3483 3484 return {getNamedOperandIdx(Opcode, OpName::src0X), 3485 getNamedOperandIdx(Opcode, OpName::vsrc1X), 3486 getNamedOperandIdx(Opcode, OpName::src0Y), 3487 getNamedOperandIdx(Opcode, OpName::vsrc1Y), 3488 ImmDeferredIdx, 3489 ImmIdx}; 3490 } 3491 3492 return {getNamedOperandIdx(Opcode, OpName::src0), 3493 getNamedOperandIdx(Opcode, OpName::src1), 3494 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx}; 3495 } 3496 3497 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3498 const MCOperand &MO = Inst.getOperand(OpIdx); 3499 if (MO.isImm()) { 3500 return !isInlineConstant(Inst, OpIdx); 3501 } else if (MO.isReg()) { 3502 auto Reg = MO.getReg(); 3503 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3504 auto PReg = mc2PseudoReg(Reg); 3505 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3506 } else { 3507 return true; 3508 } 3509 } 3510 3511 bool AMDGPUAsmParser::validateConstantBusLimitations( 3512 const MCInst &Inst, const OperandVector &Operands) { 3513 const unsigned Opcode = Inst.getOpcode(); 3514 const MCInstrDesc &Desc = MII.get(Opcode); 3515 unsigned LastSGPR = AMDGPU::NoRegister; 3516 unsigned ConstantBusUseCount = 0; 3517 unsigned NumLiterals = 0; 3518 unsigned LiteralSize; 3519 3520 if (!(Desc.TSFlags & 3521 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3522 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && 3523 !isVOPD(Opcode)) 3524 return true; 3525 3526 // Check special imm operands (used by madmk, etc) 3527 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { 3528 ++NumLiterals; 3529 LiteralSize = 4; 3530 } 3531 3532 SmallDenseSet<unsigned> SGPRsUsed; 3533 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3534 if (SGPRUsed != AMDGPU::NoRegister) { 3535 SGPRsUsed.insert(SGPRUsed); 3536 ++ConstantBusUseCount; 3537 } 3538 3539 OperandIndices OpIndices = getSrcOperandIndices(Opcode); 3540 3541 for (int OpIdx : OpIndices) { 3542 if (OpIdx == -1) 3543 continue; 3544 3545 const MCOperand &MO = Inst.getOperand(OpIdx); 3546 if (usesConstantBus(Inst, OpIdx)) { 3547 if (MO.isReg()) { 3548 LastSGPR = mc2PseudoReg(MO.getReg()); 3549 // Pairs of registers with a partial intersections like these 3550 // s0, s[0:1] 3551 // flat_scratch_lo, flat_scratch 3552 // flat_scratch_lo, flat_scratch_hi 3553 // are theoretically valid but they are disabled anyway. 3554 // Note that this code mimics SIInstrInfo::verifyInstruction 3555 if (SGPRsUsed.insert(LastSGPR).second) { 3556 ++ConstantBusUseCount; 3557 } 3558 } else { // Expression or a literal 3559 3560 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3561 continue; // special operand like VINTERP attr_chan 3562 3563 // An instruction may use only one literal. 3564 // This has been validated on the previous step. 3565 // See validateVOPLiteral. 3566 // This literal may be used as more than one operand. 3567 // If all these operands are of the same size, 3568 // this literal counts as one scalar value. 3569 // Otherwise it counts as 2 scalar values. 3570 // See "GFX10 Shader Programming", section 3.6.2.3. 3571 3572 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3573 if (Size < 4) 3574 Size = 4; 3575 3576 if (NumLiterals == 0) { 3577 NumLiterals = 1; 3578 LiteralSize = Size; 3579 } else if (LiteralSize != Size) { 3580 NumLiterals = 2; 3581 } 3582 } 3583 } 3584 } 3585 ConstantBusUseCount += NumLiterals; 3586 3587 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3588 return true; 3589 3590 SMLoc LitLoc = getLitLoc(Operands); 3591 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3592 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3593 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3594 return false; 3595 } 3596 3597 bool AMDGPUAsmParser::validateVOPDRegBankConstraints( 3598 const MCInst &Inst, const OperandVector &Operands) { 3599 3600 const unsigned Opcode = Inst.getOpcode(); 3601 if (!isVOPD(Opcode)) 3602 return true; 3603 3604 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3605 3606 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { 3607 const MCOperand &Opr = Inst.getOperand(OperandIdx); 3608 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) 3609 ? Opr.getReg() 3610 : MCRegister::NoRegister; 3611 }; 3612 3613 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. 3614 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; 3615 3616 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 3617 auto InvalidCompOprIdx = 3618 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc); 3619 if (!InvalidCompOprIdx) 3620 return true; 3621 3622 auto CompOprIdx = *InvalidCompOprIdx; 3623 auto ParsedIdx = 3624 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), 3625 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); 3626 assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); 3627 3628 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); 3629 if (CompOprIdx == VOPD::Component::DST) { 3630 Error(Loc, "one dst register must be even and the other odd"); 3631 } else { 3632 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; 3633 Error(Loc, Twine("src") + Twine(CompSrcIdx) + 3634 " operands must use different VGPR banks"); 3635 } 3636 3637 return false; 3638 } 3639 3640 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3641 3642 const unsigned Opc = Inst.getOpcode(); 3643 const MCInstrDesc &Desc = MII.get(Opc); 3644 3645 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3646 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3647 assert(ClampIdx != -1); 3648 return Inst.getOperand(ClampIdx).getImm() == 0; 3649 } 3650 3651 return true; 3652 } 3653 3654 constexpr uint64_t MIMGFlags = 3655 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; 3656 3657 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, 3658 const SMLoc &IDLoc) { 3659 3660 const unsigned Opc = Inst.getOpcode(); 3661 const MCInstrDesc &Desc = MII.get(Opc); 3662 3663 if ((Desc.TSFlags & MIMGFlags) == 0) 3664 return true; 3665 3666 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3667 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3668 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3669 3670 assert(VDataIdx != -1); 3671 3672 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray 3673 return true; 3674 3675 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3676 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3677 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3678 if (DMask == 0) 3679 DMask = 1; 3680 3681 bool IsPackedD16 = false; 3682 unsigned DataSize = 3683 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask); 3684 if (hasPackedD16()) { 3685 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3686 IsPackedD16 = D16Idx >= 0; 3687 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm()) 3688 DataSize = (DataSize + 1) / 2; 3689 } 3690 3691 if ((VDataSize / 4) == DataSize + TFESize) 3692 return true; 3693 3694 StringRef Modifiers; 3695 if (isGFX90A()) 3696 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask"; 3697 else 3698 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe"; 3699 3700 Error(IDLoc, Twine("image data size does not match ") + Modifiers); 3701 return false; 3702 } 3703 3704 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, 3705 const SMLoc &IDLoc) { 3706 const unsigned Opc = Inst.getOpcode(); 3707 const MCInstrDesc &Desc = MII.get(Opc); 3708 3709 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus()) 3710 return true; 3711 3712 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3713 3714 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3715 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3716 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3717 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc 3718 : AMDGPU::OpName::rsrc; 3719 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName); 3720 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3721 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3722 3723 assert(VAddr0Idx != -1); 3724 assert(SrsrcIdx != -1); 3725 assert(SrsrcIdx > VAddr0Idx); 3726 3727 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3728 if (BaseOpcode->BVH) { 3729 if (IsA16 == BaseOpcode->A16) 3730 return true; 3731 Error(IDLoc, "image address size does not match a16"); 3732 return false; 3733 } 3734 3735 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3736 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3737 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3738 unsigned ActualAddrSize = 3739 IsNSA ? SrsrcIdx - VAddr0Idx 3740 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3741 3742 unsigned ExpectedAddrSize = 3743 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3744 3745 if (IsNSA) { 3746 if (hasPartialNSAEncoding() && 3747 ExpectedAddrSize > 3748 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) { 3749 int VAddrLastIdx = SrsrcIdx - 1; 3750 unsigned VAddrLastSize = 3751 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; 3752 3753 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; 3754 } 3755 } else { 3756 if (ExpectedAddrSize > 12) 3757 ExpectedAddrSize = 16; 3758 3759 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3760 // This provides backward compatibility for assembly created 3761 // before 160b/192b/224b types were directly supported. 3762 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3763 return true; 3764 } 3765 3766 if (ActualAddrSize == ExpectedAddrSize) 3767 return true; 3768 3769 Error(IDLoc, "image address size does not match dim and a16"); 3770 return false; 3771 } 3772 3773 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3774 3775 const unsigned Opc = Inst.getOpcode(); 3776 const MCInstrDesc &Desc = MII.get(Opc); 3777 3778 if ((Desc.TSFlags & MIMGFlags) == 0) 3779 return true; 3780 if (!Desc.mayLoad() || !Desc.mayStore()) 3781 return true; // Not atomic 3782 3783 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3784 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3785 3786 // This is an incomplete check because image_atomic_cmpswap 3787 // may only use 0x3 and 0xf while other atomic operations 3788 // may use 0x1 and 0x3. However these limitations are 3789 // verified when we check that dmask matches dst size. 3790 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3791 } 3792 3793 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3794 3795 const unsigned Opc = Inst.getOpcode(); 3796 const MCInstrDesc &Desc = MII.get(Opc); 3797 3798 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3799 return true; 3800 3801 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3802 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3803 3804 // GATHER4 instructions use dmask in a different fashion compared to 3805 // other MIMG instructions. The only useful DMASK values are 3806 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3807 // (red,red,red,red) etc.) The ISA document doesn't mention 3808 // this. 3809 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3810 } 3811 3812 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3813 const unsigned Opc = Inst.getOpcode(); 3814 const MCInstrDesc &Desc = MII.get(Opc); 3815 3816 if ((Desc.TSFlags & MIMGFlags) == 0) 3817 return true; 3818 3819 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3820 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3821 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3822 3823 if (!BaseOpcode->MSAA) 3824 return true; 3825 3826 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3827 assert(DimIdx != -1); 3828 3829 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3830 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3831 3832 return DimInfo->MSAA; 3833 } 3834 3835 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3836 { 3837 switch (Opcode) { 3838 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3839 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3840 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3841 return true; 3842 default: 3843 return false; 3844 } 3845 } 3846 3847 // movrels* opcodes should only allow VGPRS as src0. 3848 // This is specified in .td description for vop1/vop3, 3849 // but sdwa is handled differently. See isSDWAOperand. 3850 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3851 const OperandVector &Operands) { 3852 3853 const unsigned Opc = Inst.getOpcode(); 3854 const MCInstrDesc &Desc = MII.get(Opc); 3855 3856 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3857 return true; 3858 3859 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3860 assert(Src0Idx != -1); 3861 3862 SMLoc ErrLoc; 3863 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3864 if (Src0.isReg()) { 3865 auto Reg = mc2PseudoReg(Src0.getReg()); 3866 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3867 if (!isSGPR(Reg, TRI)) 3868 return true; 3869 ErrLoc = getRegLoc(Reg, Operands); 3870 } else { 3871 ErrLoc = getConstLoc(Operands); 3872 } 3873 3874 Error(ErrLoc, "source operand must be a VGPR"); 3875 return false; 3876 } 3877 3878 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3879 const OperandVector &Operands) { 3880 3881 const unsigned Opc = Inst.getOpcode(); 3882 3883 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3884 return true; 3885 3886 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3887 assert(Src0Idx != -1); 3888 3889 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3890 if (!Src0.isReg()) 3891 return true; 3892 3893 auto Reg = mc2PseudoReg(Src0.getReg()); 3894 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3895 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3896 Error(getRegLoc(Reg, Operands), 3897 "source operand must be either a VGPR or an inline constant"); 3898 return false; 3899 } 3900 3901 return true; 3902 } 3903 3904 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, 3905 const OperandVector &Operands) { 3906 unsigned Opcode = Inst.getOpcode(); 3907 const MCInstrDesc &Desc = MII.get(Opcode); 3908 3909 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || 3910 !getFeatureBits()[FeatureMFMAInlineLiteralBug]) 3911 return true; 3912 3913 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2); 3914 if (Src2Idx == -1) 3915 return true; 3916 3917 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) { 3918 Error(getConstLoc(Operands), 3919 "inline constants are not allowed for this operand"); 3920 return false; 3921 } 3922 3923 return true; 3924 } 3925 3926 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3927 const OperandVector &Operands) { 3928 const unsigned Opc = Inst.getOpcode(); 3929 const MCInstrDesc &Desc = MII.get(Opc); 3930 3931 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3932 return true; 3933 3934 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3935 if (Src2Idx == -1) 3936 return true; 3937 3938 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3939 if (!Src2.isReg()) 3940 return true; 3941 3942 MCRegister Src2Reg = Src2.getReg(); 3943 MCRegister DstReg = Inst.getOperand(0).getReg(); 3944 if (Src2Reg == DstReg) 3945 return true; 3946 3947 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3948 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) 3949 return true; 3950 3951 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3952 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3953 "source 2 operand must not partially overlap with dst"); 3954 return false; 3955 } 3956 3957 return true; 3958 } 3959 3960 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3961 switch (Inst.getOpcode()) { 3962 default: 3963 return true; 3964 case V_DIV_SCALE_F32_gfx6_gfx7: 3965 case V_DIV_SCALE_F32_vi: 3966 case V_DIV_SCALE_F32_gfx10: 3967 case V_DIV_SCALE_F64_gfx6_gfx7: 3968 case V_DIV_SCALE_F64_vi: 3969 case V_DIV_SCALE_F64_gfx10: 3970 break; 3971 } 3972 3973 // TODO: Check that src0 = src1 or src2. 3974 3975 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3976 AMDGPU::OpName::src2_modifiers, 3977 AMDGPU::OpName::src2_modifiers}) { 3978 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3979 .getImm() & 3980 SISrcMods::ABS) { 3981 return false; 3982 } 3983 } 3984 3985 return true; 3986 } 3987 3988 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3989 3990 const unsigned Opc = Inst.getOpcode(); 3991 const MCInstrDesc &Desc = MII.get(Opc); 3992 3993 if ((Desc.TSFlags & MIMGFlags) == 0) 3994 return true; 3995 3996 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3997 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3998 if (isCI() || isSI()) 3999 return false; 4000 } 4001 4002 return true; 4003 } 4004 4005 static bool IsRevOpcode(const unsigned Opcode) 4006 { 4007 switch (Opcode) { 4008 case AMDGPU::V_SUBREV_F32_e32: 4009 case AMDGPU::V_SUBREV_F32_e64: 4010 case AMDGPU::V_SUBREV_F32_e32_gfx10: 4011 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 4012 case AMDGPU::V_SUBREV_F32_e32_vi: 4013 case AMDGPU::V_SUBREV_F32_e64_gfx10: 4014 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 4015 case AMDGPU::V_SUBREV_F32_e64_vi: 4016 4017 case AMDGPU::V_SUBREV_CO_U32_e32: 4018 case AMDGPU::V_SUBREV_CO_U32_e64: 4019 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 4020 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 4021 4022 case AMDGPU::V_SUBBREV_U32_e32: 4023 case AMDGPU::V_SUBBREV_U32_e64: 4024 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 4025 case AMDGPU::V_SUBBREV_U32_e32_vi: 4026 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 4027 case AMDGPU::V_SUBBREV_U32_e64_vi: 4028 4029 case AMDGPU::V_SUBREV_U32_e32: 4030 case AMDGPU::V_SUBREV_U32_e64: 4031 case AMDGPU::V_SUBREV_U32_e32_gfx9: 4032 case AMDGPU::V_SUBREV_U32_e32_vi: 4033 case AMDGPU::V_SUBREV_U32_e64_gfx9: 4034 case AMDGPU::V_SUBREV_U32_e64_vi: 4035 4036 case AMDGPU::V_SUBREV_F16_e32: 4037 case AMDGPU::V_SUBREV_F16_e64: 4038 case AMDGPU::V_SUBREV_F16_e32_gfx10: 4039 case AMDGPU::V_SUBREV_F16_e32_vi: 4040 case AMDGPU::V_SUBREV_F16_e64_gfx10: 4041 case AMDGPU::V_SUBREV_F16_e64_vi: 4042 4043 case AMDGPU::V_SUBREV_U16_e32: 4044 case AMDGPU::V_SUBREV_U16_e64: 4045 case AMDGPU::V_SUBREV_U16_e32_vi: 4046 case AMDGPU::V_SUBREV_U16_e64_vi: 4047 4048 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 4049 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 4050 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 4051 4052 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 4053 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 4054 4055 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 4056 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 4057 4058 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 4059 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 4060 4061 case AMDGPU::V_LSHRREV_B32_e32: 4062 case AMDGPU::V_LSHRREV_B32_e64: 4063 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 4064 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 4065 case AMDGPU::V_LSHRREV_B32_e32_vi: 4066 case AMDGPU::V_LSHRREV_B32_e64_vi: 4067 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 4068 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 4069 4070 case AMDGPU::V_ASHRREV_I32_e32: 4071 case AMDGPU::V_ASHRREV_I32_e64: 4072 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 4073 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 4074 case AMDGPU::V_ASHRREV_I32_e32_vi: 4075 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 4076 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 4077 case AMDGPU::V_ASHRREV_I32_e64_vi: 4078 4079 case AMDGPU::V_LSHLREV_B32_e32: 4080 case AMDGPU::V_LSHLREV_B32_e64: 4081 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 4082 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 4083 case AMDGPU::V_LSHLREV_B32_e32_vi: 4084 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 4085 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 4086 case AMDGPU::V_LSHLREV_B32_e64_vi: 4087 4088 case AMDGPU::V_LSHLREV_B16_e32: 4089 case AMDGPU::V_LSHLREV_B16_e64: 4090 case AMDGPU::V_LSHLREV_B16_e32_vi: 4091 case AMDGPU::V_LSHLREV_B16_e64_vi: 4092 case AMDGPU::V_LSHLREV_B16_gfx10: 4093 4094 case AMDGPU::V_LSHRREV_B16_e32: 4095 case AMDGPU::V_LSHRREV_B16_e64: 4096 case AMDGPU::V_LSHRREV_B16_e32_vi: 4097 case AMDGPU::V_LSHRREV_B16_e64_vi: 4098 case AMDGPU::V_LSHRREV_B16_gfx10: 4099 4100 case AMDGPU::V_ASHRREV_I16_e32: 4101 case AMDGPU::V_ASHRREV_I16_e64: 4102 case AMDGPU::V_ASHRREV_I16_e32_vi: 4103 case AMDGPU::V_ASHRREV_I16_e64_vi: 4104 case AMDGPU::V_ASHRREV_I16_gfx10: 4105 4106 case AMDGPU::V_LSHLREV_B64_e64: 4107 case AMDGPU::V_LSHLREV_B64_gfx10: 4108 case AMDGPU::V_LSHLREV_B64_vi: 4109 4110 case AMDGPU::V_LSHRREV_B64_e64: 4111 case AMDGPU::V_LSHRREV_B64_gfx10: 4112 case AMDGPU::V_LSHRREV_B64_vi: 4113 4114 case AMDGPU::V_ASHRREV_I64_e64: 4115 case AMDGPU::V_ASHRREV_I64_gfx10: 4116 case AMDGPU::V_ASHRREV_I64_vi: 4117 4118 case AMDGPU::V_PK_LSHLREV_B16: 4119 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 4120 case AMDGPU::V_PK_LSHLREV_B16_vi: 4121 4122 case AMDGPU::V_PK_LSHRREV_B16: 4123 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4124 case AMDGPU::V_PK_LSHRREV_B16_vi: 4125 case AMDGPU::V_PK_ASHRREV_I16: 4126 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4127 case AMDGPU::V_PK_ASHRREV_I16_vi: 4128 return true; 4129 default: 4130 return false; 4131 } 4132 } 4133 4134 std::optional<StringRef> 4135 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4136 4137 using namespace SIInstrFlags; 4138 const unsigned Opcode = Inst.getOpcode(); 4139 const MCInstrDesc &Desc = MII.get(Opcode); 4140 4141 // lds_direct register is defined so that it can be used 4142 // with 9-bit operands only. Ignore encodings which do not accept these. 4143 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4144 if ((Desc.TSFlags & Enc) == 0) 4145 return std::nullopt; 4146 4147 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4148 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4149 if (SrcIdx == -1) 4150 break; 4151 const auto &Src = Inst.getOperand(SrcIdx); 4152 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4153 4154 if (isGFX90A() || isGFX11Plus()) 4155 return StringRef("lds_direct is not supported on this GPU"); 4156 4157 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4158 return StringRef("lds_direct cannot be used with this instruction"); 4159 4160 if (SrcName != OpName::src0) 4161 return StringRef("lds_direct may be used as src0 only"); 4162 } 4163 } 4164 4165 return std::nullopt; 4166 } 4167 4168 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4169 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4170 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4171 if (Op.isFlatOffset()) 4172 return Op.getStartLoc(); 4173 } 4174 return getLoc(); 4175 } 4176 4177 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, 4178 const OperandVector &Operands) { 4179 auto Opcode = Inst.getOpcode(); 4180 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4181 if (OpNum == -1) 4182 return true; 4183 4184 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4185 if ((TSFlags & SIInstrFlags::FLAT)) 4186 return validateFlatOffset(Inst, Operands); 4187 4188 if ((TSFlags & SIInstrFlags::SMRD)) 4189 return validateSMEMOffset(Inst, Operands); 4190 4191 const auto &Op = Inst.getOperand(OpNum); 4192 if (isGFX12Plus() && 4193 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4194 const unsigned OffsetSize = 24; 4195 if (!isIntN(OffsetSize, Op.getImm())) { 4196 Error(getFlatOffsetLoc(Operands), 4197 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4198 return false; 4199 } 4200 } else { 4201 const unsigned OffsetSize = 16; 4202 if (!isUIntN(OffsetSize, Op.getImm())) { 4203 Error(getFlatOffsetLoc(Operands), 4204 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4205 return false; 4206 } 4207 } 4208 return true; 4209 } 4210 4211 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4212 const OperandVector &Operands) { 4213 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4214 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4215 return true; 4216 4217 auto Opcode = Inst.getOpcode(); 4218 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4219 assert(OpNum != -1); 4220 4221 const auto &Op = Inst.getOperand(OpNum); 4222 if (!hasFlatOffsets() && Op.getImm() != 0) { 4223 Error(getFlatOffsetLoc(Operands), 4224 "flat offset modifier is not supported on this GPU"); 4225 return false; 4226 } 4227 4228 // For pre-GFX12 FLAT instructions the offset must be positive; 4229 // MSB is ignored and forced to zero. 4230 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); 4231 bool AllowNegative = 4232 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || 4233 isGFX12Plus(); 4234 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { 4235 Error(getFlatOffsetLoc(Operands), 4236 Twine("expected a ") + 4237 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" 4238 : Twine(OffsetSize - 1) + "-bit unsigned offset")); 4239 return false; 4240 } 4241 4242 return true; 4243 } 4244 4245 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4246 // Start with second operand because SMEM Offset cannot be dst or src0. 4247 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4248 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4249 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) 4250 return Op.getStartLoc(); 4251 } 4252 return getLoc(); 4253 } 4254 4255 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4256 const OperandVector &Operands) { 4257 if (isCI() || isSI()) 4258 return true; 4259 4260 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4261 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4262 return true; 4263 4264 auto Opcode = Inst.getOpcode(); 4265 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4266 if (OpNum == -1) 4267 return true; 4268 4269 const auto &Op = Inst.getOperand(OpNum); 4270 if (!Op.isImm()) 4271 return true; 4272 4273 uint64_t Offset = Op.getImm(); 4274 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4275 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4276 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4277 return true; 4278 4279 Error(getSMEMOffsetLoc(Operands), 4280 isGFX12Plus() ? "expected a 24-bit signed offset" 4281 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" 4282 : "expected a 21-bit signed offset"); 4283 4284 return false; 4285 } 4286 4287 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4288 unsigned Opcode = Inst.getOpcode(); 4289 const MCInstrDesc &Desc = MII.get(Opcode); 4290 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4291 return true; 4292 4293 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4294 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4295 4296 const int OpIndices[] = { Src0Idx, Src1Idx }; 4297 4298 unsigned NumExprs = 0; 4299 unsigned NumLiterals = 0; 4300 uint32_t LiteralValue; 4301 4302 for (int OpIdx : OpIndices) { 4303 if (OpIdx == -1) break; 4304 4305 const MCOperand &MO = Inst.getOperand(OpIdx); 4306 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4307 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4308 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4309 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4310 if (NumLiterals == 0 || LiteralValue != Value) { 4311 LiteralValue = Value; 4312 ++NumLiterals; 4313 } 4314 } else if (MO.isExpr()) { 4315 ++NumExprs; 4316 } 4317 } 4318 } 4319 4320 return NumLiterals + NumExprs <= 1; 4321 } 4322 4323 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4324 const unsigned Opc = Inst.getOpcode(); 4325 if (isPermlane16(Opc)) { 4326 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4327 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4328 4329 if (OpSel & ~3) 4330 return false; 4331 } 4332 4333 uint64_t TSFlags = MII.get(Opc).TSFlags; 4334 4335 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4336 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4337 if (OpSelIdx != -1) { 4338 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4339 return false; 4340 } 4341 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4342 if (OpSelHiIdx != -1) { 4343 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4344 return false; 4345 } 4346 } 4347 4348 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4349 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && 4350 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { 4351 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4352 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4353 if (OpSel & 3) 4354 return false; 4355 } 4356 4357 return true; 4358 } 4359 4360 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4361 const OperandVector &Operands) { 4362 const unsigned Opc = Inst.getOpcode(); 4363 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4364 if (DppCtrlIdx >= 0) { 4365 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4366 4367 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) && 4368 AMDGPU::isDPALU_DPP(MII.get(Opc))) { 4369 // DP ALU DPP is supported for row_newbcast only on GFX9* 4370 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4371 Error(S, "DP ALU dpp only supports row_newbcast"); 4372 return false; 4373 } 4374 } 4375 4376 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8); 4377 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0; 4378 4379 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) { 4380 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 4381 if (Src1Idx >= 0) { 4382 const MCOperand &Src1 = Inst.getOperand(Src1Idx); 4383 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4384 if (Src1.isImm() || 4385 (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) { 4386 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]); 4387 Error(Op.getStartLoc(), "invalid operand for instruction"); 4388 return false; 4389 } 4390 } 4391 } 4392 4393 return true; 4394 } 4395 4396 // Check if VCC register matches wavefront size 4397 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4398 auto FB = getFeatureBits(); 4399 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4400 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4401 } 4402 4403 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4404 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4405 const OperandVector &Operands) { 4406 unsigned Opcode = Inst.getOpcode(); 4407 const MCInstrDesc &Desc = MII.get(Opcode); 4408 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1; 4409 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4410 !HasMandatoryLiteral && !isVOPD(Opcode)) 4411 return true; 4412 4413 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral); 4414 4415 unsigned NumExprs = 0; 4416 unsigned NumLiterals = 0; 4417 uint32_t LiteralValue; 4418 4419 for (int OpIdx : OpIndices) { 4420 if (OpIdx == -1) 4421 continue; 4422 4423 const MCOperand &MO = Inst.getOperand(OpIdx); 4424 if (!MO.isImm() && !MO.isExpr()) 4425 continue; 4426 if (!isSISrcOperand(Desc, OpIdx)) 4427 continue; 4428 4429 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4430 uint64_t Value = static_cast<uint64_t>(MO.getImm()); 4431 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) && 4432 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; 4433 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); 4434 4435 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) { 4436 Error(getLitLoc(Operands), "invalid operand for instruction"); 4437 return false; 4438 } 4439 4440 if (IsFP64 && IsValid32Op) 4441 Value = Hi_32(Value); 4442 4443 if (NumLiterals == 0 || LiteralValue != Value) { 4444 LiteralValue = Value; 4445 ++NumLiterals; 4446 } 4447 } else if (MO.isExpr()) { 4448 ++NumExprs; 4449 } 4450 } 4451 NumLiterals += NumExprs; 4452 4453 if (!NumLiterals) 4454 return true; 4455 4456 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { 4457 Error(getLitLoc(Operands), "literal operands are not supported"); 4458 return false; 4459 } 4460 4461 if (NumLiterals > 1) { 4462 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed"); 4463 return false; 4464 } 4465 4466 return true; 4467 } 4468 4469 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4470 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4471 const MCRegisterInfo *MRI) { 4472 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4473 if (OpIdx < 0) 4474 return -1; 4475 4476 const MCOperand &Op = Inst.getOperand(OpIdx); 4477 if (!Op.isReg()) 4478 return -1; 4479 4480 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4481 auto Reg = Sub ? Sub : Op.getReg(); 4482 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4483 return AGPR32.contains(Reg) ? 1 : 0; 4484 } 4485 4486 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4487 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4488 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4489 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4490 SIInstrFlags::DS)) == 0) 4491 return true; 4492 4493 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4494 : AMDGPU::OpName::vdata; 4495 4496 const MCRegisterInfo *MRI = getMRI(); 4497 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4498 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4499 4500 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4501 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4502 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4503 return false; 4504 } 4505 4506 auto FB = getFeatureBits(); 4507 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4508 if (DataAreg < 0 || DstAreg < 0) 4509 return true; 4510 return DstAreg == DataAreg; 4511 } 4512 4513 return DstAreg < 1 && DataAreg < 1; 4514 } 4515 4516 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4517 auto FB = getFeatureBits(); 4518 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4519 return true; 4520 4521 const MCRegisterInfo *MRI = getMRI(); 4522 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4523 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4524 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4525 const MCOperand &Op = Inst.getOperand(I); 4526 if (!Op.isReg()) 4527 continue; 4528 4529 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4530 if (!Sub) 4531 continue; 4532 4533 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4534 return false; 4535 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4536 return false; 4537 } 4538 4539 return true; 4540 } 4541 4542 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4543 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4544 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4545 if (Op.isBLGP()) 4546 return Op.getStartLoc(); 4547 } 4548 return SMLoc(); 4549 } 4550 4551 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4552 const OperandVector &Operands) { 4553 unsigned Opc = Inst.getOpcode(); 4554 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4555 if (BlgpIdx == -1) 4556 return true; 4557 SMLoc BLGPLoc = getBLGPLoc(Operands); 4558 if (!BLGPLoc.isValid()) 4559 return true; 4560 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); 4561 auto FB = getFeatureBits(); 4562 bool UsesNeg = false; 4563 if (FB[AMDGPU::FeatureGFX940Insts]) { 4564 switch (Opc) { 4565 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4566 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4567 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4568 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4569 UsesNeg = true; 4570 } 4571 } 4572 4573 if (IsNeg == UsesNeg) 4574 return true; 4575 4576 Error(BLGPLoc, 4577 UsesNeg ? "invalid modifier: blgp is not supported" 4578 : "invalid modifier: neg is not supported"); 4579 4580 return false; 4581 } 4582 4583 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, 4584 const OperandVector &Operands) { 4585 if (!isGFX11Plus()) 4586 return true; 4587 4588 unsigned Opc = Inst.getOpcode(); 4589 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && 4590 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && 4591 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && 4592 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) 4593 return true; 4594 4595 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 4596 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); 4597 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()); 4598 if (Reg == AMDGPU::SGPR_NULL) 4599 return true; 4600 4601 SMLoc RegLoc = getRegLoc(Reg, Operands); 4602 Error(RegLoc, "src0 must be null"); 4603 return false; 4604 } 4605 4606 bool AMDGPUAsmParser::validateDS(const MCInst &Inst, 4607 const OperandVector &Operands) { 4608 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4609 if ((TSFlags & SIInstrFlags::DS) == 0) 4610 return true; 4611 if (TSFlags & SIInstrFlags::GWS) 4612 return validateGWS(Inst, Operands); 4613 // Only validate GDS for non-GWS instructions. 4614 if (hasGDS()) 4615 return true; 4616 int GDSIdx = 4617 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds); 4618 if (GDSIdx < 0) 4619 return true; 4620 unsigned GDS = Inst.getOperand(GDSIdx).getImm(); 4621 if (GDS) { 4622 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands); 4623 Error(S, "gds modifier is not supported on this GPU"); 4624 return false; 4625 } 4626 return true; 4627 } 4628 4629 // gfx90a has an undocumented limitation: 4630 // DS_GWS opcodes must use even aligned registers. 4631 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4632 const OperandVector &Operands) { 4633 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4634 return true; 4635 4636 int Opc = Inst.getOpcode(); 4637 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4638 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4639 return true; 4640 4641 const MCRegisterInfo *MRI = getMRI(); 4642 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4643 int Data0Pos = 4644 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4645 assert(Data0Pos != -1); 4646 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4647 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4648 if (RegIdx & 1) { 4649 SMLoc RegLoc = getRegLoc(Reg, Operands); 4650 Error(RegLoc, "vgpr must be even aligned"); 4651 return false; 4652 } 4653 4654 return true; 4655 } 4656 4657 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4658 const OperandVector &Operands, 4659 const SMLoc &IDLoc) { 4660 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4661 AMDGPU::OpName::cpol); 4662 if (CPolPos == -1) 4663 return true; 4664 4665 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4666 4667 if (isGFX12Plus()) 4668 return validateTHAndScopeBits(Inst, Operands, CPol); 4669 4670 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4671 if (TSFlags & SIInstrFlags::SMRD) { 4672 if (CPol && (isSI() || isCI())) { 4673 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4674 Error(S, "cache policy is not supported for SMRD instructions"); 4675 return false; 4676 } 4677 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4678 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4679 return false; 4680 } 4681 } 4682 4683 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4684 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | 4685 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4686 SIInstrFlags::FLAT; 4687 if (!(TSFlags & AllowSCCModifier)) { 4688 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4689 StringRef CStr(S.getPointer()); 4690 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4691 Error(S, 4692 "scc modifier is not supported for this instruction on this GPU"); 4693 return false; 4694 } 4695 } 4696 4697 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4698 return true; 4699 4700 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4701 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4702 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4703 : "instruction must use glc"); 4704 return false; 4705 } 4706 } else { 4707 if (CPol & CPol::GLC) { 4708 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4709 StringRef CStr(S.getPointer()); 4710 S = SMLoc::getFromPointer( 4711 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4712 Error(S, isGFX940() ? "instruction must not use sc0" 4713 : "instruction must not use glc"); 4714 return false; 4715 } 4716 } 4717 4718 return true; 4719 } 4720 4721 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst, 4722 const OperandVector &Operands, 4723 const unsigned CPol) { 4724 const unsigned TH = CPol & AMDGPU::CPol::TH; 4725 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE; 4726 4727 const unsigned Opcode = Inst.getOpcode(); 4728 const MCInstrDesc &TID = MII.get(Opcode); 4729 4730 auto PrintError = [&](StringRef Msg) { 4731 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4732 Error(S, Msg); 4733 return false; 4734 }; 4735 4736 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) && 4737 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) && 4738 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN))) 4739 return PrintError("instruction must use th:TH_ATOMIC_RETURN"); 4740 4741 if (TH == 0) 4742 return true; 4743 4744 if ((TID.TSFlags & SIInstrFlags::SMRD) && 4745 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) || 4746 (TH == AMDGPU::CPol::TH_NT_HT))) 4747 return PrintError("invalid th value for SMEM instruction"); 4748 4749 if (TH == AMDGPU::CPol::TH_BYPASS) { 4750 if ((Scope != AMDGPU::CPol::SCOPE_SYS && 4751 CPol & AMDGPU::CPol::TH_REAL_BYPASS) || 4752 (Scope == AMDGPU::CPol::SCOPE_SYS && 4753 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS))) 4754 return PrintError("scope and th combination is not valid"); 4755 } 4756 4757 bool IsStore = TID.mayStore(); 4758 bool IsAtomic = 4759 TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet); 4760 4761 if (IsAtomic) { 4762 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC)) 4763 return PrintError("invalid th value for atomic instructions"); 4764 } else if (IsStore) { 4765 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE)) 4766 return PrintError("invalid th value for store instructions"); 4767 } else { 4768 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD)) 4769 return PrintError("invalid th value for load instructions"); 4770 } 4771 4772 return true; 4773 } 4774 4775 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4776 if (!isGFX11Plus()) 4777 return true; 4778 for (auto &Operand : Operands) { 4779 if (!Operand->isReg()) 4780 continue; 4781 unsigned Reg = Operand->getReg(); 4782 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4783 Error(getRegLoc(Reg, Operands), 4784 "execz and vccz are not supported on this GPU"); 4785 return false; 4786 } 4787 } 4788 return true; 4789 } 4790 4791 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, 4792 const OperandVector &Operands) { 4793 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4794 if (Desc.mayStore() && 4795 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4796 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands); 4797 if (Loc != getInstLoc(Operands)) { 4798 Error(Loc, "TFE modifier has no meaning for store instructions"); 4799 return false; 4800 } 4801 } 4802 4803 return true; 4804 } 4805 4806 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4807 const SMLoc &IDLoc, 4808 const OperandVector &Operands) { 4809 if (auto ErrMsg = validateLdsDirect(Inst)) { 4810 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4811 return false; 4812 } 4813 if (!validateSOPLiteral(Inst)) { 4814 Error(getLitLoc(Operands), 4815 "only one unique literal operand is allowed"); 4816 return false; 4817 } 4818 if (!validateVOPLiteral(Inst, Operands)) { 4819 return false; 4820 } 4821 if (!validateConstantBusLimitations(Inst, Operands)) { 4822 return false; 4823 } 4824 if (!validateVOPDRegBankConstraints(Inst, Operands)) { 4825 return false; 4826 } 4827 if (!validateIntClampSupported(Inst)) { 4828 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4829 "integer clamping is not supported on this GPU"); 4830 return false; 4831 } 4832 if (!validateOpSel(Inst)) { 4833 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4834 "invalid op_sel operand"); 4835 return false; 4836 } 4837 if (!validateDPP(Inst, Operands)) { 4838 return false; 4839 } 4840 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4841 if (!validateMIMGD16(Inst)) { 4842 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4843 "d16 modifier is not supported on this GPU"); 4844 return false; 4845 } 4846 if (!validateMIMGMSAA(Inst)) { 4847 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4848 "invalid dim; must be MSAA type"); 4849 return false; 4850 } 4851 if (!validateMIMGDataSize(Inst, IDLoc)) { 4852 return false; 4853 } 4854 if (!validateMIMGAddrSize(Inst, IDLoc)) 4855 return false; 4856 if (!validateMIMGAtomicDMask(Inst)) { 4857 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4858 "invalid atomic image dmask"); 4859 return false; 4860 } 4861 if (!validateMIMGGatherDMask(Inst)) { 4862 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4863 "invalid image_gather dmask: only one bit must be set"); 4864 return false; 4865 } 4866 if (!validateMovrels(Inst, Operands)) { 4867 return false; 4868 } 4869 if (!validateOffset(Inst, Operands)) { 4870 return false; 4871 } 4872 if (!validateMAIAccWrite(Inst, Operands)) { 4873 return false; 4874 } 4875 if (!validateMAISrc2(Inst, Operands)) { 4876 return false; 4877 } 4878 if (!validateMFMA(Inst, Operands)) { 4879 return false; 4880 } 4881 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4882 return false; 4883 } 4884 4885 if (!validateAGPRLdSt(Inst)) { 4886 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4887 ? "invalid register class: data and dst should be all VGPR or AGPR" 4888 : "invalid register class: agpr loads and stores not supported on this GPU" 4889 ); 4890 return false; 4891 } 4892 if (!validateVGPRAlign(Inst)) { 4893 Error(IDLoc, 4894 "invalid register class: vgpr tuples must be 64 bit aligned"); 4895 return false; 4896 } 4897 if (!validateDS(Inst, Operands)) { 4898 return false; 4899 } 4900 4901 if (!validateBLGP(Inst, Operands)) { 4902 return false; 4903 } 4904 4905 if (!validateDivScale(Inst)) { 4906 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4907 return false; 4908 } 4909 if (!validateWaitCnt(Inst, Operands)) { 4910 return false; 4911 } 4912 if (!validateExeczVcczOperands(Operands)) { 4913 return false; 4914 } 4915 if (!validateTFE(Inst, Operands)) { 4916 return false; 4917 } 4918 4919 return true; 4920 } 4921 4922 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4923 const FeatureBitset &FBS, 4924 unsigned VariantID = 0); 4925 4926 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4927 const FeatureBitset &AvailableFeatures, 4928 unsigned VariantID); 4929 4930 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4931 const FeatureBitset &FBS) { 4932 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4933 } 4934 4935 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4936 const FeatureBitset &FBS, 4937 ArrayRef<unsigned> Variants) { 4938 for (auto Variant : Variants) { 4939 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4940 return true; 4941 } 4942 4943 return false; 4944 } 4945 4946 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4947 const SMLoc &IDLoc) { 4948 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits()); 4949 4950 // Check if requested instruction variant is supported. 4951 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4952 return false; 4953 4954 // This instruction is not supported. 4955 // Clear any other pending errors because they are no longer relevant. 4956 getParser().clearPendingErrors(); 4957 4958 // Requested instruction variant is not supported. 4959 // Check if any other variants are supported. 4960 StringRef VariantName = getMatchedVariantName(); 4961 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4962 return Error(IDLoc, 4963 Twine(VariantName, 4964 " variant of this instruction is not supported")); 4965 } 4966 4967 // Check if this instruction may be used with a different wavesize. 4968 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && 4969 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { 4970 4971 FeatureBitset FeaturesWS32 = getFeatureBits(); 4972 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) 4973 .flip(AMDGPU::FeatureWavefrontSize32); 4974 FeatureBitset AvailableFeaturesWS32 = 4975 ComputeAvailableFeatures(FeaturesWS32); 4976 4977 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants())) 4978 return Error(IDLoc, "instruction requires wavesize=32"); 4979 } 4980 4981 // Finally check if this instruction is supported on any other GPU. 4982 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4983 return Error(IDLoc, "instruction not supported on this GPU"); 4984 } 4985 4986 // Instruction not supported on any GPU. Probably a typo. 4987 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4988 return Error(IDLoc, "invalid instruction" + Suggestion); 4989 } 4990 4991 static bool isInvalidVOPDY(const OperandVector &Operands, 4992 uint64_t InvalidOprIdx) { 4993 assert(InvalidOprIdx < Operands.size()); 4994 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); 4995 if (Op.isToken() && InvalidOprIdx > 1) { 4996 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); 4997 return PrevOp.isToken() && PrevOp.getToken() == "::"; 4998 } 4999 return false; 5000 } 5001 5002 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 5003 OperandVector &Operands, 5004 MCStreamer &Out, 5005 uint64_t &ErrorInfo, 5006 bool MatchingInlineAsm) { 5007 MCInst Inst; 5008 unsigned Result = Match_Success; 5009 for (auto Variant : getMatchedVariants()) { 5010 uint64_t EI; 5011 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 5012 Variant); 5013 // We order match statuses from least to most specific. We use most specific 5014 // status as resulting 5015 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 5016 if ((R == Match_Success) || 5017 (R == Match_PreferE32) || 5018 (R == Match_MissingFeature && Result != Match_PreferE32) || 5019 (R == Match_InvalidOperand && Result != Match_MissingFeature 5020 && Result != Match_PreferE32) || 5021 (R == Match_MnemonicFail && Result != Match_InvalidOperand 5022 && Result != Match_MissingFeature 5023 && Result != Match_PreferE32)) { 5024 Result = R; 5025 ErrorInfo = EI; 5026 } 5027 if (R == Match_Success) 5028 break; 5029 } 5030 5031 if (Result == Match_Success) { 5032 if (!validateInstruction(Inst, IDLoc, Operands)) { 5033 return true; 5034 } 5035 Inst.setLoc(IDLoc); 5036 Out.emitInstruction(Inst, getSTI()); 5037 return false; 5038 } 5039 5040 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5041 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 5042 return true; 5043 } 5044 5045 switch (Result) { 5046 default: break; 5047 case Match_MissingFeature: 5048 // It has been verified that the specified instruction 5049 // mnemonic is valid. A match was found but it requires 5050 // features which are not supported on this GPU. 5051 return Error(IDLoc, "operands are not valid for this GPU or mode"); 5052 5053 case Match_InvalidOperand: { 5054 SMLoc ErrorLoc = IDLoc; 5055 if (ErrorInfo != ~0ULL) { 5056 if (ErrorInfo >= Operands.size()) { 5057 return Error(IDLoc, "too few operands for instruction"); 5058 } 5059 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 5060 if (ErrorLoc == SMLoc()) 5061 ErrorLoc = IDLoc; 5062 5063 if (isInvalidVOPDY(Operands, ErrorInfo)) 5064 return Error(ErrorLoc, "invalid VOPDY instruction"); 5065 } 5066 return Error(ErrorLoc, "invalid operand for instruction"); 5067 } 5068 5069 case Match_PreferE32: 5070 return Error(IDLoc, "internal error: instruction without _e64 suffix " 5071 "should be encoded as e32"); 5072 case Match_MnemonicFail: 5073 llvm_unreachable("Invalid instructions should have been handled already"); 5074 } 5075 llvm_unreachable("Implement any new match types added!"); 5076 } 5077 5078 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 5079 int64_t Tmp = -1; 5080 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 5081 return true; 5082 } 5083 if (getParser().parseAbsoluteExpression(Tmp)) { 5084 return true; 5085 } 5086 Ret = static_cast<uint32_t>(Tmp); 5087 return false; 5088 } 5089 5090 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 5091 uint32_t &Minor) { 5092 if (ParseAsAbsoluteExpression(Major)) 5093 return TokError("invalid major version"); 5094 5095 if (!trySkipToken(AsmToken::Comma)) 5096 return TokError("minor version number required, comma expected"); 5097 5098 if (ParseAsAbsoluteExpression(Minor)) 5099 return TokError("invalid minor version"); 5100 5101 return false; 5102 } 5103 5104 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 5105 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5106 return TokError("directive only supported for amdgcn architecture"); 5107 5108 std::string TargetIDDirective; 5109 SMLoc TargetStart = getTok().getLoc(); 5110 if (getParser().parseEscapedString(TargetIDDirective)) 5111 return true; 5112 5113 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 5114 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5115 return getParser().Error(TargetRange.Start, 5116 (Twine(".amdgcn_target directive's target id ") + 5117 Twine(TargetIDDirective) + 5118 Twine(" does not match the specified target id ") + 5119 Twine(getTargetStreamer().getTargetID()->toString())).str()); 5120 5121 return false; 5122 } 5123 5124 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 5125 return Error(Range.Start, "value out of range", Range); 5126 } 5127 5128 bool AMDGPUAsmParser::calculateGPRBlocks( 5129 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 5130 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32, 5131 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR, 5132 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 5133 // TODO(scott.linder): These calculations are duplicated from 5134 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 5135 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 5136 5137 unsigned NumVGPRs = NextFreeVGPR; 5138 unsigned NumSGPRs = NextFreeSGPR; 5139 5140 if (Version.Major >= 10) 5141 NumSGPRs = 0; 5142 else { 5143 unsigned MaxAddressableNumSGPRs = 5144 IsaInfo::getAddressableNumSGPRs(&getSTI()); 5145 5146 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 5147 NumSGPRs > MaxAddressableNumSGPRs) 5148 return OutOfRangeError(SGPRRange); 5149 5150 NumSGPRs += 5151 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 5152 5153 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 5154 NumSGPRs > MaxAddressableNumSGPRs) 5155 return OutOfRangeError(SGPRRange); 5156 5157 if (Features.test(FeatureSGPRInitBug)) 5158 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 5159 } 5160 5161 VGPRBlocks = 5162 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 5163 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 5164 5165 return false; 5166 } 5167 5168 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 5169 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5170 return TokError("directive only supported for amdgcn architecture"); 5171 5172 if (!isHsaAbi(getSTI())) 5173 return TokError("directive only supported for amdhsa OS"); 5174 5175 StringRef KernelName; 5176 if (getParser().parseIdentifier(KernelName)) 5177 return true; 5178 5179 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 5180 5181 StringSet<> Seen; 5182 5183 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 5184 5185 SMRange VGPRRange; 5186 uint64_t NextFreeVGPR = 0; 5187 uint64_t AccumOffset = 0; 5188 uint64_t SharedVGPRCount = 0; 5189 uint64_t PreloadLength = 0; 5190 uint64_t PreloadOffset = 0; 5191 SMRange SGPRRange; 5192 uint64_t NextFreeSGPR = 0; 5193 5194 // Count the number of user SGPRs implied from the enabled feature bits. 5195 unsigned ImpliedUserSGPRCount = 0; 5196 5197 // Track if the asm explicitly contains the directive for the user SGPR 5198 // count. 5199 std::optional<unsigned> ExplicitUserSGPRCount; 5200 bool ReserveVCC = true; 5201 bool ReserveFlatScr = true; 5202 std::optional<bool> EnableWavefrontSize32; 5203 5204 while (true) { 5205 while (trySkipToken(AsmToken::EndOfStatement)); 5206 5207 StringRef ID; 5208 SMRange IDRange = getTok().getLocRange(); 5209 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 5210 return true; 5211 5212 if (ID == ".end_amdhsa_kernel") 5213 break; 5214 5215 if (!Seen.insert(ID).second) 5216 return TokError(".amdhsa_ directives cannot be repeated"); 5217 5218 SMLoc ValStart = getLoc(); 5219 int64_t IVal; 5220 if (getParser().parseAbsoluteExpression(IVal)) 5221 return true; 5222 SMLoc ValEnd = getLoc(); 5223 SMRange ValRange = SMRange(ValStart, ValEnd); 5224 5225 if (IVal < 0) 5226 return OutOfRangeError(ValRange); 5227 5228 uint64_t Val = IVal; 5229 5230 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 5231 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 5232 return OutOfRangeError(RANGE); \ 5233 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 5234 5235 if (ID == ".amdhsa_group_segment_fixed_size") { 5236 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 5237 return OutOfRangeError(ValRange); 5238 KD.group_segment_fixed_size = Val; 5239 } else if (ID == ".amdhsa_private_segment_fixed_size") { 5240 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 5241 return OutOfRangeError(ValRange); 5242 KD.private_segment_fixed_size = Val; 5243 } else if (ID == ".amdhsa_kernarg_size") { 5244 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 5245 return OutOfRangeError(ValRange); 5246 KD.kernarg_size = Val; 5247 } else if (ID == ".amdhsa_user_sgpr_count") { 5248 ExplicitUserSGPRCount = Val; 5249 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 5250 if (hasArchitectedFlatScratch()) 5251 return Error(IDRange.Start, 5252 "directive is not supported with architected flat scratch", 5253 IDRange); 5254 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5255 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 5256 Val, ValRange); 5257 if (Val) 5258 ImpliedUserSGPRCount += 4; 5259 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") { 5260 if (!hasKernargPreload()) 5261 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5262 5263 if (Val > getMaxNumUserSGPRs()) 5264 return OutOfRangeError(ValRange); 5265 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val, 5266 ValRange); 5267 if (Val) { 5268 ImpliedUserSGPRCount += Val; 5269 PreloadLength = Val; 5270 } 5271 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") { 5272 if (!hasKernargPreload()) 5273 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5274 5275 if (Val >= 1024) 5276 return OutOfRangeError(ValRange); 5277 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val, 5278 ValRange); 5279 if (Val) 5280 PreloadOffset = Val; 5281 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 5282 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5283 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 5284 ValRange); 5285 if (Val) 5286 ImpliedUserSGPRCount += 2; 5287 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 5288 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5289 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 5290 ValRange); 5291 if (Val) 5292 ImpliedUserSGPRCount += 2; 5293 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 5294 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5295 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 5296 Val, ValRange); 5297 if (Val) 5298 ImpliedUserSGPRCount += 2; 5299 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 5300 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5301 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 5302 ValRange); 5303 if (Val) 5304 ImpliedUserSGPRCount += 2; 5305 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 5306 if (hasArchitectedFlatScratch()) 5307 return Error(IDRange.Start, 5308 "directive is not supported with architected flat scratch", 5309 IDRange); 5310 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5311 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 5312 ValRange); 5313 if (Val) 5314 ImpliedUserSGPRCount += 2; 5315 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 5316 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5317 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 5318 Val, ValRange); 5319 if (Val) 5320 ImpliedUserSGPRCount += 1; 5321 } else if (ID == ".amdhsa_wavefront_size32") { 5322 if (IVersion.Major < 10) 5323 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5324 EnableWavefrontSize32 = Val; 5325 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5326 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5327 Val, ValRange); 5328 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5329 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5330 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); 5331 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5332 if (hasArchitectedFlatScratch()) 5333 return Error(IDRange.Start, 5334 "directive is not supported with architected flat scratch", 5335 IDRange); 5336 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5337 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5338 } else if (ID == ".amdhsa_enable_private_segment") { 5339 if (!hasArchitectedFlatScratch()) 5340 return Error( 5341 IDRange.Start, 5342 "directive is not supported without architected flat scratch", 5343 IDRange); 5344 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5345 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5346 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5347 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5348 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5349 ValRange); 5350 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5351 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5352 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5353 ValRange); 5354 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5355 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5356 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5357 ValRange); 5358 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5359 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5360 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5361 ValRange); 5362 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5363 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5364 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5365 ValRange); 5366 } else if (ID == ".amdhsa_next_free_vgpr") { 5367 VGPRRange = ValRange; 5368 NextFreeVGPR = Val; 5369 } else if (ID == ".amdhsa_next_free_sgpr") { 5370 SGPRRange = ValRange; 5371 NextFreeSGPR = Val; 5372 } else if (ID == ".amdhsa_accum_offset") { 5373 if (!isGFX90A()) 5374 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5375 AccumOffset = Val; 5376 } else if (ID == ".amdhsa_reserve_vcc") { 5377 if (!isUInt<1>(Val)) 5378 return OutOfRangeError(ValRange); 5379 ReserveVCC = Val; 5380 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5381 if (IVersion.Major < 7) 5382 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5383 if (hasArchitectedFlatScratch()) 5384 return Error(IDRange.Start, 5385 "directive is not supported with architected flat scratch", 5386 IDRange); 5387 if (!isUInt<1>(Val)) 5388 return OutOfRangeError(ValRange); 5389 ReserveFlatScr = Val; 5390 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5391 if (IVersion.Major < 8) 5392 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5393 if (!isUInt<1>(Val)) 5394 return OutOfRangeError(ValRange); 5395 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5396 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5397 IDRange); 5398 } else if (ID == ".amdhsa_float_round_mode_32") { 5399 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5400 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5401 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5402 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5403 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5404 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5405 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5406 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5407 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5408 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5409 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5410 ValRange); 5411 } else if (ID == ".amdhsa_dx10_clamp") { 5412 if (IVersion.Major >= 12) 5413 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5414 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5415 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val, 5416 ValRange); 5417 } else if (ID == ".amdhsa_ieee_mode") { 5418 if (IVersion.Major >= 12) 5419 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5420 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5421 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val, 5422 ValRange); 5423 } else if (ID == ".amdhsa_fp16_overflow") { 5424 if (IVersion.Major < 9) 5425 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5426 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val, 5427 ValRange); 5428 } else if (ID == ".amdhsa_tg_split") { 5429 if (!isGFX90A()) 5430 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5431 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5432 ValRange); 5433 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5434 if (IVersion.Major < 10) 5435 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5436 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val, 5437 ValRange); 5438 } else if (ID == ".amdhsa_memory_ordered") { 5439 if (IVersion.Major < 10) 5440 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5441 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val, 5442 ValRange); 5443 } else if (ID == ".amdhsa_forward_progress") { 5444 if (IVersion.Major < 10) 5445 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5446 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val, 5447 ValRange); 5448 } else if (ID == ".amdhsa_shared_vgpr_count") { 5449 if (IVersion.Major < 10 || IVersion.Major >= 12) 5450 return Error(IDRange.Start, "directive requires gfx10 or gfx11", 5451 IDRange); 5452 SharedVGPRCount = Val; 5453 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5454 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val, 5455 ValRange); 5456 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5457 PARSE_BITS_ENTRY( 5458 KD.compute_pgm_rsrc2, 5459 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5460 ValRange); 5461 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5462 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5463 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5464 Val, ValRange); 5465 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5466 PARSE_BITS_ENTRY( 5467 KD.compute_pgm_rsrc2, 5468 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5469 ValRange); 5470 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5471 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5472 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5473 Val, ValRange); 5474 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5475 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5476 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5477 Val, ValRange); 5478 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5479 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5480 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5481 Val, ValRange); 5482 } else if (ID == ".amdhsa_exception_int_div_zero") { 5483 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5484 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5485 Val, ValRange); 5486 } else if (ID == ".amdhsa_round_robin_scheduling") { 5487 if (IVersion.Major < 12) 5488 return Error(IDRange.Start, "directive requires gfx12+", IDRange); 5489 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5490 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val, 5491 ValRange); 5492 } else { 5493 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5494 } 5495 5496 #undef PARSE_BITS_ENTRY 5497 } 5498 5499 if (!Seen.contains(".amdhsa_next_free_vgpr")) 5500 return TokError(".amdhsa_next_free_vgpr directive is required"); 5501 5502 if (!Seen.contains(".amdhsa_next_free_sgpr")) 5503 return TokError(".amdhsa_next_free_sgpr directive is required"); 5504 5505 unsigned VGPRBlocks; 5506 unsigned SGPRBlocks; 5507 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5508 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5509 EnableWavefrontSize32, NextFreeVGPR, 5510 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5511 SGPRBlocks)) 5512 return true; 5513 5514 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5515 VGPRBlocks)) 5516 return OutOfRangeError(VGPRRange); 5517 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5518 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5519 5520 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5521 SGPRBlocks)) 5522 return OutOfRangeError(SGPRRange); 5523 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5524 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5525 SGPRBlocks); 5526 5527 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5528 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5529 "enabled user SGPRs"); 5530 5531 unsigned UserSGPRCount = 5532 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5533 5534 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5535 return TokError("too many user SGPRs enabled"); 5536 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5537 UserSGPRCount); 5538 5539 if (PreloadLength && KD.kernarg_size && 5540 (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size)) 5541 return TokError("Kernarg preload length + offset is larger than the " 5542 "kernarg segment size"); 5543 5544 if (isGFX90A()) { 5545 if (!Seen.contains(".amdhsa_accum_offset")) 5546 return TokError(".amdhsa_accum_offset directive is required"); 5547 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5548 return TokError("accum_offset should be in range [4..256] in " 5549 "increments of 4"); 5550 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5551 return TokError("accum_offset exceeds total VGPR allocation"); 5552 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5553 (AccumOffset / 4 - 1)); 5554 } 5555 5556 if (IVersion.Major >= 10 && IVersion.Major < 12) { 5557 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5558 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { 5559 return TokError("shared_vgpr_count directive not valid on " 5560 "wavefront size 32"); 5561 } 5562 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5563 return TokError("shared_vgpr_count*2 + " 5564 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5565 "exceed 63\n"); 5566 } 5567 } 5568 5569 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5570 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5571 ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion()); 5572 return false; 5573 } 5574 5575 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5576 uint32_t Major; 5577 uint32_t Minor; 5578 5579 if (ParseDirectiveMajorMinor(Major, Minor)) 5580 return true; 5581 5582 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5583 return false; 5584 } 5585 5586 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5587 uint32_t Major; 5588 uint32_t Minor; 5589 uint32_t Stepping; 5590 StringRef VendorName; 5591 StringRef ArchName; 5592 5593 // If this directive has no arguments, then use the ISA version for the 5594 // targeted GPU. 5595 if (isToken(AsmToken::EndOfStatement)) { 5596 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5597 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5598 ISA.Stepping, 5599 "AMD", "AMDGPU"); 5600 return false; 5601 } 5602 5603 if (ParseDirectiveMajorMinor(Major, Minor)) 5604 return true; 5605 5606 if (!trySkipToken(AsmToken::Comma)) 5607 return TokError("stepping version number required, comma expected"); 5608 5609 if (ParseAsAbsoluteExpression(Stepping)) 5610 return TokError("invalid stepping version"); 5611 5612 if (!trySkipToken(AsmToken::Comma)) 5613 return TokError("vendor name required, comma expected"); 5614 5615 if (!parseString(VendorName, "invalid vendor name")) 5616 return true; 5617 5618 if (!trySkipToken(AsmToken::Comma)) 5619 return TokError("arch name required, comma expected"); 5620 5621 if (!parseString(ArchName, "invalid arch name")) 5622 return true; 5623 5624 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5625 VendorName, ArchName); 5626 return false; 5627 } 5628 5629 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5630 amd_kernel_code_t &Header) { 5631 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5632 // assembly for backwards compatibility. 5633 if (ID == "max_scratch_backing_memory_byte_size") { 5634 Parser.eatToEndOfStatement(); 5635 return false; 5636 } 5637 5638 SmallString<40> ErrStr; 5639 raw_svector_ostream Err(ErrStr); 5640 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5641 return TokError(Err.str()); 5642 } 5643 Lex(); 5644 5645 if (ID == "enable_dx10_clamp") { 5646 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) && 5647 isGFX12Plus()) 5648 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+"); 5649 } 5650 5651 if (ID == "enable_ieee_mode") { 5652 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) && 5653 isGFX12Plus()) 5654 return TokError("enable_ieee_mode=1 is not allowed on GFX12+"); 5655 } 5656 5657 if (ID == "enable_wavefront_size32") { 5658 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5659 if (!isGFX10Plus()) 5660 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5661 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5662 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5663 } else { 5664 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5665 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5666 } 5667 } 5668 5669 if (ID == "wavefront_size") { 5670 if (Header.wavefront_size == 5) { 5671 if (!isGFX10Plus()) 5672 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5673 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5674 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5675 } else if (Header.wavefront_size == 6) { 5676 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5677 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5678 } 5679 } 5680 5681 if (ID == "enable_wgp_mode") { 5682 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5683 !isGFX10Plus()) 5684 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5685 } 5686 5687 if (ID == "enable_mem_ordered") { 5688 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5689 !isGFX10Plus()) 5690 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5691 } 5692 5693 if (ID == "enable_fwd_progress") { 5694 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5695 !isGFX10Plus()) 5696 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5697 } 5698 5699 return false; 5700 } 5701 5702 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5703 amd_kernel_code_t Header; 5704 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5705 5706 while (true) { 5707 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5708 // will set the current token to EndOfStatement. 5709 while(trySkipToken(AsmToken::EndOfStatement)); 5710 5711 StringRef ID; 5712 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5713 return true; 5714 5715 if (ID == ".end_amd_kernel_code_t") 5716 break; 5717 5718 if (ParseAMDKernelCodeTValue(ID, Header)) 5719 return true; 5720 } 5721 5722 getTargetStreamer().EmitAMDKernelCodeT(Header); 5723 5724 return false; 5725 } 5726 5727 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5728 StringRef KernelName; 5729 if (!parseId(KernelName, "expected symbol name")) 5730 return true; 5731 5732 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5733 ELF::STT_AMDGPU_HSA_KERNEL); 5734 5735 KernelScope.initialize(getContext()); 5736 return false; 5737 } 5738 5739 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5740 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5741 return Error(getLoc(), 5742 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5743 "architectures"); 5744 } 5745 5746 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5747 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5748 return Error(getParser().getTok().getLoc(), "target id must match options"); 5749 5750 getTargetStreamer().EmitISAVersion(); 5751 Lex(); 5752 5753 return false; 5754 } 5755 5756 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5757 assert(isHsaAbi(getSTI())); 5758 5759 std::string HSAMetadataString; 5760 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin, 5761 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString)) 5762 return true; 5763 5764 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5765 return Error(getLoc(), "invalid HSA metadata"); 5766 5767 return false; 5768 } 5769 5770 /// Common code to parse out a block of text (typically YAML) between start and 5771 /// end directives. 5772 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5773 const char *AssemblerDirectiveEnd, 5774 std::string &CollectString) { 5775 5776 raw_string_ostream CollectStream(CollectString); 5777 5778 getLexer().setSkipSpace(false); 5779 5780 bool FoundEnd = false; 5781 while (!isToken(AsmToken::Eof)) { 5782 while (isToken(AsmToken::Space)) { 5783 CollectStream << getTokenStr(); 5784 Lex(); 5785 } 5786 5787 if (trySkipId(AssemblerDirectiveEnd)) { 5788 FoundEnd = true; 5789 break; 5790 } 5791 5792 CollectStream << Parser.parseStringToEndOfStatement() 5793 << getContext().getAsmInfo()->getSeparatorString(); 5794 5795 Parser.eatToEndOfStatement(); 5796 } 5797 5798 getLexer().setSkipSpace(true); 5799 5800 if (isToken(AsmToken::Eof) && !FoundEnd) { 5801 return TokError(Twine("expected directive ") + 5802 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5803 } 5804 5805 CollectStream.flush(); 5806 return false; 5807 } 5808 5809 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5810 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5811 std::string String; 5812 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5813 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5814 return true; 5815 5816 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5817 if (!PALMetadata->setFromString(String)) 5818 return Error(getLoc(), "invalid PAL metadata"); 5819 return false; 5820 } 5821 5822 /// Parse the assembler directive for old linear-format PAL metadata. 5823 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5824 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5825 return Error(getLoc(), 5826 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5827 "not available on non-amdpal OSes")).str()); 5828 } 5829 5830 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5831 PALMetadata->setLegacy(); 5832 for (;;) { 5833 uint32_t Key, Value; 5834 if (ParseAsAbsoluteExpression(Key)) { 5835 return TokError(Twine("invalid value in ") + 5836 Twine(PALMD::AssemblerDirective)); 5837 } 5838 if (!trySkipToken(AsmToken::Comma)) { 5839 return TokError(Twine("expected an even number of values in ") + 5840 Twine(PALMD::AssemblerDirective)); 5841 } 5842 if (ParseAsAbsoluteExpression(Value)) { 5843 return TokError(Twine("invalid value in ") + 5844 Twine(PALMD::AssemblerDirective)); 5845 } 5846 PALMetadata->setRegister(Key, Value); 5847 if (!trySkipToken(AsmToken::Comma)) 5848 break; 5849 } 5850 return false; 5851 } 5852 5853 /// ParseDirectiveAMDGPULDS 5854 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5855 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5856 if (getParser().checkForValidSection()) 5857 return true; 5858 5859 StringRef Name; 5860 SMLoc NameLoc = getLoc(); 5861 if (getParser().parseIdentifier(Name)) 5862 return TokError("expected identifier in directive"); 5863 5864 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5865 if (getParser().parseComma()) 5866 return true; 5867 5868 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5869 5870 int64_t Size; 5871 SMLoc SizeLoc = getLoc(); 5872 if (getParser().parseAbsoluteExpression(Size)) 5873 return true; 5874 if (Size < 0) 5875 return Error(SizeLoc, "size must be non-negative"); 5876 if (Size > LocalMemorySize) 5877 return Error(SizeLoc, "size is too large"); 5878 5879 int64_t Alignment = 4; 5880 if (trySkipToken(AsmToken::Comma)) { 5881 SMLoc AlignLoc = getLoc(); 5882 if (getParser().parseAbsoluteExpression(Alignment)) 5883 return true; 5884 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5885 return Error(AlignLoc, "alignment must be a power of two"); 5886 5887 // Alignment larger than the size of LDS is possible in theory, as long 5888 // as the linker manages to place to symbol at address 0, but we do want 5889 // to make sure the alignment fits nicely into a 32-bit integer. 5890 if (Alignment >= 1u << 31) 5891 return Error(AlignLoc, "alignment is too large"); 5892 } 5893 5894 if (parseEOL()) 5895 return true; 5896 5897 Symbol->redefineIfPossible(); 5898 if (!Symbol->isUndefined()) 5899 return Error(NameLoc, "invalid symbol redefinition"); 5900 5901 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5902 return false; 5903 } 5904 5905 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5906 StringRef IDVal = DirectiveID.getString(); 5907 5908 if (isHsaAbi(getSTI())) { 5909 if (IDVal == ".amdhsa_kernel") 5910 return ParseDirectiveAMDHSAKernel(); 5911 5912 // TODO: Restructure/combine with PAL metadata directive. 5913 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5914 return ParseDirectiveHSAMetadata(); 5915 } else { 5916 if (IDVal == ".hsa_code_object_version") 5917 return ParseDirectiveHSACodeObjectVersion(); 5918 5919 if (IDVal == ".hsa_code_object_isa") 5920 return ParseDirectiveHSACodeObjectISA(); 5921 5922 if (IDVal == ".amd_kernel_code_t") 5923 return ParseDirectiveAMDKernelCodeT(); 5924 5925 if (IDVal == ".amdgpu_hsa_kernel") 5926 return ParseDirectiveAMDGPUHsaKernel(); 5927 5928 if (IDVal == ".amd_amdgpu_isa") 5929 return ParseDirectiveISAVersion(); 5930 5931 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) { 5932 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) + 5933 Twine(" directive is " 5934 "not available on non-amdhsa OSes")) 5935 .str()); 5936 } 5937 } 5938 5939 if (IDVal == ".amdgcn_target") 5940 return ParseDirectiveAMDGCNTarget(); 5941 5942 if (IDVal == ".amdgpu_lds") 5943 return ParseDirectiveAMDGPULDS(); 5944 5945 if (IDVal == PALMD::AssemblerDirectiveBegin) 5946 return ParseDirectivePALMetadataBegin(); 5947 5948 if (IDVal == PALMD::AssemblerDirective) 5949 return ParseDirectivePALMetadata(); 5950 5951 return true; 5952 } 5953 5954 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5955 unsigned RegNo) { 5956 5957 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5958 return isGFX9Plus(); 5959 5960 // GFX10+ has 2 more SGPRs 104 and 105. 5961 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5962 return hasSGPR104_SGPR105(); 5963 5964 switch (RegNo) { 5965 case AMDGPU::SRC_SHARED_BASE_LO: 5966 case AMDGPU::SRC_SHARED_BASE: 5967 case AMDGPU::SRC_SHARED_LIMIT_LO: 5968 case AMDGPU::SRC_SHARED_LIMIT: 5969 case AMDGPU::SRC_PRIVATE_BASE_LO: 5970 case AMDGPU::SRC_PRIVATE_BASE: 5971 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 5972 case AMDGPU::SRC_PRIVATE_LIMIT: 5973 return isGFX9Plus(); 5974 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5975 return isGFX9Plus() && !isGFX11Plus(); 5976 case AMDGPU::TBA: 5977 case AMDGPU::TBA_LO: 5978 case AMDGPU::TBA_HI: 5979 case AMDGPU::TMA: 5980 case AMDGPU::TMA_LO: 5981 case AMDGPU::TMA_HI: 5982 return !isGFX9Plus(); 5983 case AMDGPU::XNACK_MASK: 5984 case AMDGPU::XNACK_MASK_LO: 5985 case AMDGPU::XNACK_MASK_HI: 5986 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5987 case AMDGPU::SGPR_NULL: 5988 return isGFX10Plus(); 5989 default: 5990 break; 5991 } 5992 5993 if (isCI()) 5994 return true; 5995 5996 if (isSI() || isGFX10Plus()) { 5997 // No flat_scr on SI. 5998 // On GFX10Plus flat scratch is not a valid register operand and can only be 5999 // accessed with s_setreg/s_getreg. 6000 switch (RegNo) { 6001 case AMDGPU::FLAT_SCR: 6002 case AMDGPU::FLAT_SCR_LO: 6003 case AMDGPU::FLAT_SCR_HI: 6004 return false; 6005 default: 6006 return true; 6007 } 6008 } 6009 6010 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 6011 // SI/CI have. 6012 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 6013 return hasSGPR102_SGPR103(); 6014 6015 return true; 6016 } 6017 6018 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, 6019 StringRef Mnemonic, 6020 OperandMode Mode) { 6021 ParseStatus Res = parseVOPD(Operands); 6022 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6023 return Res; 6024 6025 // Try to parse with a custom parser 6026 Res = MatchOperandParserImpl(Operands, Mnemonic); 6027 6028 // If we successfully parsed the operand or if there as an error parsing, 6029 // we are done. 6030 // 6031 // If we are parsing after we reach EndOfStatement then this means we 6032 // are appending default values to the Operands list. This is only done 6033 // by custom parser, so we shouldn't continue on to the generic parsing. 6034 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6035 return Res; 6036 6037 SMLoc RBraceLoc; 6038 SMLoc LBraceLoc = getLoc(); 6039 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 6040 unsigned Prefix = Operands.size(); 6041 6042 for (;;) { 6043 auto Loc = getLoc(); 6044 Res = parseReg(Operands); 6045 if (Res.isNoMatch()) 6046 Error(Loc, "expected a register"); 6047 if (!Res.isSuccess()) 6048 return ParseStatus::Failure; 6049 6050 RBraceLoc = getLoc(); 6051 if (trySkipToken(AsmToken::RBrac)) 6052 break; 6053 6054 if (!skipToken(AsmToken::Comma, 6055 "expected a comma or a closing square bracket")) 6056 return ParseStatus::Failure; 6057 } 6058 6059 if (Operands.size() - Prefix > 1) { 6060 Operands.insert(Operands.begin() + Prefix, 6061 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 6062 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 6063 } 6064 6065 return ParseStatus::Success; 6066 } 6067 6068 return parseRegOrImm(Operands); 6069 } 6070 6071 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 6072 // Clear any forced encodings from the previous instruction. 6073 setForcedEncodingSize(0); 6074 setForcedDPP(false); 6075 setForcedSDWA(false); 6076 6077 if (Name.ends_with("_e64_dpp")) { 6078 setForcedDPP(true); 6079 setForcedEncodingSize(64); 6080 return Name.substr(0, Name.size() - 8); 6081 } else if (Name.ends_with("_e64")) { 6082 setForcedEncodingSize(64); 6083 return Name.substr(0, Name.size() - 4); 6084 } else if (Name.ends_with("_e32")) { 6085 setForcedEncodingSize(32); 6086 return Name.substr(0, Name.size() - 4); 6087 } else if (Name.ends_with("_dpp")) { 6088 setForcedDPP(true); 6089 return Name.substr(0, Name.size() - 4); 6090 } else if (Name.ends_with("_sdwa")) { 6091 setForcedSDWA(true); 6092 return Name.substr(0, Name.size() - 5); 6093 } 6094 return Name; 6095 } 6096 6097 static void applyMnemonicAliases(StringRef &Mnemonic, 6098 const FeatureBitset &Features, 6099 unsigned VariantID); 6100 6101 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 6102 StringRef Name, 6103 SMLoc NameLoc, OperandVector &Operands) { 6104 // Add the instruction mnemonic 6105 Name = parseMnemonicSuffix(Name); 6106 6107 // If the target architecture uses MnemonicAlias, call it here to parse 6108 // operands correctly. 6109 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 6110 6111 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 6112 6113 bool IsMIMG = Name.starts_with("image_"); 6114 6115 while (!trySkipToken(AsmToken::EndOfStatement)) { 6116 OperandMode Mode = OperandMode_Default; 6117 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 6118 Mode = OperandMode_NSA; 6119 ParseStatus Res = parseOperand(Operands, Name, Mode); 6120 6121 if (!Res.isSuccess()) { 6122 checkUnsupportedInstruction(Name, NameLoc); 6123 if (!Parser.hasPendingError()) { 6124 // FIXME: use real operand location rather than the current location. 6125 StringRef Msg = Res.isFailure() ? "failed parsing operand." 6126 : "not a valid operand."; 6127 Error(getLoc(), Msg); 6128 } 6129 while (!trySkipToken(AsmToken::EndOfStatement)) { 6130 lex(); 6131 } 6132 return true; 6133 } 6134 6135 // Eat the comma or space if there is one. 6136 trySkipToken(AsmToken::Comma); 6137 } 6138 6139 return false; 6140 } 6141 6142 //===----------------------------------------------------------------------===// 6143 // Utility functions 6144 //===----------------------------------------------------------------------===// 6145 6146 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, 6147 OperandVector &Operands) { 6148 SMLoc S = getLoc(); 6149 if (!trySkipId(Name)) 6150 return ParseStatus::NoMatch; 6151 6152 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S)); 6153 return ParseStatus::Success; 6154 } 6155 6156 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, 6157 int64_t &IntVal) { 6158 6159 if (!trySkipId(Prefix, AsmToken::Colon)) 6160 return ParseStatus::NoMatch; 6161 6162 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure; 6163 } 6164 6165 ParseStatus AMDGPUAsmParser::parseIntWithPrefix( 6166 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6167 std::function<bool(int64_t &)> ConvertResult) { 6168 SMLoc S = getLoc(); 6169 int64_t Value = 0; 6170 6171 ParseStatus Res = parseIntWithPrefix(Prefix, Value); 6172 if (!Res.isSuccess()) 6173 return Res; 6174 6175 if (ConvertResult && !ConvertResult(Value)) { 6176 Error(S, "invalid " + StringRef(Prefix) + " value."); 6177 } 6178 6179 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 6180 return ParseStatus::Success; 6181 } 6182 6183 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( 6184 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6185 bool (*ConvertResult)(int64_t &)) { 6186 SMLoc S = getLoc(); 6187 if (!trySkipId(Prefix, AsmToken::Colon)) 6188 return ParseStatus::NoMatch; 6189 6190 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 6191 return ParseStatus::Failure; 6192 6193 unsigned Val = 0; 6194 const unsigned MaxSize = 4; 6195 6196 // FIXME: How to verify the number of elements matches the number of src 6197 // operands? 6198 for (int I = 0; ; ++I) { 6199 int64_t Op; 6200 SMLoc Loc = getLoc(); 6201 if (!parseExpr(Op)) 6202 return ParseStatus::Failure; 6203 6204 if (Op != 0 && Op != 1) 6205 return Error(Loc, "invalid " + StringRef(Prefix) + " value."); 6206 6207 Val |= (Op << I); 6208 6209 if (trySkipToken(AsmToken::RBrac)) 6210 break; 6211 6212 if (I + 1 == MaxSize) 6213 return Error(getLoc(), "expected a closing square bracket"); 6214 6215 if (!skipToken(AsmToken::Comma, "expected a comma")) 6216 return ParseStatus::Failure; 6217 } 6218 6219 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 6220 return ParseStatus::Success; 6221 } 6222 6223 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, 6224 OperandVector &Operands, 6225 AMDGPUOperand::ImmTy ImmTy) { 6226 int64_t Bit; 6227 SMLoc S = getLoc(); 6228 6229 if (trySkipId(Name)) { 6230 Bit = 1; 6231 } else if (trySkipId("no", Name)) { 6232 Bit = 0; 6233 } else { 6234 return ParseStatus::NoMatch; 6235 } 6236 6237 if (Name == "r128" && !hasMIMG_R128()) 6238 return Error(S, "r128 modifier is not supported on this GPU"); 6239 if (Name == "a16" && !hasA16()) 6240 return Error(S, "a16 modifier is not supported on this GPU"); 6241 6242 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 6243 ImmTy = AMDGPUOperand::ImmTyR128A16; 6244 6245 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 6246 return ParseStatus::Success; 6247 } 6248 6249 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, 6250 bool &Disabling) const { 6251 Disabling = Id.consume_front("no"); 6252 6253 if (isGFX940() && !Mnemo.starts_with("s_")) { 6254 return StringSwitch<unsigned>(Id) 6255 .Case("nt", AMDGPU::CPol::NT) 6256 .Case("sc0", AMDGPU::CPol::SC0) 6257 .Case("sc1", AMDGPU::CPol::SC1) 6258 .Default(0); 6259 } 6260 6261 return StringSwitch<unsigned>(Id) 6262 .Case("dlc", AMDGPU::CPol::DLC) 6263 .Case("glc", AMDGPU::CPol::GLC) 6264 .Case("scc", AMDGPU::CPol::SCC) 6265 .Case("slc", AMDGPU::CPol::SLC) 6266 .Default(0); 6267 } 6268 6269 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 6270 if (isGFX12Plus()) { 6271 SMLoc StringLoc = getLoc(); 6272 6273 int64_t CPolVal = 0; 6274 ParseStatus ResTH = ParseStatus::NoMatch; 6275 ParseStatus ResScope = ParseStatus::NoMatch; 6276 6277 for (;;) { 6278 if (ResTH.isNoMatch()) { 6279 int64_t TH; 6280 ResTH = parseTH(Operands, TH); 6281 if (ResTH.isFailure()) 6282 return ResTH; 6283 if (ResTH.isSuccess()) { 6284 CPolVal |= TH; 6285 continue; 6286 } 6287 } 6288 6289 if (ResScope.isNoMatch()) { 6290 int64_t Scope; 6291 ResScope = parseScope(Operands, Scope); 6292 if (ResScope.isFailure()) 6293 return ResScope; 6294 if (ResScope.isSuccess()) { 6295 CPolVal |= Scope; 6296 continue; 6297 } 6298 } 6299 6300 break; 6301 } 6302 6303 if (ResTH.isNoMatch() && ResScope.isNoMatch()) 6304 return ParseStatus::NoMatch; 6305 6306 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc, 6307 AMDGPUOperand::ImmTyCPol)); 6308 return ParseStatus::Success; 6309 } 6310 6311 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 6312 SMLoc OpLoc = getLoc(); 6313 unsigned Enabled = 0, Seen = 0; 6314 for (;;) { 6315 SMLoc S = getLoc(); 6316 bool Disabling; 6317 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); 6318 if (!CPol) 6319 break; 6320 6321 lex(); 6322 6323 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) 6324 return Error(S, "dlc modifier is not supported on this GPU"); 6325 6326 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) 6327 return Error(S, "scc modifier is not supported on this GPU"); 6328 6329 if (Seen & CPol) 6330 return Error(S, "duplicate cache policy modifier"); 6331 6332 if (!Disabling) 6333 Enabled |= CPol; 6334 6335 Seen |= CPol; 6336 } 6337 6338 if (!Seen) 6339 return ParseStatus::NoMatch; 6340 6341 Operands.push_back( 6342 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol)); 6343 return ParseStatus::Success; 6344 } 6345 6346 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands, 6347 int64_t &Scope) { 6348 Scope = AMDGPU::CPol::SCOPE_CU; // default; 6349 6350 StringRef Value; 6351 SMLoc StringLoc; 6352 ParseStatus Res; 6353 6354 Res = parseStringWithPrefix("scope", Value, StringLoc); 6355 if (!Res.isSuccess()) 6356 return Res; 6357 6358 Scope = StringSwitch<int64_t>(Value) 6359 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU) 6360 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE) 6361 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV) 6362 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS) 6363 .Default(0xffffffff); 6364 6365 if (Scope == 0xffffffff) 6366 return Error(StringLoc, "invalid scope value"); 6367 6368 return ParseStatus::Success; 6369 } 6370 6371 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { 6372 TH = AMDGPU::CPol::TH_RT; // default 6373 6374 StringRef Value; 6375 SMLoc StringLoc; 6376 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc); 6377 if (!Res.isSuccess()) 6378 return Res; 6379 6380 if (Value == "TH_DEFAULT") 6381 TH = AMDGPU::CPol::TH_RT; 6382 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" || 6383 Value == "TH_LOAD_NT_WB") { 6384 return Error(StringLoc, "invalid th value"); 6385 } else if (Value.starts_with("TH_ATOMIC_")) { 6386 Value = Value.drop_front(10); 6387 TH = AMDGPU::CPol::TH_TYPE_ATOMIC; 6388 } else if (Value.starts_with("TH_LOAD_")) { 6389 Value = Value.drop_front(8); 6390 TH = AMDGPU::CPol::TH_TYPE_LOAD; 6391 } else if (Value.starts_with("TH_STORE_")) { 6392 Value = Value.drop_front(9); 6393 TH = AMDGPU::CPol::TH_TYPE_STORE; 6394 } else { 6395 return Error(StringLoc, "invalid th value"); 6396 } 6397 6398 if (Value == "BYPASS") 6399 TH |= AMDGPU::CPol::TH_REAL_BYPASS; 6400 6401 if (TH != 0) { 6402 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC) 6403 TH |= StringSwitch<int64_t>(Value) 6404 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6405 .Case("RT", AMDGPU::CPol::TH_RT) 6406 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6407 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT) 6408 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT | 6409 AMDGPU::CPol::TH_ATOMIC_RETURN) 6410 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE) 6411 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE | 6412 AMDGPU::CPol::TH_ATOMIC_NT) 6413 .Default(0xffffffff); 6414 else 6415 TH |= StringSwitch<int64_t>(Value) 6416 .Case("RT", AMDGPU::CPol::TH_RT) 6417 .Case("NT", AMDGPU::CPol::TH_NT) 6418 .Case("HT", AMDGPU::CPol::TH_HT) 6419 .Case("LU", AMDGPU::CPol::TH_LU) 6420 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB) 6421 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT) 6422 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT) 6423 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT) 6424 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB) 6425 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS) 6426 .Default(0xffffffff); 6427 } 6428 6429 if (TH == 0xffffffff) 6430 return Error(StringLoc, "invalid th value"); 6431 6432 return ParseStatus::Success; 6433 } 6434 6435 static void addOptionalImmOperand( 6436 MCInst& Inst, const OperandVector& Operands, 6437 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 6438 AMDGPUOperand::ImmTy ImmT, 6439 int64_t Default = 0) { 6440 auto i = OptionalIdx.find(ImmT); 6441 if (i != OptionalIdx.end()) { 6442 unsigned Idx = i->second; 6443 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 6444 } else { 6445 Inst.addOperand(MCOperand::createImm(Default)); 6446 } 6447 } 6448 6449 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6450 StringRef &Value, 6451 SMLoc &StringLoc) { 6452 if (!trySkipId(Prefix, AsmToken::Colon)) 6453 return ParseStatus::NoMatch; 6454 6455 StringLoc = getLoc(); 6456 return parseId(Value, "expected an identifier") ? ParseStatus::Success 6457 : ParseStatus::Failure; 6458 } 6459 6460 //===----------------------------------------------------------------------===// 6461 // MTBUF format 6462 //===----------------------------------------------------------------------===// 6463 6464 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6465 int64_t MaxVal, 6466 int64_t &Fmt) { 6467 int64_t Val; 6468 SMLoc Loc = getLoc(); 6469 6470 auto Res = parseIntWithPrefix(Pref, Val); 6471 if (Res.isFailure()) 6472 return false; 6473 if (Res.isNoMatch()) 6474 return true; 6475 6476 if (Val < 0 || Val > MaxVal) { 6477 Error(Loc, Twine("out of range ", StringRef(Pref))); 6478 return false; 6479 } 6480 6481 Fmt = Val; 6482 return true; 6483 } 6484 6485 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6486 // values to live in a joint format operand in the MCInst encoding. 6487 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6488 using namespace llvm::AMDGPU::MTBUFFormat; 6489 6490 int64_t Dfmt = DFMT_UNDEF; 6491 int64_t Nfmt = NFMT_UNDEF; 6492 6493 // dfmt and nfmt can appear in either order, and each is optional. 6494 for (int I = 0; I < 2; ++I) { 6495 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6496 return ParseStatus::Failure; 6497 6498 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) 6499 return ParseStatus::Failure; 6500 6501 // Skip optional comma between dfmt/nfmt 6502 // but guard against 2 commas following each other. 6503 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6504 !peekToken().is(AsmToken::Comma)) { 6505 trySkipToken(AsmToken::Comma); 6506 } 6507 } 6508 6509 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6510 return ParseStatus::NoMatch; 6511 6512 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6513 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6514 6515 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6516 return ParseStatus::Success; 6517 } 6518 6519 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6520 using namespace llvm::AMDGPU::MTBUFFormat; 6521 6522 int64_t Fmt = UFMT_UNDEF; 6523 6524 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6525 return ParseStatus::Failure; 6526 6527 if (Fmt == UFMT_UNDEF) 6528 return ParseStatus::NoMatch; 6529 6530 Format = Fmt; 6531 return ParseStatus::Success; 6532 } 6533 6534 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6535 int64_t &Nfmt, 6536 StringRef FormatStr, 6537 SMLoc Loc) { 6538 using namespace llvm::AMDGPU::MTBUFFormat; 6539 int64_t Format; 6540 6541 Format = getDfmt(FormatStr); 6542 if (Format != DFMT_UNDEF) { 6543 Dfmt = Format; 6544 return true; 6545 } 6546 6547 Format = getNfmt(FormatStr, getSTI()); 6548 if (Format != NFMT_UNDEF) { 6549 Nfmt = Format; 6550 return true; 6551 } 6552 6553 Error(Loc, "unsupported format"); 6554 return false; 6555 } 6556 6557 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6558 SMLoc FormatLoc, 6559 int64_t &Format) { 6560 using namespace llvm::AMDGPU::MTBUFFormat; 6561 6562 int64_t Dfmt = DFMT_UNDEF; 6563 int64_t Nfmt = NFMT_UNDEF; 6564 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6565 return ParseStatus::Failure; 6566 6567 if (trySkipToken(AsmToken::Comma)) { 6568 StringRef Str; 6569 SMLoc Loc = getLoc(); 6570 if (!parseId(Str, "expected a format string") || 6571 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) 6572 return ParseStatus::Failure; 6573 if (Dfmt == DFMT_UNDEF) 6574 return Error(Loc, "duplicate numeric format"); 6575 if (Nfmt == NFMT_UNDEF) 6576 return Error(Loc, "duplicate data format"); 6577 } 6578 6579 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6580 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6581 6582 if (isGFX10Plus()) { 6583 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6584 if (Ufmt == UFMT_UNDEF) 6585 return Error(FormatLoc, "unsupported format"); 6586 Format = Ufmt; 6587 } else { 6588 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6589 } 6590 6591 return ParseStatus::Success; 6592 } 6593 6594 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6595 SMLoc Loc, 6596 int64_t &Format) { 6597 using namespace llvm::AMDGPU::MTBUFFormat; 6598 6599 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6600 if (Id == UFMT_UNDEF) 6601 return ParseStatus::NoMatch; 6602 6603 if (!isGFX10Plus()) 6604 return Error(Loc, "unified format is not supported on this GPU"); 6605 6606 Format = Id; 6607 return ParseStatus::Success; 6608 } 6609 6610 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6611 using namespace llvm::AMDGPU::MTBUFFormat; 6612 SMLoc Loc = getLoc(); 6613 6614 if (!parseExpr(Format)) 6615 return ParseStatus::Failure; 6616 if (!isValidFormatEncoding(Format, getSTI())) 6617 return Error(Loc, "out of range format"); 6618 6619 return ParseStatus::Success; 6620 } 6621 6622 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6623 using namespace llvm::AMDGPU::MTBUFFormat; 6624 6625 if (!trySkipId("format", AsmToken::Colon)) 6626 return ParseStatus::NoMatch; 6627 6628 if (trySkipToken(AsmToken::LBrac)) { 6629 StringRef FormatStr; 6630 SMLoc Loc = getLoc(); 6631 if (!parseId(FormatStr, "expected a format string")) 6632 return ParseStatus::Failure; 6633 6634 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6635 if (Res.isNoMatch()) 6636 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6637 if (!Res.isSuccess()) 6638 return Res; 6639 6640 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6641 return ParseStatus::Failure; 6642 6643 return ParseStatus::Success; 6644 } 6645 6646 return parseNumericFormat(Format); 6647 } 6648 6649 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6650 using namespace llvm::AMDGPU::MTBUFFormat; 6651 6652 int64_t Format = getDefaultFormatEncoding(getSTI()); 6653 ParseStatus Res; 6654 SMLoc Loc = getLoc(); 6655 6656 // Parse legacy format syntax. 6657 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6658 if (Res.isFailure()) 6659 return Res; 6660 6661 bool FormatFound = Res.isSuccess(); 6662 6663 Operands.push_back( 6664 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6665 6666 if (FormatFound) 6667 trySkipToken(AsmToken::Comma); 6668 6669 if (isToken(AsmToken::EndOfStatement)) { 6670 // We are expecting an soffset operand, 6671 // but let matcher handle the error. 6672 return ParseStatus::Success; 6673 } 6674 6675 // Parse soffset. 6676 Res = parseRegOrImm(Operands); 6677 if (!Res.isSuccess()) 6678 return Res; 6679 6680 trySkipToken(AsmToken::Comma); 6681 6682 if (!FormatFound) { 6683 Res = parseSymbolicOrNumericFormat(Format); 6684 if (Res.isFailure()) 6685 return Res; 6686 if (Res.isSuccess()) { 6687 auto Size = Operands.size(); 6688 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6689 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6690 Op.setImm(Format); 6691 } 6692 return ParseStatus::Success; 6693 } 6694 6695 if (isId("format") && peekToken().is(AsmToken::Colon)) 6696 return Error(getLoc(), "duplicate format"); 6697 return ParseStatus::Success; 6698 } 6699 6700 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { 6701 ParseStatus Res = 6702 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 6703 if (Res.isNoMatch()) { 6704 Res = parseIntWithPrefix("inst_offset", Operands, 6705 AMDGPUOperand::ImmTyInstOffset); 6706 } 6707 return Res; 6708 } 6709 6710 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { 6711 ParseStatus Res = 6712 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16); 6713 if (Res.isNoMatch()) 6714 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16); 6715 return Res; 6716 } 6717 6718 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { 6719 ParseStatus Res = 6720 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP); 6721 if (Res.isNoMatch()) { 6722 Res = 6723 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP); 6724 } 6725 return Res; 6726 } 6727 6728 //===----------------------------------------------------------------------===// 6729 // Exp 6730 //===----------------------------------------------------------------------===// 6731 6732 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6733 OptionalImmIndexMap OptionalIdx; 6734 6735 unsigned OperandIdx[4]; 6736 unsigned EnMask = 0; 6737 int SrcIdx = 0; 6738 6739 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6740 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6741 6742 // Add the register arguments 6743 if (Op.isReg()) { 6744 assert(SrcIdx < 4); 6745 OperandIdx[SrcIdx] = Inst.size(); 6746 Op.addRegOperands(Inst, 1); 6747 ++SrcIdx; 6748 continue; 6749 } 6750 6751 if (Op.isOff()) { 6752 assert(SrcIdx < 4); 6753 OperandIdx[SrcIdx] = Inst.size(); 6754 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6755 ++SrcIdx; 6756 continue; 6757 } 6758 6759 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6760 Op.addImmOperands(Inst, 1); 6761 continue; 6762 } 6763 6764 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6765 continue; 6766 6767 // Handle optional arguments 6768 OptionalIdx[Op.getImmTy()] = i; 6769 } 6770 6771 assert(SrcIdx == 4); 6772 6773 bool Compr = false; 6774 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6775 Compr = true; 6776 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6777 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6778 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6779 } 6780 6781 for (auto i = 0; i < SrcIdx; ++i) { 6782 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6783 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6784 } 6785 } 6786 6787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6789 6790 Inst.addOperand(MCOperand::createImm(EnMask)); 6791 } 6792 6793 //===----------------------------------------------------------------------===// 6794 // s_waitcnt 6795 //===----------------------------------------------------------------------===// 6796 6797 static bool 6798 encodeCnt( 6799 const AMDGPU::IsaVersion ISA, 6800 int64_t &IntVal, 6801 int64_t CntVal, 6802 bool Saturate, 6803 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6804 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6805 { 6806 bool Failed = false; 6807 6808 IntVal = encode(ISA, IntVal, CntVal); 6809 if (CntVal != decode(ISA, IntVal)) { 6810 if (Saturate) { 6811 IntVal = encode(ISA, IntVal, -1); 6812 } else { 6813 Failed = true; 6814 } 6815 } 6816 return Failed; 6817 } 6818 6819 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6820 6821 SMLoc CntLoc = getLoc(); 6822 StringRef CntName = getTokenStr(); 6823 6824 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6825 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6826 return false; 6827 6828 int64_t CntVal; 6829 SMLoc ValLoc = getLoc(); 6830 if (!parseExpr(CntVal)) 6831 return false; 6832 6833 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6834 6835 bool Failed = true; 6836 bool Sat = CntName.ends_with("_sat"); 6837 6838 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6839 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6840 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6841 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6842 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6843 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6844 } else { 6845 Error(CntLoc, "invalid counter name " + CntName); 6846 return false; 6847 } 6848 6849 if (Failed) { 6850 Error(ValLoc, "too large value for " + CntName); 6851 return false; 6852 } 6853 6854 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6855 return false; 6856 6857 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6858 if (isToken(AsmToken::EndOfStatement)) { 6859 Error(getLoc(), "expected a counter name"); 6860 return false; 6861 } 6862 } 6863 6864 return true; 6865 } 6866 6867 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { 6868 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6869 int64_t Waitcnt = getWaitcntBitMask(ISA); 6870 SMLoc S = getLoc(); 6871 6872 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6873 while (!isToken(AsmToken::EndOfStatement)) { 6874 if (!parseCnt(Waitcnt)) 6875 return ParseStatus::Failure; 6876 } 6877 } else { 6878 if (!parseExpr(Waitcnt)) 6879 return ParseStatus::Failure; 6880 } 6881 6882 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6883 return ParseStatus::Success; 6884 } 6885 6886 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6887 SMLoc FieldLoc = getLoc(); 6888 StringRef FieldName = getTokenStr(); 6889 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6890 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6891 return false; 6892 6893 SMLoc ValueLoc = getLoc(); 6894 StringRef ValueName = getTokenStr(); 6895 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6896 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6897 return false; 6898 6899 unsigned Shift; 6900 if (FieldName == "instid0") { 6901 Shift = 0; 6902 } else if (FieldName == "instskip") { 6903 Shift = 4; 6904 } else if (FieldName == "instid1") { 6905 Shift = 7; 6906 } else { 6907 Error(FieldLoc, "invalid field name " + FieldName); 6908 return false; 6909 } 6910 6911 int Value; 6912 if (Shift == 4) { 6913 // Parse values for instskip. 6914 Value = StringSwitch<int>(ValueName) 6915 .Case("SAME", 0) 6916 .Case("NEXT", 1) 6917 .Case("SKIP_1", 2) 6918 .Case("SKIP_2", 3) 6919 .Case("SKIP_3", 4) 6920 .Case("SKIP_4", 5) 6921 .Default(-1); 6922 } else { 6923 // Parse values for instid0 and instid1. 6924 Value = StringSwitch<int>(ValueName) 6925 .Case("NO_DEP", 0) 6926 .Case("VALU_DEP_1", 1) 6927 .Case("VALU_DEP_2", 2) 6928 .Case("VALU_DEP_3", 3) 6929 .Case("VALU_DEP_4", 4) 6930 .Case("TRANS32_DEP_1", 5) 6931 .Case("TRANS32_DEP_2", 6) 6932 .Case("TRANS32_DEP_3", 7) 6933 .Case("FMA_ACCUM_CYCLE_1", 8) 6934 .Case("SALU_CYCLE_1", 9) 6935 .Case("SALU_CYCLE_2", 10) 6936 .Case("SALU_CYCLE_3", 11) 6937 .Default(-1); 6938 } 6939 if (Value < 0) { 6940 Error(ValueLoc, "invalid value name " + ValueName); 6941 return false; 6942 } 6943 6944 Delay |= Value << Shift; 6945 return true; 6946 } 6947 6948 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { 6949 int64_t Delay = 0; 6950 SMLoc S = getLoc(); 6951 6952 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6953 do { 6954 if (!parseDelay(Delay)) 6955 return ParseStatus::Failure; 6956 } while (trySkipToken(AsmToken::Pipe)); 6957 } else { 6958 if (!parseExpr(Delay)) 6959 return ParseStatus::Failure; 6960 } 6961 6962 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6963 return ParseStatus::Success; 6964 } 6965 6966 bool 6967 AMDGPUOperand::isSWaitCnt() const { 6968 return isImm(); 6969 } 6970 6971 bool AMDGPUOperand::isSDelayALU() const { return isImm(); } 6972 6973 //===----------------------------------------------------------------------===// 6974 // DepCtr 6975 //===----------------------------------------------------------------------===// 6976 6977 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6978 StringRef DepCtrName) { 6979 switch (ErrorId) { 6980 case OPR_ID_UNKNOWN: 6981 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6982 return; 6983 case OPR_ID_UNSUPPORTED: 6984 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6985 return; 6986 case OPR_ID_DUPLICATE: 6987 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6988 return; 6989 case OPR_VAL_INVALID: 6990 Error(Loc, Twine("invalid value for ", DepCtrName)); 6991 return; 6992 default: 6993 assert(false); 6994 } 6995 } 6996 6997 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6998 6999 using namespace llvm::AMDGPU::DepCtr; 7000 7001 SMLoc DepCtrLoc = getLoc(); 7002 StringRef DepCtrName = getTokenStr(); 7003 7004 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 7005 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7006 return false; 7007 7008 int64_t ExprVal; 7009 if (!parseExpr(ExprVal)) 7010 return false; 7011 7012 unsigned PrevOprMask = UsedOprMask; 7013 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 7014 7015 if (CntVal < 0) { 7016 depCtrError(DepCtrLoc, CntVal, DepCtrName); 7017 return false; 7018 } 7019 7020 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7021 return false; 7022 7023 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 7024 if (isToken(AsmToken::EndOfStatement)) { 7025 Error(getLoc(), "expected a counter name"); 7026 return false; 7027 } 7028 } 7029 7030 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 7031 DepCtr = (DepCtr & ~CntValMask) | CntVal; 7032 return true; 7033 } 7034 7035 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { 7036 using namespace llvm::AMDGPU::DepCtr; 7037 7038 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 7039 SMLoc Loc = getLoc(); 7040 7041 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7042 unsigned UsedOprMask = 0; 7043 while (!isToken(AsmToken::EndOfStatement)) { 7044 if (!parseDepCtr(DepCtr, UsedOprMask)) 7045 return ParseStatus::Failure; 7046 } 7047 } else { 7048 if (!parseExpr(DepCtr)) 7049 return ParseStatus::Failure; 7050 } 7051 7052 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 7053 return ParseStatus::Success; 7054 } 7055 7056 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 7057 7058 //===----------------------------------------------------------------------===// 7059 // hwreg 7060 //===----------------------------------------------------------------------===// 7061 7062 bool 7063 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 7064 OperandInfoTy &Offset, 7065 OperandInfoTy &Width) { 7066 using namespace llvm::AMDGPU::Hwreg; 7067 7068 // The register may be specified by name or using a numeric code 7069 HwReg.Loc = getLoc(); 7070 if (isToken(AsmToken::Identifier) && 7071 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7072 HwReg.IsSymbolic = true; 7073 lex(); // skip register name 7074 } else if (!parseExpr(HwReg.Id, "a register name")) { 7075 return false; 7076 } 7077 7078 if (trySkipToken(AsmToken::RParen)) 7079 return true; 7080 7081 // parse optional params 7082 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 7083 return false; 7084 7085 Offset.Loc = getLoc(); 7086 if (!parseExpr(Offset.Id)) 7087 return false; 7088 7089 if (!skipToken(AsmToken::Comma, "expected a comma")) 7090 return false; 7091 7092 Width.Loc = getLoc(); 7093 return parseExpr(Width.Id) && 7094 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7095 } 7096 7097 bool 7098 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 7099 const OperandInfoTy &Offset, 7100 const OperandInfoTy &Width) { 7101 7102 using namespace llvm::AMDGPU::Hwreg; 7103 7104 if (HwReg.IsSymbolic) { 7105 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 7106 Error(HwReg.Loc, 7107 "specified hardware register is not supported on this GPU"); 7108 return false; 7109 } 7110 } else { 7111 if (!isValidHwreg(HwReg.Id)) { 7112 Error(HwReg.Loc, 7113 "invalid code of hardware register: only 6-bit values are legal"); 7114 return false; 7115 } 7116 } 7117 if (!isValidHwregOffset(Offset.Id)) { 7118 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 7119 return false; 7120 } 7121 if (!isValidHwregWidth(Width.Id)) { 7122 Error(Width.Loc, 7123 "invalid bitfield width: only values from 1 to 32 are legal"); 7124 return false; 7125 } 7126 return true; 7127 } 7128 7129 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 7130 using namespace llvm::AMDGPU::Hwreg; 7131 7132 int64_t ImmVal = 0; 7133 SMLoc Loc = getLoc(); 7134 7135 if (trySkipId("hwreg", AsmToken::LParen)) { 7136 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 7137 OperandInfoTy Offset(OFFSET_DEFAULT_); 7138 OperandInfoTy Width(WIDTH_DEFAULT_); 7139 if (parseHwregBody(HwReg, Offset, Width) && 7140 validateHwreg(HwReg, Offset, Width)) { 7141 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 7142 } else { 7143 return ParseStatus::Failure; 7144 } 7145 } else if (parseExpr(ImmVal, "a hwreg macro")) { 7146 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7147 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7148 } else { 7149 return ParseStatus::Failure; 7150 } 7151 7152 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 7153 return ParseStatus::Success; 7154 } 7155 7156 bool AMDGPUOperand::isHwreg() const { 7157 return isImmTy(ImmTyHwreg); 7158 } 7159 7160 //===----------------------------------------------------------------------===// 7161 // sendmsg 7162 //===----------------------------------------------------------------------===// 7163 7164 bool 7165 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 7166 OperandInfoTy &Op, 7167 OperandInfoTy &Stream) { 7168 using namespace llvm::AMDGPU::SendMsg; 7169 7170 Msg.Loc = getLoc(); 7171 if (isToken(AsmToken::Identifier) && 7172 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7173 Msg.IsSymbolic = true; 7174 lex(); // skip message name 7175 } else if (!parseExpr(Msg.Id, "a message name")) { 7176 return false; 7177 } 7178 7179 if (trySkipToken(AsmToken::Comma)) { 7180 Op.IsDefined = true; 7181 Op.Loc = getLoc(); 7182 if (isToken(AsmToken::Identifier) && 7183 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 7184 lex(); // skip operation name 7185 } else if (!parseExpr(Op.Id, "an operation name")) { 7186 return false; 7187 } 7188 7189 if (trySkipToken(AsmToken::Comma)) { 7190 Stream.IsDefined = true; 7191 Stream.Loc = getLoc(); 7192 if (!parseExpr(Stream.Id)) 7193 return false; 7194 } 7195 } 7196 7197 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7198 } 7199 7200 bool 7201 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 7202 const OperandInfoTy &Op, 7203 const OperandInfoTy &Stream) { 7204 using namespace llvm::AMDGPU::SendMsg; 7205 7206 // Validation strictness depends on whether message is specified 7207 // in a symbolic or in a numeric form. In the latter case 7208 // only encoding possibility is checked. 7209 bool Strict = Msg.IsSymbolic; 7210 7211 if (Strict) { 7212 if (Msg.Id == OPR_ID_UNSUPPORTED) { 7213 Error(Msg.Loc, "specified message id is not supported on this GPU"); 7214 return false; 7215 } 7216 } else { 7217 if (!isValidMsgId(Msg.Id, getSTI())) { 7218 Error(Msg.Loc, "invalid message id"); 7219 return false; 7220 } 7221 } 7222 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 7223 if (Op.IsDefined) { 7224 Error(Op.Loc, "message does not support operations"); 7225 } else { 7226 Error(Msg.Loc, "missing message operation"); 7227 } 7228 return false; 7229 } 7230 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 7231 Error(Op.Loc, "invalid operation id"); 7232 return false; 7233 } 7234 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 7235 Stream.IsDefined) { 7236 Error(Stream.Loc, "message operation does not support streams"); 7237 return false; 7238 } 7239 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 7240 Error(Stream.Loc, "invalid message stream id"); 7241 return false; 7242 } 7243 return true; 7244 } 7245 7246 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { 7247 using namespace llvm::AMDGPU::SendMsg; 7248 7249 int64_t ImmVal = 0; 7250 SMLoc Loc = getLoc(); 7251 7252 if (trySkipId("sendmsg", AsmToken::LParen)) { 7253 OperandInfoTy Msg(OPR_ID_UNKNOWN); 7254 OperandInfoTy Op(OP_NONE_); 7255 OperandInfoTy Stream(STREAM_ID_NONE_); 7256 if (parseSendMsgBody(Msg, Op, Stream) && 7257 validateSendMsg(Msg, Op, Stream)) { 7258 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 7259 } else { 7260 return ParseStatus::Failure; 7261 } 7262 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 7263 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7264 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7265 } else { 7266 return ParseStatus::Failure; 7267 } 7268 7269 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 7270 return ParseStatus::Success; 7271 } 7272 7273 bool AMDGPUOperand::isSendMsg() const { 7274 return isImmTy(ImmTySendMsg); 7275 } 7276 7277 //===----------------------------------------------------------------------===// 7278 // v_interp 7279 //===----------------------------------------------------------------------===// 7280 7281 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 7282 StringRef Str; 7283 SMLoc S = getLoc(); 7284 7285 if (!parseId(Str)) 7286 return ParseStatus::NoMatch; 7287 7288 int Slot = StringSwitch<int>(Str) 7289 .Case("p10", 0) 7290 .Case("p20", 1) 7291 .Case("p0", 2) 7292 .Default(-1); 7293 7294 if (Slot == -1) 7295 return Error(S, "invalid interpolation slot"); 7296 7297 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 7298 AMDGPUOperand::ImmTyInterpSlot)); 7299 return ParseStatus::Success; 7300 } 7301 7302 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 7303 StringRef Str; 7304 SMLoc S = getLoc(); 7305 7306 if (!parseId(Str)) 7307 return ParseStatus::NoMatch; 7308 7309 if (!Str.starts_with("attr")) 7310 return Error(S, "invalid interpolation attribute"); 7311 7312 StringRef Chan = Str.take_back(2); 7313 int AttrChan = StringSwitch<int>(Chan) 7314 .Case(".x", 0) 7315 .Case(".y", 1) 7316 .Case(".z", 2) 7317 .Case(".w", 3) 7318 .Default(-1); 7319 if (AttrChan == -1) 7320 return Error(S, "invalid or missing interpolation attribute channel"); 7321 7322 Str = Str.drop_back(2).drop_front(4); 7323 7324 uint8_t Attr; 7325 if (Str.getAsInteger(10, Attr)) 7326 return Error(S, "invalid or missing interpolation attribute number"); 7327 7328 if (Attr > 32) 7329 return Error(S, "out of bounds interpolation attribute number"); 7330 7331 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 7332 7333 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 7334 AMDGPUOperand::ImmTyInterpAttr)); 7335 Operands.push_back(AMDGPUOperand::CreateImm( 7336 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan)); 7337 return ParseStatus::Success; 7338 } 7339 7340 //===----------------------------------------------------------------------===// 7341 // exp 7342 //===----------------------------------------------------------------------===// 7343 7344 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 7345 using namespace llvm::AMDGPU::Exp; 7346 7347 StringRef Str; 7348 SMLoc S = getLoc(); 7349 7350 if (!parseId(Str)) 7351 return ParseStatus::NoMatch; 7352 7353 unsigned Id = getTgtId(Str); 7354 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) 7355 return Error(S, (Id == ET_INVALID) 7356 ? "invalid exp target" 7357 : "exp target is not supported on this GPU"); 7358 7359 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 7360 AMDGPUOperand::ImmTyExpTgt)); 7361 return ParseStatus::Success; 7362 } 7363 7364 //===----------------------------------------------------------------------===// 7365 // parser helpers 7366 //===----------------------------------------------------------------------===// 7367 7368 bool 7369 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 7370 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 7371 } 7372 7373 bool 7374 AMDGPUAsmParser::isId(const StringRef Id) const { 7375 return isId(getToken(), Id); 7376 } 7377 7378 bool 7379 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 7380 return getTokenKind() == Kind; 7381 } 7382 7383 StringRef AMDGPUAsmParser::getId() const { 7384 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef(); 7385 } 7386 7387 bool 7388 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7389 if (isId(Id)) { 7390 lex(); 7391 return true; 7392 } 7393 return false; 7394 } 7395 7396 bool 7397 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7398 if (isToken(AsmToken::Identifier)) { 7399 StringRef Tok = getTokenStr(); 7400 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) { 7401 lex(); 7402 return true; 7403 } 7404 } 7405 return false; 7406 } 7407 7408 bool 7409 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7410 if (isId(Id) && peekToken().is(Kind)) { 7411 lex(); 7412 lex(); 7413 return true; 7414 } 7415 return false; 7416 } 7417 7418 bool 7419 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7420 if (isToken(Kind)) { 7421 lex(); 7422 return true; 7423 } 7424 return false; 7425 } 7426 7427 bool 7428 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7429 const StringRef ErrMsg) { 7430 if (!trySkipToken(Kind)) { 7431 Error(getLoc(), ErrMsg); 7432 return false; 7433 } 7434 return true; 7435 } 7436 7437 bool 7438 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7439 SMLoc S = getLoc(); 7440 7441 const MCExpr *Expr; 7442 if (Parser.parseExpression(Expr)) 7443 return false; 7444 7445 if (Expr->evaluateAsAbsolute(Imm)) 7446 return true; 7447 7448 if (Expected.empty()) { 7449 Error(S, "expected absolute expression"); 7450 } else { 7451 Error(S, Twine("expected ", Expected) + 7452 Twine(" or an absolute expression")); 7453 } 7454 return false; 7455 } 7456 7457 bool 7458 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7459 SMLoc S = getLoc(); 7460 7461 const MCExpr *Expr; 7462 if (Parser.parseExpression(Expr)) 7463 return false; 7464 7465 int64_t IntVal; 7466 if (Expr->evaluateAsAbsolute(IntVal)) { 7467 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7468 } else { 7469 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7470 } 7471 return true; 7472 } 7473 7474 bool 7475 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7476 if (isToken(AsmToken::String)) { 7477 Val = getToken().getStringContents(); 7478 lex(); 7479 return true; 7480 } else { 7481 Error(getLoc(), ErrMsg); 7482 return false; 7483 } 7484 } 7485 7486 bool 7487 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7488 if (isToken(AsmToken::Identifier)) { 7489 Val = getTokenStr(); 7490 lex(); 7491 return true; 7492 } else { 7493 if (!ErrMsg.empty()) 7494 Error(getLoc(), ErrMsg); 7495 return false; 7496 } 7497 } 7498 7499 AsmToken 7500 AMDGPUAsmParser::getToken() const { 7501 return Parser.getTok(); 7502 } 7503 7504 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7505 return isToken(AsmToken::EndOfStatement) 7506 ? getToken() 7507 : getLexer().peekTok(ShouldSkipSpace); 7508 } 7509 7510 void 7511 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7512 auto TokCount = getLexer().peekTokens(Tokens); 7513 7514 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7515 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7516 } 7517 7518 AsmToken::TokenKind 7519 AMDGPUAsmParser::getTokenKind() const { 7520 return getLexer().getKind(); 7521 } 7522 7523 SMLoc 7524 AMDGPUAsmParser::getLoc() const { 7525 return getToken().getLoc(); 7526 } 7527 7528 StringRef 7529 AMDGPUAsmParser::getTokenStr() const { 7530 return getToken().getString(); 7531 } 7532 7533 void 7534 AMDGPUAsmParser::lex() { 7535 Parser.Lex(); 7536 } 7537 7538 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { 7539 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7540 } 7541 7542 SMLoc 7543 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7544 const OperandVector &Operands) const { 7545 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7546 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7547 if (Test(Op)) 7548 return Op.getStartLoc(); 7549 } 7550 return getInstLoc(Operands); 7551 } 7552 7553 SMLoc 7554 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7555 const OperandVector &Operands) const { 7556 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7557 return getOperandLoc(Test, Operands); 7558 } 7559 7560 SMLoc 7561 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7562 const OperandVector &Operands) const { 7563 auto Test = [=](const AMDGPUOperand& Op) { 7564 return Op.isRegKind() && Op.getReg() == Reg; 7565 }; 7566 return getOperandLoc(Test, Operands); 7567 } 7568 7569 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, 7570 bool SearchMandatoryLiterals) const { 7571 auto Test = [](const AMDGPUOperand& Op) { 7572 return Op.IsImmKindLiteral() || Op.isExpr(); 7573 }; 7574 SMLoc Loc = getOperandLoc(Test, Operands); 7575 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) 7576 Loc = getMandatoryLitLoc(Operands); 7577 return Loc; 7578 } 7579 7580 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { 7581 auto Test = [](const AMDGPUOperand &Op) { 7582 return Op.IsImmKindMandatoryLiteral(); 7583 }; 7584 return getOperandLoc(Test, Operands); 7585 } 7586 7587 SMLoc 7588 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7589 auto Test = [](const AMDGPUOperand& Op) { 7590 return Op.isImmKindConst(); 7591 }; 7592 return getOperandLoc(Test, Operands); 7593 } 7594 7595 //===----------------------------------------------------------------------===// 7596 // swizzle 7597 //===----------------------------------------------------------------------===// 7598 7599 LLVM_READNONE 7600 static unsigned 7601 encodeBitmaskPerm(const unsigned AndMask, 7602 const unsigned OrMask, 7603 const unsigned XorMask) { 7604 using namespace llvm::AMDGPU::Swizzle; 7605 7606 return BITMASK_PERM_ENC | 7607 (AndMask << BITMASK_AND_SHIFT) | 7608 (OrMask << BITMASK_OR_SHIFT) | 7609 (XorMask << BITMASK_XOR_SHIFT); 7610 } 7611 7612 bool 7613 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7614 const unsigned MinVal, 7615 const unsigned MaxVal, 7616 const StringRef ErrMsg, 7617 SMLoc &Loc) { 7618 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7619 return false; 7620 } 7621 Loc = getLoc(); 7622 if (!parseExpr(Op)) { 7623 return false; 7624 } 7625 if (Op < MinVal || Op > MaxVal) { 7626 Error(Loc, ErrMsg); 7627 return false; 7628 } 7629 7630 return true; 7631 } 7632 7633 bool 7634 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7635 const unsigned MinVal, 7636 const unsigned MaxVal, 7637 const StringRef ErrMsg) { 7638 SMLoc Loc; 7639 for (unsigned i = 0; i < OpNum; ++i) { 7640 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7641 return false; 7642 } 7643 7644 return true; 7645 } 7646 7647 bool 7648 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7649 using namespace llvm::AMDGPU::Swizzle; 7650 7651 int64_t Lane[LANE_NUM]; 7652 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7653 "expected a 2-bit lane id")) { 7654 Imm = QUAD_PERM_ENC; 7655 for (unsigned I = 0; I < LANE_NUM; ++I) { 7656 Imm |= Lane[I] << (LANE_SHIFT * I); 7657 } 7658 return true; 7659 } 7660 return false; 7661 } 7662 7663 bool 7664 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7665 using namespace llvm::AMDGPU::Swizzle; 7666 7667 SMLoc Loc; 7668 int64_t GroupSize; 7669 int64_t LaneIdx; 7670 7671 if (!parseSwizzleOperand(GroupSize, 7672 2, 32, 7673 "group size must be in the interval [2,32]", 7674 Loc)) { 7675 return false; 7676 } 7677 if (!isPowerOf2_64(GroupSize)) { 7678 Error(Loc, "group size must be a power of two"); 7679 return false; 7680 } 7681 if (parseSwizzleOperand(LaneIdx, 7682 0, GroupSize - 1, 7683 "lane id must be in the interval [0,group size - 1]", 7684 Loc)) { 7685 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7686 return true; 7687 } 7688 return false; 7689 } 7690 7691 bool 7692 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7693 using namespace llvm::AMDGPU::Swizzle; 7694 7695 SMLoc Loc; 7696 int64_t GroupSize; 7697 7698 if (!parseSwizzleOperand(GroupSize, 7699 2, 32, 7700 "group size must be in the interval [2,32]", 7701 Loc)) { 7702 return false; 7703 } 7704 if (!isPowerOf2_64(GroupSize)) { 7705 Error(Loc, "group size must be a power of two"); 7706 return false; 7707 } 7708 7709 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7710 return true; 7711 } 7712 7713 bool 7714 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7715 using namespace llvm::AMDGPU::Swizzle; 7716 7717 SMLoc Loc; 7718 int64_t GroupSize; 7719 7720 if (!parseSwizzleOperand(GroupSize, 7721 1, 16, 7722 "group size must be in the interval [1,16]", 7723 Loc)) { 7724 return false; 7725 } 7726 if (!isPowerOf2_64(GroupSize)) { 7727 Error(Loc, "group size must be a power of two"); 7728 return false; 7729 } 7730 7731 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7732 return true; 7733 } 7734 7735 bool 7736 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7737 using namespace llvm::AMDGPU::Swizzle; 7738 7739 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7740 return false; 7741 } 7742 7743 StringRef Ctl; 7744 SMLoc StrLoc = getLoc(); 7745 if (!parseString(Ctl)) { 7746 return false; 7747 } 7748 if (Ctl.size() != BITMASK_WIDTH) { 7749 Error(StrLoc, "expected a 5-character mask"); 7750 return false; 7751 } 7752 7753 unsigned AndMask = 0; 7754 unsigned OrMask = 0; 7755 unsigned XorMask = 0; 7756 7757 for (size_t i = 0; i < Ctl.size(); ++i) { 7758 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7759 switch(Ctl[i]) { 7760 default: 7761 Error(StrLoc, "invalid mask"); 7762 return false; 7763 case '0': 7764 break; 7765 case '1': 7766 OrMask |= Mask; 7767 break; 7768 case 'p': 7769 AndMask |= Mask; 7770 break; 7771 case 'i': 7772 AndMask |= Mask; 7773 XorMask |= Mask; 7774 break; 7775 } 7776 } 7777 7778 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7779 return true; 7780 } 7781 7782 bool 7783 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7784 7785 SMLoc OffsetLoc = getLoc(); 7786 7787 if (!parseExpr(Imm, "a swizzle macro")) { 7788 return false; 7789 } 7790 if (!isUInt<16>(Imm)) { 7791 Error(OffsetLoc, "expected a 16-bit offset"); 7792 return false; 7793 } 7794 return true; 7795 } 7796 7797 bool 7798 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7799 using namespace llvm::AMDGPU::Swizzle; 7800 7801 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7802 7803 SMLoc ModeLoc = getLoc(); 7804 bool Ok = false; 7805 7806 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7807 Ok = parseSwizzleQuadPerm(Imm); 7808 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7809 Ok = parseSwizzleBitmaskPerm(Imm); 7810 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7811 Ok = parseSwizzleBroadcast(Imm); 7812 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7813 Ok = parseSwizzleSwap(Imm); 7814 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7815 Ok = parseSwizzleReverse(Imm); 7816 } else { 7817 Error(ModeLoc, "expected a swizzle mode"); 7818 } 7819 7820 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7821 } 7822 7823 return false; 7824 } 7825 7826 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { 7827 SMLoc S = getLoc(); 7828 int64_t Imm = 0; 7829 7830 if (trySkipId("offset")) { 7831 7832 bool Ok = false; 7833 if (skipToken(AsmToken::Colon, "expected a colon")) { 7834 if (trySkipId("swizzle")) { 7835 Ok = parseSwizzleMacro(Imm); 7836 } else { 7837 Ok = parseSwizzleOffset(Imm); 7838 } 7839 } 7840 7841 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7842 7843 return Ok ? ParseStatus::Success : ParseStatus::Failure; 7844 } 7845 return ParseStatus::NoMatch; 7846 } 7847 7848 bool 7849 AMDGPUOperand::isSwizzle() const { 7850 return isImmTy(ImmTySwizzle); 7851 } 7852 7853 //===----------------------------------------------------------------------===// 7854 // VGPR Index Mode 7855 //===----------------------------------------------------------------------===// 7856 7857 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7858 7859 using namespace llvm::AMDGPU::VGPRIndexMode; 7860 7861 if (trySkipToken(AsmToken::RParen)) { 7862 return OFF; 7863 } 7864 7865 int64_t Imm = 0; 7866 7867 while (true) { 7868 unsigned Mode = 0; 7869 SMLoc S = getLoc(); 7870 7871 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7872 if (trySkipId(IdSymbolic[ModeId])) { 7873 Mode = 1 << ModeId; 7874 break; 7875 } 7876 } 7877 7878 if (Mode == 0) { 7879 Error(S, (Imm == 0)? 7880 "expected a VGPR index mode or a closing parenthesis" : 7881 "expected a VGPR index mode"); 7882 return UNDEF; 7883 } 7884 7885 if (Imm & Mode) { 7886 Error(S, "duplicate VGPR index mode"); 7887 return UNDEF; 7888 } 7889 Imm |= Mode; 7890 7891 if (trySkipToken(AsmToken::RParen)) 7892 break; 7893 if (!skipToken(AsmToken::Comma, 7894 "expected a comma or a closing parenthesis")) 7895 return UNDEF; 7896 } 7897 7898 return Imm; 7899 } 7900 7901 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7902 7903 using namespace llvm::AMDGPU::VGPRIndexMode; 7904 7905 int64_t Imm = 0; 7906 SMLoc S = getLoc(); 7907 7908 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7909 Imm = parseGPRIdxMacro(); 7910 if (Imm == UNDEF) 7911 return ParseStatus::Failure; 7912 } else { 7913 if (getParser().parseAbsoluteExpression(Imm)) 7914 return ParseStatus::Failure; 7915 if (Imm < 0 || !isUInt<4>(Imm)) 7916 return Error(S, "invalid immediate: only 4-bit values are legal"); 7917 } 7918 7919 Operands.push_back( 7920 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7921 return ParseStatus::Success; 7922 } 7923 7924 bool AMDGPUOperand::isGPRIdxMode() const { 7925 return isImmTy(ImmTyGprIdxMode); 7926 } 7927 7928 //===----------------------------------------------------------------------===// 7929 // sopp branch targets 7930 //===----------------------------------------------------------------------===// 7931 7932 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { 7933 7934 // Make sure we are not parsing something 7935 // that looks like a label or an expression but is not. 7936 // This will improve error messages. 7937 if (isRegister() || isModifier()) 7938 return ParseStatus::NoMatch; 7939 7940 if (!parseExpr(Operands)) 7941 return ParseStatus::Failure; 7942 7943 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7944 assert(Opr.isImm() || Opr.isExpr()); 7945 SMLoc Loc = Opr.getStartLoc(); 7946 7947 // Currently we do not support arbitrary expressions as branch targets. 7948 // Only labels and absolute expressions are accepted. 7949 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7950 Error(Loc, "expected an absolute expression or a label"); 7951 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7952 Error(Loc, "expected a 16-bit signed jump offset"); 7953 } 7954 7955 return ParseStatus::Success; 7956 } 7957 7958 //===----------------------------------------------------------------------===// 7959 // Boolean holding registers 7960 //===----------------------------------------------------------------------===// 7961 7962 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7963 return parseReg(Operands); 7964 } 7965 7966 //===----------------------------------------------------------------------===// 7967 // mubuf 7968 //===----------------------------------------------------------------------===// 7969 7970 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7971 const OperandVector &Operands, 7972 bool IsAtomic) { 7973 OptionalImmIndexMap OptionalIdx; 7974 unsigned FirstOperandIdx = 1; 7975 bool IsAtomicReturn = false; 7976 7977 if (IsAtomic) { 7978 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7979 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7980 if (!Op.isCPol()) 7981 continue; 7982 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7983 break; 7984 } 7985 7986 if (!IsAtomicReturn) { 7987 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7988 if (NewOpc != -1) 7989 Inst.setOpcode(NewOpc); 7990 } 7991 7992 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7993 SIInstrFlags::IsAtomicRet; 7994 } 7995 7996 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7997 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7998 7999 // Add the register arguments 8000 if (Op.isReg()) { 8001 Op.addRegOperands(Inst, 1); 8002 // Insert a tied src for atomic return dst. 8003 // This cannot be postponed as subsequent calls to 8004 // addImmOperands rely on correct number of MC operands. 8005 if (IsAtomicReturn && i == FirstOperandIdx) 8006 Op.addRegOperands(Inst, 1); 8007 continue; 8008 } 8009 8010 // Handle the case where soffset is an immediate 8011 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 8012 Op.addImmOperands(Inst, 1); 8013 continue; 8014 } 8015 8016 // Handle tokens like 'offen' which are sometimes hard-coded into the 8017 // asm string. There are no MCInst operands for these. 8018 if (Op.isToken()) { 8019 continue; 8020 } 8021 assert(Op.isImm()); 8022 8023 // Handle optional arguments 8024 OptionalIdx[Op.getImmTy()] = i; 8025 } 8026 8027 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 8028 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 8029 } 8030 8031 //===----------------------------------------------------------------------===// 8032 // smrd 8033 //===----------------------------------------------------------------------===// 8034 8035 bool AMDGPUOperand::isSMRDOffset8() const { 8036 return isImmLiteral() && isUInt<8>(getImm()); 8037 } 8038 8039 bool AMDGPUOperand::isSMEMOffset() const { 8040 // Offset range is checked later by validator. 8041 return isImmLiteral(); 8042 } 8043 8044 bool AMDGPUOperand::isSMRDLiteralOffset() const { 8045 // 32-bit literals are only supported on CI and we only want to use them 8046 // when the offset is > 8-bits. 8047 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 8048 } 8049 8050 //===----------------------------------------------------------------------===// 8051 // vop3 8052 //===----------------------------------------------------------------------===// 8053 8054 static bool ConvertOmodMul(int64_t &Mul) { 8055 if (Mul != 1 && Mul != 2 && Mul != 4) 8056 return false; 8057 8058 Mul >>= 1; 8059 return true; 8060 } 8061 8062 static bool ConvertOmodDiv(int64_t &Div) { 8063 if (Div == 1) { 8064 Div = 0; 8065 return true; 8066 } 8067 8068 if (Div == 2) { 8069 Div = 3; 8070 return true; 8071 } 8072 8073 return false; 8074 } 8075 8076 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 8077 // This is intentional and ensures compatibility with sp3. 8078 // See bug 35397 for details. 8079 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { 8080 if (BoundCtrl == 0 || BoundCtrl == 1) { 8081 if (!isGFX11Plus()) 8082 BoundCtrl = 1; 8083 return true; 8084 } 8085 return false; 8086 } 8087 8088 void AMDGPUAsmParser::onBeginOfFile() { 8089 if (!getParser().getStreamer().getTargetStreamer() || 8090 getSTI().getTargetTriple().getArch() == Triple::r600) 8091 return; 8092 8093 if (!getTargetStreamer().getTargetID()) 8094 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(), 8095 // TODO: Should try to check code object version from directive??? 8096 AMDGPU::getAmdhsaCodeObjectVersion()); 8097 8098 if (isHsaAbi(getSTI())) 8099 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 8100 } 8101 8102 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { 8103 StringRef Name = getTokenStr(); 8104 if (Name == "mul") { 8105 return parseIntWithPrefix("mul", Operands, 8106 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8107 } 8108 8109 if (Name == "div") { 8110 return parseIntWithPrefix("div", Operands, 8111 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8112 } 8113 8114 return ParseStatus::NoMatch; 8115 } 8116 8117 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8118 // the number of src operands present, then copies that bit into src0_modifiers. 8119 void cvtVOP3DstOpSelOnly(MCInst &Inst) { 8120 int Opc = Inst.getOpcode(); 8121 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8122 if (OpSelIdx == -1) 8123 return; 8124 8125 int SrcNum; 8126 const int Ops[] = { AMDGPU::OpName::src0, 8127 AMDGPU::OpName::src1, 8128 AMDGPU::OpName::src2 }; 8129 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); 8130 ++SrcNum) 8131 ; 8132 assert(SrcNum > 0); 8133 8134 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8135 8136 if ((OpSel & (1 << SrcNum)) != 0) { 8137 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8138 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8139 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8140 } 8141 } 8142 8143 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8144 const OperandVector &Operands) { 8145 cvtVOP3P(Inst, Operands); 8146 cvtVOP3DstOpSelOnly(Inst); 8147 } 8148 8149 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8150 OptionalImmIndexMap &OptionalIdx) { 8151 cvtVOP3P(Inst, Operands, OptionalIdx); 8152 cvtVOP3DstOpSelOnly(Inst); 8153 } 8154 8155 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8156 return 8157 // 1. This operand is input modifiers 8158 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8159 // 2. This is not last operand 8160 && Desc.NumOperands > (OpNum + 1) 8161 // 3. Next operand is register class 8162 && Desc.operands()[OpNum + 1].RegClass != -1 8163 // 4. Next register is not tied to any other operand 8164 && Desc.getOperandConstraint(OpNum + 1, 8165 MCOI::OperandConstraint::TIED_TO) == -1; 8166 } 8167 8168 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8169 { 8170 OptionalImmIndexMap OptionalIdx; 8171 unsigned Opc = Inst.getOpcode(); 8172 8173 unsigned I = 1; 8174 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8175 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8176 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8177 } 8178 8179 for (unsigned E = Operands.size(); I != E; ++I) { 8180 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8181 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8182 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8183 } else if (Op.isInterpSlot() || Op.isInterpAttr() || 8184 Op.isInterpAttrChan()) { 8185 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8186 } else if (Op.isImmModifier()) { 8187 OptionalIdx[Op.getImmTy()] = I; 8188 } else { 8189 llvm_unreachable("unhandled operand type"); 8190 } 8191 } 8192 8193 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) 8194 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8195 AMDGPUOperand::ImmTyHigh); 8196 8197 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8198 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8199 AMDGPUOperand::ImmTyClampSI); 8200 8201 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8202 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8203 AMDGPUOperand::ImmTyOModSI); 8204 } 8205 8206 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8207 { 8208 OptionalImmIndexMap OptionalIdx; 8209 unsigned Opc = Inst.getOpcode(); 8210 8211 unsigned I = 1; 8212 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8213 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8214 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8215 } 8216 8217 for (unsigned E = Operands.size(); I != E; ++I) { 8218 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8219 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8220 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8221 } else if (Op.isImmModifier()) { 8222 OptionalIdx[Op.getImmTy()] = I; 8223 } else { 8224 llvm_unreachable("unhandled operand type"); 8225 } 8226 } 8227 8228 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8229 8230 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8231 if (OpSelIdx != -1) 8232 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8233 8234 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8235 8236 if (OpSelIdx == -1) 8237 return; 8238 8239 const int Ops[] = { AMDGPU::OpName::src0, 8240 AMDGPU::OpName::src1, 8241 AMDGPU::OpName::src2 }; 8242 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8243 AMDGPU::OpName::src1_modifiers, 8244 AMDGPU::OpName::src2_modifiers }; 8245 8246 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8247 8248 for (int J = 0; J < 3; ++J) { 8249 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8250 if (OpIdx == -1) 8251 break; 8252 8253 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8254 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8255 8256 if ((OpSel & (1 << J)) != 0) 8257 ModVal |= SISrcMods::OP_SEL_0; 8258 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8259 (OpSel & (1 << 3)) != 0) 8260 ModVal |= SISrcMods::DST_OP_SEL; 8261 8262 Inst.getOperand(ModIdx).setImm(ModVal); 8263 } 8264 } 8265 8266 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8267 OptionalImmIndexMap &OptionalIdx) { 8268 unsigned Opc = Inst.getOpcode(); 8269 8270 unsigned I = 1; 8271 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8272 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8273 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8274 } 8275 8276 for (unsigned E = Operands.size(); I != E; ++I) { 8277 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8278 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8279 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8280 } else if (Op.isImmModifier()) { 8281 OptionalIdx[Op.getImmTy()] = I; 8282 } else if (Op.isRegOrImm()) { 8283 Op.addRegOrImmOperands(Inst, 1); 8284 } else { 8285 llvm_unreachable("unhandled operand type"); 8286 } 8287 } 8288 8289 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8290 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8291 AMDGPUOperand::ImmTyClampSI); 8292 8293 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8294 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8295 AMDGPUOperand::ImmTyOModSI); 8296 8297 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8298 // it has src2 register operand that is tied to dst operand 8299 // we don't allow modifiers for this operand in assembler so src2_modifiers 8300 // should be 0. 8301 if (isMAC(Opc)) { 8302 auto it = Inst.begin(); 8303 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8304 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8305 ++it; 8306 // Copy the operand to ensure it's not invalidated when Inst grows. 8307 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8308 } 8309 } 8310 8311 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8312 OptionalImmIndexMap OptionalIdx; 8313 cvtVOP3(Inst, Operands, OptionalIdx); 8314 } 8315 8316 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8317 OptionalImmIndexMap &OptIdx) { 8318 const int Opc = Inst.getOpcode(); 8319 const MCInstrDesc &Desc = MII.get(Opc); 8320 8321 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8322 8323 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 8324 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { 8325 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 8326 Inst.addOperand(Inst.getOperand(0)); 8327 } 8328 8329 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { 8330 assert(!IsPacked); 8331 Inst.addOperand(Inst.getOperand(0)); 8332 } 8333 8334 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8335 // instruction, and then figure out where to actually put the modifiers 8336 8337 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8338 if (OpSelIdx != -1) { 8339 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8340 } 8341 8342 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8343 if (OpSelHiIdx != -1) { 8344 int DefaultVal = IsPacked ? -1 : 0; 8345 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8346 DefaultVal); 8347 } 8348 8349 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8350 if (NegLoIdx != -1) { 8351 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8352 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8353 } 8354 8355 const int Ops[] = { AMDGPU::OpName::src0, 8356 AMDGPU::OpName::src1, 8357 AMDGPU::OpName::src2 }; 8358 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8359 AMDGPU::OpName::src1_modifiers, 8360 AMDGPU::OpName::src2_modifiers }; 8361 8362 unsigned OpSel = 0; 8363 unsigned OpSelHi = 0; 8364 unsigned NegLo = 0; 8365 unsigned NegHi = 0; 8366 8367 if (OpSelIdx != -1) 8368 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8369 8370 if (OpSelHiIdx != -1) 8371 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8372 8373 if (NegLoIdx != -1) { 8374 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8375 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8376 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8377 } 8378 8379 for (int J = 0; J < 3; ++J) { 8380 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8381 if (OpIdx == -1) 8382 break; 8383 8384 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8385 8386 if (ModIdx == -1) 8387 continue; 8388 8389 uint32_t ModVal = 0; 8390 8391 if ((OpSel & (1 << J)) != 0) 8392 ModVal |= SISrcMods::OP_SEL_0; 8393 8394 if ((OpSelHi & (1 << J)) != 0) 8395 ModVal |= SISrcMods::OP_SEL_1; 8396 8397 if ((NegLo & (1 << J)) != 0) 8398 ModVal |= SISrcMods::NEG; 8399 8400 if ((NegHi & (1 << J)) != 0) 8401 ModVal |= SISrcMods::NEG_HI; 8402 8403 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8404 } 8405 } 8406 8407 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8408 OptionalImmIndexMap OptIdx; 8409 cvtVOP3(Inst, Operands, OptIdx); 8410 cvtVOP3P(Inst, Operands, OptIdx); 8411 } 8412 8413 //===----------------------------------------------------------------------===// 8414 // VOPD 8415 //===----------------------------------------------------------------------===// 8416 8417 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8418 if (!hasVOPD(getSTI())) 8419 return ParseStatus::NoMatch; 8420 8421 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8422 SMLoc S = getLoc(); 8423 lex(); 8424 lex(); 8425 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8426 SMLoc OpYLoc = getLoc(); 8427 StringRef OpYName; 8428 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) { 8429 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc)); 8430 return ParseStatus::Success; 8431 } 8432 return Error(OpYLoc, "expected a VOPDY instruction after ::"); 8433 } 8434 return ParseStatus::NoMatch; 8435 } 8436 8437 // Create VOPD MCInst operands using parsed assembler operands. 8438 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8439 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer 8440 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); 8441 if (Op.isReg()) { 8442 Op.addRegOperands(Inst, 1); 8443 return; 8444 } 8445 if (Op.isImm()) { 8446 Op.addImmOperands(Inst, 1); 8447 return; 8448 } 8449 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8450 }; 8451 8452 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); 8453 8454 // MCInst operands are ordered as follows: 8455 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8456 8457 for (auto CompIdx : VOPD::COMPONENTS) { 8458 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); 8459 } 8460 8461 for (auto CompIdx : VOPD::COMPONENTS) { 8462 const auto &CInfo = InstInfo[CompIdx]; 8463 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); 8464 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) 8465 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); 8466 if (CInfo.hasSrc2Acc()) 8467 addOp(CInfo.getIndexOfDstInParsedOperands()); 8468 } 8469 } 8470 8471 //===----------------------------------------------------------------------===// 8472 // dpp 8473 //===----------------------------------------------------------------------===// 8474 8475 bool AMDGPUOperand::isDPP8() const { 8476 return isImmTy(ImmTyDPP8); 8477 } 8478 8479 bool AMDGPUOperand::isDPPCtrl() const { 8480 using namespace AMDGPU::DPP; 8481 8482 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8483 if (result) { 8484 int64_t Imm = getImm(); 8485 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8486 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8487 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8488 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8489 (Imm == DppCtrl::WAVE_SHL1) || 8490 (Imm == DppCtrl::WAVE_ROL1) || 8491 (Imm == DppCtrl::WAVE_SHR1) || 8492 (Imm == DppCtrl::WAVE_ROR1) || 8493 (Imm == DppCtrl::ROW_MIRROR) || 8494 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8495 (Imm == DppCtrl::BCAST15) || 8496 (Imm == DppCtrl::BCAST31) || 8497 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8498 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8499 } 8500 return false; 8501 } 8502 8503 //===----------------------------------------------------------------------===// 8504 // mAI 8505 //===----------------------------------------------------------------------===// 8506 8507 bool AMDGPUOperand::isBLGP() const { 8508 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8509 } 8510 8511 bool AMDGPUOperand::isCBSZ() const { 8512 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8513 } 8514 8515 bool AMDGPUOperand::isABID() const { 8516 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8517 } 8518 8519 bool AMDGPUOperand::isS16Imm() const { 8520 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8521 } 8522 8523 bool AMDGPUOperand::isU16Imm() const { 8524 return isImmLiteral() && isUInt<16>(getImm()); 8525 } 8526 8527 //===----------------------------------------------------------------------===// 8528 // dim 8529 //===----------------------------------------------------------------------===// 8530 8531 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8532 // We want to allow "dim:1D" etc., 8533 // but the initial 1 is tokenized as an integer. 8534 std::string Token; 8535 if (isToken(AsmToken::Integer)) { 8536 SMLoc Loc = getToken().getEndLoc(); 8537 Token = std::string(getTokenStr()); 8538 lex(); 8539 if (getLoc() != Loc) 8540 return false; 8541 } 8542 8543 StringRef Suffix; 8544 if (!parseId(Suffix)) 8545 return false; 8546 Token += Suffix; 8547 8548 StringRef DimId = Token; 8549 if (DimId.starts_with("SQ_RSRC_IMG_")) 8550 DimId = DimId.drop_front(12); 8551 8552 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8553 if (!DimInfo) 8554 return false; 8555 8556 Encoding = DimInfo->Encoding; 8557 return true; 8558 } 8559 8560 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8561 if (!isGFX10Plus()) 8562 return ParseStatus::NoMatch; 8563 8564 SMLoc S = getLoc(); 8565 8566 if (!trySkipId("dim", AsmToken::Colon)) 8567 return ParseStatus::NoMatch; 8568 8569 unsigned Encoding; 8570 SMLoc Loc = getLoc(); 8571 if (!parseDimId(Encoding)) 8572 return Error(Loc, "invalid dim value"); 8573 8574 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8575 AMDGPUOperand::ImmTyDim)); 8576 return ParseStatus::Success; 8577 } 8578 8579 //===----------------------------------------------------------------------===// 8580 // dpp 8581 //===----------------------------------------------------------------------===// 8582 8583 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8584 SMLoc S = getLoc(); 8585 8586 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8587 return ParseStatus::NoMatch; 8588 8589 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8590 8591 int64_t Sels[8]; 8592 8593 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8594 return ParseStatus::Failure; 8595 8596 for (size_t i = 0; i < 8; ++i) { 8597 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8598 return ParseStatus::Failure; 8599 8600 SMLoc Loc = getLoc(); 8601 if (getParser().parseAbsoluteExpression(Sels[i])) 8602 return ParseStatus::Failure; 8603 if (0 > Sels[i] || 7 < Sels[i]) 8604 return Error(Loc, "expected a 3-bit value"); 8605 } 8606 8607 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8608 return ParseStatus::Failure; 8609 8610 unsigned DPP8 = 0; 8611 for (size_t i = 0; i < 8; ++i) 8612 DPP8 |= (Sels[i] << (i * 3)); 8613 8614 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8615 return ParseStatus::Success; 8616 } 8617 8618 bool 8619 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8620 const OperandVector &Operands) { 8621 if (Ctrl == "row_newbcast") 8622 return isGFX90A(); 8623 8624 if (Ctrl == "row_share" || 8625 Ctrl == "row_xmask") 8626 return isGFX10Plus(); 8627 8628 if (Ctrl == "wave_shl" || 8629 Ctrl == "wave_shr" || 8630 Ctrl == "wave_rol" || 8631 Ctrl == "wave_ror" || 8632 Ctrl == "row_bcast") 8633 return isVI() || isGFX9(); 8634 8635 return Ctrl == "row_mirror" || 8636 Ctrl == "row_half_mirror" || 8637 Ctrl == "quad_perm" || 8638 Ctrl == "row_shl" || 8639 Ctrl == "row_shr" || 8640 Ctrl == "row_ror"; 8641 } 8642 8643 int64_t 8644 AMDGPUAsmParser::parseDPPCtrlPerm() { 8645 // quad_perm:[%d,%d,%d,%d] 8646 8647 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8648 return -1; 8649 8650 int64_t Val = 0; 8651 for (int i = 0; i < 4; ++i) { 8652 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8653 return -1; 8654 8655 int64_t Temp; 8656 SMLoc Loc = getLoc(); 8657 if (getParser().parseAbsoluteExpression(Temp)) 8658 return -1; 8659 if (Temp < 0 || Temp > 3) { 8660 Error(Loc, "expected a 2-bit value"); 8661 return -1; 8662 } 8663 8664 Val += (Temp << i * 2); 8665 } 8666 8667 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8668 return -1; 8669 8670 return Val; 8671 } 8672 8673 int64_t 8674 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8675 using namespace AMDGPU::DPP; 8676 8677 // sel:%d 8678 8679 int64_t Val; 8680 SMLoc Loc = getLoc(); 8681 8682 if (getParser().parseAbsoluteExpression(Val)) 8683 return -1; 8684 8685 struct DppCtrlCheck { 8686 int64_t Ctrl; 8687 int Lo; 8688 int Hi; 8689 }; 8690 8691 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8692 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8693 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8694 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8695 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8696 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8697 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8698 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8699 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8700 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8701 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8702 .Default({-1, 0, 0}); 8703 8704 bool Valid; 8705 if (Check.Ctrl == -1) { 8706 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8707 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8708 } else { 8709 Valid = Check.Lo <= Val && Val <= Check.Hi; 8710 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8711 } 8712 8713 if (!Valid) { 8714 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8715 return -1; 8716 } 8717 8718 return Val; 8719 } 8720 8721 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8722 using namespace AMDGPU::DPP; 8723 8724 if (!isToken(AsmToken::Identifier) || 8725 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8726 return ParseStatus::NoMatch; 8727 8728 SMLoc S = getLoc(); 8729 int64_t Val = -1; 8730 StringRef Ctrl; 8731 8732 parseId(Ctrl); 8733 8734 if (Ctrl == "row_mirror") { 8735 Val = DppCtrl::ROW_MIRROR; 8736 } else if (Ctrl == "row_half_mirror") { 8737 Val = DppCtrl::ROW_HALF_MIRROR; 8738 } else { 8739 if (skipToken(AsmToken::Colon, "expected a colon")) { 8740 if (Ctrl == "quad_perm") { 8741 Val = parseDPPCtrlPerm(); 8742 } else { 8743 Val = parseDPPCtrlSel(Ctrl); 8744 } 8745 } 8746 } 8747 8748 if (Val == -1) 8749 return ParseStatus::Failure; 8750 8751 Operands.push_back( 8752 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8753 return ParseStatus::Success; 8754 } 8755 8756 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 8757 bool IsDPP8) { 8758 OptionalImmIndexMap OptionalIdx; 8759 unsigned Opc = Inst.getOpcode(); 8760 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8761 8762 // MAC instructions are special because they have 'old' 8763 // operand which is not tied to dst (but assumed to be). 8764 // They also have dummy unused src2_modifiers. 8765 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); 8766 int Src2ModIdx = 8767 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); 8768 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && 8769 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; 8770 8771 unsigned I = 1; 8772 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8773 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8774 } 8775 8776 int Fi = 0; 8777 for (unsigned E = Operands.size(); I != E; ++I) { 8778 8779 if (IsMAC) { 8780 int NumOperands = Inst.getNumOperands(); 8781 if (OldIdx == NumOperands) { 8782 // Handle old operand 8783 constexpr int DST_IDX = 0; 8784 Inst.addOperand(Inst.getOperand(DST_IDX)); 8785 } else if (Src2ModIdx == NumOperands) { 8786 // Add unused dummy src2_modifiers 8787 Inst.addOperand(MCOperand::createImm(0)); 8788 } 8789 } 8790 8791 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8792 MCOI::TIED_TO); 8793 if (TiedTo != -1) { 8794 assert((unsigned)TiedTo < Inst.getNumOperands()); 8795 // handle tied old or src2 for MAC instructions 8796 Inst.addOperand(Inst.getOperand(TiedTo)); 8797 } 8798 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8799 // Add the register arguments 8800 if (IsDPP8 && Op.isDppFI()) { 8801 Fi = Op.getImm(); 8802 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8803 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8804 } else if (Op.isReg()) { 8805 Op.addRegOperands(Inst, 1); 8806 } else if (Op.isImm() && 8807 Desc.operands()[Inst.getNumOperands()].RegClass != -1) { 8808 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8809 Op.addImmOperands(Inst, 1); 8810 } else if (Op.isImm()) { 8811 OptionalIdx[Op.getImmTy()] = I; 8812 } else { 8813 llvm_unreachable("unhandled operand type"); 8814 } 8815 } 8816 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8817 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8818 8819 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8821 8822 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8823 cvtVOP3P(Inst, Operands, OptionalIdx); 8824 else if (Desc.TSFlags & SIInstrFlags::VOP3) 8825 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 8826 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 8827 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8828 } 8829 8830 if (IsDPP8) { 8831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8832 using namespace llvm::AMDGPU::DPP; 8833 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8834 } else { 8835 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8837 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8838 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8839 8840 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) 8841 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8842 AMDGPUOperand::ImmTyDppFI); 8843 } 8844 } 8845 8846 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8847 OptionalImmIndexMap OptionalIdx; 8848 8849 unsigned I = 1; 8850 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8851 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8852 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8853 } 8854 8855 int Fi = 0; 8856 for (unsigned E = Operands.size(); I != E; ++I) { 8857 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8858 MCOI::TIED_TO); 8859 if (TiedTo != -1) { 8860 assert((unsigned)TiedTo < Inst.getNumOperands()); 8861 // handle tied old or src2 for MAC instructions 8862 Inst.addOperand(Inst.getOperand(TiedTo)); 8863 } 8864 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8865 // Add the register arguments 8866 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8867 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8868 // Skip it. 8869 continue; 8870 } 8871 8872 if (IsDPP8) { 8873 if (Op.isDPP8()) { 8874 Op.addImmOperands(Inst, 1); 8875 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8876 Op.addRegWithFPInputModsOperands(Inst, 2); 8877 } else if (Op.isDppFI()) { 8878 Fi = Op.getImm(); 8879 } else if (Op.isReg()) { 8880 Op.addRegOperands(Inst, 1); 8881 } else { 8882 llvm_unreachable("Invalid operand type"); 8883 } 8884 } else { 8885 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8886 Op.addRegWithFPInputModsOperands(Inst, 2); 8887 } else if (Op.isReg()) { 8888 Op.addRegOperands(Inst, 1); 8889 } else if (Op.isDPPCtrl()) { 8890 Op.addImmOperands(Inst, 1); 8891 } else if (Op.isImm()) { 8892 // Handle optional arguments 8893 OptionalIdx[Op.getImmTy()] = I; 8894 } else { 8895 llvm_unreachable("Invalid operand type"); 8896 } 8897 } 8898 } 8899 8900 if (IsDPP8) { 8901 using namespace llvm::AMDGPU::DPP; 8902 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8903 } else { 8904 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8905 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8906 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8907 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { 8908 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8909 AMDGPUOperand::ImmTyDppFI); 8910 } 8911 } 8912 } 8913 8914 //===----------------------------------------------------------------------===// 8915 // sdwa 8916 //===----------------------------------------------------------------------===// 8917 8918 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, 8919 StringRef Prefix, 8920 AMDGPUOperand::ImmTy Type) { 8921 using namespace llvm::AMDGPU::SDWA; 8922 8923 SMLoc S = getLoc(); 8924 StringRef Value; 8925 8926 SMLoc StringLoc; 8927 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc); 8928 if (!Res.isSuccess()) 8929 return Res; 8930 8931 int64_t Int; 8932 Int = StringSwitch<int64_t>(Value) 8933 .Case("BYTE_0", SdwaSel::BYTE_0) 8934 .Case("BYTE_1", SdwaSel::BYTE_1) 8935 .Case("BYTE_2", SdwaSel::BYTE_2) 8936 .Case("BYTE_3", SdwaSel::BYTE_3) 8937 .Case("WORD_0", SdwaSel::WORD_0) 8938 .Case("WORD_1", SdwaSel::WORD_1) 8939 .Case("DWORD", SdwaSel::DWORD) 8940 .Default(0xffffffff); 8941 8942 if (Int == 0xffffffff) 8943 return Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8944 8945 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8946 return ParseStatus::Success; 8947 } 8948 8949 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8950 using namespace llvm::AMDGPU::SDWA; 8951 8952 SMLoc S = getLoc(); 8953 StringRef Value; 8954 8955 SMLoc StringLoc; 8956 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8957 if (!Res.isSuccess()) 8958 return Res; 8959 8960 int64_t Int; 8961 Int = StringSwitch<int64_t>(Value) 8962 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8963 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8964 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8965 .Default(0xffffffff); 8966 8967 if (Int == 0xffffffff) 8968 return Error(StringLoc, "invalid dst_unused value"); 8969 8970 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused)); 8971 return ParseStatus::Success; 8972 } 8973 8974 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8975 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8976 } 8977 8978 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8979 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8980 } 8981 8982 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8983 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8984 } 8985 8986 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8987 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8988 } 8989 8990 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8991 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8992 } 8993 8994 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8995 uint64_t BasicInstType, 8996 bool SkipDstVcc, 8997 bool SkipSrcVcc) { 8998 using namespace llvm::AMDGPU::SDWA; 8999 9000 OptionalImmIndexMap OptionalIdx; 9001 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 9002 bool SkippedVcc = false; 9003 9004 unsigned I = 1; 9005 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9006 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9007 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9008 } 9009 9010 for (unsigned E = Operands.size(); I != E; ++I) { 9011 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9012 if (SkipVcc && !SkippedVcc && Op.isReg() && 9013 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 9014 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9015 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9016 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9017 // Skip VCC only if we didn't skip it on previous iteration. 9018 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9019 if (BasicInstType == SIInstrFlags::VOP2 && 9020 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9021 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9022 SkippedVcc = true; 9023 continue; 9024 } else if (BasicInstType == SIInstrFlags::VOPC && 9025 Inst.getNumOperands() == 0) { 9026 SkippedVcc = true; 9027 continue; 9028 } 9029 } 9030 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9031 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9032 } else if (Op.isImm()) { 9033 // Handle optional arguments 9034 OptionalIdx[Op.getImmTy()] = I; 9035 } else { 9036 llvm_unreachable("Invalid operand type"); 9037 } 9038 SkippedVcc = false; 9039 } 9040 9041 const unsigned Opc = Inst.getOpcode(); 9042 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && 9043 Opc != AMDGPU::V_NOP_sdwa_vi) { 9044 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9045 switch (BasicInstType) { 9046 case SIInstrFlags::VOP1: 9047 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9048 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9049 AMDGPUOperand::ImmTyClampSI, 0); 9050 9051 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9052 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9053 AMDGPUOperand::ImmTyOModSI, 0); 9054 9055 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) 9056 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9057 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9058 9059 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) 9060 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9061 AMDGPUOperand::ImmTySDWADstUnused, 9062 DstUnused::UNUSED_PRESERVE); 9063 9064 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9065 break; 9066 9067 case SIInstrFlags::VOP2: 9068 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9069 9070 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) 9071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9072 9073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE); 9075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9077 break; 9078 9079 case SIInstrFlags::VOPC: 9080 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) 9081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9082 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9084 break; 9085 9086 default: 9087 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9088 } 9089 } 9090 9091 // special case v_mac_{f16, f32}: 9092 // it has src2 register operand that is tied to dst operand 9093 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9094 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9095 auto it = Inst.begin(); 9096 std::advance( 9097 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9098 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9099 } 9100 } 9101 9102 /// Force static initialization. 9103 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9104 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); 9105 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9106 } 9107 9108 #define GET_REGISTER_MATCHER 9109 #define GET_MATCHER_IMPLEMENTATION 9110 #define GET_MNEMONIC_SPELL_CHECKER 9111 #define GET_MNEMONIC_CHECKER 9112 #include "AMDGPUGenAsmMatcher.inc" 9113 9114 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, 9115 unsigned MCK) { 9116 switch (MCK) { 9117 case MCK_addr64: 9118 return parseTokenOp("addr64", Operands); 9119 case MCK_done: 9120 return parseTokenOp("done", Operands); 9121 case MCK_idxen: 9122 return parseTokenOp("idxen", Operands); 9123 case MCK_lds: 9124 return parseTokenOp("lds", Operands); 9125 case MCK_offen: 9126 return parseTokenOp("offen", Operands); 9127 case MCK_off: 9128 return parseTokenOp("off", Operands); 9129 case MCK_row_95_en: 9130 return parseTokenOp("row_en", Operands); 9131 case MCK_gds: 9132 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS); 9133 case MCK_tfe: 9134 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE); 9135 } 9136 return tryCustomParseOperand(Operands, MCK); 9137 } 9138 9139 // This function should be defined after auto-generated include so that we have 9140 // MatchClassKind enum defined 9141 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9142 unsigned Kind) { 9143 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9144 // But MatchInstructionImpl() expects to meet token and fails to validate 9145 // operand. This method checks if we are given immediate operand but expect to 9146 // get corresponding token. 9147 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9148 switch (Kind) { 9149 case MCK_addr64: 9150 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9151 case MCK_gds: 9152 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9153 case MCK_lds: 9154 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9155 case MCK_idxen: 9156 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9157 case MCK_offen: 9158 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9159 case MCK_tfe: 9160 return Operand.isTFE() ? Match_Success : Match_InvalidOperand; 9161 case MCK_SSrcB32: 9162 // When operands have expression values, they will return true for isToken, 9163 // because it is not possible to distinguish between a token and an 9164 // expression at parse time. MatchInstructionImpl() will always try to 9165 // match an operand as a token, when isToken returns true, and when the 9166 // name of the expression is not a valid token, the match will fail, 9167 // so we need to handle it here. 9168 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9169 case MCK_SSrcF32: 9170 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9171 case MCK_SOPPBrTarget: 9172 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; 9173 case MCK_VReg32OrOff: 9174 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9175 case MCK_InterpSlot: 9176 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9177 case MCK_InterpAttr: 9178 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9179 case MCK_InterpAttrChan: 9180 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; 9181 case MCK_SReg_64: 9182 case MCK_SReg_64_XEXEC: 9183 // Null is defined as a 32-bit register but 9184 // it should also be enabled with 64-bit operands. 9185 // The following code enables it for SReg_64 operands 9186 // used as source and destination. Remaining source 9187 // operands are handled in isInlinableImm. 9188 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9189 default: 9190 return Match_InvalidOperand; 9191 } 9192 } 9193 9194 //===----------------------------------------------------------------------===// 9195 // endpgm 9196 //===----------------------------------------------------------------------===// 9197 9198 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { 9199 SMLoc S = getLoc(); 9200 int64_t Imm = 0; 9201 9202 if (!parseExpr(Imm)) { 9203 // The operand is optional, if not present default to 0 9204 Imm = 0; 9205 } 9206 9207 if (!isUInt<16>(Imm)) 9208 return Error(S, "expected a 16-bit value"); 9209 9210 Operands.push_back( 9211 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9212 return ParseStatus::Success; 9213 } 9214 9215 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9216 9217 //===----------------------------------------------------------------------===// 9218 // LDSDIR 9219 //===----------------------------------------------------------------------===// 9220 9221 bool AMDGPUOperand::isWaitVDST() const { 9222 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9223 } 9224 9225 bool AMDGPUOperand::isWaitVAVDst() const { 9226 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm()); 9227 } 9228 9229 bool AMDGPUOperand::isWaitVMVSrc() const { 9230 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm()); 9231 } 9232 9233 //===----------------------------------------------------------------------===// 9234 // VINTERP 9235 //===----------------------------------------------------------------------===// 9236 9237 bool AMDGPUOperand::isWaitEXP() const { 9238 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9239 } 9240 9241 //===----------------------------------------------------------------------===// 9242 // Split Barrier 9243 //===----------------------------------------------------------------------===// 9244 9245 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); } 9246