1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/CodeGen/MachineValueType.h" 25 #include "llvm/MC/MCAsmInfo.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCExpr.h" 28 #include "llvm/MC/MCInst.h" 29 #include "llvm/MC/MCInstrDesc.h" 30 #include "llvm/MC/MCParser/MCAsmLexer.h" 31 #include "llvm/MC/MCParser/MCAsmParser.h" 32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 33 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/MC/TargetRegistry.h" 36 #include "llvm/Support/AMDGPUMetadata.h" 37 #include "llvm/Support/AMDHSAKernelDescriptor.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/TargetParser/TargetParser.h" 41 #include <optional> 42 43 using namespace llvm; 44 using namespace llvm::AMDGPU; 45 using namespace llvm::amdhsa; 46 47 namespace { 48 49 class AMDGPUAsmParser; 50 51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 52 53 //===----------------------------------------------------------------------===// 54 // Operand 55 //===----------------------------------------------------------------------===// 56 57 class AMDGPUOperand : public MCParsedAsmOperand { 58 enum KindTy { 59 Token, 60 Immediate, 61 Register, 62 Expression 63 } Kind; 64 65 SMLoc StartLoc, EndLoc; 66 const AMDGPUAsmParser *AsmParser; 67 68 public: 69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 70 : Kind(Kind_), AsmParser(AsmParser_) {} 71 72 using Ptr = std::unique_ptr<AMDGPUOperand>; 73 74 struct Modifiers { 75 bool Abs = false; 76 bool Neg = false; 77 bool Sext = false; 78 bool Lit = false; 79 80 bool hasFPModifiers() const { return Abs || Neg; } 81 bool hasIntModifiers() const { return Sext; } 82 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 83 84 int64_t getFPModifiersOperand() const { 85 int64_t Operand = 0; 86 Operand |= Abs ? SISrcMods::ABS : 0u; 87 Operand |= Neg ? SISrcMods::NEG : 0u; 88 return Operand; 89 } 90 91 int64_t getIntModifiersOperand() const { 92 int64_t Operand = 0; 93 Operand |= Sext ? SISrcMods::SEXT : 0u; 94 return Operand; 95 } 96 97 int64_t getModifiersOperand() const { 98 assert(!(hasFPModifiers() && hasIntModifiers()) 99 && "fp and int modifiers should not be used simultaneously"); 100 if (hasFPModifiers()) { 101 return getFPModifiersOperand(); 102 } else if (hasIntModifiers()) { 103 return getIntModifiersOperand(); 104 } else { 105 return 0; 106 } 107 } 108 109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 110 }; 111 112 enum ImmTy { 113 ImmTyNone, 114 ImmTyGDS, 115 ImmTyLDS, 116 ImmTyOffen, 117 ImmTyIdxen, 118 ImmTyAddr64, 119 ImmTyOffset, 120 ImmTyInstOffset, 121 ImmTyOffset0, 122 ImmTyOffset1, 123 ImmTySMEMOffsetMod, 124 ImmTyCPol, 125 ImmTyTFE, 126 ImmTyD16, 127 ImmTyClampSI, 128 ImmTyOModSI, 129 ImmTySDWADstSel, 130 ImmTySDWASrc0Sel, 131 ImmTySDWASrc1Sel, 132 ImmTySDWADstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyInterpAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTyIndexKey8bit, 155 ImmTyIndexKey16bit, 156 ImmTyDPP8, 157 ImmTyDppCtrl, 158 ImmTyDppRowMask, 159 ImmTyDppBankMask, 160 ImmTyDppBoundCtrl, 161 ImmTyDppFI, 162 ImmTySwizzle, 163 ImmTyGprIdxMode, 164 ImmTyHigh, 165 ImmTyBLGP, 166 ImmTyCBSZ, 167 ImmTyABID, 168 ImmTyEndpgm, 169 ImmTyWaitVDST, 170 ImmTyWaitEXP, 171 ImmTyWaitVAVDst, 172 ImmTyWaitVMVSrc, 173 }; 174 175 // Immediate operand kind. 176 // It helps to identify the location of an offending operand after an error. 177 // Note that regular literals and mandatory literals (KImm) must be handled 178 // differently. When looking for an offending operand, we should usually 179 // ignore mandatory literals because they are part of the instruction and 180 // cannot be changed. Report location of mandatory operands only for VOPD, 181 // when both OpX and OpY have a KImm and there are no other literals. 182 enum ImmKindTy { 183 ImmKindTyNone, 184 ImmKindTyLiteral, 185 ImmKindTyMandatoryLiteral, 186 ImmKindTyConst, 187 }; 188 189 private: 190 struct TokOp { 191 const char *Data; 192 unsigned Length; 193 }; 194 195 struct ImmOp { 196 int64_t Val; 197 ImmTy Type; 198 bool IsFPImm; 199 mutable ImmKindTy Kind; 200 Modifiers Mods; 201 }; 202 203 struct RegOp { 204 unsigned RegNo; 205 Modifiers Mods; 206 }; 207 208 union { 209 TokOp Tok; 210 ImmOp Imm; 211 RegOp Reg; 212 const MCExpr *Expr; 213 }; 214 215 public: 216 bool isToken() const override { return Kind == Token; } 217 218 bool isSymbolRefExpr() const { 219 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 220 } 221 222 bool isImm() const override { 223 return Kind == Immediate; 224 } 225 226 void setImmKindNone() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyNone; 229 } 230 231 void setImmKindLiteral() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyLiteral; 234 } 235 236 void setImmKindMandatoryLiteral() const { 237 assert(isImm()); 238 Imm.Kind = ImmKindTyMandatoryLiteral; 239 } 240 241 void setImmKindConst() const { 242 assert(isImm()); 243 Imm.Kind = ImmKindTyConst; 244 } 245 246 bool IsImmKindLiteral() const { 247 return isImm() && Imm.Kind == ImmKindTyLiteral; 248 } 249 250 bool IsImmKindMandatoryLiteral() const { 251 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; 252 } 253 254 bool isImmKindConst() const { 255 return isImm() && Imm.Kind == ImmKindTyConst; 256 } 257 258 bool isInlinableImm(MVT type) const; 259 bool isLiteralImm(MVT type) const; 260 261 bool isRegKind() const { 262 return Kind == Register; 263 } 264 265 bool isReg() const override { 266 return isRegKind() && !hasModifiers(); 267 } 268 269 bool isRegOrInline(unsigned RCID, MVT type) const { 270 return isRegClass(RCID) || isInlinableImm(type); 271 } 272 273 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 274 return isRegOrInline(RCID, type) || isLiteralImm(type); 275 } 276 277 bool isRegOrImmWithInt16InputMods() const { 278 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 279 } 280 281 bool isRegOrImmWithIntT16InputMods() const { 282 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16); 283 } 284 285 bool isRegOrImmWithInt32InputMods() const { 286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 287 } 288 289 bool isRegOrInlineImmWithInt16InputMods() const { 290 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 291 } 292 293 bool isRegOrInlineImmWithInt32InputMods() const { 294 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 295 } 296 297 bool isRegOrImmWithInt64InputMods() const { 298 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 299 } 300 301 bool isRegOrImmWithFP16InputMods() const { 302 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 303 } 304 305 bool isRegOrImmWithFPT16InputMods() const { 306 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16); 307 } 308 309 bool isRegOrImmWithFP32InputMods() const { 310 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 311 } 312 313 bool isRegOrImmWithFP64InputMods() const { 314 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 315 } 316 317 bool isRegOrInlineImmWithFP16InputMods() const { 318 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 319 } 320 321 bool isRegOrInlineImmWithFP32InputMods() const { 322 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 323 } 324 325 326 bool isVReg() const { 327 return isRegClass(AMDGPU::VGPR_32RegClassID) || 328 isRegClass(AMDGPU::VReg_64RegClassID) || 329 isRegClass(AMDGPU::VReg_96RegClassID) || 330 isRegClass(AMDGPU::VReg_128RegClassID) || 331 isRegClass(AMDGPU::VReg_160RegClassID) || 332 isRegClass(AMDGPU::VReg_192RegClassID) || 333 isRegClass(AMDGPU::VReg_256RegClassID) || 334 isRegClass(AMDGPU::VReg_512RegClassID) || 335 isRegClass(AMDGPU::VReg_1024RegClassID); 336 } 337 338 bool isVReg32() const { 339 return isRegClass(AMDGPU::VGPR_32RegClassID); 340 } 341 342 bool isVReg32OrOff() const { 343 return isOff() || isVReg32(); 344 } 345 346 bool isNull() const { 347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 348 } 349 350 bool isVRegWithInputMods() const; 351 template <bool IsFake16> bool isT16VRegWithInputMods() const; 352 353 bool isSDWAOperand(MVT type) const; 354 bool isSDWAFP16Operand() const; 355 bool isSDWAFP32Operand() const; 356 bool isSDWAInt16Operand() const; 357 bool isSDWAInt32Operand() const; 358 359 bool isImmTy(ImmTy ImmT) const { 360 return isImm() && Imm.Type == ImmT; 361 } 362 363 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); } 364 365 bool isImmLiteral() const { return isImmTy(ImmTyNone); } 366 367 bool isImmModifier() const { 368 return isImm() && Imm.Type != ImmTyNone; 369 } 370 371 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 372 bool isDMask() const { return isImmTy(ImmTyDMask); } 373 bool isDim() const { return isImmTy(ImmTyDim); } 374 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 375 bool isOff() const { return isImmTy(ImmTyOff); } 376 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 377 bool isOffen() const { return isImmTy(ImmTyOffen); } 378 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 379 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 380 bool isOffset() const { return isImmTy(ImmTyOffset); } 381 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 382 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 383 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); } 384 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 385 bool isGDS() const { return isImmTy(ImmTyGDS); } 386 bool isLDS() const { return isImmTy(ImmTyLDS); } 387 bool isCPol() const { return isImmTy(ImmTyCPol); } 388 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); } 389 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); } 390 bool isTFE() const { return isImmTy(ImmTyTFE); } 391 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 392 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); } 393 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); } 394 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 395 bool isDppFI() const { return isImmTy(ImmTyDppFI); } 396 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); } 397 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); } 398 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); } 399 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); } 400 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 401 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 402 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); } 403 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 404 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 405 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 406 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 407 408 bool isRegOrImm() const { 409 return isReg() || isImm(); 410 } 411 412 bool isRegClass(unsigned RCID) const; 413 414 bool isInlineValue() const; 415 416 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 417 return isRegOrInline(RCID, type) && !hasModifiers(); 418 } 419 420 bool isSCSrcB16() const { 421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 422 } 423 424 bool isSCSrcV2B16() const { 425 return isSCSrcB16(); 426 } 427 428 bool isSCSrcB32() const { 429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 430 } 431 432 bool isSCSrcB64() const { 433 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 434 } 435 436 bool isBoolReg() const; 437 438 bool isSCSrcF16() const { 439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 440 } 441 442 bool isSCSrcV2F16() const { 443 return isSCSrcF16(); 444 } 445 446 bool isSCSrcF32() const { 447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 448 } 449 450 bool isSCSrcF64() const { 451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 452 } 453 454 bool isSSrcB32() const { 455 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 456 } 457 458 bool isSSrcB16() const { 459 return isSCSrcB16() || isLiteralImm(MVT::i16); 460 } 461 462 bool isSSrcV2B16() const { 463 llvm_unreachable("cannot happen"); 464 return isSSrcB16(); 465 } 466 467 bool isSSrcB64() const { 468 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 469 // See isVSrc64(). 470 return isSCSrcB64() || isLiteralImm(MVT::i64); 471 } 472 473 bool isSSrcF32() const { 474 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 475 } 476 477 bool isSSrcF64() const { 478 return isSCSrcB64() || isLiteralImm(MVT::f64); 479 } 480 481 bool isSSrcF16() const { 482 return isSCSrcB16() || isLiteralImm(MVT::f16); 483 } 484 485 bool isSSrcV2F16() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcF16(); 488 } 489 490 bool isSSrcV2FP32() const { 491 llvm_unreachable("cannot happen"); 492 return isSSrcF32(); 493 } 494 495 bool isSCSrcV2FP32() const { 496 llvm_unreachable("cannot happen"); 497 return isSCSrcF32(); 498 } 499 500 bool isSSrcV2INT32() const { 501 llvm_unreachable("cannot happen"); 502 return isSSrcB32(); 503 } 504 505 bool isSCSrcV2INT32() const { 506 llvm_unreachable("cannot happen"); 507 return isSCSrcB32(); 508 } 509 510 bool isSSrcOrLdsB32() const { 511 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 512 isLiteralImm(MVT::i32) || isExpr(); 513 } 514 515 bool isVCSrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 517 } 518 519 bool isVCSrcB64() const { 520 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 521 } 522 523 bool isVCSrcTB16() const { 524 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16); 525 } 526 527 bool isVCSrcTB16_Lo128() const { 528 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16); 529 } 530 531 bool isVCSrcFake16B16_Lo128() const { 532 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); 533 } 534 535 bool isVCSrcB16() const { 536 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 537 } 538 539 bool isVCSrcV2B16() const { 540 return isVCSrcB16(); 541 } 542 543 bool isVCSrcF32() const { 544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 545 } 546 547 bool isVCSrcF64() const { 548 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 549 } 550 551 bool isVCSrcTF16() const { 552 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); 553 } 554 555 bool isVCSrcTF16_Lo128() const { 556 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16); 557 } 558 559 bool isVCSrcFake16F16_Lo128() const { 560 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); 561 } 562 563 bool isVCSrcF16() const { 564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 565 } 566 567 bool isVCSrcV2F16() const { 568 return isVCSrcF16(); 569 } 570 571 bool isVSrcB32() const { 572 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 573 } 574 575 bool isVSrcB64() const { 576 return isVCSrcF64() || isLiteralImm(MVT::i64); 577 } 578 579 bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); } 580 581 bool isVSrcTB16_Lo128() const { 582 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); 583 } 584 585 bool isVSrcFake16B16_Lo128() const { 586 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16); 587 } 588 589 bool isVSrcB16() const { 590 return isVCSrcB16() || isLiteralImm(MVT::i16); 591 } 592 593 bool isVSrcV2B16() const { 594 return isVSrcB16() || isLiteralImm(MVT::v2i16); 595 } 596 597 bool isVCSrcV2FP32() const { 598 return isVCSrcF64(); 599 } 600 601 bool isVSrcV2FP32() const { 602 return isVSrcF64() || isLiteralImm(MVT::v2f32); 603 } 604 605 bool isVCSrcV2INT32() const { 606 return isVCSrcB64(); 607 } 608 609 bool isVSrcV2INT32() const { 610 return isVSrcB64() || isLiteralImm(MVT::v2i32); 611 } 612 613 bool isVSrcF32() const { 614 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 615 } 616 617 bool isVSrcF64() const { 618 return isVCSrcF64() || isLiteralImm(MVT::f64); 619 } 620 621 bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); } 622 623 bool isVSrcTF16_Lo128() const { 624 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); 625 } 626 627 bool isVSrcFake16F16_Lo128() const { 628 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16); 629 } 630 631 bool isVSrcF16() const { 632 return isVCSrcF16() || isLiteralImm(MVT::f16); 633 } 634 635 bool isVSrcV2F16() const { 636 return isVSrcF16() || isLiteralImm(MVT::v2f16); 637 } 638 639 bool isVISrcB32() const { 640 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 641 } 642 643 bool isVISrcB16() const { 644 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 645 } 646 647 bool isVISrcV2B16() const { 648 return isVISrcB16(); 649 } 650 651 bool isVISrcF32() const { 652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 653 } 654 655 bool isVISrcF16() const { 656 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 657 } 658 659 bool isVISrcV2F16() const { 660 return isVISrcF16() || isVISrcB32(); 661 } 662 663 bool isVISrc_64F16() const { 664 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16); 665 } 666 667 bool isVISrc_64B32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 669 } 670 671 bool isVISrc_64B64() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 673 } 674 675 bool isVISrc_64F64() const { 676 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 677 } 678 679 bool isVISrc_64V2FP32() const { 680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 681 } 682 683 bool isVISrc_64V2INT32() const { 684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 685 } 686 687 bool isVISrc_256B32() const { 688 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 689 } 690 691 bool isVISrc_256F32() const { 692 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 693 } 694 695 bool isVISrc_256B64() const { 696 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 697 } 698 699 bool isVISrc_256F64() const { 700 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 701 } 702 703 bool isVISrc_128B16() const { 704 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 705 } 706 707 bool isVISrc_128V2B16() const { 708 return isVISrc_128B16(); 709 } 710 711 bool isVISrc_128B32() const { 712 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 713 } 714 715 bool isVISrc_128F32() const { 716 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 717 } 718 719 bool isVISrc_256V2FP32() const { 720 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 721 } 722 723 bool isVISrc_256V2INT32() const { 724 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 725 } 726 727 bool isVISrc_512B32() const { 728 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 729 } 730 731 bool isVISrc_512B16() const { 732 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 733 } 734 735 bool isVISrc_512V2B16() const { 736 return isVISrc_512B16(); 737 } 738 739 bool isVISrc_512F32() const { 740 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 741 } 742 743 bool isVISrc_512F16() const { 744 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 745 } 746 747 bool isVISrc_512V2F16() const { 748 return isVISrc_512F16() || isVISrc_512B32(); 749 } 750 751 bool isVISrc_1024B32() const { 752 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 753 } 754 755 bool isVISrc_1024B16() const { 756 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 757 } 758 759 bool isVISrc_1024V2B16() const { 760 return isVISrc_1024B16(); 761 } 762 763 bool isVISrc_1024F32() const { 764 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 765 } 766 767 bool isVISrc_1024F16() const { 768 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 769 } 770 771 bool isVISrc_1024V2F16() const { 772 return isVISrc_1024F16() || isVISrc_1024B32(); 773 } 774 775 bool isAISrcB32() const { 776 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 777 } 778 779 bool isAISrcB16() const { 780 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 781 } 782 783 bool isAISrcV2B16() const { 784 return isAISrcB16(); 785 } 786 787 bool isAISrcF32() const { 788 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 789 } 790 791 bool isAISrcF16() const { 792 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 793 } 794 795 bool isAISrcV2F16() const { 796 return isAISrcF16() || isAISrcB32(); 797 } 798 799 bool isAISrc_64B64() const { 800 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 801 } 802 803 bool isAISrc_64F64() const { 804 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 805 } 806 807 bool isAISrc_128B32() const { 808 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 809 } 810 811 bool isAISrc_128B16() const { 812 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 813 } 814 815 bool isAISrc_128V2B16() const { 816 return isAISrc_128B16(); 817 } 818 819 bool isAISrc_128F32() const { 820 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 821 } 822 823 bool isAISrc_128F16() const { 824 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 825 } 826 827 bool isAISrc_128V2F16() const { 828 return isAISrc_128F16() || isAISrc_128B32(); 829 } 830 831 bool isVISrc_128F16() const { 832 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 833 } 834 835 bool isVISrc_128V2F16() const { 836 return isVISrc_128F16() || isVISrc_128B32(); 837 } 838 839 bool isAISrc_256B64() const { 840 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 841 } 842 843 bool isAISrc_256F64() const { 844 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 845 } 846 847 bool isAISrc_512B32() const { 848 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 849 } 850 851 bool isAISrc_512B16() const { 852 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 853 } 854 855 bool isAISrc_512V2B16() const { 856 return isAISrc_512B16(); 857 } 858 859 bool isAISrc_512F32() const { 860 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 861 } 862 863 bool isAISrc_512F16() const { 864 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 865 } 866 867 bool isAISrc_512V2F16() const { 868 return isAISrc_512F16() || isAISrc_512B32(); 869 } 870 871 bool isAISrc_1024B32() const { 872 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 873 } 874 875 bool isAISrc_1024B16() const { 876 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 877 } 878 879 bool isAISrc_1024V2B16() const { 880 return isAISrc_1024B16(); 881 } 882 883 bool isAISrc_1024F32() const { 884 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 885 } 886 887 bool isAISrc_1024F16() const { 888 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 889 } 890 891 bool isAISrc_1024V2F16() const { 892 return isAISrc_1024F16() || isAISrc_1024B32(); 893 } 894 895 bool isKImmFP32() const { 896 return isLiteralImm(MVT::f32); 897 } 898 899 bool isKImmFP16() const { 900 return isLiteralImm(MVT::f16); 901 } 902 903 bool isMem() const override { 904 return false; 905 } 906 907 bool isExpr() const { 908 return Kind == Expression; 909 } 910 911 bool isSOPPBrTarget() const { return isExpr() || isImm(); } 912 913 bool isSWaitCnt() const; 914 bool isDepCtr() const; 915 bool isSDelayALU() const; 916 bool isHwreg() const; 917 bool isSendMsg() const; 918 bool isSplitBarrier() const; 919 bool isSwizzle() const; 920 bool isSMRDOffset8() const; 921 bool isSMEMOffset() const; 922 bool isSMRDLiteralOffset() const; 923 bool isDPP8() const; 924 bool isDPPCtrl() const; 925 bool isBLGP() const; 926 bool isCBSZ() const; 927 bool isABID() const; 928 bool isGPRIdxMode() const; 929 bool isS16Imm() const; 930 bool isU16Imm() const; 931 bool isEndpgm() const; 932 bool isWaitVDST() const; 933 bool isWaitEXP() const; 934 bool isWaitVAVDst() const; 935 bool isWaitVMVSrc() const; 936 937 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { 938 return std::bind(P, *this); 939 } 940 941 StringRef getToken() const { 942 assert(isToken()); 943 return StringRef(Tok.Data, Tok.Length); 944 } 945 946 int64_t getImm() const { 947 assert(isImm()); 948 return Imm.Val; 949 } 950 951 void setImm(int64_t Val) { 952 assert(isImm()); 953 Imm.Val = Val; 954 } 955 956 ImmTy getImmTy() const { 957 assert(isImm()); 958 return Imm.Type; 959 } 960 961 unsigned getReg() const override { 962 assert(isRegKind()); 963 return Reg.RegNo; 964 } 965 966 SMLoc getStartLoc() const override { 967 return StartLoc; 968 } 969 970 SMLoc getEndLoc() const override { 971 return EndLoc; 972 } 973 974 SMRange getLocRange() const { 975 return SMRange(StartLoc, EndLoc); 976 } 977 978 Modifiers getModifiers() const { 979 assert(isRegKind() || isImmTy(ImmTyNone)); 980 return isRegKind() ? Reg.Mods : Imm.Mods; 981 } 982 983 void setModifiers(Modifiers Mods) { 984 assert(isRegKind() || isImmTy(ImmTyNone)); 985 if (isRegKind()) 986 Reg.Mods = Mods; 987 else 988 Imm.Mods = Mods; 989 } 990 991 bool hasModifiers() const { 992 return getModifiers().hasModifiers(); 993 } 994 995 bool hasFPModifiers() const { 996 return getModifiers().hasFPModifiers(); 997 } 998 999 bool hasIntModifiers() const { 1000 return getModifiers().hasIntModifiers(); 1001 } 1002 1003 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 1004 1005 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 1006 1007 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 1008 1009 void addRegOperands(MCInst &Inst, unsigned N) const; 1010 1011 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 1012 if (isRegKind()) 1013 addRegOperands(Inst, N); 1014 else 1015 addImmOperands(Inst, N); 1016 } 1017 1018 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 1019 Modifiers Mods = getModifiers(); 1020 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1021 if (isRegKind()) { 1022 addRegOperands(Inst, N); 1023 } else { 1024 addImmOperands(Inst, N, false); 1025 } 1026 } 1027 1028 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1029 assert(!hasIntModifiers()); 1030 addRegOrImmWithInputModsOperands(Inst, N); 1031 } 1032 1033 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1034 assert(!hasFPModifiers()); 1035 addRegOrImmWithInputModsOperands(Inst, N); 1036 } 1037 1038 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 1039 Modifiers Mods = getModifiers(); 1040 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1041 assert(isRegKind()); 1042 addRegOperands(Inst, N); 1043 } 1044 1045 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1046 assert(!hasIntModifiers()); 1047 addRegWithInputModsOperands(Inst, N); 1048 } 1049 1050 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1051 assert(!hasFPModifiers()); 1052 addRegWithInputModsOperands(Inst, N); 1053 } 1054 1055 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1056 // clang-format off 1057 switch (Type) { 1058 case ImmTyNone: OS << "None"; break; 1059 case ImmTyGDS: OS << "GDS"; break; 1060 case ImmTyLDS: OS << "LDS"; break; 1061 case ImmTyOffen: OS << "Offen"; break; 1062 case ImmTyIdxen: OS << "Idxen"; break; 1063 case ImmTyAddr64: OS << "Addr64"; break; 1064 case ImmTyOffset: OS << "Offset"; break; 1065 case ImmTyInstOffset: OS << "InstOffset"; break; 1066 case ImmTyOffset0: OS << "Offset0"; break; 1067 case ImmTyOffset1: OS << "Offset1"; break; 1068 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break; 1069 case ImmTyCPol: OS << "CPol"; break; 1070 case ImmTyIndexKey8bit: OS << "index_key"; break; 1071 case ImmTyIndexKey16bit: OS << "index_key"; break; 1072 case ImmTyTFE: OS << "TFE"; break; 1073 case ImmTyD16: OS << "D16"; break; 1074 case ImmTyFORMAT: OS << "FORMAT"; break; 1075 case ImmTyClampSI: OS << "ClampSI"; break; 1076 case ImmTyOModSI: OS << "OModSI"; break; 1077 case ImmTyDPP8: OS << "DPP8"; break; 1078 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1079 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1080 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1081 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1082 case ImmTyDppFI: OS << "DppFI"; break; 1083 case ImmTySDWADstSel: OS << "SDWADstSel"; break; 1084 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break; 1085 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break; 1086 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break; 1087 case ImmTyDMask: OS << "DMask"; break; 1088 case ImmTyDim: OS << "Dim"; break; 1089 case ImmTyUNorm: OS << "UNorm"; break; 1090 case ImmTyDA: OS << "DA"; break; 1091 case ImmTyR128A16: OS << "R128A16"; break; 1092 case ImmTyA16: OS << "A16"; break; 1093 case ImmTyLWE: OS << "LWE"; break; 1094 case ImmTyOff: OS << "Off"; break; 1095 case ImmTyExpTgt: OS << "ExpTgt"; break; 1096 case ImmTyExpCompr: OS << "ExpCompr"; break; 1097 case ImmTyExpVM: OS << "ExpVM"; break; 1098 case ImmTyHwreg: OS << "Hwreg"; break; 1099 case ImmTySendMsg: OS << "SendMsg"; break; 1100 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1101 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1102 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break; 1103 case ImmTyOpSel: OS << "OpSel"; break; 1104 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1105 case ImmTyNegLo: OS << "NegLo"; break; 1106 case ImmTyNegHi: OS << "NegHi"; break; 1107 case ImmTySwizzle: OS << "Swizzle"; break; 1108 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1109 case ImmTyHigh: OS << "High"; break; 1110 case ImmTyBLGP: OS << "BLGP"; break; 1111 case ImmTyCBSZ: OS << "CBSZ"; break; 1112 case ImmTyABID: OS << "ABID"; break; 1113 case ImmTyEndpgm: OS << "Endpgm"; break; 1114 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1115 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1116 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; 1117 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; 1118 } 1119 // clang-format on 1120 } 1121 1122 void print(raw_ostream &OS) const override { 1123 switch (Kind) { 1124 case Register: 1125 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1126 break; 1127 case Immediate: 1128 OS << '<' << getImm(); 1129 if (getImmTy() != ImmTyNone) { 1130 OS << " type: "; printImmTy(OS, getImmTy()); 1131 } 1132 OS << " mods: " << Imm.Mods << '>'; 1133 break; 1134 case Token: 1135 OS << '\'' << getToken() << '\''; 1136 break; 1137 case Expression: 1138 OS << "<expr " << *Expr << '>'; 1139 break; 1140 } 1141 } 1142 1143 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1144 int64_t Val, SMLoc Loc, 1145 ImmTy Type = ImmTyNone, 1146 bool IsFPImm = false) { 1147 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1148 Op->Imm.Val = Val; 1149 Op->Imm.IsFPImm = IsFPImm; 1150 Op->Imm.Kind = ImmKindTyNone; 1151 Op->Imm.Type = Type; 1152 Op->Imm.Mods = Modifiers(); 1153 Op->StartLoc = Loc; 1154 Op->EndLoc = Loc; 1155 return Op; 1156 } 1157 1158 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1159 StringRef Str, SMLoc Loc, 1160 bool HasExplicitEncodingSize = true) { 1161 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1162 Res->Tok.Data = Str.data(); 1163 Res->Tok.Length = Str.size(); 1164 Res->StartLoc = Loc; 1165 Res->EndLoc = Loc; 1166 return Res; 1167 } 1168 1169 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1170 unsigned RegNo, SMLoc S, 1171 SMLoc E) { 1172 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1173 Op->Reg.RegNo = RegNo; 1174 Op->Reg.Mods = Modifiers(); 1175 Op->StartLoc = S; 1176 Op->EndLoc = E; 1177 return Op; 1178 } 1179 1180 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1181 const class MCExpr *Expr, SMLoc S) { 1182 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1183 Op->Expr = Expr; 1184 Op->StartLoc = S; 1185 Op->EndLoc = S; 1186 return Op; 1187 } 1188 }; 1189 1190 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1191 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1192 return OS; 1193 } 1194 1195 //===----------------------------------------------------------------------===// 1196 // AsmParser 1197 //===----------------------------------------------------------------------===// 1198 1199 // Holds info related to the current kernel, e.g. count of SGPRs used. 1200 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1201 // .amdgpu_hsa_kernel or at EOF. 1202 class KernelScopeInfo { 1203 int SgprIndexUnusedMin = -1; 1204 int VgprIndexUnusedMin = -1; 1205 int AgprIndexUnusedMin = -1; 1206 MCContext *Ctx = nullptr; 1207 MCSubtargetInfo const *MSTI = nullptr; 1208 1209 void usesSgprAt(int i) { 1210 if (i >= SgprIndexUnusedMin) { 1211 SgprIndexUnusedMin = ++i; 1212 if (Ctx) { 1213 MCSymbol* const Sym = 1214 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1215 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1216 } 1217 } 1218 } 1219 1220 void usesVgprAt(int i) { 1221 if (i >= VgprIndexUnusedMin) { 1222 VgprIndexUnusedMin = ++i; 1223 if (Ctx) { 1224 MCSymbol* const Sym = 1225 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1226 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1227 VgprIndexUnusedMin); 1228 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1229 } 1230 } 1231 } 1232 1233 void usesAgprAt(int i) { 1234 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1235 if (!hasMAIInsts(*MSTI)) 1236 return; 1237 1238 if (i >= AgprIndexUnusedMin) { 1239 AgprIndexUnusedMin = ++i; 1240 if (Ctx) { 1241 MCSymbol* const Sym = 1242 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1243 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1244 1245 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1246 MCSymbol* const vSym = 1247 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1248 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1249 VgprIndexUnusedMin); 1250 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1251 } 1252 } 1253 } 1254 1255 public: 1256 KernelScopeInfo() = default; 1257 1258 void initialize(MCContext &Context) { 1259 Ctx = &Context; 1260 MSTI = Ctx->getSubtargetInfo(); 1261 1262 usesSgprAt(SgprIndexUnusedMin = -1); 1263 usesVgprAt(VgprIndexUnusedMin = -1); 1264 if (hasMAIInsts(*MSTI)) { 1265 usesAgprAt(AgprIndexUnusedMin = -1); 1266 } 1267 } 1268 1269 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1270 unsigned RegWidth) { 1271 switch (RegKind) { 1272 case IS_SGPR: 1273 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1274 break; 1275 case IS_AGPR: 1276 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1277 break; 1278 case IS_VGPR: 1279 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1280 break; 1281 default: 1282 break; 1283 } 1284 } 1285 }; 1286 1287 class AMDGPUAsmParser : public MCTargetAsmParser { 1288 MCAsmParser &Parser; 1289 1290 unsigned ForcedEncodingSize = 0; 1291 bool ForcedDPP = false; 1292 bool ForcedSDWA = false; 1293 KernelScopeInfo KernelScope; 1294 1295 /// @name Auto-generated Match Functions 1296 /// { 1297 1298 #define GET_ASSEMBLER_HEADER 1299 #include "AMDGPUGenAsmMatcher.inc" 1300 1301 /// } 1302 1303 private: 1304 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1305 bool OutOfRangeError(SMRange Range); 1306 /// Calculate VGPR/SGPR blocks required for given target, reserved 1307 /// registers, and user-specified NextFreeXGPR values. 1308 /// 1309 /// \param Features [in] Target features, used for bug corrections. 1310 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1311 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1312 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1313 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1314 /// descriptor field, if valid. 1315 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1316 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1317 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1318 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1319 /// \param VGPRBlocks [out] Result VGPR block count. 1320 /// \param SGPRBlocks [out] Result SGPR block count. 1321 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1322 bool FlatScrUsed, bool XNACKUsed, 1323 std::optional<bool> EnableWavefrontSize32, 1324 unsigned NextFreeVGPR, SMRange VGPRRange, 1325 unsigned NextFreeSGPR, SMRange SGPRRange, 1326 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 1327 bool ParseDirectiveAMDGCNTarget(); 1328 bool ParseDirectiveAMDHSACodeObjectVersion(); 1329 bool ParseDirectiveAMDHSAKernel(); 1330 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1331 bool ParseDirectiveAMDKernelCodeT(); 1332 // TODO: Possibly make subtargetHasRegister const. 1333 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1334 bool ParseDirectiveAMDGPUHsaKernel(); 1335 1336 bool ParseDirectiveISAVersion(); 1337 bool ParseDirectiveHSAMetadata(); 1338 bool ParseDirectivePALMetadataBegin(); 1339 bool ParseDirectivePALMetadata(); 1340 bool ParseDirectiveAMDGPULDS(); 1341 1342 /// Common code to parse out a block of text (typically YAML) between start and 1343 /// end directives. 1344 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1345 const char *AssemblerDirectiveEnd, 1346 std::string &CollectString); 1347 1348 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1349 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1350 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1351 unsigned &RegNum, unsigned &RegWidth, 1352 bool RestoreOnFailure = false); 1353 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1354 unsigned &RegNum, unsigned &RegWidth, 1355 SmallVectorImpl<AsmToken> &Tokens); 1356 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1357 unsigned &RegWidth, 1358 SmallVectorImpl<AsmToken> &Tokens); 1359 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1360 unsigned &RegWidth, 1361 SmallVectorImpl<AsmToken> &Tokens); 1362 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1363 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1364 bool ParseRegRange(unsigned& Num, unsigned& Width); 1365 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg, 1366 unsigned RegWidth, SMLoc Loc); 1367 1368 bool isRegister(); 1369 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1370 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1371 void initializeGprCountSymbol(RegisterKind RegKind); 1372 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1373 unsigned RegWidth); 1374 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1375 bool IsAtomic); 1376 1377 public: 1378 enum AMDGPUMatchResultTy { 1379 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1380 }; 1381 enum OperandMode { 1382 OperandMode_Default, 1383 OperandMode_NSA, 1384 }; 1385 1386 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1387 1388 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1389 const MCInstrInfo &MII, 1390 const MCTargetOptions &Options) 1391 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1392 MCAsmParserExtension::Initialize(Parser); 1393 1394 if (getFeatureBits().none()) { 1395 // Set default features. 1396 copySTI().ToggleFeature("southern-islands"); 1397 } 1398 1399 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1400 1401 { 1402 // TODO: make those pre-defined variables read-only. 1403 // Currently there is none suitable machinery in the core llvm-mc for this. 1404 // MCSymbol::isRedefinable is intended for another purpose, and 1405 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1406 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1407 MCContext &Ctx = getContext(); 1408 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1409 MCSymbol *Sym = 1410 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1411 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1412 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1413 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1414 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1415 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1416 } else { 1417 MCSymbol *Sym = 1418 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1419 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1420 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1421 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1422 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1423 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1424 } 1425 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1426 initializeGprCountSymbol(IS_VGPR); 1427 initializeGprCountSymbol(IS_SGPR); 1428 } else 1429 KernelScope.initialize(getContext()); 1430 } 1431 } 1432 1433 bool hasMIMG_R128() const { 1434 return AMDGPU::hasMIMG_R128(getSTI()); 1435 } 1436 1437 bool hasPackedD16() const { 1438 return AMDGPU::hasPackedD16(getSTI()); 1439 } 1440 1441 bool hasA16() const { return AMDGPU::hasA16(getSTI()); } 1442 1443 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1444 1445 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); } 1446 1447 bool isSI() const { 1448 return AMDGPU::isSI(getSTI()); 1449 } 1450 1451 bool isCI() const { 1452 return AMDGPU::isCI(getSTI()); 1453 } 1454 1455 bool isVI() const { 1456 return AMDGPU::isVI(getSTI()); 1457 } 1458 1459 bool isGFX9() const { 1460 return AMDGPU::isGFX9(getSTI()); 1461 } 1462 1463 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1464 bool isGFX90A() const { 1465 return AMDGPU::isGFX90A(getSTI()); 1466 } 1467 1468 bool isGFX940() const { 1469 return AMDGPU::isGFX940(getSTI()); 1470 } 1471 1472 bool isGFX9Plus() const { 1473 return AMDGPU::isGFX9Plus(getSTI()); 1474 } 1475 1476 bool isGFX10() const { 1477 return AMDGPU::isGFX10(getSTI()); 1478 } 1479 1480 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1481 1482 bool isGFX11() const { 1483 return AMDGPU::isGFX11(getSTI()); 1484 } 1485 1486 bool isGFX11Plus() const { 1487 return AMDGPU::isGFX11Plus(getSTI()); 1488 } 1489 1490 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); } 1491 1492 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); } 1493 1494 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } 1495 1496 bool isGFX10_BEncoding() const { 1497 return AMDGPU::isGFX10_BEncoding(getSTI()); 1498 } 1499 1500 bool hasInv2PiInlineImm() const { 1501 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1502 } 1503 1504 bool hasFlatOffsets() const { 1505 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1506 } 1507 1508 bool hasArchitectedFlatScratch() const { 1509 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1510 } 1511 1512 bool hasSGPR102_SGPR103() const { 1513 return !isVI() && !isGFX9(); 1514 } 1515 1516 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1517 1518 bool hasIntClamp() const { 1519 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1520 } 1521 1522 bool hasPartialNSAEncoding() const { 1523 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; 1524 } 1525 1526 unsigned getNSAMaxSize(bool HasSampler = false) const { 1527 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler); 1528 } 1529 1530 unsigned getMaxNumUserSGPRs() const { 1531 return AMDGPU::getMaxNumUserSGPRs(getSTI()); 1532 } 1533 1534 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } 1535 1536 AMDGPUTargetStreamer &getTargetStreamer() { 1537 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1538 return static_cast<AMDGPUTargetStreamer &>(TS); 1539 } 1540 1541 const MCRegisterInfo *getMRI() const { 1542 // We need this const_cast because for some reason getContext() is not const 1543 // in MCAsmParser. 1544 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1545 } 1546 1547 const MCInstrInfo *getMII() const { 1548 return &MII; 1549 } 1550 1551 const FeatureBitset &getFeatureBits() const { 1552 return getSTI().getFeatureBits(); 1553 } 1554 1555 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1556 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1557 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1558 1559 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1560 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1561 bool isForcedDPP() const { return ForcedDPP; } 1562 bool isForcedSDWA() const { return ForcedSDWA; } 1563 ArrayRef<unsigned> getMatchedVariants() const; 1564 StringRef getMatchedVariantName() const; 1565 1566 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1567 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1568 bool RestoreOnFailure); 1569 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1570 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1571 SMLoc &EndLoc) override; 1572 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1573 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1574 unsigned Kind) override; 1575 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1576 OperandVector &Operands, MCStreamer &Out, 1577 uint64_t &ErrorInfo, 1578 bool MatchingInlineAsm) override; 1579 bool ParseDirective(AsmToken DirectiveID) override; 1580 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, 1581 OperandMode Mode = OperandMode_Default); 1582 StringRef parseMnemonicSuffix(StringRef Name); 1583 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1584 SMLoc NameLoc, OperandVector &Operands) override; 1585 //bool ProcessInstruction(MCInst &Inst); 1586 1587 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); 1588 1589 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); 1590 1591 ParseStatus 1592 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1593 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1594 std::function<bool(int64_t &)> ConvertResult = nullptr); 1595 1596 ParseStatus parseOperandArrayWithPrefix( 1597 const char *Prefix, OperandVector &Operands, 1598 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1599 bool (*ConvertResult)(int64_t &) = nullptr); 1600 1601 ParseStatus 1602 parseNamedBit(StringRef Name, OperandVector &Operands, 1603 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1604 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; 1605 ParseStatus parseCPol(OperandVector &Operands); 1606 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope); 1607 ParseStatus parseTH(OperandVector &Operands, int64_t &TH); 1608 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, 1609 SMLoc &StringLoc); 1610 1611 bool isModifier(); 1612 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1613 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1614 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1615 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1616 bool parseSP3NegModifier(); 1617 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false, 1618 bool HasLit = false); 1619 ParseStatus parseReg(OperandVector &Operands); 1620 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false, 1621 bool HasLit = false); 1622 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, 1623 bool AllowImm = true); 1624 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, 1625 bool AllowImm = true); 1626 ParseStatus parseRegWithFPInputMods(OperandVector &Operands); 1627 ParseStatus parseRegWithIntInputMods(OperandVector &Operands); 1628 ParseStatus parseVReg32OrOff(OperandVector &Operands); 1629 ParseStatus tryParseIndexKey(OperandVector &Operands, 1630 AMDGPUOperand::ImmTy ImmTy); 1631 ParseStatus parseIndexKey8bit(OperandVector &Operands); 1632 ParseStatus parseIndexKey16bit(OperandVector &Operands); 1633 1634 ParseStatus parseDfmtNfmt(int64_t &Format); 1635 ParseStatus parseUfmt(int64_t &Format); 1636 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, 1637 int64_t &Format); 1638 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, 1639 int64_t &Format); 1640 ParseStatus parseFORMAT(OperandVector &Operands); 1641 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); 1642 ParseStatus parseNumericFormat(int64_t &Format); 1643 ParseStatus parseFlatOffset(OperandVector &Operands); 1644 ParseStatus parseR128A16(OperandVector &Operands); 1645 ParseStatus parseBLGP(OperandVector &Operands); 1646 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1647 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1648 1649 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1650 1651 bool parseCnt(int64_t &IntVal); 1652 ParseStatus parseSWaitCnt(OperandVector &Operands); 1653 1654 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1655 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1656 ParseStatus parseDepCtr(OperandVector &Operands); 1657 1658 bool parseDelay(int64_t &Delay); 1659 ParseStatus parseSDelayALU(OperandVector &Operands); 1660 1661 ParseStatus parseHwreg(OperandVector &Operands); 1662 1663 private: 1664 struct OperandInfoTy { 1665 SMLoc Loc; 1666 int64_t Id; 1667 bool IsSymbolic = false; 1668 bool IsDefined = false; 1669 1670 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1671 }; 1672 1673 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1674 bool validateSendMsg(const OperandInfoTy &Msg, 1675 const OperandInfoTy &Op, 1676 const OperandInfoTy &Stream); 1677 1678 bool parseHwregBody(OperandInfoTy &HwReg, 1679 OperandInfoTy &Offset, 1680 OperandInfoTy &Width); 1681 bool validateHwreg(const OperandInfoTy &HwReg, 1682 const OperandInfoTy &Offset, 1683 const OperandInfoTy &Width); 1684 1685 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1686 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1687 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1688 1689 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1690 const OperandVector &Operands) const; 1691 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1692 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1693 SMLoc getLitLoc(const OperandVector &Operands, 1694 bool SearchMandatoryLiterals = false) const; 1695 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; 1696 SMLoc getConstLoc(const OperandVector &Operands) const; 1697 SMLoc getInstLoc(const OperandVector &Operands) const; 1698 1699 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1700 bool validateOffset(const MCInst &Inst, const OperandVector &Operands); 1701 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1702 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1703 bool validateSOPLiteral(const MCInst &Inst) const; 1704 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1705 bool validateVOPDRegBankConstraints(const MCInst &Inst, 1706 const OperandVector &Operands); 1707 bool validateIntClampSupported(const MCInst &Inst); 1708 bool validateMIMGAtomicDMask(const MCInst &Inst); 1709 bool validateMIMGGatherDMask(const MCInst &Inst); 1710 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1711 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); 1712 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); 1713 bool validateMIMGD16(const MCInst &Inst); 1714 bool validateMIMGMSAA(const MCInst &Inst); 1715 bool validateOpSel(const MCInst &Inst); 1716 bool validateNeg(const MCInst &Inst, int OpName); 1717 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1718 bool validateVccOperand(unsigned Reg) const; 1719 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1720 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1721 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); 1722 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1723 bool validateAGPRLdSt(const MCInst &Inst) const; 1724 bool validateVGPRAlign(const MCInst &Inst) const; 1725 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1726 bool validateDS(const MCInst &Inst, const OperandVector &Operands); 1727 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1728 bool validateDivScale(const MCInst &Inst); 1729 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); 1730 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1731 const SMLoc &IDLoc); 1732 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, 1733 const unsigned CPol); 1734 bool validateExeczVcczOperands(const OperandVector &Operands); 1735 bool validateTFE(const MCInst &Inst, const OperandVector &Operands); 1736 std::optional<StringRef> validateLdsDirect(const MCInst &Inst); 1737 unsigned getConstantBusLimit(unsigned Opcode) const; 1738 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1739 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1740 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1741 1742 bool isSupportedMnemo(StringRef Mnemo, 1743 const FeatureBitset &FBS); 1744 bool isSupportedMnemo(StringRef Mnemo, 1745 const FeatureBitset &FBS, 1746 ArrayRef<unsigned> Variants); 1747 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1748 1749 bool isId(const StringRef Id) const; 1750 bool isId(const AsmToken &Token, const StringRef Id) const; 1751 bool isToken(const AsmToken::TokenKind Kind) const; 1752 StringRef getId() const; 1753 bool trySkipId(const StringRef Id); 1754 bool trySkipId(const StringRef Pref, const StringRef Id); 1755 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1756 bool trySkipToken(const AsmToken::TokenKind Kind); 1757 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1758 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1759 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1760 1761 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1762 AsmToken::TokenKind getTokenKind() const; 1763 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1764 bool parseExpr(OperandVector &Operands); 1765 StringRef getTokenStr() const; 1766 AsmToken peekToken(bool ShouldSkipSpace = true); 1767 AsmToken getToken() const; 1768 SMLoc getLoc() const; 1769 void lex(); 1770 1771 public: 1772 void onBeginOfFile() override; 1773 1774 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); 1775 1776 ParseStatus parseExpTgt(OperandVector &Operands); 1777 ParseStatus parseSendMsg(OperandVector &Operands); 1778 ParseStatus parseInterpSlot(OperandVector &Operands); 1779 ParseStatus parseInterpAttr(OperandVector &Operands); 1780 ParseStatus parseSOPPBrTarget(OperandVector &Operands); 1781 ParseStatus parseBoolReg(OperandVector &Operands); 1782 1783 bool parseSwizzleOperand(int64_t &Op, 1784 const unsigned MinVal, 1785 const unsigned MaxVal, 1786 const StringRef ErrMsg, 1787 SMLoc &Loc); 1788 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1789 const unsigned MinVal, 1790 const unsigned MaxVal, 1791 const StringRef ErrMsg); 1792 ParseStatus parseSwizzle(OperandVector &Operands); 1793 bool parseSwizzleOffset(int64_t &Imm); 1794 bool parseSwizzleMacro(int64_t &Imm); 1795 bool parseSwizzleQuadPerm(int64_t &Imm); 1796 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1797 bool parseSwizzleBroadcast(int64_t &Imm); 1798 bool parseSwizzleSwap(int64_t &Imm); 1799 bool parseSwizzleReverse(int64_t &Imm); 1800 1801 ParseStatus parseGPRIdxMode(OperandVector &Operands); 1802 int64_t parseGPRIdxMacro(); 1803 1804 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1805 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1806 1807 ParseStatus parseOModSI(OperandVector &Operands); 1808 1809 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1810 OptionalImmIndexMap &OptionalIdx); 1811 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1812 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1813 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1814 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands); 1815 1816 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1817 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1818 OptionalImmIndexMap &OptionalIdx); 1819 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1820 OptionalImmIndexMap &OptionalIdx); 1821 1822 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1823 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1824 1825 bool parseDimId(unsigned &Encoding); 1826 ParseStatus parseDim(OperandVector &Operands); 1827 bool convertDppBoundCtrl(int64_t &BoundCtrl); 1828 ParseStatus parseDPP8(OperandVector &Operands); 1829 ParseStatus parseDPPCtrl(OperandVector &Operands); 1830 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1831 int64_t parseDPPCtrlSel(StringRef Ctrl); 1832 int64_t parseDPPCtrlPerm(); 1833 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1834 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1835 cvtDPP(Inst, Operands, true); 1836 } 1837 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1838 bool IsDPP8 = false); 1839 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1840 cvtVOP3DPP(Inst, Operands, true); 1841 } 1842 1843 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, 1844 AMDGPUOperand::ImmTy Type); 1845 ParseStatus parseSDWADstUnused(OperandVector &Operands); 1846 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1847 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1848 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1849 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1850 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1851 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1852 uint64_t BasicInstType, 1853 bool SkipDstVcc = false, 1854 bool SkipSrcVcc = false); 1855 1856 ParseStatus parseEndpgm(OperandVector &Operands); 1857 1858 ParseStatus parseVOPD(OperandVector &Operands); 1859 }; 1860 1861 } // end anonymous namespace 1862 1863 // May be called with integer type with equivalent bitwidth. 1864 static const fltSemantics *getFltSemantics(unsigned Size) { 1865 switch (Size) { 1866 case 4: 1867 return &APFloat::IEEEsingle(); 1868 case 8: 1869 return &APFloat::IEEEdouble(); 1870 case 2: 1871 return &APFloat::IEEEhalf(); 1872 default: 1873 llvm_unreachable("unsupported fp type"); 1874 } 1875 } 1876 1877 static const fltSemantics *getFltSemantics(MVT VT) { 1878 return getFltSemantics(VT.getSizeInBits() / 8); 1879 } 1880 1881 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1882 switch (OperandType) { 1883 case AMDGPU::OPERAND_REG_IMM_INT32: 1884 case AMDGPU::OPERAND_REG_IMM_FP32: 1885 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1886 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1887 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1888 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1889 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1890 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1891 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1892 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1893 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1894 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1895 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1896 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1897 case AMDGPU::OPERAND_KIMM32: 1898 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1899 return &APFloat::IEEEsingle(); 1900 case AMDGPU::OPERAND_REG_IMM_INT64: 1901 case AMDGPU::OPERAND_REG_IMM_FP64: 1902 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1903 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1904 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1905 return &APFloat::IEEEdouble(); 1906 case AMDGPU::OPERAND_REG_IMM_INT16: 1907 case AMDGPU::OPERAND_REG_IMM_FP16: 1908 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1909 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1910 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1911 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1912 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1913 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1914 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1915 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1916 case AMDGPU::OPERAND_KIMM16: 1917 return &APFloat::IEEEhalf(); 1918 default: 1919 llvm_unreachable("unsupported fp type"); 1920 } 1921 } 1922 1923 //===----------------------------------------------------------------------===// 1924 // Operand 1925 //===----------------------------------------------------------------------===// 1926 1927 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1928 bool Lost; 1929 1930 // Convert literal to single precision 1931 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1932 APFloat::rmNearestTiesToEven, 1933 &Lost); 1934 // We allow precision lost but not overflow or underflow 1935 if (Status != APFloat::opOK && 1936 Lost && 1937 ((Status & APFloat::opOverflow) != 0 || 1938 (Status & APFloat::opUnderflow) != 0)) { 1939 return false; 1940 } 1941 1942 return true; 1943 } 1944 1945 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1946 return isUIntN(Size, Val) || isIntN(Size, Val); 1947 } 1948 1949 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1950 if (VT.getScalarType() == MVT::i16) { 1951 // FP immediate values are broken. 1952 return isInlinableIntLiteral(Val); 1953 } 1954 1955 // f16/v2f16 operands work correctly for all values. 1956 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1957 } 1958 1959 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1960 1961 // This is a hack to enable named inline values like 1962 // shared_base with both 32-bit and 64-bit operands. 1963 // Note that these values are defined as 1964 // 32-bit operands only. 1965 if (isInlineValue()) { 1966 return true; 1967 } 1968 1969 if (!isImmTy(ImmTyNone)) { 1970 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1971 return false; 1972 } 1973 // TODO: We should avoid using host float here. It would be better to 1974 // check the float bit values which is what a few other places do. 1975 // We've had bot failures before due to weird NaN support on mips hosts. 1976 1977 APInt Literal(64, Imm.Val); 1978 1979 if (Imm.IsFPImm) { // We got fp literal token 1980 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1981 return AMDGPU::isInlinableLiteral64(Imm.Val, 1982 AsmParser->hasInv2PiInlineImm()); 1983 } 1984 1985 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1986 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1987 return false; 1988 1989 if (type.getScalarSizeInBits() == 16) { 1990 return isInlineableLiteralOp16( 1991 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1992 type, AsmParser->hasInv2PiInlineImm()); 1993 } 1994 1995 // Check if single precision literal is inlinable 1996 return AMDGPU::isInlinableLiteral32( 1997 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1998 AsmParser->hasInv2PiInlineImm()); 1999 } 2000 2001 // We got int literal token. 2002 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 2003 return AMDGPU::isInlinableLiteral64(Imm.Val, 2004 AsmParser->hasInv2PiInlineImm()); 2005 } 2006 2007 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 2008 return false; 2009 } 2010 2011 if (type.getScalarSizeInBits() == 16) { 2012 return isInlineableLiteralOp16( 2013 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 2014 type, AsmParser->hasInv2PiInlineImm()); 2015 } 2016 2017 return AMDGPU::isInlinableLiteral32( 2018 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 2019 AsmParser->hasInv2PiInlineImm()); 2020 } 2021 2022 bool AMDGPUOperand::isLiteralImm(MVT type) const { 2023 // Check that this immediate can be added as literal 2024 if (!isImmTy(ImmTyNone)) { 2025 return false; 2026 } 2027 2028 if (!Imm.IsFPImm) { 2029 // We got int literal token. 2030 2031 if (type == MVT::f64 && hasFPModifiers()) { 2032 // Cannot apply fp modifiers to int literals preserving the same semantics 2033 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 2034 // disable these cases. 2035 return false; 2036 } 2037 2038 unsigned Size = type.getSizeInBits(); 2039 if (Size == 64) 2040 Size = 32; 2041 2042 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2043 // types. 2044 return isSafeTruncation(Imm.Val, Size); 2045 } 2046 2047 // We got fp literal token 2048 if (type == MVT::f64) { // Expected 64-bit fp operand 2049 // We would set low 64-bits of literal to zeroes but we accept this literals 2050 return true; 2051 } 2052 2053 if (type == MVT::i64) { // Expected 64-bit int operand 2054 // We don't allow fp literals in 64-bit integer instructions. It is 2055 // unclear how we should encode them. 2056 return false; 2057 } 2058 2059 // We allow fp literals with f16x2 operands assuming that the specified 2060 // literal goes into the lower half and the upper half is zero. We also 2061 // require that the literal may be losslessly converted to f16. 2062 // 2063 // For i16x2 operands, we assume that the specified literal is encoded as a 2064 // single-precision float. This is pretty odd, but it matches SP3 and what 2065 // happens in hardware. 2066 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16 2067 : (type == MVT::v2i16) ? MVT::f32 2068 : (type == MVT::v2f32) ? MVT::f32 2069 : type; 2070 2071 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2072 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2073 } 2074 2075 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2076 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2077 } 2078 2079 bool AMDGPUOperand::isVRegWithInputMods() const { 2080 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2081 // GFX90A allows DPP on 64-bit operands. 2082 (isRegClass(AMDGPU::VReg_64RegClassID) && 2083 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); 2084 } 2085 2086 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const { 2087 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID 2088 : AMDGPU::VGPR_16_Lo128RegClassID); 2089 } 2090 2091 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2092 if (AsmParser->isVI()) 2093 return isVReg32(); 2094 else if (AsmParser->isGFX9Plus()) 2095 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2096 else 2097 return false; 2098 } 2099 2100 bool AMDGPUOperand::isSDWAFP16Operand() const { 2101 return isSDWAOperand(MVT::f16); 2102 } 2103 2104 bool AMDGPUOperand::isSDWAFP32Operand() const { 2105 return isSDWAOperand(MVT::f32); 2106 } 2107 2108 bool AMDGPUOperand::isSDWAInt16Operand() const { 2109 return isSDWAOperand(MVT::i16); 2110 } 2111 2112 bool AMDGPUOperand::isSDWAInt32Operand() const { 2113 return isSDWAOperand(MVT::i32); 2114 } 2115 2116 bool AMDGPUOperand::isBoolReg() const { 2117 auto FB = AsmParser->getFeatureBits(); 2118 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2119 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2120 } 2121 2122 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2123 { 2124 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2125 assert(Size == 2 || Size == 4 || Size == 8); 2126 2127 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2128 2129 if (Imm.Mods.Abs) { 2130 Val &= ~FpSignMask; 2131 } 2132 if (Imm.Mods.Neg) { 2133 Val ^= FpSignMask; 2134 } 2135 2136 return Val; 2137 } 2138 2139 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2140 if (isExpr()) { 2141 Inst.addOperand(MCOperand::createExpr(Expr)); 2142 return; 2143 } 2144 2145 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2146 Inst.getNumOperands())) { 2147 addLiteralImmOperand(Inst, Imm.Val, 2148 ApplyModifiers & 2149 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2150 } else { 2151 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2152 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2153 setImmKindNone(); 2154 } 2155 } 2156 2157 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2158 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2159 auto OpNum = Inst.getNumOperands(); 2160 // Check that this operand accepts literals 2161 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2162 2163 if (ApplyModifiers) { 2164 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2165 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2166 Val = applyInputFPModifiers(Val, Size); 2167 } 2168 2169 APInt Literal(64, Val); 2170 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; 2171 2172 if (Imm.IsFPImm) { // We got fp literal token 2173 switch (OpTy) { 2174 case AMDGPU::OPERAND_REG_IMM_INT64: 2175 case AMDGPU::OPERAND_REG_IMM_FP64: 2176 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2177 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2178 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2179 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2180 AsmParser->hasInv2PiInlineImm())) { 2181 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2182 setImmKindConst(); 2183 return; 2184 } 2185 2186 // Non-inlineable 2187 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2188 // For fp operands we check if low 32 bits are zeros 2189 if (Literal.getLoBits(32) != 0) { 2190 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2191 "Can't encode literal as exact 64-bit floating-point operand. " 2192 "Low 32-bits will be set to zero"); 2193 Val &= 0xffffffff00000000u; 2194 } 2195 2196 Inst.addOperand(MCOperand::createImm(Val)); 2197 setImmKindLiteral(); 2198 return; 2199 } 2200 2201 // We don't allow fp literals in 64-bit integer instructions. It is 2202 // unclear how we should encode them. This case should be checked earlier 2203 // in predicate methods (isLiteralImm()) 2204 llvm_unreachable("fp literal in 64-bit integer instruction."); 2205 2206 case AMDGPU::OPERAND_REG_IMM_INT32: 2207 case AMDGPU::OPERAND_REG_IMM_FP32: 2208 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2209 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2210 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2211 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2212 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2213 case AMDGPU::OPERAND_REG_IMM_INT16: 2214 case AMDGPU::OPERAND_REG_IMM_FP16: 2215 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2216 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2217 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2218 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2219 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2220 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2221 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2222 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2223 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2224 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2225 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2226 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2227 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2228 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2229 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2230 case AMDGPU::OPERAND_KIMM32: 2231 case AMDGPU::OPERAND_KIMM16: 2232 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { 2233 bool lost; 2234 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2235 // Convert literal to single precision 2236 FPLiteral.convert(*getOpFltSemantics(OpTy), 2237 APFloat::rmNearestTiesToEven, &lost); 2238 // We allow precision lost but not overflow or underflow. This should be 2239 // checked earlier in isLiteralImm() 2240 2241 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2242 Inst.addOperand(MCOperand::createImm(ImmVal)); 2243 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { 2244 setImmKindMandatoryLiteral(); 2245 } else { 2246 setImmKindLiteral(); 2247 } 2248 return; 2249 } 2250 default: 2251 llvm_unreachable("invalid operand size"); 2252 } 2253 2254 return; 2255 } 2256 2257 // We got int literal token. 2258 // Only sign extend inline immediates. 2259 switch (OpTy) { 2260 case AMDGPU::OPERAND_REG_IMM_INT32: 2261 case AMDGPU::OPERAND_REG_IMM_FP32: 2262 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2263 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2264 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2265 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2266 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2267 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2268 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2269 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2270 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2271 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2272 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2273 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 2274 if (isSafeTruncation(Val, 32) && 2275 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2276 AsmParser->hasInv2PiInlineImm())) { 2277 Inst.addOperand(MCOperand::createImm(Val)); 2278 setImmKindConst(); 2279 return; 2280 } 2281 2282 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2283 setImmKindLiteral(); 2284 return; 2285 2286 case AMDGPU::OPERAND_REG_IMM_INT64: 2287 case AMDGPU::OPERAND_REG_IMM_FP64: 2288 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2289 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2290 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2291 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2292 Inst.addOperand(MCOperand::createImm(Val)); 2293 setImmKindConst(); 2294 return; 2295 } 2296 2297 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32 2298 : Lo_32(Val); 2299 2300 Inst.addOperand(MCOperand::createImm(Val)); 2301 setImmKindLiteral(); 2302 return; 2303 2304 case AMDGPU::OPERAND_REG_IMM_INT16: 2305 case AMDGPU::OPERAND_REG_IMM_FP16: 2306 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2307 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2308 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2309 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2310 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2311 if (isSafeTruncation(Val, 16) && 2312 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2313 AsmParser->hasInv2PiInlineImm())) { 2314 Inst.addOperand(MCOperand::createImm(Val)); 2315 setImmKindConst(); 2316 return; 2317 } 2318 2319 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2320 setImmKindLiteral(); 2321 return; 2322 2323 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2324 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2325 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2326 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2327 assert(isSafeTruncation(Val, 16)); 2328 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2329 AsmParser->hasInv2PiInlineImm())); 2330 2331 Inst.addOperand(MCOperand::createImm(Val)); 2332 return; 2333 } 2334 case AMDGPU::OPERAND_KIMM32: 2335 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2336 setImmKindMandatoryLiteral(); 2337 return; 2338 case AMDGPU::OPERAND_KIMM16: 2339 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2340 setImmKindMandatoryLiteral(); 2341 return; 2342 default: 2343 llvm_unreachable("invalid operand size"); 2344 } 2345 } 2346 2347 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2348 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2349 } 2350 2351 bool AMDGPUOperand::isInlineValue() const { 2352 return isRegKind() && ::isInlineValue(getReg()); 2353 } 2354 2355 //===----------------------------------------------------------------------===// 2356 // AsmParser 2357 //===----------------------------------------------------------------------===// 2358 2359 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2360 if (Is == IS_VGPR) { 2361 switch (RegWidth) { 2362 default: return -1; 2363 case 32: 2364 return AMDGPU::VGPR_32RegClassID; 2365 case 64: 2366 return AMDGPU::VReg_64RegClassID; 2367 case 96: 2368 return AMDGPU::VReg_96RegClassID; 2369 case 128: 2370 return AMDGPU::VReg_128RegClassID; 2371 case 160: 2372 return AMDGPU::VReg_160RegClassID; 2373 case 192: 2374 return AMDGPU::VReg_192RegClassID; 2375 case 224: 2376 return AMDGPU::VReg_224RegClassID; 2377 case 256: 2378 return AMDGPU::VReg_256RegClassID; 2379 case 288: 2380 return AMDGPU::VReg_288RegClassID; 2381 case 320: 2382 return AMDGPU::VReg_320RegClassID; 2383 case 352: 2384 return AMDGPU::VReg_352RegClassID; 2385 case 384: 2386 return AMDGPU::VReg_384RegClassID; 2387 case 512: 2388 return AMDGPU::VReg_512RegClassID; 2389 case 1024: 2390 return AMDGPU::VReg_1024RegClassID; 2391 } 2392 } else if (Is == IS_TTMP) { 2393 switch (RegWidth) { 2394 default: return -1; 2395 case 32: 2396 return AMDGPU::TTMP_32RegClassID; 2397 case 64: 2398 return AMDGPU::TTMP_64RegClassID; 2399 case 128: 2400 return AMDGPU::TTMP_128RegClassID; 2401 case 256: 2402 return AMDGPU::TTMP_256RegClassID; 2403 case 512: 2404 return AMDGPU::TTMP_512RegClassID; 2405 } 2406 } else if (Is == IS_SGPR) { 2407 switch (RegWidth) { 2408 default: return -1; 2409 case 32: 2410 return AMDGPU::SGPR_32RegClassID; 2411 case 64: 2412 return AMDGPU::SGPR_64RegClassID; 2413 case 96: 2414 return AMDGPU::SGPR_96RegClassID; 2415 case 128: 2416 return AMDGPU::SGPR_128RegClassID; 2417 case 160: 2418 return AMDGPU::SGPR_160RegClassID; 2419 case 192: 2420 return AMDGPU::SGPR_192RegClassID; 2421 case 224: 2422 return AMDGPU::SGPR_224RegClassID; 2423 case 256: 2424 return AMDGPU::SGPR_256RegClassID; 2425 case 288: 2426 return AMDGPU::SGPR_288RegClassID; 2427 case 320: 2428 return AMDGPU::SGPR_320RegClassID; 2429 case 352: 2430 return AMDGPU::SGPR_352RegClassID; 2431 case 384: 2432 return AMDGPU::SGPR_384RegClassID; 2433 case 512: 2434 return AMDGPU::SGPR_512RegClassID; 2435 } 2436 } else if (Is == IS_AGPR) { 2437 switch (RegWidth) { 2438 default: return -1; 2439 case 32: 2440 return AMDGPU::AGPR_32RegClassID; 2441 case 64: 2442 return AMDGPU::AReg_64RegClassID; 2443 case 96: 2444 return AMDGPU::AReg_96RegClassID; 2445 case 128: 2446 return AMDGPU::AReg_128RegClassID; 2447 case 160: 2448 return AMDGPU::AReg_160RegClassID; 2449 case 192: 2450 return AMDGPU::AReg_192RegClassID; 2451 case 224: 2452 return AMDGPU::AReg_224RegClassID; 2453 case 256: 2454 return AMDGPU::AReg_256RegClassID; 2455 case 288: 2456 return AMDGPU::AReg_288RegClassID; 2457 case 320: 2458 return AMDGPU::AReg_320RegClassID; 2459 case 352: 2460 return AMDGPU::AReg_352RegClassID; 2461 case 384: 2462 return AMDGPU::AReg_384RegClassID; 2463 case 512: 2464 return AMDGPU::AReg_512RegClassID; 2465 case 1024: 2466 return AMDGPU::AReg_1024RegClassID; 2467 } 2468 } 2469 return -1; 2470 } 2471 2472 static unsigned getSpecialRegForName(StringRef RegName) { 2473 return StringSwitch<unsigned>(RegName) 2474 .Case("exec", AMDGPU::EXEC) 2475 .Case("vcc", AMDGPU::VCC) 2476 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2477 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2478 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2479 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2480 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2481 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2482 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2483 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2484 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2485 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2486 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2487 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2488 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2489 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2490 .Case("m0", AMDGPU::M0) 2491 .Case("vccz", AMDGPU::SRC_VCCZ) 2492 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2493 .Case("execz", AMDGPU::SRC_EXECZ) 2494 .Case("src_execz", AMDGPU::SRC_EXECZ) 2495 .Case("scc", AMDGPU::SRC_SCC) 2496 .Case("src_scc", AMDGPU::SRC_SCC) 2497 .Case("tba", AMDGPU::TBA) 2498 .Case("tma", AMDGPU::TMA) 2499 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2500 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2501 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2502 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2503 .Case("vcc_lo", AMDGPU::VCC_LO) 2504 .Case("vcc_hi", AMDGPU::VCC_HI) 2505 .Case("exec_lo", AMDGPU::EXEC_LO) 2506 .Case("exec_hi", AMDGPU::EXEC_HI) 2507 .Case("tma_lo", AMDGPU::TMA_LO) 2508 .Case("tma_hi", AMDGPU::TMA_HI) 2509 .Case("tba_lo", AMDGPU::TBA_LO) 2510 .Case("tba_hi", AMDGPU::TBA_HI) 2511 .Case("pc", AMDGPU::PC_REG) 2512 .Case("null", AMDGPU::SGPR_NULL) 2513 .Default(AMDGPU::NoRegister); 2514 } 2515 2516 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2517 SMLoc &EndLoc, bool RestoreOnFailure) { 2518 auto R = parseRegister(); 2519 if (!R) return true; 2520 assert(R->isReg()); 2521 RegNo = R->getReg(); 2522 StartLoc = R->getStartLoc(); 2523 EndLoc = R->getEndLoc(); 2524 return false; 2525 } 2526 2527 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 2528 SMLoc &EndLoc) { 2529 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2530 } 2531 2532 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 2533 SMLoc &EndLoc) { 2534 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2535 bool PendingErrors = getParser().hasPendingError(); 2536 getParser().clearPendingErrors(); 2537 if (PendingErrors) 2538 return ParseStatus::Failure; 2539 if (Result) 2540 return ParseStatus::NoMatch; 2541 return ParseStatus::Success; 2542 } 2543 2544 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2545 RegisterKind RegKind, unsigned Reg1, 2546 SMLoc Loc) { 2547 switch (RegKind) { 2548 case IS_SPECIAL: 2549 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2550 Reg = AMDGPU::EXEC; 2551 RegWidth = 64; 2552 return true; 2553 } 2554 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2555 Reg = AMDGPU::FLAT_SCR; 2556 RegWidth = 64; 2557 return true; 2558 } 2559 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2560 Reg = AMDGPU::XNACK_MASK; 2561 RegWidth = 64; 2562 return true; 2563 } 2564 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2565 Reg = AMDGPU::VCC; 2566 RegWidth = 64; 2567 return true; 2568 } 2569 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2570 Reg = AMDGPU::TBA; 2571 RegWidth = 64; 2572 return true; 2573 } 2574 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2575 Reg = AMDGPU::TMA; 2576 RegWidth = 64; 2577 return true; 2578 } 2579 Error(Loc, "register does not fit in the list"); 2580 return false; 2581 case IS_VGPR: 2582 case IS_SGPR: 2583 case IS_AGPR: 2584 case IS_TTMP: 2585 if (Reg1 != Reg + RegWidth / 32) { 2586 Error(Loc, "registers in a list must have consecutive indices"); 2587 return false; 2588 } 2589 RegWidth += 32; 2590 return true; 2591 default: 2592 llvm_unreachable("unexpected register kind"); 2593 } 2594 } 2595 2596 struct RegInfo { 2597 StringLiteral Name; 2598 RegisterKind Kind; 2599 }; 2600 2601 static constexpr RegInfo RegularRegisters[] = { 2602 {{"v"}, IS_VGPR}, 2603 {{"s"}, IS_SGPR}, 2604 {{"ttmp"}, IS_TTMP}, 2605 {{"acc"}, IS_AGPR}, 2606 {{"a"}, IS_AGPR}, 2607 }; 2608 2609 static bool isRegularReg(RegisterKind Kind) { 2610 return Kind == IS_VGPR || 2611 Kind == IS_SGPR || 2612 Kind == IS_TTMP || 2613 Kind == IS_AGPR; 2614 } 2615 2616 static const RegInfo* getRegularRegInfo(StringRef Str) { 2617 for (const RegInfo &Reg : RegularRegisters) 2618 if (Str.starts_with(Reg.Name)) 2619 return &Reg; 2620 return nullptr; 2621 } 2622 2623 static bool getRegNum(StringRef Str, unsigned& Num) { 2624 return !Str.getAsInteger(10, Num); 2625 } 2626 2627 bool 2628 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2629 const AsmToken &NextToken) const { 2630 2631 // A list of consecutive registers: [s0,s1,s2,s3] 2632 if (Token.is(AsmToken::LBrac)) 2633 return true; 2634 2635 if (!Token.is(AsmToken::Identifier)) 2636 return false; 2637 2638 // A single register like s0 or a range of registers like s[0:1] 2639 2640 StringRef Str = Token.getString(); 2641 const RegInfo *Reg = getRegularRegInfo(Str); 2642 if (Reg) { 2643 StringRef RegName = Reg->Name; 2644 StringRef RegSuffix = Str.substr(RegName.size()); 2645 if (!RegSuffix.empty()) { 2646 RegSuffix.consume_back(".l"); 2647 RegSuffix.consume_back(".h"); 2648 unsigned Num; 2649 // A single register with an index: rXX 2650 if (getRegNum(RegSuffix, Num)) 2651 return true; 2652 } else { 2653 // A range of registers: r[XX:YY]. 2654 if (NextToken.is(AsmToken::LBrac)) 2655 return true; 2656 } 2657 } 2658 2659 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2660 } 2661 2662 bool 2663 AMDGPUAsmParser::isRegister() 2664 { 2665 return isRegister(getToken(), peekToken()); 2666 } 2667 2668 unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, 2669 unsigned SubReg, unsigned RegWidth, 2670 SMLoc Loc) { 2671 assert(isRegularReg(RegKind)); 2672 2673 unsigned AlignSize = 1; 2674 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2675 // SGPR and TTMP registers must be aligned. 2676 // Max required alignment is 4 dwords. 2677 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u); 2678 } 2679 2680 if (RegNum % AlignSize != 0) { 2681 Error(Loc, "invalid register alignment"); 2682 return AMDGPU::NoRegister; 2683 } 2684 2685 unsigned RegIdx = RegNum / AlignSize; 2686 int RCID = getRegClass(RegKind, RegWidth); 2687 if (RCID == -1) { 2688 Error(Loc, "invalid or unsupported register size"); 2689 return AMDGPU::NoRegister; 2690 } 2691 2692 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2693 const MCRegisterClass RC = TRI->getRegClass(RCID); 2694 if (RegIdx >= RC.getNumRegs()) { 2695 Error(Loc, "register index is out of range"); 2696 return AMDGPU::NoRegister; 2697 } 2698 2699 unsigned Reg = RC.getRegister(RegIdx); 2700 2701 if (SubReg) { 2702 Reg = TRI->getSubReg(Reg, SubReg); 2703 2704 // Currently all regular registers have their .l and .h subregisters, so 2705 // we should never need to generate an error here. 2706 assert(Reg && "Invalid subregister!"); 2707 } 2708 2709 return Reg; 2710 } 2711 2712 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2713 int64_t RegLo, RegHi; 2714 if (!skipToken(AsmToken::LBrac, "missing register index")) 2715 return false; 2716 2717 SMLoc FirstIdxLoc = getLoc(); 2718 SMLoc SecondIdxLoc; 2719 2720 if (!parseExpr(RegLo)) 2721 return false; 2722 2723 if (trySkipToken(AsmToken::Colon)) { 2724 SecondIdxLoc = getLoc(); 2725 if (!parseExpr(RegHi)) 2726 return false; 2727 } else { 2728 RegHi = RegLo; 2729 } 2730 2731 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2732 return false; 2733 2734 if (!isUInt<32>(RegLo)) { 2735 Error(FirstIdxLoc, "invalid register index"); 2736 return false; 2737 } 2738 2739 if (!isUInt<32>(RegHi)) { 2740 Error(SecondIdxLoc, "invalid register index"); 2741 return false; 2742 } 2743 2744 if (RegLo > RegHi) { 2745 Error(FirstIdxLoc, "first register index should not exceed second index"); 2746 return false; 2747 } 2748 2749 Num = static_cast<unsigned>(RegLo); 2750 RegWidth = 32 * ((RegHi - RegLo) + 1); 2751 return true; 2752 } 2753 2754 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2755 unsigned &RegNum, unsigned &RegWidth, 2756 SmallVectorImpl<AsmToken> &Tokens) { 2757 assert(isToken(AsmToken::Identifier)); 2758 unsigned Reg = getSpecialRegForName(getTokenStr()); 2759 if (Reg) { 2760 RegNum = 0; 2761 RegWidth = 32; 2762 RegKind = IS_SPECIAL; 2763 Tokens.push_back(getToken()); 2764 lex(); // skip register name 2765 } 2766 return Reg; 2767 } 2768 2769 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2770 unsigned &RegNum, unsigned &RegWidth, 2771 SmallVectorImpl<AsmToken> &Tokens) { 2772 assert(isToken(AsmToken::Identifier)); 2773 StringRef RegName = getTokenStr(); 2774 auto Loc = getLoc(); 2775 2776 const RegInfo *RI = getRegularRegInfo(RegName); 2777 if (!RI) { 2778 Error(Loc, "invalid register name"); 2779 return AMDGPU::NoRegister; 2780 } 2781 2782 Tokens.push_back(getToken()); 2783 lex(); // skip register name 2784 2785 RegKind = RI->Kind; 2786 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2787 unsigned SubReg = NoSubRegister; 2788 if (!RegSuffix.empty()) { 2789 // We don't know the opcode till we are done parsing, so we don't know if 2790 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or 2791 // .h to correctly specify 16 bit registers. We also can't determine class 2792 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16. 2793 if (RegSuffix.consume_back(".l")) 2794 SubReg = AMDGPU::lo16; 2795 else if (RegSuffix.consume_back(".h")) 2796 SubReg = AMDGPU::hi16; 2797 2798 // Single 32-bit register: vXX. 2799 if (!getRegNum(RegSuffix, RegNum)) { 2800 Error(Loc, "invalid register index"); 2801 return AMDGPU::NoRegister; 2802 } 2803 RegWidth = 32; 2804 } else { 2805 // Range of registers: v[XX:YY]. ":YY" is optional. 2806 if (!ParseRegRange(RegNum, RegWidth)) 2807 return AMDGPU::NoRegister; 2808 } 2809 2810 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); 2811 } 2812 2813 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2814 unsigned &RegWidth, 2815 SmallVectorImpl<AsmToken> &Tokens) { 2816 unsigned Reg = AMDGPU::NoRegister; 2817 auto ListLoc = getLoc(); 2818 2819 if (!skipToken(AsmToken::LBrac, 2820 "expected a register or a list of registers")) { 2821 return AMDGPU::NoRegister; 2822 } 2823 2824 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2825 2826 auto Loc = getLoc(); 2827 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2828 return AMDGPU::NoRegister; 2829 if (RegWidth != 32) { 2830 Error(Loc, "expected a single 32-bit register"); 2831 return AMDGPU::NoRegister; 2832 } 2833 2834 for (; trySkipToken(AsmToken::Comma); ) { 2835 RegisterKind NextRegKind; 2836 unsigned NextReg, NextRegNum, NextRegWidth; 2837 Loc = getLoc(); 2838 2839 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2840 NextRegNum, NextRegWidth, 2841 Tokens)) { 2842 return AMDGPU::NoRegister; 2843 } 2844 if (NextRegWidth != 32) { 2845 Error(Loc, "expected a single 32-bit register"); 2846 return AMDGPU::NoRegister; 2847 } 2848 if (NextRegKind != RegKind) { 2849 Error(Loc, "registers in a list must be of the same kind"); 2850 return AMDGPU::NoRegister; 2851 } 2852 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2853 return AMDGPU::NoRegister; 2854 } 2855 2856 if (!skipToken(AsmToken::RBrac, 2857 "expected a comma or a closing square bracket")) { 2858 return AMDGPU::NoRegister; 2859 } 2860 2861 if (isRegularReg(RegKind)) 2862 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc); 2863 2864 return Reg; 2865 } 2866 2867 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2868 unsigned &RegNum, unsigned &RegWidth, 2869 SmallVectorImpl<AsmToken> &Tokens) { 2870 auto Loc = getLoc(); 2871 Reg = AMDGPU::NoRegister; 2872 2873 if (isToken(AsmToken::Identifier)) { 2874 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2875 if (Reg == AMDGPU::NoRegister) 2876 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2877 } else { 2878 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2879 } 2880 2881 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2882 if (Reg == AMDGPU::NoRegister) { 2883 assert(Parser.hasPendingError()); 2884 return false; 2885 } 2886 2887 if (!subtargetHasRegister(*TRI, Reg)) { 2888 if (Reg == AMDGPU::SGPR_NULL) { 2889 Error(Loc, "'null' operand is not supported on this GPU"); 2890 } else { 2891 Error(Loc, "register not available on this GPU"); 2892 } 2893 return false; 2894 } 2895 2896 return true; 2897 } 2898 2899 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2900 unsigned &RegNum, unsigned &RegWidth, 2901 bool RestoreOnFailure /*=false*/) { 2902 Reg = AMDGPU::NoRegister; 2903 2904 SmallVector<AsmToken, 1> Tokens; 2905 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2906 if (RestoreOnFailure) { 2907 while (!Tokens.empty()) { 2908 getLexer().UnLex(Tokens.pop_back_val()); 2909 } 2910 } 2911 return true; 2912 } 2913 return false; 2914 } 2915 2916 std::optional<StringRef> 2917 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2918 switch (RegKind) { 2919 case IS_VGPR: 2920 return StringRef(".amdgcn.next_free_vgpr"); 2921 case IS_SGPR: 2922 return StringRef(".amdgcn.next_free_sgpr"); 2923 default: 2924 return std::nullopt; 2925 } 2926 } 2927 2928 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2929 auto SymbolName = getGprCountSymbolName(RegKind); 2930 assert(SymbolName && "initializing invalid register kind"); 2931 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2932 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2933 } 2934 2935 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2936 unsigned DwordRegIndex, 2937 unsigned RegWidth) { 2938 // Symbols are only defined for GCN targets 2939 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2940 return true; 2941 2942 auto SymbolName = getGprCountSymbolName(RegKind); 2943 if (!SymbolName) 2944 return true; 2945 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2946 2947 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2948 int64_t OldCount; 2949 2950 if (!Sym->isVariable()) 2951 return !Error(getLoc(), 2952 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2953 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2954 return !Error( 2955 getLoc(), 2956 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2957 2958 if (OldCount <= NewMax) 2959 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2960 2961 return true; 2962 } 2963 2964 std::unique_ptr<AMDGPUOperand> 2965 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2966 const auto &Tok = getToken(); 2967 SMLoc StartLoc = Tok.getLoc(); 2968 SMLoc EndLoc = Tok.getEndLoc(); 2969 RegisterKind RegKind; 2970 unsigned Reg, RegNum, RegWidth; 2971 2972 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2973 return nullptr; 2974 } 2975 if (isHsaAbi(getSTI())) { 2976 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2977 return nullptr; 2978 } else 2979 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2980 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2981 } 2982 2983 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, 2984 bool HasSP3AbsModifier, bool HasLit) { 2985 // TODO: add syntactic sugar for 1/(2*PI) 2986 2987 if (isRegister()) 2988 return ParseStatus::NoMatch; 2989 assert(!isModifier()); 2990 2991 if (!HasLit) { 2992 HasLit = trySkipId("lit"); 2993 if (HasLit) { 2994 if (!skipToken(AsmToken::LParen, "expected left paren after lit")) 2995 return ParseStatus::Failure; 2996 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit); 2997 if (S.isSuccess() && 2998 !skipToken(AsmToken::RParen, "expected closing parentheses")) 2999 return ParseStatus::Failure; 3000 return S; 3001 } 3002 } 3003 3004 const auto& Tok = getToken(); 3005 const auto& NextTok = peekToken(); 3006 bool IsReal = Tok.is(AsmToken::Real); 3007 SMLoc S = getLoc(); 3008 bool Negate = false; 3009 3010 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 3011 lex(); 3012 IsReal = true; 3013 Negate = true; 3014 } 3015 3016 AMDGPUOperand::Modifiers Mods; 3017 Mods.Lit = HasLit; 3018 3019 if (IsReal) { 3020 // Floating-point expressions are not supported. 3021 // Can only allow floating-point literals with an 3022 // optional sign. 3023 3024 StringRef Num = getTokenStr(); 3025 lex(); 3026 3027 APFloat RealVal(APFloat::IEEEdouble()); 3028 auto roundMode = APFloat::rmNearestTiesToEven; 3029 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) 3030 return ParseStatus::Failure; 3031 if (Negate) 3032 RealVal.changeSign(); 3033 3034 Operands.push_back( 3035 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 3036 AMDGPUOperand::ImmTyNone, true)); 3037 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3038 Op.setModifiers(Mods); 3039 3040 return ParseStatus::Success; 3041 3042 } else { 3043 int64_t IntVal; 3044 const MCExpr *Expr; 3045 SMLoc S = getLoc(); 3046 3047 if (HasSP3AbsModifier) { 3048 // This is a workaround for handling expressions 3049 // as arguments of SP3 'abs' modifier, for example: 3050 // |1.0| 3051 // |-1| 3052 // |1+x| 3053 // This syntax is not compatible with syntax of standard 3054 // MC expressions (due to the trailing '|'). 3055 SMLoc EndLoc; 3056 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 3057 return ParseStatus::Failure; 3058 } else { 3059 if (Parser.parseExpression(Expr)) 3060 return ParseStatus::Failure; 3061 } 3062 3063 if (Expr->evaluateAsAbsolute(IntVal)) { 3064 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 3065 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3066 Op.setModifiers(Mods); 3067 } else { 3068 if (HasLit) 3069 return ParseStatus::NoMatch; 3070 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3071 } 3072 3073 return ParseStatus::Success; 3074 } 3075 3076 return ParseStatus::NoMatch; 3077 } 3078 3079 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { 3080 if (!isRegister()) 3081 return ParseStatus::NoMatch; 3082 3083 if (auto R = parseRegister()) { 3084 assert(R->isReg()); 3085 Operands.push_back(std::move(R)); 3086 return ParseStatus::Success; 3087 } 3088 return ParseStatus::Failure; 3089 } 3090 3091 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, 3092 bool HasSP3AbsMod, bool HasLit) { 3093 ParseStatus Res = parseReg(Operands); 3094 if (!Res.isNoMatch()) 3095 return Res; 3096 if (isModifier()) 3097 return ParseStatus::NoMatch; 3098 return parseImm(Operands, HasSP3AbsMod, HasLit); 3099 } 3100 3101 bool 3102 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3103 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3104 const auto &str = Token.getString(); 3105 return str == "abs" || str == "neg" || str == "sext"; 3106 } 3107 return false; 3108 } 3109 3110 bool 3111 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3112 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3113 } 3114 3115 bool 3116 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3117 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3118 } 3119 3120 bool 3121 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3122 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3123 } 3124 3125 // Check if this is an operand modifier or an opcode modifier 3126 // which may look like an expression but it is not. We should 3127 // avoid parsing these modifiers as expressions. Currently 3128 // recognized sequences are: 3129 // |...| 3130 // abs(...) 3131 // neg(...) 3132 // sext(...) 3133 // -reg 3134 // -|...| 3135 // -abs(...) 3136 // name:... 3137 // 3138 bool 3139 AMDGPUAsmParser::isModifier() { 3140 3141 AsmToken Tok = getToken(); 3142 AsmToken NextToken[2]; 3143 peekTokens(NextToken); 3144 3145 return isOperandModifier(Tok, NextToken[0]) || 3146 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3147 isOpcodeModifierWithVal(Tok, NextToken[0]); 3148 } 3149 3150 // Check if the current token is an SP3 'neg' modifier. 3151 // Currently this modifier is allowed in the following context: 3152 // 3153 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3154 // 2. Before an 'abs' modifier: -abs(...) 3155 // 3. Before an SP3 'abs' modifier: -|...| 3156 // 3157 // In all other cases "-" is handled as a part 3158 // of an expression that follows the sign. 3159 // 3160 // Note: When "-" is followed by an integer literal, 3161 // this is interpreted as integer negation rather 3162 // than a floating-point NEG modifier applied to N. 3163 // Beside being contr-intuitive, such use of floating-point 3164 // NEG modifier would have resulted in different meaning 3165 // of integer literals used with VOP1/2/C and VOP3, 3166 // for example: 3167 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3168 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3169 // Negative fp literals with preceding "-" are 3170 // handled likewise for uniformity 3171 // 3172 bool 3173 AMDGPUAsmParser::parseSP3NegModifier() { 3174 3175 AsmToken NextToken[2]; 3176 peekTokens(NextToken); 3177 3178 if (isToken(AsmToken::Minus) && 3179 (isRegister(NextToken[0], NextToken[1]) || 3180 NextToken[0].is(AsmToken::Pipe) || 3181 isId(NextToken[0], "abs"))) { 3182 lex(); 3183 return true; 3184 } 3185 3186 return false; 3187 } 3188 3189 ParseStatus 3190 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3191 bool AllowImm) { 3192 bool Neg, SP3Neg; 3193 bool Abs, SP3Abs; 3194 bool Lit; 3195 SMLoc Loc; 3196 3197 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3198 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) 3199 return Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3200 3201 SP3Neg = parseSP3NegModifier(); 3202 3203 Loc = getLoc(); 3204 Neg = trySkipId("neg"); 3205 if (Neg && SP3Neg) 3206 return Error(Loc, "expected register or immediate"); 3207 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3208 return ParseStatus::Failure; 3209 3210 Abs = trySkipId("abs"); 3211 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3212 return ParseStatus::Failure; 3213 3214 Lit = trySkipId("lit"); 3215 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit")) 3216 return ParseStatus::Failure; 3217 3218 Loc = getLoc(); 3219 SP3Abs = trySkipToken(AsmToken::Pipe); 3220 if (Abs && SP3Abs) 3221 return Error(Loc, "expected register or immediate"); 3222 3223 ParseStatus Res; 3224 if (AllowImm) { 3225 Res = parseRegOrImm(Operands, SP3Abs, Lit); 3226 } else { 3227 Res = parseReg(Operands); 3228 } 3229 if (!Res.isSuccess()) 3230 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res; 3231 3232 if (Lit && !Operands.back()->isImm()) 3233 Error(Loc, "expected immediate with lit modifier"); 3234 3235 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3236 return ParseStatus::Failure; 3237 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3238 return ParseStatus::Failure; 3239 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3240 return ParseStatus::Failure; 3241 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3242 return ParseStatus::Failure; 3243 3244 AMDGPUOperand::Modifiers Mods; 3245 Mods.Abs = Abs || SP3Abs; 3246 Mods.Neg = Neg || SP3Neg; 3247 Mods.Lit = Lit; 3248 3249 if (Mods.hasFPModifiers() || Lit) { 3250 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3251 if (Op.isExpr()) 3252 return Error(Op.getStartLoc(), "expected an absolute expression"); 3253 Op.setModifiers(Mods); 3254 } 3255 return ParseStatus::Success; 3256 } 3257 3258 ParseStatus 3259 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3260 bool AllowImm) { 3261 bool Sext = trySkipId("sext"); 3262 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3263 return ParseStatus::Failure; 3264 3265 ParseStatus Res; 3266 if (AllowImm) { 3267 Res = parseRegOrImm(Operands); 3268 } else { 3269 Res = parseReg(Operands); 3270 } 3271 if (!Res.isSuccess()) 3272 return Sext ? ParseStatus::Failure : Res; 3273 3274 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3275 return ParseStatus::Failure; 3276 3277 AMDGPUOperand::Modifiers Mods; 3278 Mods.Sext = Sext; 3279 3280 if (Mods.hasIntModifiers()) { 3281 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3282 if (Op.isExpr()) 3283 return Error(Op.getStartLoc(), "expected an absolute expression"); 3284 Op.setModifiers(Mods); 3285 } 3286 3287 return ParseStatus::Success; 3288 } 3289 3290 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3291 return parseRegOrImmWithFPInputMods(Operands, false); 3292 } 3293 3294 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3295 return parseRegOrImmWithIntInputMods(Operands, false); 3296 } 3297 3298 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3299 auto Loc = getLoc(); 3300 if (trySkipId("off")) { 3301 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3302 AMDGPUOperand::ImmTyOff, false)); 3303 return ParseStatus::Success; 3304 } 3305 3306 if (!isRegister()) 3307 return ParseStatus::NoMatch; 3308 3309 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3310 if (Reg) { 3311 Operands.push_back(std::move(Reg)); 3312 return ParseStatus::Success; 3313 } 3314 3315 return ParseStatus::Failure; 3316 } 3317 3318 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3319 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3320 3321 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3322 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3323 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3324 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3325 return Match_InvalidOperand; 3326 3327 if ((TSFlags & SIInstrFlags::VOP3) && 3328 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3329 getForcedEncodingSize() != 64) 3330 return Match_PreferE32; 3331 3332 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3333 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3334 // v_mac_f32/16 allow only dst_sel == DWORD; 3335 auto OpNum = 3336 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3337 const auto &Op = Inst.getOperand(OpNum); 3338 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3339 return Match_InvalidOperand; 3340 } 3341 } 3342 3343 return Match_Success; 3344 } 3345 3346 static ArrayRef<unsigned> getAllVariants() { 3347 static const unsigned Variants[] = { 3348 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3349 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3350 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3351 }; 3352 3353 return ArrayRef(Variants); 3354 } 3355 3356 // What asm variants we should check 3357 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3358 if (isForcedDPP() && isForcedVOP3()) { 3359 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3360 return ArrayRef(Variants); 3361 } 3362 if (getForcedEncodingSize() == 32) { 3363 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3364 return ArrayRef(Variants); 3365 } 3366 3367 if (isForcedVOP3()) { 3368 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3369 return ArrayRef(Variants); 3370 } 3371 3372 if (isForcedSDWA()) { 3373 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3374 AMDGPUAsmVariants::SDWA9}; 3375 return ArrayRef(Variants); 3376 } 3377 3378 if (isForcedDPP()) { 3379 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3380 return ArrayRef(Variants); 3381 } 3382 3383 return getAllVariants(); 3384 } 3385 3386 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3387 if (isForcedDPP() && isForcedVOP3()) 3388 return "e64_dpp"; 3389 3390 if (getForcedEncodingSize() == 32) 3391 return "e32"; 3392 3393 if (isForcedVOP3()) 3394 return "e64"; 3395 3396 if (isForcedSDWA()) 3397 return "sdwa"; 3398 3399 if (isForcedDPP()) 3400 return "dpp"; 3401 3402 return ""; 3403 } 3404 3405 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3406 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3407 for (MCPhysReg Reg : Desc.implicit_uses()) { 3408 switch (Reg) { 3409 case AMDGPU::FLAT_SCR: 3410 case AMDGPU::VCC: 3411 case AMDGPU::VCC_LO: 3412 case AMDGPU::VCC_HI: 3413 case AMDGPU::M0: 3414 return Reg; 3415 default: 3416 break; 3417 } 3418 } 3419 return AMDGPU::NoRegister; 3420 } 3421 3422 // NB: This code is correct only when used to check constant 3423 // bus limitations because GFX7 support no f16 inline constants. 3424 // Note that there are no cases when a GFX7 opcode violates 3425 // constant bus limitations due to the use of an f16 constant. 3426 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3427 unsigned OpIdx) const { 3428 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3429 3430 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) || 3431 AMDGPU::isKImmOperand(Desc, OpIdx)) { 3432 return false; 3433 } 3434 3435 const MCOperand &MO = Inst.getOperand(OpIdx); 3436 3437 int64_t Val = MO.getImm(); 3438 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3439 3440 switch (OpSize) { // expected operand size 3441 case 8: 3442 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3443 case 4: 3444 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3445 case 2: { 3446 const unsigned OperandType = Desc.operands()[OpIdx].OperandType; 3447 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3448 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3449 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3450 return AMDGPU::isInlinableIntLiteral(Val); 3451 3452 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3453 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3454 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3455 return AMDGPU::isInlinableLiteralV2I16(Val); 3456 3457 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3458 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3459 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3460 return AMDGPU::isInlinableLiteralV2F16(Val); 3461 3462 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3463 } 3464 default: 3465 llvm_unreachable("invalid operand size"); 3466 } 3467 } 3468 3469 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3470 if (!isGFX10Plus()) 3471 return 1; 3472 3473 switch (Opcode) { 3474 // 64-bit shift instructions can use only one scalar value input 3475 case AMDGPU::V_LSHLREV_B64_e64: 3476 case AMDGPU::V_LSHLREV_B64_gfx10: 3477 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3478 case AMDGPU::V_LSHLREV_B64_e32_gfx12: 3479 case AMDGPU::V_LSHLREV_B64_e64_gfx12: 3480 case AMDGPU::V_LSHRREV_B64_e64: 3481 case AMDGPU::V_LSHRREV_B64_gfx10: 3482 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3483 case AMDGPU::V_LSHRREV_B64_e64_gfx12: 3484 case AMDGPU::V_ASHRREV_I64_e64: 3485 case AMDGPU::V_ASHRREV_I64_gfx10: 3486 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3487 case AMDGPU::V_ASHRREV_I64_e64_gfx12: 3488 case AMDGPU::V_LSHL_B64_e64: 3489 case AMDGPU::V_LSHR_B64_e64: 3490 case AMDGPU::V_ASHR_I64_e64: 3491 return 1; 3492 default: 3493 return 2; 3494 } 3495 } 3496 3497 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; 3498 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; 3499 3500 // Get regular operand indices in the same order as specified 3501 // in the instruction (but append mandatory literals to the end). 3502 static OperandIndices getSrcOperandIndices(unsigned Opcode, 3503 bool AddMandatoryLiterals = false) { 3504 3505 int16_t ImmIdx = 3506 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1; 3507 3508 if (isVOPD(Opcode)) { 3509 int16_t ImmDeferredIdx = 3510 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred) 3511 : -1; 3512 3513 return {getNamedOperandIdx(Opcode, OpName::src0X), 3514 getNamedOperandIdx(Opcode, OpName::vsrc1X), 3515 getNamedOperandIdx(Opcode, OpName::src0Y), 3516 getNamedOperandIdx(Opcode, OpName::vsrc1Y), 3517 ImmDeferredIdx, 3518 ImmIdx}; 3519 } 3520 3521 return {getNamedOperandIdx(Opcode, OpName::src0), 3522 getNamedOperandIdx(Opcode, OpName::src1), 3523 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx}; 3524 } 3525 3526 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3527 const MCOperand &MO = Inst.getOperand(OpIdx); 3528 if (MO.isImm()) { 3529 return !isInlineConstant(Inst, OpIdx); 3530 } else if (MO.isReg()) { 3531 auto Reg = MO.getReg(); 3532 if (!Reg) { 3533 return false; 3534 } 3535 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3536 auto PReg = mc2PseudoReg(Reg); 3537 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3538 } else { 3539 return true; 3540 } 3541 } 3542 3543 bool AMDGPUAsmParser::validateConstantBusLimitations( 3544 const MCInst &Inst, const OperandVector &Operands) { 3545 const unsigned Opcode = Inst.getOpcode(); 3546 const MCInstrDesc &Desc = MII.get(Opcode); 3547 unsigned LastSGPR = AMDGPU::NoRegister; 3548 unsigned ConstantBusUseCount = 0; 3549 unsigned NumLiterals = 0; 3550 unsigned LiteralSize; 3551 3552 if (!(Desc.TSFlags & 3553 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3554 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && 3555 !isVOPD(Opcode)) 3556 return true; 3557 3558 // Check special imm operands (used by madmk, etc) 3559 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { 3560 ++NumLiterals; 3561 LiteralSize = 4; 3562 } 3563 3564 SmallDenseSet<unsigned> SGPRsUsed; 3565 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3566 if (SGPRUsed != AMDGPU::NoRegister) { 3567 SGPRsUsed.insert(SGPRUsed); 3568 ++ConstantBusUseCount; 3569 } 3570 3571 OperandIndices OpIndices = getSrcOperandIndices(Opcode); 3572 3573 for (int OpIdx : OpIndices) { 3574 if (OpIdx == -1) 3575 continue; 3576 3577 const MCOperand &MO = Inst.getOperand(OpIdx); 3578 if (usesConstantBus(Inst, OpIdx)) { 3579 if (MO.isReg()) { 3580 LastSGPR = mc2PseudoReg(MO.getReg()); 3581 // Pairs of registers with a partial intersections like these 3582 // s0, s[0:1] 3583 // flat_scratch_lo, flat_scratch 3584 // flat_scratch_lo, flat_scratch_hi 3585 // are theoretically valid but they are disabled anyway. 3586 // Note that this code mimics SIInstrInfo::verifyInstruction 3587 if (SGPRsUsed.insert(LastSGPR).second) { 3588 ++ConstantBusUseCount; 3589 } 3590 } else { // Expression or a literal 3591 3592 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3593 continue; // special operand like VINTERP attr_chan 3594 3595 // An instruction may use only one literal. 3596 // This has been validated on the previous step. 3597 // See validateVOPLiteral. 3598 // This literal may be used as more than one operand. 3599 // If all these operands are of the same size, 3600 // this literal counts as one scalar value. 3601 // Otherwise it counts as 2 scalar values. 3602 // See "GFX10 Shader Programming", section 3.6.2.3. 3603 3604 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3605 if (Size < 4) 3606 Size = 4; 3607 3608 if (NumLiterals == 0) { 3609 NumLiterals = 1; 3610 LiteralSize = Size; 3611 } else if (LiteralSize != Size) { 3612 NumLiterals = 2; 3613 } 3614 } 3615 } 3616 } 3617 ConstantBusUseCount += NumLiterals; 3618 3619 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3620 return true; 3621 3622 SMLoc LitLoc = getLitLoc(Operands); 3623 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3624 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3625 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3626 return false; 3627 } 3628 3629 bool AMDGPUAsmParser::validateVOPDRegBankConstraints( 3630 const MCInst &Inst, const OperandVector &Operands) { 3631 3632 const unsigned Opcode = Inst.getOpcode(); 3633 if (!isVOPD(Opcode)) 3634 return true; 3635 3636 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3637 3638 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { 3639 const MCOperand &Opr = Inst.getOperand(OperandIdx); 3640 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) 3641 ? Opr.getReg() 3642 : MCRegister::NoRegister; 3643 }; 3644 3645 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. 3646 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; 3647 3648 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 3649 auto InvalidCompOprIdx = 3650 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc); 3651 if (!InvalidCompOprIdx) 3652 return true; 3653 3654 auto CompOprIdx = *InvalidCompOprIdx; 3655 auto ParsedIdx = 3656 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), 3657 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); 3658 assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); 3659 3660 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); 3661 if (CompOprIdx == VOPD::Component::DST) { 3662 Error(Loc, "one dst register must be even and the other odd"); 3663 } else { 3664 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; 3665 Error(Loc, Twine("src") + Twine(CompSrcIdx) + 3666 " operands must use different VGPR banks"); 3667 } 3668 3669 return false; 3670 } 3671 3672 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3673 3674 const unsigned Opc = Inst.getOpcode(); 3675 const MCInstrDesc &Desc = MII.get(Opc); 3676 3677 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3678 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3679 assert(ClampIdx != -1); 3680 return Inst.getOperand(ClampIdx).getImm() == 0; 3681 } 3682 3683 return true; 3684 } 3685 3686 constexpr uint64_t MIMGFlags = 3687 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; 3688 3689 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, 3690 const SMLoc &IDLoc) { 3691 3692 const unsigned Opc = Inst.getOpcode(); 3693 const MCInstrDesc &Desc = MII.get(Opc); 3694 3695 if ((Desc.TSFlags & MIMGFlags) == 0) 3696 return true; 3697 3698 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3699 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3700 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3701 3702 assert(VDataIdx != -1); 3703 3704 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray 3705 return true; 3706 3707 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3708 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3709 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3710 if (DMask == 0) 3711 DMask = 1; 3712 3713 bool IsPackedD16 = false; 3714 unsigned DataSize = 3715 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask); 3716 if (hasPackedD16()) { 3717 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3718 IsPackedD16 = D16Idx >= 0; 3719 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm()) 3720 DataSize = (DataSize + 1) / 2; 3721 } 3722 3723 if ((VDataSize / 4) == DataSize + TFESize) 3724 return true; 3725 3726 StringRef Modifiers; 3727 if (isGFX90A()) 3728 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask"; 3729 else 3730 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe"; 3731 3732 Error(IDLoc, Twine("image data size does not match ") + Modifiers); 3733 return false; 3734 } 3735 3736 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, 3737 const SMLoc &IDLoc) { 3738 const unsigned Opc = Inst.getOpcode(); 3739 const MCInstrDesc &Desc = MII.get(Opc); 3740 3741 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus()) 3742 return true; 3743 3744 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3745 3746 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3747 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3748 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3749 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc 3750 : AMDGPU::OpName::rsrc; 3751 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName); 3752 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3753 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3754 3755 assert(VAddr0Idx != -1); 3756 assert(SrsrcIdx != -1); 3757 assert(SrsrcIdx > VAddr0Idx); 3758 3759 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3760 if (BaseOpcode->BVH) { 3761 if (IsA16 == BaseOpcode->A16) 3762 return true; 3763 Error(IDLoc, "image address size does not match a16"); 3764 return false; 3765 } 3766 3767 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3768 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3769 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3770 unsigned ActualAddrSize = 3771 IsNSA ? SrsrcIdx - VAddr0Idx 3772 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3773 3774 unsigned ExpectedAddrSize = 3775 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3776 3777 if (IsNSA) { 3778 if (hasPartialNSAEncoding() && 3779 ExpectedAddrSize > 3780 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) { 3781 int VAddrLastIdx = SrsrcIdx - 1; 3782 unsigned VAddrLastSize = 3783 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; 3784 3785 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; 3786 } 3787 } else { 3788 if (ExpectedAddrSize > 12) 3789 ExpectedAddrSize = 16; 3790 3791 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3792 // This provides backward compatibility for assembly created 3793 // before 160b/192b/224b types were directly supported. 3794 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3795 return true; 3796 } 3797 3798 if (ActualAddrSize == ExpectedAddrSize) 3799 return true; 3800 3801 Error(IDLoc, "image address size does not match dim and a16"); 3802 return false; 3803 } 3804 3805 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3806 3807 const unsigned Opc = Inst.getOpcode(); 3808 const MCInstrDesc &Desc = MII.get(Opc); 3809 3810 if ((Desc.TSFlags & MIMGFlags) == 0) 3811 return true; 3812 if (!Desc.mayLoad() || !Desc.mayStore()) 3813 return true; // Not atomic 3814 3815 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3816 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3817 3818 // This is an incomplete check because image_atomic_cmpswap 3819 // may only use 0x3 and 0xf while other atomic operations 3820 // may use 0x1 and 0x3. However these limitations are 3821 // verified when we check that dmask matches dst size. 3822 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3823 } 3824 3825 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3826 3827 const unsigned Opc = Inst.getOpcode(); 3828 const MCInstrDesc &Desc = MII.get(Opc); 3829 3830 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3831 return true; 3832 3833 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3834 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3835 3836 // GATHER4 instructions use dmask in a different fashion compared to 3837 // other MIMG instructions. The only useful DMASK values are 3838 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3839 // (red,red,red,red) etc.) The ISA document doesn't mention 3840 // this. 3841 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3842 } 3843 3844 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3845 const unsigned Opc = Inst.getOpcode(); 3846 const MCInstrDesc &Desc = MII.get(Opc); 3847 3848 if ((Desc.TSFlags & MIMGFlags) == 0) 3849 return true; 3850 3851 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3852 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3853 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3854 3855 if (!BaseOpcode->MSAA) 3856 return true; 3857 3858 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3859 assert(DimIdx != -1); 3860 3861 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3862 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3863 3864 return DimInfo->MSAA; 3865 } 3866 3867 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3868 { 3869 switch (Opcode) { 3870 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3871 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3872 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3873 return true; 3874 default: 3875 return false; 3876 } 3877 } 3878 3879 // movrels* opcodes should only allow VGPRS as src0. 3880 // This is specified in .td description for vop1/vop3, 3881 // but sdwa is handled differently. See isSDWAOperand. 3882 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3883 const OperandVector &Operands) { 3884 3885 const unsigned Opc = Inst.getOpcode(); 3886 const MCInstrDesc &Desc = MII.get(Opc); 3887 3888 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3889 return true; 3890 3891 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3892 assert(Src0Idx != -1); 3893 3894 SMLoc ErrLoc; 3895 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3896 if (Src0.isReg()) { 3897 auto Reg = mc2PseudoReg(Src0.getReg()); 3898 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3899 if (!isSGPR(Reg, TRI)) 3900 return true; 3901 ErrLoc = getRegLoc(Reg, Operands); 3902 } else { 3903 ErrLoc = getConstLoc(Operands); 3904 } 3905 3906 Error(ErrLoc, "source operand must be a VGPR"); 3907 return false; 3908 } 3909 3910 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3911 const OperandVector &Operands) { 3912 3913 const unsigned Opc = Inst.getOpcode(); 3914 3915 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3916 return true; 3917 3918 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3919 assert(Src0Idx != -1); 3920 3921 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3922 if (!Src0.isReg()) 3923 return true; 3924 3925 auto Reg = mc2PseudoReg(Src0.getReg()); 3926 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3927 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3928 Error(getRegLoc(Reg, Operands), 3929 "source operand must be either a VGPR or an inline constant"); 3930 return false; 3931 } 3932 3933 return true; 3934 } 3935 3936 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, 3937 const OperandVector &Operands) { 3938 unsigned Opcode = Inst.getOpcode(); 3939 const MCInstrDesc &Desc = MII.get(Opcode); 3940 3941 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || 3942 !getFeatureBits()[FeatureMFMAInlineLiteralBug]) 3943 return true; 3944 3945 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2); 3946 if (Src2Idx == -1) 3947 return true; 3948 3949 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) { 3950 Error(getConstLoc(Operands), 3951 "inline constants are not allowed for this operand"); 3952 return false; 3953 } 3954 3955 return true; 3956 } 3957 3958 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3959 const OperandVector &Operands) { 3960 const unsigned Opc = Inst.getOpcode(); 3961 const MCInstrDesc &Desc = MII.get(Opc); 3962 3963 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3964 return true; 3965 3966 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3967 if (Src2Idx == -1) 3968 return true; 3969 3970 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3971 if (!Src2.isReg()) 3972 return true; 3973 3974 MCRegister Src2Reg = Src2.getReg(); 3975 MCRegister DstReg = Inst.getOperand(0).getReg(); 3976 if (Src2Reg == DstReg) 3977 return true; 3978 3979 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3980 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) 3981 return true; 3982 3983 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3984 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3985 "source 2 operand must not partially overlap with dst"); 3986 return false; 3987 } 3988 3989 return true; 3990 } 3991 3992 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3993 switch (Inst.getOpcode()) { 3994 default: 3995 return true; 3996 case V_DIV_SCALE_F32_gfx6_gfx7: 3997 case V_DIV_SCALE_F32_vi: 3998 case V_DIV_SCALE_F32_gfx10: 3999 case V_DIV_SCALE_F64_gfx6_gfx7: 4000 case V_DIV_SCALE_F64_vi: 4001 case V_DIV_SCALE_F64_gfx10: 4002 break; 4003 } 4004 4005 // TODO: Check that src0 = src1 or src2. 4006 4007 for (auto Name : {AMDGPU::OpName::src0_modifiers, 4008 AMDGPU::OpName::src2_modifiers, 4009 AMDGPU::OpName::src2_modifiers}) { 4010 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 4011 .getImm() & 4012 SISrcMods::ABS) { 4013 return false; 4014 } 4015 } 4016 4017 return true; 4018 } 4019 4020 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 4021 4022 const unsigned Opc = Inst.getOpcode(); 4023 const MCInstrDesc &Desc = MII.get(Opc); 4024 4025 if ((Desc.TSFlags & MIMGFlags) == 0) 4026 return true; 4027 4028 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 4029 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 4030 if (isCI() || isSI()) 4031 return false; 4032 } 4033 4034 return true; 4035 } 4036 4037 static bool IsRevOpcode(const unsigned Opcode) 4038 { 4039 switch (Opcode) { 4040 case AMDGPU::V_SUBREV_F32_e32: 4041 case AMDGPU::V_SUBREV_F32_e64: 4042 case AMDGPU::V_SUBREV_F32_e32_gfx10: 4043 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 4044 case AMDGPU::V_SUBREV_F32_e32_vi: 4045 case AMDGPU::V_SUBREV_F32_e64_gfx10: 4046 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 4047 case AMDGPU::V_SUBREV_F32_e64_vi: 4048 4049 case AMDGPU::V_SUBREV_CO_U32_e32: 4050 case AMDGPU::V_SUBREV_CO_U32_e64: 4051 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 4052 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 4053 4054 case AMDGPU::V_SUBBREV_U32_e32: 4055 case AMDGPU::V_SUBBREV_U32_e64: 4056 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 4057 case AMDGPU::V_SUBBREV_U32_e32_vi: 4058 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 4059 case AMDGPU::V_SUBBREV_U32_e64_vi: 4060 4061 case AMDGPU::V_SUBREV_U32_e32: 4062 case AMDGPU::V_SUBREV_U32_e64: 4063 case AMDGPU::V_SUBREV_U32_e32_gfx9: 4064 case AMDGPU::V_SUBREV_U32_e32_vi: 4065 case AMDGPU::V_SUBREV_U32_e64_gfx9: 4066 case AMDGPU::V_SUBREV_U32_e64_vi: 4067 4068 case AMDGPU::V_SUBREV_F16_e32: 4069 case AMDGPU::V_SUBREV_F16_e64: 4070 case AMDGPU::V_SUBREV_F16_e32_gfx10: 4071 case AMDGPU::V_SUBREV_F16_e32_vi: 4072 case AMDGPU::V_SUBREV_F16_e64_gfx10: 4073 case AMDGPU::V_SUBREV_F16_e64_vi: 4074 4075 case AMDGPU::V_SUBREV_U16_e32: 4076 case AMDGPU::V_SUBREV_U16_e64: 4077 case AMDGPU::V_SUBREV_U16_e32_vi: 4078 case AMDGPU::V_SUBREV_U16_e64_vi: 4079 4080 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 4081 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 4082 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 4083 4084 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 4085 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 4086 4087 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 4088 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 4089 4090 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 4091 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 4092 4093 case AMDGPU::V_LSHRREV_B32_e32: 4094 case AMDGPU::V_LSHRREV_B32_e64: 4095 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 4096 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 4097 case AMDGPU::V_LSHRREV_B32_e32_vi: 4098 case AMDGPU::V_LSHRREV_B32_e64_vi: 4099 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 4100 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 4101 4102 case AMDGPU::V_ASHRREV_I32_e32: 4103 case AMDGPU::V_ASHRREV_I32_e64: 4104 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 4105 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 4106 case AMDGPU::V_ASHRREV_I32_e32_vi: 4107 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 4108 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 4109 case AMDGPU::V_ASHRREV_I32_e64_vi: 4110 4111 case AMDGPU::V_LSHLREV_B32_e32: 4112 case AMDGPU::V_LSHLREV_B32_e64: 4113 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 4114 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 4115 case AMDGPU::V_LSHLREV_B32_e32_vi: 4116 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 4117 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 4118 case AMDGPU::V_LSHLREV_B32_e64_vi: 4119 4120 case AMDGPU::V_LSHLREV_B16_e32: 4121 case AMDGPU::V_LSHLREV_B16_e64: 4122 case AMDGPU::V_LSHLREV_B16_e32_vi: 4123 case AMDGPU::V_LSHLREV_B16_e64_vi: 4124 case AMDGPU::V_LSHLREV_B16_gfx10: 4125 4126 case AMDGPU::V_LSHRREV_B16_e32: 4127 case AMDGPU::V_LSHRREV_B16_e64: 4128 case AMDGPU::V_LSHRREV_B16_e32_vi: 4129 case AMDGPU::V_LSHRREV_B16_e64_vi: 4130 case AMDGPU::V_LSHRREV_B16_gfx10: 4131 4132 case AMDGPU::V_ASHRREV_I16_e32: 4133 case AMDGPU::V_ASHRREV_I16_e64: 4134 case AMDGPU::V_ASHRREV_I16_e32_vi: 4135 case AMDGPU::V_ASHRREV_I16_e64_vi: 4136 case AMDGPU::V_ASHRREV_I16_gfx10: 4137 4138 case AMDGPU::V_LSHLREV_B64_e64: 4139 case AMDGPU::V_LSHLREV_B64_gfx10: 4140 case AMDGPU::V_LSHLREV_B64_vi: 4141 4142 case AMDGPU::V_LSHRREV_B64_e64: 4143 case AMDGPU::V_LSHRREV_B64_gfx10: 4144 case AMDGPU::V_LSHRREV_B64_vi: 4145 4146 case AMDGPU::V_ASHRREV_I64_e64: 4147 case AMDGPU::V_ASHRREV_I64_gfx10: 4148 case AMDGPU::V_ASHRREV_I64_vi: 4149 4150 case AMDGPU::V_PK_LSHLREV_B16: 4151 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 4152 case AMDGPU::V_PK_LSHLREV_B16_vi: 4153 4154 case AMDGPU::V_PK_LSHRREV_B16: 4155 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4156 case AMDGPU::V_PK_LSHRREV_B16_vi: 4157 case AMDGPU::V_PK_ASHRREV_I16: 4158 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4159 case AMDGPU::V_PK_ASHRREV_I16_vi: 4160 return true; 4161 default: 4162 return false; 4163 } 4164 } 4165 4166 std::optional<StringRef> 4167 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4168 4169 using namespace SIInstrFlags; 4170 const unsigned Opcode = Inst.getOpcode(); 4171 const MCInstrDesc &Desc = MII.get(Opcode); 4172 4173 // lds_direct register is defined so that it can be used 4174 // with 9-bit operands only. Ignore encodings which do not accept these. 4175 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4176 if ((Desc.TSFlags & Enc) == 0) 4177 return std::nullopt; 4178 4179 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4180 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4181 if (SrcIdx == -1) 4182 break; 4183 const auto &Src = Inst.getOperand(SrcIdx); 4184 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4185 4186 if (isGFX90A() || isGFX11Plus()) 4187 return StringRef("lds_direct is not supported on this GPU"); 4188 4189 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4190 return StringRef("lds_direct cannot be used with this instruction"); 4191 4192 if (SrcName != OpName::src0) 4193 return StringRef("lds_direct may be used as src0 only"); 4194 } 4195 } 4196 4197 return std::nullopt; 4198 } 4199 4200 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4201 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4202 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4203 if (Op.isFlatOffset()) 4204 return Op.getStartLoc(); 4205 } 4206 return getLoc(); 4207 } 4208 4209 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, 4210 const OperandVector &Operands) { 4211 auto Opcode = Inst.getOpcode(); 4212 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4213 if (OpNum == -1) 4214 return true; 4215 4216 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4217 if ((TSFlags & SIInstrFlags::FLAT)) 4218 return validateFlatOffset(Inst, Operands); 4219 4220 if ((TSFlags & SIInstrFlags::SMRD)) 4221 return validateSMEMOffset(Inst, Operands); 4222 4223 const auto &Op = Inst.getOperand(OpNum); 4224 if (isGFX12Plus() && 4225 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4226 const unsigned OffsetSize = 24; 4227 if (!isIntN(OffsetSize, Op.getImm())) { 4228 Error(getFlatOffsetLoc(Operands), 4229 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4230 return false; 4231 } 4232 } else { 4233 const unsigned OffsetSize = 16; 4234 if (!isUIntN(OffsetSize, Op.getImm())) { 4235 Error(getFlatOffsetLoc(Operands), 4236 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4237 return false; 4238 } 4239 } 4240 return true; 4241 } 4242 4243 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4244 const OperandVector &Operands) { 4245 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4246 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4247 return true; 4248 4249 auto Opcode = Inst.getOpcode(); 4250 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4251 assert(OpNum != -1); 4252 4253 const auto &Op = Inst.getOperand(OpNum); 4254 if (!hasFlatOffsets() && Op.getImm() != 0) { 4255 Error(getFlatOffsetLoc(Operands), 4256 "flat offset modifier is not supported on this GPU"); 4257 return false; 4258 } 4259 4260 // For pre-GFX12 FLAT instructions the offset must be positive; 4261 // MSB is ignored and forced to zero. 4262 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); 4263 bool AllowNegative = 4264 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || 4265 isGFX12Plus(); 4266 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { 4267 Error(getFlatOffsetLoc(Operands), 4268 Twine("expected a ") + 4269 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" 4270 : Twine(OffsetSize - 1) + "-bit unsigned offset")); 4271 return false; 4272 } 4273 4274 return true; 4275 } 4276 4277 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4278 // Start with second operand because SMEM Offset cannot be dst or src0. 4279 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4280 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4281 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) 4282 return Op.getStartLoc(); 4283 } 4284 return getLoc(); 4285 } 4286 4287 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4288 const OperandVector &Operands) { 4289 if (isCI() || isSI()) 4290 return true; 4291 4292 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4293 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4294 return true; 4295 4296 auto Opcode = Inst.getOpcode(); 4297 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4298 if (OpNum == -1) 4299 return true; 4300 4301 const auto &Op = Inst.getOperand(OpNum); 4302 if (!Op.isImm()) 4303 return true; 4304 4305 uint64_t Offset = Op.getImm(); 4306 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4307 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4308 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4309 return true; 4310 4311 Error(getSMEMOffsetLoc(Operands), 4312 isGFX12Plus() ? "expected a 24-bit signed offset" 4313 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" 4314 : "expected a 21-bit signed offset"); 4315 4316 return false; 4317 } 4318 4319 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4320 unsigned Opcode = Inst.getOpcode(); 4321 const MCInstrDesc &Desc = MII.get(Opcode); 4322 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4323 return true; 4324 4325 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4326 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4327 4328 const int OpIndices[] = { Src0Idx, Src1Idx }; 4329 4330 unsigned NumExprs = 0; 4331 unsigned NumLiterals = 0; 4332 uint32_t LiteralValue; 4333 4334 for (int OpIdx : OpIndices) { 4335 if (OpIdx == -1) break; 4336 4337 const MCOperand &MO = Inst.getOperand(OpIdx); 4338 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4339 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4340 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4341 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4342 if (NumLiterals == 0 || LiteralValue != Value) { 4343 LiteralValue = Value; 4344 ++NumLiterals; 4345 } 4346 } else if (MO.isExpr()) { 4347 ++NumExprs; 4348 } 4349 } 4350 } 4351 4352 return NumLiterals + NumExprs <= 1; 4353 } 4354 4355 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4356 const unsigned Opc = Inst.getOpcode(); 4357 if (isPermlane16(Opc)) { 4358 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4359 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4360 4361 if (OpSel & ~3) 4362 return false; 4363 } 4364 4365 uint64_t TSFlags = MII.get(Opc).TSFlags; 4366 4367 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4368 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4369 if (OpSelIdx != -1) { 4370 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4371 return false; 4372 } 4373 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4374 if (OpSelHiIdx != -1) { 4375 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4376 return false; 4377 } 4378 } 4379 4380 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4381 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && 4382 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { 4383 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4384 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4385 if (OpSel & 3) 4386 return false; 4387 } 4388 4389 return true; 4390 } 4391 4392 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) { 4393 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi); 4394 4395 const unsigned Opc = Inst.getOpcode(); 4396 uint64_t TSFlags = MII.get(Opc).TSFlags; 4397 4398 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2) 4399 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1) 4400 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1) 4401 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand. 4402 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) && 4403 !(TSFlags & SIInstrFlags::IsSWMMAC)) 4404 return true; 4405 4406 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName); 4407 if (NegIdx == -1) 4408 return true; 4409 4410 unsigned Neg = Inst.getOperand(NegIdx).getImm(); 4411 4412 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed 4413 // on some src operands but not allowed on other. 4414 // It is convenient that such instructions don't have src_modifiers operand 4415 // for src operands that don't allow neg because they also don't allow opsel. 4416 4417 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers, 4418 AMDGPU::OpName::src1_modifiers, 4419 AMDGPU::OpName::src2_modifiers}; 4420 4421 for (unsigned i = 0; i < 3; ++i) { 4422 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) { 4423 if (Neg & (1 << i)) 4424 return false; 4425 } 4426 } 4427 4428 return true; 4429 } 4430 4431 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4432 const OperandVector &Operands) { 4433 const unsigned Opc = Inst.getOpcode(); 4434 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4435 if (DppCtrlIdx >= 0) { 4436 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4437 4438 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) && 4439 AMDGPU::isDPALU_DPP(MII.get(Opc))) { 4440 // DP ALU DPP is supported for row_newbcast only on GFX9* 4441 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4442 Error(S, "DP ALU dpp only supports row_newbcast"); 4443 return false; 4444 } 4445 } 4446 4447 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8); 4448 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0; 4449 4450 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) { 4451 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 4452 if (Src1Idx >= 0) { 4453 const MCOperand &Src1 = Inst.getOperand(Src1Idx); 4454 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4455 if (Src1.isImm() || 4456 (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) { 4457 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]); 4458 Error(Op.getStartLoc(), "invalid operand for instruction"); 4459 return false; 4460 } 4461 } 4462 } 4463 4464 return true; 4465 } 4466 4467 // Check if VCC register matches wavefront size 4468 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4469 auto FB = getFeatureBits(); 4470 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4471 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4472 } 4473 4474 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4475 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4476 const OperandVector &Operands) { 4477 unsigned Opcode = Inst.getOpcode(); 4478 const MCInstrDesc &Desc = MII.get(Opcode); 4479 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1; 4480 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4481 !HasMandatoryLiteral && !isVOPD(Opcode)) 4482 return true; 4483 4484 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral); 4485 4486 unsigned NumExprs = 0; 4487 unsigned NumLiterals = 0; 4488 uint32_t LiteralValue; 4489 4490 for (int OpIdx : OpIndices) { 4491 if (OpIdx == -1) 4492 continue; 4493 4494 const MCOperand &MO = Inst.getOperand(OpIdx); 4495 if (!MO.isImm() && !MO.isExpr()) 4496 continue; 4497 if (!isSISrcOperand(Desc, OpIdx)) 4498 continue; 4499 4500 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4501 uint64_t Value = static_cast<uint64_t>(MO.getImm()); 4502 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) && 4503 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; 4504 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); 4505 4506 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) { 4507 Error(getLitLoc(Operands), "invalid operand for instruction"); 4508 return false; 4509 } 4510 4511 if (IsFP64 && IsValid32Op) 4512 Value = Hi_32(Value); 4513 4514 if (NumLiterals == 0 || LiteralValue != Value) { 4515 LiteralValue = Value; 4516 ++NumLiterals; 4517 } 4518 } else if (MO.isExpr()) { 4519 ++NumExprs; 4520 } 4521 } 4522 NumLiterals += NumExprs; 4523 4524 if (!NumLiterals) 4525 return true; 4526 4527 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { 4528 Error(getLitLoc(Operands), "literal operands are not supported"); 4529 return false; 4530 } 4531 4532 if (NumLiterals > 1) { 4533 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed"); 4534 return false; 4535 } 4536 4537 return true; 4538 } 4539 4540 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4541 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4542 const MCRegisterInfo *MRI) { 4543 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4544 if (OpIdx < 0) 4545 return -1; 4546 4547 const MCOperand &Op = Inst.getOperand(OpIdx); 4548 if (!Op.isReg()) 4549 return -1; 4550 4551 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4552 auto Reg = Sub ? Sub : Op.getReg(); 4553 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4554 return AGPR32.contains(Reg) ? 1 : 0; 4555 } 4556 4557 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4558 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4559 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4560 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4561 SIInstrFlags::DS)) == 0) 4562 return true; 4563 4564 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4565 : AMDGPU::OpName::vdata; 4566 4567 const MCRegisterInfo *MRI = getMRI(); 4568 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4569 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4570 4571 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4572 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4573 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4574 return false; 4575 } 4576 4577 auto FB = getFeatureBits(); 4578 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4579 if (DataAreg < 0 || DstAreg < 0) 4580 return true; 4581 return DstAreg == DataAreg; 4582 } 4583 4584 return DstAreg < 1 && DataAreg < 1; 4585 } 4586 4587 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4588 auto FB = getFeatureBits(); 4589 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4590 return true; 4591 4592 const MCRegisterInfo *MRI = getMRI(); 4593 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4594 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4595 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4596 const MCOperand &Op = Inst.getOperand(I); 4597 if (!Op.isReg()) 4598 continue; 4599 4600 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4601 if (!Sub) 4602 continue; 4603 4604 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4605 return false; 4606 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4607 return false; 4608 } 4609 4610 return true; 4611 } 4612 4613 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4614 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4615 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4616 if (Op.isBLGP()) 4617 return Op.getStartLoc(); 4618 } 4619 return SMLoc(); 4620 } 4621 4622 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4623 const OperandVector &Operands) { 4624 unsigned Opc = Inst.getOpcode(); 4625 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4626 if (BlgpIdx == -1) 4627 return true; 4628 SMLoc BLGPLoc = getBLGPLoc(Operands); 4629 if (!BLGPLoc.isValid()) 4630 return true; 4631 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); 4632 auto FB = getFeatureBits(); 4633 bool UsesNeg = false; 4634 if (FB[AMDGPU::FeatureGFX940Insts]) { 4635 switch (Opc) { 4636 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4637 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4638 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4639 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4640 UsesNeg = true; 4641 } 4642 } 4643 4644 if (IsNeg == UsesNeg) 4645 return true; 4646 4647 Error(BLGPLoc, 4648 UsesNeg ? "invalid modifier: blgp is not supported" 4649 : "invalid modifier: neg is not supported"); 4650 4651 return false; 4652 } 4653 4654 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, 4655 const OperandVector &Operands) { 4656 if (!isGFX11Plus()) 4657 return true; 4658 4659 unsigned Opc = Inst.getOpcode(); 4660 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && 4661 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && 4662 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && 4663 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) 4664 return true; 4665 4666 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 4667 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); 4668 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()); 4669 if (Reg == AMDGPU::SGPR_NULL) 4670 return true; 4671 4672 SMLoc RegLoc = getRegLoc(Reg, Operands); 4673 Error(RegLoc, "src0 must be null"); 4674 return false; 4675 } 4676 4677 bool AMDGPUAsmParser::validateDS(const MCInst &Inst, 4678 const OperandVector &Operands) { 4679 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4680 if ((TSFlags & SIInstrFlags::DS) == 0) 4681 return true; 4682 if (TSFlags & SIInstrFlags::GWS) 4683 return validateGWS(Inst, Operands); 4684 // Only validate GDS for non-GWS instructions. 4685 if (hasGDS()) 4686 return true; 4687 int GDSIdx = 4688 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds); 4689 if (GDSIdx < 0) 4690 return true; 4691 unsigned GDS = Inst.getOperand(GDSIdx).getImm(); 4692 if (GDS) { 4693 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands); 4694 Error(S, "gds modifier is not supported on this GPU"); 4695 return false; 4696 } 4697 return true; 4698 } 4699 4700 // gfx90a has an undocumented limitation: 4701 // DS_GWS opcodes must use even aligned registers. 4702 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4703 const OperandVector &Operands) { 4704 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4705 return true; 4706 4707 int Opc = Inst.getOpcode(); 4708 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4709 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4710 return true; 4711 4712 const MCRegisterInfo *MRI = getMRI(); 4713 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4714 int Data0Pos = 4715 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4716 assert(Data0Pos != -1); 4717 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4718 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4719 if (RegIdx & 1) { 4720 SMLoc RegLoc = getRegLoc(Reg, Operands); 4721 Error(RegLoc, "vgpr must be even aligned"); 4722 return false; 4723 } 4724 4725 return true; 4726 } 4727 4728 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4729 const OperandVector &Operands, 4730 const SMLoc &IDLoc) { 4731 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4732 AMDGPU::OpName::cpol); 4733 if (CPolPos == -1) 4734 return true; 4735 4736 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4737 4738 if (isGFX12Plus()) 4739 return validateTHAndScopeBits(Inst, Operands, CPol); 4740 4741 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4742 if (TSFlags & SIInstrFlags::SMRD) { 4743 if (CPol && (isSI() || isCI())) { 4744 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4745 Error(S, "cache policy is not supported for SMRD instructions"); 4746 return false; 4747 } 4748 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4749 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4750 return false; 4751 } 4752 } 4753 4754 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4755 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | 4756 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4757 SIInstrFlags::FLAT; 4758 if (!(TSFlags & AllowSCCModifier)) { 4759 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4760 StringRef CStr(S.getPointer()); 4761 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4762 Error(S, 4763 "scc modifier is not supported for this instruction on this GPU"); 4764 return false; 4765 } 4766 } 4767 4768 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4769 return true; 4770 4771 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4772 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4773 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4774 : "instruction must use glc"); 4775 return false; 4776 } 4777 } else { 4778 if (CPol & CPol::GLC) { 4779 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4780 StringRef CStr(S.getPointer()); 4781 S = SMLoc::getFromPointer( 4782 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4783 Error(S, isGFX940() ? "instruction must not use sc0" 4784 : "instruction must not use glc"); 4785 return false; 4786 } 4787 } 4788 4789 return true; 4790 } 4791 4792 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst, 4793 const OperandVector &Operands, 4794 const unsigned CPol) { 4795 const unsigned TH = CPol & AMDGPU::CPol::TH; 4796 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE; 4797 4798 const unsigned Opcode = Inst.getOpcode(); 4799 const MCInstrDesc &TID = MII.get(Opcode); 4800 4801 auto PrintError = [&](StringRef Msg) { 4802 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4803 Error(S, Msg); 4804 return false; 4805 }; 4806 4807 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) && 4808 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) && 4809 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN))) 4810 return PrintError("instruction must use th:TH_ATOMIC_RETURN"); 4811 4812 if (TH == 0) 4813 return true; 4814 4815 if ((TID.TSFlags & SIInstrFlags::SMRD) && 4816 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) || 4817 (TH == AMDGPU::CPol::TH_NT_HT))) 4818 return PrintError("invalid th value for SMEM instruction"); 4819 4820 if (TH == AMDGPU::CPol::TH_BYPASS) { 4821 if ((Scope != AMDGPU::CPol::SCOPE_SYS && 4822 CPol & AMDGPU::CPol::TH_REAL_BYPASS) || 4823 (Scope == AMDGPU::CPol::SCOPE_SYS && 4824 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS))) 4825 return PrintError("scope and th combination is not valid"); 4826 } 4827 4828 bool IsStore = TID.mayStore(); 4829 bool IsAtomic = 4830 TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet); 4831 4832 if (IsAtomic) { 4833 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC)) 4834 return PrintError("invalid th value for atomic instructions"); 4835 } else if (IsStore) { 4836 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE)) 4837 return PrintError("invalid th value for store instructions"); 4838 } else { 4839 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD)) 4840 return PrintError("invalid th value for load instructions"); 4841 } 4842 4843 return true; 4844 } 4845 4846 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4847 if (!isGFX11Plus()) 4848 return true; 4849 for (auto &Operand : Operands) { 4850 if (!Operand->isReg()) 4851 continue; 4852 unsigned Reg = Operand->getReg(); 4853 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4854 Error(getRegLoc(Reg, Operands), 4855 "execz and vccz are not supported on this GPU"); 4856 return false; 4857 } 4858 } 4859 return true; 4860 } 4861 4862 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, 4863 const OperandVector &Operands) { 4864 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4865 if (Desc.mayStore() && 4866 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4867 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands); 4868 if (Loc != getInstLoc(Operands)) { 4869 Error(Loc, "TFE modifier has no meaning for store instructions"); 4870 return false; 4871 } 4872 } 4873 4874 return true; 4875 } 4876 4877 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4878 const SMLoc &IDLoc, 4879 const OperandVector &Operands) { 4880 if (auto ErrMsg = validateLdsDirect(Inst)) { 4881 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4882 return false; 4883 } 4884 if (!validateSOPLiteral(Inst)) { 4885 Error(getLitLoc(Operands), 4886 "only one unique literal operand is allowed"); 4887 return false; 4888 } 4889 if (!validateVOPLiteral(Inst, Operands)) { 4890 return false; 4891 } 4892 if (!validateConstantBusLimitations(Inst, Operands)) { 4893 return false; 4894 } 4895 if (!validateVOPDRegBankConstraints(Inst, Operands)) { 4896 return false; 4897 } 4898 if (!validateIntClampSupported(Inst)) { 4899 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4900 "integer clamping is not supported on this GPU"); 4901 return false; 4902 } 4903 if (!validateOpSel(Inst)) { 4904 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4905 "invalid op_sel operand"); 4906 return false; 4907 } 4908 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) { 4909 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands), 4910 "invalid neg_lo operand"); 4911 return false; 4912 } 4913 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) { 4914 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands), 4915 "invalid neg_hi operand"); 4916 return false; 4917 } 4918 if (!validateDPP(Inst, Operands)) { 4919 return false; 4920 } 4921 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4922 if (!validateMIMGD16(Inst)) { 4923 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4924 "d16 modifier is not supported on this GPU"); 4925 return false; 4926 } 4927 if (!validateMIMGMSAA(Inst)) { 4928 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4929 "invalid dim; must be MSAA type"); 4930 return false; 4931 } 4932 if (!validateMIMGDataSize(Inst, IDLoc)) { 4933 return false; 4934 } 4935 if (!validateMIMGAddrSize(Inst, IDLoc)) 4936 return false; 4937 if (!validateMIMGAtomicDMask(Inst)) { 4938 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4939 "invalid atomic image dmask"); 4940 return false; 4941 } 4942 if (!validateMIMGGatherDMask(Inst)) { 4943 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4944 "invalid image_gather dmask: only one bit must be set"); 4945 return false; 4946 } 4947 if (!validateMovrels(Inst, Operands)) { 4948 return false; 4949 } 4950 if (!validateOffset(Inst, Operands)) { 4951 return false; 4952 } 4953 if (!validateMAIAccWrite(Inst, Operands)) { 4954 return false; 4955 } 4956 if (!validateMAISrc2(Inst, Operands)) { 4957 return false; 4958 } 4959 if (!validateMFMA(Inst, Operands)) { 4960 return false; 4961 } 4962 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4963 return false; 4964 } 4965 4966 if (!validateAGPRLdSt(Inst)) { 4967 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4968 ? "invalid register class: data and dst should be all VGPR or AGPR" 4969 : "invalid register class: agpr loads and stores not supported on this GPU" 4970 ); 4971 return false; 4972 } 4973 if (!validateVGPRAlign(Inst)) { 4974 Error(IDLoc, 4975 "invalid register class: vgpr tuples must be 64 bit aligned"); 4976 return false; 4977 } 4978 if (!validateDS(Inst, Operands)) { 4979 return false; 4980 } 4981 4982 if (!validateBLGP(Inst, Operands)) { 4983 return false; 4984 } 4985 4986 if (!validateDivScale(Inst)) { 4987 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4988 return false; 4989 } 4990 if (!validateWaitCnt(Inst, Operands)) { 4991 return false; 4992 } 4993 if (!validateExeczVcczOperands(Operands)) { 4994 return false; 4995 } 4996 if (!validateTFE(Inst, Operands)) { 4997 return false; 4998 } 4999 5000 return true; 5001 } 5002 5003 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 5004 const FeatureBitset &FBS, 5005 unsigned VariantID = 0); 5006 5007 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 5008 const FeatureBitset &AvailableFeatures, 5009 unsigned VariantID); 5010 5011 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 5012 const FeatureBitset &FBS) { 5013 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 5014 } 5015 5016 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 5017 const FeatureBitset &FBS, 5018 ArrayRef<unsigned> Variants) { 5019 for (auto Variant : Variants) { 5020 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 5021 return true; 5022 } 5023 5024 return false; 5025 } 5026 5027 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 5028 const SMLoc &IDLoc) { 5029 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits()); 5030 5031 // Check if requested instruction variant is supported. 5032 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 5033 return false; 5034 5035 // This instruction is not supported. 5036 // Clear any other pending errors because they are no longer relevant. 5037 getParser().clearPendingErrors(); 5038 5039 // Requested instruction variant is not supported. 5040 // Check if any other variants are supported. 5041 StringRef VariantName = getMatchedVariantName(); 5042 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 5043 return Error(IDLoc, 5044 Twine(VariantName, 5045 " variant of this instruction is not supported")); 5046 } 5047 5048 // Check if this instruction may be used with a different wavesize. 5049 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && 5050 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { 5051 5052 FeatureBitset FeaturesWS32 = getFeatureBits(); 5053 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) 5054 .flip(AMDGPU::FeatureWavefrontSize32); 5055 FeatureBitset AvailableFeaturesWS32 = 5056 ComputeAvailableFeatures(FeaturesWS32); 5057 5058 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants())) 5059 return Error(IDLoc, "instruction requires wavesize=32"); 5060 } 5061 5062 // Finally check if this instruction is supported on any other GPU. 5063 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 5064 return Error(IDLoc, "instruction not supported on this GPU"); 5065 } 5066 5067 // Instruction not supported on any GPU. Probably a typo. 5068 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 5069 return Error(IDLoc, "invalid instruction" + Suggestion); 5070 } 5071 5072 static bool isInvalidVOPDY(const OperandVector &Operands, 5073 uint64_t InvalidOprIdx) { 5074 assert(InvalidOprIdx < Operands.size()); 5075 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); 5076 if (Op.isToken() && InvalidOprIdx > 1) { 5077 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); 5078 return PrevOp.isToken() && PrevOp.getToken() == "::"; 5079 } 5080 return false; 5081 } 5082 5083 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 5084 OperandVector &Operands, 5085 MCStreamer &Out, 5086 uint64_t &ErrorInfo, 5087 bool MatchingInlineAsm) { 5088 MCInst Inst; 5089 unsigned Result = Match_Success; 5090 for (auto Variant : getMatchedVariants()) { 5091 uint64_t EI; 5092 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 5093 Variant); 5094 // We order match statuses from least to most specific. We use most specific 5095 // status as resulting 5096 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 5097 if ((R == Match_Success) || 5098 (R == Match_PreferE32) || 5099 (R == Match_MissingFeature && Result != Match_PreferE32) || 5100 (R == Match_InvalidOperand && Result != Match_MissingFeature 5101 && Result != Match_PreferE32) || 5102 (R == Match_MnemonicFail && Result != Match_InvalidOperand 5103 && Result != Match_MissingFeature 5104 && Result != Match_PreferE32)) { 5105 Result = R; 5106 ErrorInfo = EI; 5107 } 5108 if (R == Match_Success) 5109 break; 5110 } 5111 5112 if (Result == Match_Success) { 5113 if (!validateInstruction(Inst, IDLoc, Operands)) { 5114 return true; 5115 } 5116 Inst.setLoc(IDLoc); 5117 Out.emitInstruction(Inst, getSTI()); 5118 return false; 5119 } 5120 5121 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5122 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 5123 return true; 5124 } 5125 5126 switch (Result) { 5127 default: break; 5128 case Match_MissingFeature: 5129 // It has been verified that the specified instruction 5130 // mnemonic is valid. A match was found but it requires 5131 // features which are not supported on this GPU. 5132 return Error(IDLoc, "operands are not valid for this GPU or mode"); 5133 5134 case Match_InvalidOperand: { 5135 SMLoc ErrorLoc = IDLoc; 5136 if (ErrorInfo != ~0ULL) { 5137 if (ErrorInfo >= Operands.size()) { 5138 return Error(IDLoc, "too few operands for instruction"); 5139 } 5140 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 5141 if (ErrorLoc == SMLoc()) 5142 ErrorLoc = IDLoc; 5143 5144 if (isInvalidVOPDY(Operands, ErrorInfo)) 5145 return Error(ErrorLoc, "invalid VOPDY instruction"); 5146 } 5147 return Error(ErrorLoc, "invalid operand for instruction"); 5148 } 5149 5150 case Match_PreferE32: 5151 return Error(IDLoc, "internal error: instruction without _e64 suffix " 5152 "should be encoded as e32"); 5153 case Match_MnemonicFail: 5154 llvm_unreachable("Invalid instructions should have been handled already"); 5155 } 5156 llvm_unreachable("Implement any new match types added!"); 5157 } 5158 5159 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 5160 int64_t Tmp = -1; 5161 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 5162 return true; 5163 } 5164 if (getParser().parseAbsoluteExpression(Tmp)) { 5165 return true; 5166 } 5167 Ret = static_cast<uint32_t>(Tmp); 5168 return false; 5169 } 5170 5171 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 5172 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5173 return TokError("directive only supported for amdgcn architecture"); 5174 5175 std::string TargetIDDirective; 5176 SMLoc TargetStart = getTok().getLoc(); 5177 if (getParser().parseEscapedString(TargetIDDirective)) 5178 return true; 5179 5180 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 5181 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5182 return getParser().Error(TargetRange.Start, 5183 (Twine(".amdgcn_target directive's target id ") + 5184 Twine(TargetIDDirective) + 5185 Twine(" does not match the specified target id ") + 5186 Twine(getTargetStreamer().getTargetID()->toString())).str()); 5187 5188 return false; 5189 } 5190 5191 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 5192 return Error(Range.Start, "value out of range", Range); 5193 } 5194 5195 bool AMDGPUAsmParser::calculateGPRBlocks( 5196 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 5197 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32, 5198 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR, 5199 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 5200 // TODO(scott.linder): These calculations are duplicated from 5201 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 5202 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 5203 5204 unsigned NumVGPRs = NextFreeVGPR; 5205 unsigned NumSGPRs = NextFreeSGPR; 5206 5207 if (Version.Major >= 10) 5208 NumSGPRs = 0; 5209 else { 5210 unsigned MaxAddressableNumSGPRs = 5211 IsaInfo::getAddressableNumSGPRs(&getSTI()); 5212 5213 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 5214 NumSGPRs > MaxAddressableNumSGPRs) 5215 return OutOfRangeError(SGPRRange); 5216 5217 NumSGPRs += 5218 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 5219 5220 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 5221 NumSGPRs > MaxAddressableNumSGPRs) 5222 return OutOfRangeError(SGPRRange); 5223 5224 if (Features.test(FeatureSGPRInitBug)) 5225 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 5226 } 5227 5228 VGPRBlocks = 5229 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 5230 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 5231 5232 return false; 5233 } 5234 5235 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 5236 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5237 return TokError("directive only supported for amdgcn architecture"); 5238 5239 if (!isHsaAbi(getSTI())) 5240 return TokError("directive only supported for amdhsa OS"); 5241 5242 StringRef KernelName; 5243 if (getParser().parseIdentifier(KernelName)) 5244 return true; 5245 5246 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 5247 5248 StringSet<> Seen; 5249 5250 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 5251 5252 SMRange VGPRRange; 5253 uint64_t NextFreeVGPR = 0; 5254 uint64_t AccumOffset = 0; 5255 uint64_t SharedVGPRCount = 0; 5256 uint64_t PreloadLength = 0; 5257 uint64_t PreloadOffset = 0; 5258 SMRange SGPRRange; 5259 uint64_t NextFreeSGPR = 0; 5260 5261 // Count the number of user SGPRs implied from the enabled feature bits. 5262 unsigned ImpliedUserSGPRCount = 0; 5263 5264 // Track if the asm explicitly contains the directive for the user SGPR 5265 // count. 5266 std::optional<unsigned> ExplicitUserSGPRCount; 5267 bool ReserveVCC = true; 5268 bool ReserveFlatScr = true; 5269 std::optional<bool> EnableWavefrontSize32; 5270 5271 while (true) { 5272 while (trySkipToken(AsmToken::EndOfStatement)); 5273 5274 StringRef ID; 5275 SMRange IDRange = getTok().getLocRange(); 5276 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 5277 return true; 5278 5279 if (ID == ".end_amdhsa_kernel") 5280 break; 5281 5282 if (!Seen.insert(ID).second) 5283 return TokError(".amdhsa_ directives cannot be repeated"); 5284 5285 SMLoc ValStart = getLoc(); 5286 int64_t IVal; 5287 if (getParser().parseAbsoluteExpression(IVal)) 5288 return true; 5289 SMLoc ValEnd = getLoc(); 5290 SMRange ValRange = SMRange(ValStart, ValEnd); 5291 5292 if (IVal < 0) 5293 return OutOfRangeError(ValRange); 5294 5295 uint64_t Val = IVal; 5296 5297 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 5298 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 5299 return OutOfRangeError(RANGE); \ 5300 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 5301 5302 if (ID == ".amdhsa_group_segment_fixed_size") { 5303 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 5304 return OutOfRangeError(ValRange); 5305 KD.group_segment_fixed_size = Val; 5306 } else if (ID == ".amdhsa_private_segment_fixed_size") { 5307 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 5308 return OutOfRangeError(ValRange); 5309 KD.private_segment_fixed_size = Val; 5310 } else if (ID == ".amdhsa_kernarg_size") { 5311 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 5312 return OutOfRangeError(ValRange); 5313 KD.kernarg_size = Val; 5314 } else if (ID == ".amdhsa_user_sgpr_count") { 5315 ExplicitUserSGPRCount = Val; 5316 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 5317 if (hasArchitectedFlatScratch()) 5318 return Error(IDRange.Start, 5319 "directive is not supported with architected flat scratch", 5320 IDRange); 5321 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5322 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 5323 Val, ValRange); 5324 if (Val) 5325 ImpliedUserSGPRCount += 4; 5326 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") { 5327 if (!hasKernargPreload()) 5328 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5329 5330 if (Val > getMaxNumUserSGPRs()) 5331 return OutOfRangeError(ValRange); 5332 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val, 5333 ValRange); 5334 if (Val) { 5335 ImpliedUserSGPRCount += Val; 5336 PreloadLength = Val; 5337 } 5338 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") { 5339 if (!hasKernargPreload()) 5340 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5341 5342 if (Val >= 1024) 5343 return OutOfRangeError(ValRange); 5344 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val, 5345 ValRange); 5346 if (Val) 5347 PreloadOffset = Val; 5348 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 5349 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5350 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 5351 ValRange); 5352 if (Val) 5353 ImpliedUserSGPRCount += 2; 5354 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 5355 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5356 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 5357 ValRange); 5358 if (Val) 5359 ImpliedUserSGPRCount += 2; 5360 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 5361 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5362 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 5363 Val, ValRange); 5364 if (Val) 5365 ImpliedUserSGPRCount += 2; 5366 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 5367 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5368 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 5369 ValRange); 5370 if (Val) 5371 ImpliedUserSGPRCount += 2; 5372 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 5373 if (hasArchitectedFlatScratch()) 5374 return Error(IDRange.Start, 5375 "directive is not supported with architected flat scratch", 5376 IDRange); 5377 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5378 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 5379 ValRange); 5380 if (Val) 5381 ImpliedUserSGPRCount += 2; 5382 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 5383 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5384 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 5385 Val, ValRange); 5386 if (Val) 5387 ImpliedUserSGPRCount += 1; 5388 } else if (ID == ".amdhsa_wavefront_size32") { 5389 if (IVersion.Major < 10) 5390 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5391 EnableWavefrontSize32 = Val; 5392 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5393 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5394 Val, ValRange); 5395 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5396 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5397 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); 5398 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5399 if (hasArchitectedFlatScratch()) 5400 return Error(IDRange.Start, 5401 "directive is not supported with architected flat scratch", 5402 IDRange); 5403 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5404 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5405 } else if (ID == ".amdhsa_enable_private_segment") { 5406 if (!hasArchitectedFlatScratch()) 5407 return Error( 5408 IDRange.Start, 5409 "directive is not supported without architected flat scratch", 5410 IDRange); 5411 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5412 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5413 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5414 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5415 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5416 ValRange); 5417 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5418 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5419 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5420 ValRange); 5421 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5422 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5423 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5424 ValRange); 5425 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5426 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5427 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5428 ValRange); 5429 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5430 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5431 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5432 ValRange); 5433 } else if (ID == ".amdhsa_next_free_vgpr") { 5434 VGPRRange = ValRange; 5435 NextFreeVGPR = Val; 5436 } else if (ID == ".amdhsa_next_free_sgpr") { 5437 SGPRRange = ValRange; 5438 NextFreeSGPR = Val; 5439 } else if (ID == ".amdhsa_accum_offset") { 5440 if (!isGFX90A()) 5441 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5442 AccumOffset = Val; 5443 } else if (ID == ".amdhsa_reserve_vcc") { 5444 if (!isUInt<1>(Val)) 5445 return OutOfRangeError(ValRange); 5446 ReserveVCC = Val; 5447 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5448 if (IVersion.Major < 7) 5449 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5450 if (hasArchitectedFlatScratch()) 5451 return Error(IDRange.Start, 5452 "directive is not supported with architected flat scratch", 5453 IDRange); 5454 if (!isUInt<1>(Val)) 5455 return OutOfRangeError(ValRange); 5456 ReserveFlatScr = Val; 5457 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5458 if (IVersion.Major < 8) 5459 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5460 if (!isUInt<1>(Val)) 5461 return OutOfRangeError(ValRange); 5462 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5463 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5464 IDRange); 5465 } else if (ID == ".amdhsa_float_round_mode_32") { 5466 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5467 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5468 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5469 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5470 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5471 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5472 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5473 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5474 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5475 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5476 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5477 ValRange); 5478 } else if (ID == ".amdhsa_dx10_clamp") { 5479 if (IVersion.Major >= 12) 5480 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5481 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5482 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val, 5483 ValRange); 5484 } else if (ID == ".amdhsa_ieee_mode") { 5485 if (IVersion.Major >= 12) 5486 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5487 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5488 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val, 5489 ValRange); 5490 } else if (ID == ".amdhsa_fp16_overflow") { 5491 if (IVersion.Major < 9) 5492 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5493 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val, 5494 ValRange); 5495 } else if (ID == ".amdhsa_tg_split") { 5496 if (!isGFX90A()) 5497 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5498 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5499 ValRange); 5500 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5501 if (IVersion.Major < 10) 5502 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5503 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val, 5504 ValRange); 5505 } else if (ID == ".amdhsa_memory_ordered") { 5506 if (IVersion.Major < 10) 5507 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5508 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val, 5509 ValRange); 5510 } else if (ID == ".amdhsa_forward_progress") { 5511 if (IVersion.Major < 10) 5512 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5513 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val, 5514 ValRange); 5515 } else if (ID == ".amdhsa_shared_vgpr_count") { 5516 if (IVersion.Major < 10 || IVersion.Major >= 12) 5517 return Error(IDRange.Start, "directive requires gfx10 or gfx11", 5518 IDRange); 5519 SharedVGPRCount = Val; 5520 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5521 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val, 5522 ValRange); 5523 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5524 PARSE_BITS_ENTRY( 5525 KD.compute_pgm_rsrc2, 5526 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5527 ValRange); 5528 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5529 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5530 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5531 Val, ValRange); 5532 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5533 PARSE_BITS_ENTRY( 5534 KD.compute_pgm_rsrc2, 5535 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5536 ValRange); 5537 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5538 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5539 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5540 Val, ValRange); 5541 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5542 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5543 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5544 Val, ValRange); 5545 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5546 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5547 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5548 Val, ValRange); 5549 } else if (ID == ".amdhsa_exception_int_div_zero") { 5550 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5551 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5552 Val, ValRange); 5553 } else if (ID == ".amdhsa_round_robin_scheduling") { 5554 if (IVersion.Major < 12) 5555 return Error(IDRange.Start, "directive requires gfx12+", IDRange); 5556 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5557 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val, 5558 ValRange); 5559 } else { 5560 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5561 } 5562 5563 #undef PARSE_BITS_ENTRY 5564 } 5565 5566 if (!Seen.contains(".amdhsa_next_free_vgpr")) 5567 return TokError(".amdhsa_next_free_vgpr directive is required"); 5568 5569 if (!Seen.contains(".amdhsa_next_free_sgpr")) 5570 return TokError(".amdhsa_next_free_sgpr directive is required"); 5571 5572 unsigned VGPRBlocks; 5573 unsigned SGPRBlocks; 5574 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5575 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5576 EnableWavefrontSize32, NextFreeVGPR, 5577 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5578 SGPRBlocks)) 5579 return true; 5580 5581 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5582 VGPRBlocks)) 5583 return OutOfRangeError(VGPRRange); 5584 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5585 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5586 5587 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5588 SGPRBlocks)) 5589 return OutOfRangeError(SGPRRange); 5590 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5591 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5592 SGPRBlocks); 5593 5594 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5595 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5596 "enabled user SGPRs"); 5597 5598 unsigned UserSGPRCount = 5599 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5600 5601 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5602 return TokError("too many user SGPRs enabled"); 5603 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5604 UserSGPRCount); 5605 5606 if (PreloadLength && KD.kernarg_size && 5607 (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size)) 5608 return TokError("Kernarg preload length + offset is larger than the " 5609 "kernarg segment size"); 5610 5611 if (isGFX90A()) { 5612 if (!Seen.contains(".amdhsa_accum_offset")) 5613 return TokError(".amdhsa_accum_offset directive is required"); 5614 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5615 return TokError("accum_offset should be in range [4..256] in " 5616 "increments of 4"); 5617 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5618 return TokError("accum_offset exceeds total VGPR allocation"); 5619 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5620 (AccumOffset / 4 - 1)); 5621 } 5622 5623 if (IVersion.Major >= 10 && IVersion.Major < 12) { 5624 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5625 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { 5626 return TokError("shared_vgpr_count directive not valid on " 5627 "wavefront size 32"); 5628 } 5629 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5630 return TokError("shared_vgpr_count*2 + " 5631 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5632 "exceed 63\n"); 5633 } 5634 } 5635 5636 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD, 5637 NextFreeVGPR, NextFreeSGPR, 5638 ReserveVCC, ReserveFlatScr); 5639 return false; 5640 } 5641 5642 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() { 5643 uint32_t Version; 5644 if (ParseAsAbsoluteExpression(Version)) 5645 return true; 5646 5647 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version); 5648 return false; 5649 } 5650 5651 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5652 amd_kernel_code_t &Header) { 5653 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5654 // assembly for backwards compatibility. 5655 if (ID == "max_scratch_backing_memory_byte_size") { 5656 Parser.eatToEndOfStatement(); 5657 return false; 5658 } 5659 5660 SmallString<40> ErrStr; 5661 raw_svector_ostream Err(ErrStr); 5662 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5663 return TokError(Err.str()); 5664 } 5665 Lex(); 5666 5667 if (ID == "enable_dx10_clamp") { 5668 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) && 5669 isGFX12Plus()) 5670 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+"); 5671 } 5672 5673 if (ID == "enable_ieee_mode") { 5674 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) && 5675 isGFX12Plus()) 5676 return TokError("enable_ieee_mode=1 is not allowed on GFX12+"); 5677 } 5678 5679 if (ID == "enable_wavefront_size32") { 5680 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5681 if (!isGFX10Plus()) 5682 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5683 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5684 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5685 } else { 5686 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5687 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5688 } 5689 } 5690 5691 if (ID == "wavefront_size") { 5692 if (Header.wavefront_size == 5) { 5693 if (!isGFX10Plus()) 5694 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5695 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5696 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5697 } else if (Header.wavefront_size == 6) { 5698 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5699 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5700 } 5701 } 5702 5703 if (ID == "enable_wgp_mode") { 5704 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5705 !isGFX10Plus()) 5706 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5707 } 5708 5709 if (ID == "enable_mem_ordered") { 5710 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5711 !isGFX10Plus()) 5712 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5713 } 5714 5715 if (ID == "enable_fwd_progress") { 5716 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5717 !isGFX10Plus()) 5718 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5719 } 5720 5721 return false; 5722 } 5723 5724 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5725 amd_kernel_code_t Header; 5726 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5727 5728 while (true) { 5729 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5730 // will set the current token to EndOfStatement. 5731 while(trySkipToken(AsmToken::EndOfStatement)); 5732 5733 StringRef ID; 5734 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5735 return true; 5736 5737 if (ID == ".end_amd_kernel_code_t") 5738 break; 5739 5740 if (ParseAMDKernelCodeTValue(ID, Header)) 5741 return true; 5742 } 5743 5744 getTargetStreamer().EmitAMDKernelCodeT(Header); 5745 5746 return false; 5747 } 5748 5749 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5750 StringRef KernelName; 5751 if (!parseId(KernelName, "expected symbol name")) 5752 return true; 5753 5754 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5755 ELF::STT_AMDGPU_HSA_KERNEL); 5756 5757 KernelScope.initialize(getContext()); 5758 return false; 5759 } 5760 5761 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5762 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5763 return Error(getLoc(), 5764 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5765 "architectures"); 5766 } 5767 5768 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5769 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5770 return Error(getParser().getTok().getLoc(), "target id must match options"); 5771 5772 getTargetStreamer().EmitISAVersion(); 5773 Lex(); 5774 5775 return false; 5776 } 5777 5778 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5779 assert(isHsaAbi(getSTI())); 5780 5781 std::string HSAMetadataString; 5782 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin, 5783 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString)) 5784 return true; 5785 5786 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5787 return Error(getLoc(), "invalid HSA metadata"); 5788 5789 return false; 5790 } 5791 5792 /// Common code to parse out a block of text (typically YAML) between start and 5793 /// end directives. 5794 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5795 const char *AssemblerDirectiveEnd, 5796 std::string &CollectString) { 5797 5798 raw_string_ostream CollectStream(CollectString); 5799 5800 getLexer().setSkipSpace(false); 5801 5802 bool FoundEnd = false; 5803 while (!isToken(AsmToken::Eof)) { 5804 while (isToken(AsmToken::Space)) { 5805 CollectStream << getTokenStr(); 5806 Lex(); 5807 } 5808 5809 if (trySkipId(AssemblerDirectiveEnd)) { 5810 FoundEnd = true; 5811 break; 5812 } 5813 5814 CollectStream << Parser.parseStringToEndOfStatement() 5815 << getContext().getAsmInfo()->getSeparatorString(); 5816 5817 Parser.eatToEndOfStatement(); 5818 } 5819 5820 getLexer().setSkipSpace(true); 5821 5822 if (isToken(AsmToken::Eof) && !FoundEnd) { 5823 return TokError(Twine("expected directive ") + 5824 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5825 } 5826 5827 CollectStream.flush(); 5828 return false; 5829 } 5830 5831 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5832 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5833 std::string String; 5834 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5835 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5836 return true; 5837 5838 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5839 if (!PALMetadata->setFromString(String)) 5840 return Error(getLoc(), "invalid PAL metadata"); 5841 return false; 5842 } 5843 5844 /// Parse the assembler directive for old linear-format PAL metadata. 5845 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5846 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5847 return Error(getLoc(), 5848 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5849 "not available on non-amdpal OSes")).str()); 5850 } 5851 5852 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5853 PALMetadata->setLegacy(); 5854 for (;;) { 5855 uint32_t Key, Value; 5856 if (ParseAsAbsoluteExpression(Key)) { 5857 return TokError(Twine("invalid value in ") + 5858 Twine(PALMD::AssemblerDirective)); 5859 } 5860 if (!trySkipToken(AsmToken::Comma)) { 5861 return TokError(Twine("expected an even number of values in ") + 5862 Twine(PALMD::AssemblerDirective)); 5863 } 5864 if (ParseAsAbsoluteExpression(Value)) { 5865 return TokError(Twine("invalid value in ") + 5866 Twine(PALMD::AssemblerDirective)); 5867 } 5868 PALMetadata->setRegister(Key, Value); 5869 if (!trySkipToken(AsmToken::Comma)) 5870 break; 5871 } 5872 return false; 5873 } 5874 5875 /// ParseDirectiveAMDGPULDS 5876 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5877 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5878 if (getParser().checkForValidSection()) 5879 return true; 5880 5881 StringRef Name; 5882 SMLoc NameLoc = getLoc(); 5883 if (getParser().parseIdentifier(Name)) 5884 return TokError("expected identifier in directive"); 5885 5886 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5887 if (getParser().parseComma()) 5888 return true; 5889 5890 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5891 5892 int64_t Size; 5893 SMLoc SizeLoc = getLoc(); 5894 if (getParser().parseAbsoluteExpression(Size)) 5895 return true; 5896 if (Size < 0) 5897 return Error(SizeLoc, "size must be non-negative"); 5898 if (Size > LocalMemorySize) 5899 return Error(SizeLoc, "size is too large"); 5900 5901 int64_t Alignment = 4; 5902 if (trySkipToken(AsmToken::Comma)) { 5903 SMLoc AlignLoc = getLoc(); 5904 if (getParser().parseAbsoluteExpression(Alignment)) 5905 return true; 5906 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5907 return Error(AlignLoc, "alignment must be a power of two"); 5908 5909 // Alignment larger than the size of LDS is possible in theory, as long 5910 // as the linker manages to place to symbol at address 0, but we do want 5911 // to make sure the alignment fits nicely into a 32-bit integer. 5912 if (Alignment >= 1u << 31) 5913 return Error(AlignLoc, "alignment is too large"); 5914 } 5915 5916 if (parseEOL()) 5917 return true; 5918 5919 Symbol->redefineIfPossible(); 5920 if (!Symbol->isUndefined()) 5921 return Error(NameLoc, "invalid symbol redefinition"); 5922 5923 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5924 return false; 5925 } 5926 5927 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5928 StringRef IDVal = DirectiveID.getString(); 5929 5930 if (isHsaAbi(getSTI())) { 5931 if (IDVal == ".amdhsa_kernel") 5932 return ParseDirectiveAMDHSAKernel(); 5933 5934 if (IDVal == ".amdhsa_code_object_version") 5935 return ParseDirectiveAMDHSACodeObjectVersion(); 5936 5937 // TODO: Restructure/combine with PAL metadata directive. 5938 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5939 return ParseDirectiveHSAMetadata(); 5940 } else { 5941 if (IDVal == ".amd_kernel_code_t") 5942 return ParseDirectiveAMDKernelCodeT(); 5943 5944 if (IDVal == ".amdgpu_hsa_kernel") 5945 return ParseDirectiveAMDGPUHsaKernel(); 5946 5947 if (IDVal == ".amd_amdgpu_isa") 5948 return ParseDirectiveISAVersion(); 5949 5950 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) { 5951 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) + 5952 Twine(" directive is " 5953 "not available on non-amdhsa OSes")) 5954 .str()); 5955 } 5956 } 5957 5958 if (IDVal == ".amdgcn_target") 5959 return ParseDirectiveAMDGCNTarget(); 5960 5961 if (IDVal == ".amdgpu_lds") 5962 return ParseDirectiveAMDGPULDS(); 5963 5964 if (IDVal == PALMD::AssemblerDirectiveBegin) 5965 return ParseDirectivePALMetadataBegin(); 5966 5967 if (IDVal == PALMD::AssemblerDirective) 5968 return ParseDirectivePALMetadata(); 5969 5970 return true; 5971 } 5972 5973 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5974 unsigned RegNo) { 5975 5976 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5977 return isGFX9Plus(); 5978 5979 // GFX10+ has 2 more SGPRs 104 and 105. 5980 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5981 return hasSGPR104_SGPR105(); 5982 5983 switch (RegNo) { 5984 case AMDGPU::SRC_SHARED_BASE_LO: 5985 case AMDGPU::SRC_SHARED_BASE: 5986 case AMDGPU::SRC_SHARED_LIMIT_LO: 5987 case AMDGPU::SRC_SHARED_LIMIT: 5988 case AMDGPU::SRC_PRIVATE_BASE_LO: 5989 case AMDGPU::SRC_PRIVATE_BASE: 5990 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 5991 case AMDGPU::SRC_PRIVATE_LIMIT: 5992 return isGFX9Plus(); 5993 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5994 return isGFX9Plus() && !isGFX11Plus(); 5995 case AMDGPU::TBA: 5996 case AMDGPU::TBA_LO: 5997 case AMDGPU::TBA_HI: 5998 case AMDGPU::TMA: 5999 case AMDGPU::TMA_LO: 6000 case AMDGPU::TMA_HI: 6001 return !isGFX9Plus(); 6002 case AMDGPU::XNACK_MASK: 6003 case AMDGPU::XNACK_MASK_LO: 6004 case AMDGPU::XNACK_MASK_HI: 6005 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 6006 case AMDGPU::SGPR_NULL: 6007 return isGFX10Plus(); 6008 default: 6009 break; 6010 } 6011 6012 if (isCI()) 6013 return true; 6014 6015 if (isSI() || isGFX10Plus()) { 6016 // No flat_scr on SI. 6017 // On GFX10Plus flat scratch is not a valid register operand and can only be 6018 // accessed with s_setreg/s_getreg. 6019 switch (RegNo) { 6020 case AMDGPU::FLAT_SCR: 6021 case AMDGPU::FLAT_SCR_LO: 6022 case AMDGPU::FLAT_SCR_HI: 6023 return false; 6024 default: 6025 return true; 6026 } 6027 } 6028 6029 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 6030 // SI/CI have. 6031 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 6032 return hasSGPR102_SGPR103(); 6033 6034 return true; 6035 } 6036 6037 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, 6038 StringRef Mnemonic, 6039 OperandMode Mode) { 6040 ParseStatus Res = parseVOPD(Operands); 6041 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6042 return Res; 6043 6044 // Try to parse with a custom parser 6045 Res = MatchOperandParserImpl(Operands, Mnemonic); 6046 6047 // If we successfully parsed the operand or if there as an error parsing, 6048 // we are done. 6049 // 6050 // If we are parsing after we reach EndOfStatement then this means we 6051 // are appending default values to the Operands list. This is only done 6052 // by custom parser, so we shouldn't continue on to the generic parsing. 6053 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6054 return Res; 6055 6056 SMLoc RBraceLoc; 6057 SMLoc LBraceLoc = getLoc(); 6058 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 6059 unsigned Prefix = Operands.size(); 6060 6061 for (;;) { 6062 auto Loc = getLoc(); 6063 Res = parseReg(Operands); 6064 if (Res.isNoMatch()) 6065 Error(Loc, "expected a register"); 6066 if (!Res.isSuccess()) 6067 return ParseStatus::Failure; 6068 6069 RBraceLoc = getLoc(); 6070 if (trySkipToken(AsmToken::RBrac)) 6071 break; 6072 6073 if (!skipToken(AsmToken::Comma, 6074 "expected a comma or a closing square bracket")) 6075 return ParseStatus::Failure; 6076 } 6077 6078 if (Operands.size() - Prefix > 1) { 6079 Operands.insert(Operands.begin() + Prefix, 6080 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 6081 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 6082 } 6083 6084 return ParseStatus::Success; 6085 } 6086 6087 return parseRegOrImm(Operands); 6088 } 6089 6090 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 6091 // Clear any forced encodings from the previous instruction. 6092 setForcedEncodingSize(0); 6093 setForcedDPP(false); 6094 setForcedSDWA(false); 6095 6096 if (Name.ends_with("_e64_dpp")) { 6097 setForcedDPP(true); 6098 setForcedEncodingSize(64); 6099 return Name.substr(0, Name.size() - 8); 6100 } else if (Name.ends_with("_e64")) { 6101 setForcedEncodingSize(64); 6102 return Name.substr(0, Name.size() - 4); 6103 } else if (Name.ends_with("_e32")) { 6104 setForcedEncodingSize(32); 6105 return Name.substr(0, Name.size() - 4); 6106 } else if (Name.ends_with("_dpp")) { 6107 setForcedDPP(true); 6108 return Name.substr(0, Name.size() - 4); 6109 } else if (Name.ends_with("_sdwa")) { 6110 setForcedSDWA(true); 6111 return Name.substr(0, Name.size() - 5); 6112 } 6113 return Name; 6114 } 6115 6116 static void applyMnemonicAliases(StringRef &Mnemonic, 6117 const FeatureBitset &Features, 6118 unsigned VariantID); 6119 6120 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 6121 StringRef Name, 6122 SMLoc NameLoc, OperandVector &Operands) { 6123 // Add the instruction mnemonic 6124 Name = parseMnemonicSuffix(Name); 6125 6126 // If the target architecture uses MnemonicAlias, call it here to parse 6127 // operands correctly. 6128 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 6129 6130 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 6131 6132 bool IsMIMG = Name.starts_with("image_"); 6133 6134 while (!trySkipToken(AsmToken::EndOfStatement)) { 6135 OperandMode Mode = OperandMode_Default; 6136 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 6137 Mode = OperandMode_NSA; 6138 ParseStatus Res = parseOperand(Operands, Name, Mode); 6139 6140 if (!Res.isSuccess()) { 6141 checkUnsupportedInstruction(Name, NameLoc); 6142 if (!Parser.hasPendingError()) { 6143 // FIXME: use real operand location rather than the current location. 6144 StringRef Msg = Res.isFailure() ? "failed parsing operand." 6145 : "not a valid operand."; 6146 Error(getLoc(), Msg); 6147 } 6148 while (!trySkipToken(AsmToken::EndOfStatement)) { 6149 lex(); 6150 } 6151 return true; 6152 } 6153 6154 // Eat the comma or space if there is one. 6155 trySkipToken(AsmToken::Comma); 6156 } 6157 6158 return false; 6159 } 6160 6161 //===----------------------------------------------------------------------===// 6162 // Utility functions 6163 //===----------------------------------------------------------------------===// 6164 6165 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, 6166 OperandVector &Operands) { 6167 SMLoc S = getLoc(); 6168 if (!trySkipId(Name)) 6169 return ParseStatus::NoMatch; 6170 6171 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S)); 6172 return ParseStatus::Success; 6173 } 6174 6175 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, 6176 int64_t &IntVal) { 6177 6178 if (!trySkipId(Prefix, AsmToken::Colon)) 6179 return ParseStatus::NoMatch; 6180 6181 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure; 6182 } 6183 6184 ParseStatus AMDGPUAsmParser::parseIntWithPrefix( 6185 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6186 std::function<bool(int64_t &)> ConvertResult) { 6187 SMLoc S = getLoc(); 6188 int64_t Value = 0; 6189 6190 ParseStatus Res = parseIntWithPrefix(Prefix, Value); 6191 if (!Res.isSuccess()) 6192 return Res; 6193 6194 if (ConvertResult && !ConvertResult(Value)) { 6195 Error(S, "invalid " + StringRef(Prefix) + " value."); 6196 } 6197 6198 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 6199 return ParseStatus::Success; 6200 } 6201 6202 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( 6203 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6204 bool (*ConvertResult)(int64_t &)) { 6205 SMLoc S = getLoc(); 6206 if (!trySkipId(Prefix, AsmToken::Colon)) 6207 return ParseStatus::NoMatch; 6208 6209 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 6210 return ParseStatus::Failure; 6211 6212 unsigned Val = 0; 6213 const unsigned MaxSize = 4; 6214 6215 // FIXME: How to verify the number of elements matches the number of src 6216 // operands? 6217 for (int I = 0; ; ++I) { 6218 int64_t Op; 6219 SMLoc Loc = getLoc(); 6220 if (!parseExpr(Op)) 6221 return ParseStatus::Failure; 6222 6223 if (Op != 0 && Op != 1) 6224 return Error(Loc, "invalid " + StringRef(Prefix) + " value."); 6225 6226 Val |= (Op << I); 6227 6228 if (trySkipToken(AsmToken::RBrac)) 6229 break; 6230 6231 if (I + 1 == MaxSize) 6232 return Error(getLoc(), "expected a closing square bracket"); 6233 6234 if (!skipToken(AsmToken::Comma, "expected a comma")) 6235 return ParseStatus::Failure; 6236 } 6237 6238 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 6239 return ParseStatus::Success; 6240 } 6241 6242 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, 6243 OperandVector &Operands, 6244 AMDGPUOperand::ImmTy ImmTy) { 6245 int64_t Bit; 6246 SMLoc S = getLoc(); 6247 6248 if (trySkipId(Name)) { 6249 Bit = 1; 6250 } else if (trySkipId("no", Name)) { 6251 Bit = 0; 6252 } else { 6253 return ParseStatus::NoMatch; 6254 } 6255 6256 if (Name == "r128" && !hasMIMG_R128()) 6257 return Error(S, "r128 modifier is not supported on this GPU"); 6258 if (Name == "a16" && !hasA16()) 6259 return Error(S, "a16 modifier is not supported on this GPU"); 6260 6261 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 6262 ImmTy = AMDGPUOperand::ImmTyR128A16; 6263 6264 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 6265 return ParseStatus::Success; 6266 } 6267 6268 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, 6269 bool &Disabling) const { 6270 Disabling = Id.consume_front("no"); 6271 6272 if (isGFX940() && !Mnemo.starts_with("s_")) { 6273 return StringSwitch<unsigned>(Id) 6274 .Case("nt", AMDGPU::CPol::NT) 6275 .Case("sc0", AMDGPU::CPol::SC0) 6276 .Case("sc1", AMDGPU::CPol::SC1) 6277 .Default(0); 6278 } 6279 6280 return StringSwitch<unsigned>(Id) 6281 .Case("dlc", AMDGPU::CPol::DLC) 6282 .Case("glc", AMDGPU::CPol::GLC) 6283 .Case("scc", AMDGPU::CPol::SCC) 6284 .Case("slc", AMDGPU::CPol::SLC) 6285 .Default(0); 6286 } 6287 6288 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 6289 if (isGFX12Plus()) { 6290 SMLoc StringLoc = getLoc(); 6291 6292 int64_t CPolVal = 0; 6293 ParseStatus ResTH = ParseStatus::NoMatch; 6294 ParseStatus ResScope = ParseStatus::NoMatch; 6295 6296 for (;;) { 6297 if (ResTH.isNoMatch()) { 6298 int64_t TH; 6299 ResTH = parseTH(Operands, TH); 6300 if (ResTH.isFailure()) 6301 return ResTH; 6302 if (ResTH.isSuccess()) { 6303 CPolVal |= TH; 6304 continue; 6305 } 6306 } 6307 6308 if (ResScope.isNoMatch()) { 6309 int64_t Scope; 6310 ResScope = parseScope(Operands, Scope); 6311 if (ResScope.isFailure()) 6312 return ResScope; 6313 if (ResScope.isSuccess()) { 6314 CPolVal |= Scope; 6315 continue; 6316 } 6317 } 6318 6319 break; 6320 } 6321 6322 if (ResTH.isNoMatch() && ResScope.isNoMatch()) 6323 return ParseStatus::NoMatch; 6324 6325 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc, 6326 AMDGPUOperand::ImmTyCPol)); 6327 return ParseStatus::Success; 6328 } 6329 6330 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 6331 SMLoc OpLoc = getLoc(); 6332 unsigned Enabled = 0, Seen = 0; 6333 for (;;) { 6334 SMLoc S = getLoc(); 6335 bool Disabling; 6336 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); 6337 if (!CPol) 6338 break; 6339 6340 lex(); 6341 6342 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) 6343 return Error(S, "dlc modifier is not supported on this GPU"); 6344 6345 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) 6346 return Error(S, "scc modifier is not supported on this GPU"); 6347 6348 if (Seen & CPol) 6349 return Error(S, "duplicate cache policy modifier"); 6350 6351 if (!Disabling) 6352 Enabled |= CPol; 6353 6354 Seen |= CPol; 6355 } 6356 6357 if (!Seen) 6358 return ParseStatus::NoMatch; 6359 6360 Operands.push_back( 6361 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol)); 6362 return ParseStatus::Success; 6363 } 6364 6365 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands, 6366 int64_t &Scope) { 6367 Scope = AMDGPU::CPol::SCOPE_CU; // default; 6368 6369 StringRef Value; 6370 SMLoc StringLoc; 6371 ParseStatus Res; 6372 6373 Res = parseStringWithPrefix("scope", Value, StringLoc); 6374 if (!Res.isSuccess()) 6375 return Res; 6376 6377 Scope = StringSwitch<int64_t>(Value) 6378 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU) 6379 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE) 6380 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV) 6381 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS) 6382 .Default(0xffffffff); 6383 6384 if (Scope == 0xffffffff) 6385 return Error(StringLoc, "invalid scope value"); 6386 6387 return ParseStatus::Success; 6388 } 6389 6390 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { 6391 TH = AMDGPU::CPol::TH_RT; // default 6392 6393 StringRef Value; 6394 SMLoc StringLoc; 6395 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc); 6396 if (!Res.isSuccess()) 6397 return Res; 6398 6399 if (Value == "TH_DEFAULT") 6400 TH = AMDGPU::CPol::TH_RT; 6401 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" || 6402 Value == "TH_LOAD_NT_WB") { 6403 return Error(StringLoc, "invalid th value"); 6404 } else if (Value.starts_with("TH_ATOMIC_")) { 6405 Value = Value.drop_front(10); 6406 TH = AMDGPU::CPol::TH_TYPE_ATOMIC; 6407 } else if (Value.starts_with("TH_LOAD_")) { 6408 Value = Value.drop_front(8); 6409 TH = AMDGPU::CPol::TH_TYPE_LOAD; 6410 } else if (Value.starts_with("TH_STORE_")) { 6411 Value = Value.drop_front(9); 6412 TH = AMDGPU::CPol::TH_TYPE_STORE; 6413 } else { 6414 return Error(StringLoc, "invalid th value"); 6415 } 6416 6417 if (Value == "BYPASS") 6418 TH |= AMDGPU::CPol::TH_REAL_BYPASS; 6419 6420 if (TH != 0) { 6421 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC) 6422 TH |= StringSwitch<int64_t>(Value) 6423 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6424 .Case("RT", AMDGPU::CPol::TH_RT) 6425 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6426 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT) 6427 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT | 6428 AMDGPU::CPol::TH_ATOMIC_RETURN) 6429 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE) 6430 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE | 6431 AMDGPU::CPol::TH_ATOMIC_NT) 6432 .Default(0xffffffff); 6433 else 6434 TH |= StringSwitch<int64_t>(Value) 6435 .Case("RT", AMDGPU::CPol::TH_RT) 6436 .Case("NT", AMDGPU::CPol::TH_NT) 6437 .Case("HT", AMDGPU::CPol::TH_HT) 6438 .Case("LU", AMDGPU::CPol::TH_LU) 6439 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB) 6440 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT) 6441 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT) 6442 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT) 6443 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB) 6444 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS) 6445 .Default(0xffffffff); 6446 } 6447 6448 if (TH == 0xffffffff) 6449 return Error(StringLoc, "invalid th value"); 6450 6451 return ParseStatus::Success; 6452 } 6453 6454 static void addOptionalImmOperand( 6455 MCInst& Inst, const OperandVector& Operands, 6456 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 6457 AMDGPUOperand::ImmTy ImmT, 6458 int64_t Default = 0) { 6459 auto i = OptionalIdx.find(ImmT); 6460 if (i != OptionalIdx.end()) { 6461 unsigned Idx = i->second; 6462 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 6463 } else { 6464 Inst.addOperand(MCOperand::createImm(Default)); 6465 } 6466 } 6467 6468 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6469 StringRef &Value, 6470 SMLoc &StringLoc) { 6471 if (!trySkipId(Prefix, AsmToken::Colon)) 6472 return ParseStatus::NoMatch; 6473 6474 StringLoc = getLoc(); 6475 return parseId(Value, "expected an identifier") ? ParseStatus::Success 6476 : ParseStatus::Failure; 6477 } 6478 6479 //===----------------------------------------------------------------------===// 6480 // MTBUF format 6481 //===----------------------------------------------------------------------===// 6482 6483 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6484 int64_t MaxVal, 6485 int64_t &Fmt) { 6486 int64_t Val; 6487 SMLoc Loc = getLoc(); 6488 6489 auto Res = parseIntWithPrefix(Pref, Val); 6490 if (Res.isFailure()) 6491 return false; 6492 if (Res.isNoMatch()) 6493 return true; 6494 6495 if (Val < 0 || Val > MaxVal) { 6496 Error(Loc, Twine("out of range ", StringRef(Pref))); 6497 return false; 6498 } 6499 6500 Fmt = Val; 6501 return true; 6502 } 6503 6504 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands, 6505 AMDGPUOperand::ImmTy ImmTy) { 6506 const char *Pref = "index_key"; 6507 int64_t ImmVal = 0; 6508 SMLoc Loc = getLoc(); 6509 auto Res = parseIntWithPrefix(Pref, ImmVal); 6510 if (!Res.isSuccess()) 6511 return Res; 6512 6513 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1)) 6514 return Error(Loc, Twine("out of range ", StringRef(Pref))); 6515 6516 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3)) 6517 return Error(Loc, Twine("out of range ", StringRef(Pref))); 6518 6519 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy)); 6520 return ParseStatus::Success; 6521 } 6522 6523 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) { 6524 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit); 6525 } 6526 6527 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) { 6528 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit); 6529 } 6530 6531 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6532 // values to live in a joint format operand in the MCInst encoding. 6533 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6534 using namespace llvm::AMDGPU::MTBUFFormat; 6535 6536 int64_t Dfmt = DFMT_UNDEF; 6537 int64_t Nfmt = NFMT_UNDEF; 6538 6539 // dfmt and nfmt can appear in either order, and each is optional. 6540 for (int I = 0; I < 2; ++I) { 6541 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6542 return ParseStatus::Failure; 6543 6544 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) 6545 return ParseStatus::Failure; 6546 6547 // Skip optional comma between dfmt/nfmt 6548 // but guard against 2 commas following each other. 6549 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6550 !peekToken().is(AsmToken::Comma)) { 6551 trySkipToken(AsmToken::Comma); 6552 } 6553 } 6554 6555 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6556 return ParseStatus::NoMatch; 6557 6558 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6559 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6560 6561 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6562 return ParseStatus::Success; 6563 } 6564 6565 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6566 using namespace llvm::AMDGPU::MTBUFFormat; 6567 6568 int64_t Fmt = UFMT_UNDEF; 6569 6570 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6571 return ParseStatus::Failure; 6572 6573 if (Fmt == UFMT_UNDEF) 6574 return ParseStatus::NoMatch; 6575 6576 Format = Fmt; 6577 return ParseStatus::Success; 6578 } 6579 6580 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6581 int64_t &Nfmt, 6582 StringRef FormatStr, 6583 SMLoc Loc) { 6584 using namespace llvm::AMDGPU::MTBUFFormat; 6585 int64_t Format; 6586 6587 Format = getDfmt(FormatStr); 6588 if (Format != DFMT_UNDEF) { 6589 Dfmt = Format; 6590 return true; 6591 } 6592 6593 Format = getNfmt(FormatStr, getSTI()); 6594 if (Format != NFMT_UNDEF) { 6595 Nfmt = Format; 6596 return true; 6597 } 6598 6599 Error(Loc, "unsupported format"); 6600 return false; 6601 } 6602 6603 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6604 SMLoc FormatLoc, 6605 int64_t &Format) { 6606 using namespace llvm::AMDGPU::MTBUFFormat; 6607 6608 int64_t Dfmt = DFMT_UNDEF; 6609 int64_t Nfmt = NFMT_UNDEF; 6610 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6611 return ParseStatus::Failure; 6612 6613 if (trySkipToken(AsmToken::Comma)) { 6614 StringRef Str; 6615 SMLoc Loc = getLoc(); 6616 if (!parseId(Str, "expected a format string") || 6617 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) 6618 return ParseStatus::Failure; 6619 if (Dfmt == DFMT_UNDEF) 6620 return Error(Loc, "duplicate numeric format"); 6621 if (Nfmt == NFMT_UNDEF) 6622 return Error(Loc, "duplicate data format"); 6623 } 6624 6625 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6626 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6627 6628 if (isGFX10Plus()) { 6629 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6630 if (Ufmt == UFMT_UNDEF) 6631 return Error(FormatLoc, "unsupported format"); 6632 Format = Ufmt; 6633 } else { 6634 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6635 } 6636 6637 return ParseStatus::Success; 6638 } 6639 6640 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6641 SMLoc Loc, 6642 int64_t &Format) { 6643 using namespace llvm::AMDGPU::MTBUFFormat; 6644 6645 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6646 if (Id == UFMT_UNDEF) 6647 return ParseStatus::NoMatch; 6648 6649 if (!isGFX10Plus()) 6650 return Error(Loc, "unified format is not supported on this GPU"); 6651 6652 Format = Id; 6653 return ParseStatus::Success; 6654 } 6655 6656 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6657 using namespace llvm::AMDGPU::MTBUFFormat; 6658 SMLoc Loc = getLoc(); 6659 6660 if (!parseExpr(Format)) 6661 return ParseStatus::Failure; 6662 if (!isValidFormatEncoding(Format, getSTI())) 6663 return Error(Loc, "out of range format"); 6664 6665 return ParseStatus::Success; 6666 } 6667 6668 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6669 using namespace llvm::AMDGPU::MTBUFFormat; 6670 6671 if (!trySkipId("format", AsmToken::Colon)) 6672 return ParseStatus::NoMatch; 6673 6674 if (trySkipToken(AsmToken::LBrac)) { 6675 StringRef FormatStr; 6676 SMLoc Loc = getLoc(); 6677 if (!parseId(FormatStr, "expected a format string")) 6678 return ParseStatus::Failure; 6679 6680 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6681 if (Res.isNoMatch()) 6682 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6683 if (!Res.isSuccess()) 6684 return Res; 6685 6686 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6687 return ParseStatus::Failure; 6688 6689 return ParseStatus::Success; 6690 } 6691 6692 return parseNumericFormat(Format); 6693 } 6694 6695 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6696 using namespace llvm::AMDGPU::MTBUFFormat; 6697 6698 int64_t Format = getDefaultFormatEncoding(getSTI()); 6699 ParseStatus Res; 6700 SMLoc Loc = getLoc(); 6701 6702 // Parse legacy format syntax. 6703 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6704 if (Res.isFailure()) 6705 return Res; 6706 6707 bool FormatFound = Res.isSuccess(); 6708 6709 Operands.push_back( 6710 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6711 6712 if (FormatFound) 6713 trySkipToken(AsmToken::Comma); 6714 6715 if (isToken(AsmToken::EndOfStatement)) { 6716 // We are expecting an soffset operand, 6717 // but let matcher handle the error. 6718 return ParseStatus::Success; 6719 } 6720 6721 // Parse soffset. 6722 Res = parseRegOrImm(Operands); 6723 if (!Res.isSuccess()) 6724 return Res; 6725 6726 trySkipToken(AsmToken::Comma); 6727 6728 if (!FormatFound) { 6729 Res = parseSymbolicOrNumericFormat(Format); 6730 if (Res.isFailure()) 6731 return Res; 6732 if (Res.isSuccess()) { 6733 auto Size = Operands.size(); 6734 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6735 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6736 Op.setImm(Format); 6737 } 6738 return ParseStatus::Success; 6739 } 6740 6741 if (isId("format") && peekToken().is(AsmToken::Colon)) 6742 return Error(getLoc(), "duplicate format"); 6743 return ParseStatus::Success; 6744 } 6745 6746 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { 6747 ParseStatus Res = 6748 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 6749 if (Res.isNoMatch()) { 6750 Res = parseIntWithPrefix("inst_offset", Operands, 6751 AMDGPUOperand::ImmTyInstOffset); 6752 } 6753 return Res; 6754 } 6755 6756 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { 6757 ParseStatus Res = 6758 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16); 6759 if (Res.isNoMatch()) 6760 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16); 6761 return Res; 6762 } 6763 6764 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { 6765 ParseStatus Res = 6766 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP); 6767 if (Res.isNoMatch()) { 6768 Res = 6769 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP); 6770 } 6771 return Res; 6772 } 6773 6774 //===----------------------------------------------------------------------===// 6775 // Exp 6776 //===----------------------------------------------------------------------===// 6777 6778 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6779 OptionalImmIndexMap OptionalIdx; 6780 6781 unsigned OperandIdx[4]; 6782 unsigned EnMask = 0; 6783 int SrcIdx = 0; 6784 6785 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6786 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6787 6788 // Add the register arguments 6789 if (Op.isReg()) { 6790 assert(SrcIdx < 4); 6791 OperandIdx[SrcIdx] = Inst.size(); 6792 Op.addRegOperands(Inst, 1); 6793 ++SrcIdx; 6794 continue; 6795 } 6796 6797 if (Op.isOff()) { 6798 assert(SrcIdx < 4); 6799 OperandIdx[SrcIdx] = Inst.size(); 6800 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6801 ++SrcIdx; 6802 continue; 6803 } 6804 6805 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6806 Op.addImmOperands(Inst, 1); 6807 continue; 6808 } 6809 6810 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6811 continue; 6812 6813 // Handle optional arguments 6814 OptionalIdx[Op.getImmTy()] = i; 6815 } 6816 6817 assert(SrcIdx == 4); 6818 6819 bool Compr = false; 6820 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6821 Compr = true; 6822 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6823 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6824 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6825 } 6826 6827 for (auto i = 0; i < SrcIdx; ++i) { 6828 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6829 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6830 } 6831 } 6832 6833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6835 6836 Inst.addOperand(MCOperand::createImm(EnMask)); 6837 } 6838 6839 //===----------------------------------------------------------------------===// 6840 // s_waitcnt 6841 //===----------------------------------------------------------------------===// 6842 6843 static bool 6844 encodeCnt( 6845 const AMDGPU::IsaVersion ISA, 6846 int64_t &IntVal, 6847 int64_t CntVal, 6848 bool Saturate, 6849 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6850 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6851 { 6852 bool Failed = false; 6853 6854 IntVal = encode(ISA, IntVal, CntVal); 6855 if (CntVal != decode(ISA, IntVal)) { 6856 if (Saturate) { 6857 IntVal = encode(ISA, IntVal, -1); 6858 } else { 6859 Failed = true; 6860 } 6861 } 6862 return Failed; 6863 } 6864 6865 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6866 6867 SMLoc CntLoc = getLoc(); 6868 StringRef CntName = getTokenStr(); 6869 6870 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6871 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6872 return false; 6873 6874 int64_t CntVal; 6875 SMLoc ValLoc = getLoc(); 6876 if (!parseExpr(CntVal)) 6877 return false; 6878 6879 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6880 6881 bool Failed = true; 6882 bool Sat = CntName.ends_with("_sat"); 6883 6884 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6885 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6886 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6887 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6888 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6889 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6890 } else { 6891 Error(CntLoc, "invalid counter name " + CntName); 6892 return false; 6893 } 6894 6895 if (Failed) { 6896 Error(ValLoc, "too large value for " + CntName); 6897 return false; 6898 } 6899 6900 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6901 return false; 6902 6903 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6904 if (isToken(AsmToken::EndOfStatement)) { 6905 Error(getLoc(), "expected a counter name"); 6906 return false; 6907 } 6908 } 6909 6910 return true; 6911 } 6912 6913 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { 6914 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6915 int64_t Waitcnt = getWaitcntBitMask(ISA); 6916 SMLoc S = getLoc(); 6917 6918 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6919 while (!isToken(AsmToken::EndOfStatement)) { 6920 if (!parseCnt(Waitcnt)) 6921 return ParseStatus::Failure; 6922 } 6923 } else { 6924 if (!parseExpr(Waitcnt)) 6925 return ParseStatus::Failure; 6926 } 6927 6928 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6929 return ParseStatus::Success; 6930 } 6931 6932 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6933 SMLoc FieldLoc = getLoc(); 6934 StringRef FieldName = getTokenStr(); 6935 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6936 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6937 return false; 6938 6939 SMLoc ValueLoc = getLoc(); 6940 StringRef ValueName = getTokenStr(); 6941 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6942 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6943 return false; 6944 6945 unsigned Shift; 6946 if (FieldName == "instid0") { 6947 Shift = 0; 6948 } else if (FieldName == "instskip") { 6949 Shift = 4; 6950 } else if (FieldName == "instid1") { 6951 Shift = 7; 6952 } else { 6953 Error(FieldLoc, "invalid field name " + FieldName); 6954 return false; 6955 } 6956 6957 int Value; 6958 if (Shift == 4) { 6959 // Parse values for instskip. 6960 Value = StringSwitch<int>(ValueName) 6961 .Case("SAME", 0) 6962 .Case("NEXT", 1) 6963 .Case("SKIP_1", 2) 6964 .Case("SKIP_2", 3) 6965 .Case("SKIP_3", 4) 6966 .Case("SKIP_4", 5) 6967 .Default(-1); 6968 } else { 6969 // Parse values for instid0 and instid1. 6970 Value = StringSwitch<int>(ValueName) 6971 .Case("NO_DEP", 0) 6972 .Case("VALU_DEP_1", 1) 6973 .Case("VALU_DEP_2", 2) 6974 .Case("VALU_DEP_3", 3) 6975 .Case("VALU_DEP_4", 4) 6976 .Case("TRANS32_DEP_1", 5) 6977 .Case("TRANS32_DEP_2", 6) 6978 .Case("TRANS32_DEP_3", 7) 6979 .Case("FMA_ACCUM_CYCLE_1", 8) 6980 .Case("SALU_CYCLE_1", 9) 6981 .Case("SALU_CYCLE_2", 10) 6982 .Case("SALU_CYCLE_3", 11) 6983 .Default(-1); 6984 } 6985 if (Value < 0) { 6986 Error(ValueLoc, "invalid value name " + ValueName); 6987 return false; 6988 } 6989 6990 Delay |= Value << Shift; 6991 return true; 6992 } 6993 6994 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { 6995 int64_t Delay = 0; 6996 SMLoc S = getLoc(); 6997 6998 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6999 do { 7000 if (!parseDelay(Delay)) 7001 return ParseStatus::Failure; 7002 } while (trySkipToken(AsmToken::Pipe)); 7003 } else { 7004 if (!parseExpr(Delay)) 7005 return ParseStatus::Failure; 7006 } 7007 7008 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 7009 return ParseStatus::Success; 7010 } 7011 7012 bool 7013 AMDGPUOperand::isSWaitCnt() const { 7014 return isImm(); 7015 } 7016 7017 bool AMDGPUOperand::isSDelayALU() const { return isImm(); } 7018 7019 //===----------------------------------------------------------------------===// 7020 // DepCtr 7021 //===----------------------------------------------------------------------===// 7022 7023 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 7024 StringRef DepCtrName) { 7025 switch (ErrorId) { 7026 case OPR_ID_UNKNOWN: 7027 Error(Loc, Twine("invalid counter name ", DepCtrName)); 7028 return; 7029 case OPR_ID_UNSUPPORTED: 7030 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 7031 return; 7032 case OPR_ID_DUPLICATE: 7033 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 7034 return; 7035 case OPR_VAL_INVALID: 7036 Error(Loc, Twine("invalid value for ", DepCtrName)); 7037 return; 7038 default: 7039 assert(false); 7040 } 7041 } 7042 7043 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 7044 7045 using namespace llvm::AMDGPU::DepCtr; 7046 7047 SMLoc DepCtrLoc = getLoc(); 7048 StringRef DepCtrName = getTokenStr(); 7049 7050 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 7051 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7052 return false; 7053 7054 int64_t ExprVal; 7055 if (!parseExpr(ExprVal)) 7056 return false; 7057 7058 unsigned PrevOprMask = UsedOprMask; 7059 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 7060 7061 if (CntVal < 0) { 7062 depCtrError(DepCtrLoc, CntVal, DepCtrName); 7063 return false; 7064 } 7065 7066 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7067 return false; 7068 7069 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 7070 if (isToken(AsmToken::EndOfStatement)) { 7071 Error(getLoc(), "expected a counter name"); 7072 return false; 7073 } 7074 } 7075 7076 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 7077 DepCtr = (DepCtr & ~CntValMask) | CntVal; 7078 return true; 7079 } 7080 7081 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { 7082 using namespace llvm::AMDGPU::DepCtr; 7083 7084 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 7085 SMLoc Loc = getLoc(); 7086 7087 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7088 unsigned UsedOprMask = 0; 7089 while (!isToken(AsmToken::EndOfStatement)) { 7090 if (!parseDepCtr(DepCtr, UsedOprMask)) 7091 return ParseStatus::Failure; 7092 } 7093 } else { 7094 if (!parseExpr(DepCtr)) 7095 return ParseStatus::Failure; 7096 } 7097 7098 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 7099 return ParseStatus::Success; 7100 } 7101 7102 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 7103 7104 //===----------------------------------------------------------------------===// 7105 // hwreg 7106 //===----------------------------------------------------------------------===// 7107 7108 bool 7109 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 7110 OperandInfoTy &Offset, 7111 OperandInfoTy &Width) { 7112 using namespace llvm::AMDGPU::Hwreg; 7113 7114 // The register may be specified by name or using a numeric code 7115 HwReg.Loc = getLoc(); 7116 if (isToken(AsmToken::Identifier) && 7117 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7118 HwReg.IsSymbolic = true; 7119 lex(); // skip register name 7120 } else if (!parseExpr(HwReg.Id, "a register name")) { 7121 return false; 7122 } 7123 7124 if (trySkipToken(AsmToken::RParen)) 7125 return true; 7126 7127 // parse optional params 7128 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 7129 return false; 7130 7131 Offset.Loc = getLoc(); 7132 if (!parseExpr(Offset.Id)) 7133 return false; 7134 7135 if (!skipToken(AsmToken::Comma, "expected a comma")) 7136 return false; 7137 7138 Width.Loc = getLoc(); 7139 return parseExpr(Width.Id) && 7140 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7141 } 7142 7143 bool 7144 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 7145 const OperandInfoTy &Offset, 7146 const OperandInfoTy &Width) { 7147 7148 using namespace llvm::AMDGPU::Hwreg; 7149 7150 if (HwReg.IsSymbolic) { 7151 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 7152 Error(HwReg.Loc, 7153 "specified hardware register is not supported on this GPU"); 7154 return false; 7155 } 7156 } else { 7157 if (!isValidHwreg(HwReg.Id)) { 7158 Error(HwReg.Loc, 7159 "invalid code of hardware register: only 6-bit values are legal"); 7160 return false; 7161 } 7162 } 7163 if (!isValidHwregOffset(Offset.Id)) { 7164 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 7165 return false; 7166 } 7167 if (!isValidHwregWidth(Width.Id)) { 7168 Error(Width.Loc, 7169 "invalid bitfield width: only values from 1 to 32 are legal"); 7170 return false; 7171 } 7172 return true; 7173 } 7174 7175 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 7176 using namespace llvm::AMDGPU::Hwreg; 7177 7178 int64_t ImmVal = 0; 7179 SMLoc Loc = getLoc(); 7180 7181 if (trySkipId("hwreg", AsmToken::LParen)) { 7182 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 7183 OperandInfoTy Offset(OFFSET_DEFAULT_); 7184 OperandInfoTy Width(WIDTH_DEFAULT_); 7185 if (parseHwregBody(HwReg, Offset, Width) && 7186 validateHwreg(HwReg, Offset, Width)) { 7187 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 7188 } else { 7189 return ParseStatus::Failure; 7190 } 7191 } else if (parseExpr(ImmVal, "a hwreg macro")) { 7192 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7193 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7194 } else { 7195 return ParseStatus::Failure; 7196 } 7197 7198 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 7199 return ParseStatus::Success; 7200 } 7201 7202 bool AMDGPUOperand::isHwreg() const { 7203 return isImmTy(ImmTyHwreg); 7204 } 7205 7206 //===----------------------------------------------------------------------===// 7207 // sendmsg 7208 //===----------------------------------------------------------------------===// 7209 7210 bool 7211 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 7212 OperandInfoTy &Op, 7213 OperandInfoTy &Stream) { 7214 using namespace llvm::AMDGPU::SendMsg; 7215 7216 Msg.Loc = getLoc(); 7217 if (isToken(AsmToken::Identifier) && 7218 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7219 Msg.IsSymbolic = true; 7220 lex(); // skip message name 7221 } else if (!parseExpr(Msg.Id, "a message name")) { 7222 return false; 7223 } 7224 7225 if (trySkipToken(AsmToken::Comma)) { 7226 Op.IsDefined = true; 7227 Op.Loc = getLoc(); 7228 if (isToken(AsmToken::Identifier) && 7229 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 7230 lex(); // skip operation name 7231 } else if (!parseExpr(Op.Id, "an operation name")) { 7232 return false; 7233 } 7234 7235 if (trySkipToken(AsmToken::Comma)) { 7236 Stream.IsDefined = true; 7237 Stream.Loc = getLoc(); 7238 if (!parseExpr(Stream.Id)) 7239 return false; 7240 } 7241 } 7242 7243 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7244 } 7245 7246 bool 7247 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 7248 const OperandInfoTy &Op, 7249 const OperandInfoTy &Stream) { 7250 using namespace llvm::AMDGPU::SendMsg; 7251 7252 // Validation strictness depends on whether message is specified 7253 // in a symbolic or in a numeric form. In the latter case 7254 // only encoding possibility is checked. 7255 bool Strict = Msg.IsSymbolic; 7256 7257 if (Strict) { 7258 if (Msg.Id == OPR_ID_UNSUPPORTED) { 7259 Error(Msg.Loc, "specified message id is not supported on this GPU"); 7260 return false; 7261 } 7262 } else { 7263 if (!isValidMsgId(Msg.Id, getSTI())) { 7264 Error(Msg.Loc, "invalid message id"); 7265 return false; 7266 } 7267 } 7268 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 7269 if (Op.IsDefined) { 7270 Error(Op.Loc, "message does not support operations"); 7271 } else { 7272 Error(Msg.Loc, "missing message operation"); 7273 } 7274 return false; 7275 } 7276 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 7277 Error(Op.Loc, "invalid operation id"); 7278 return false; 7279 } 7280 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 7281 Stream.IsDefined) { 7282 Error(Stream.Loc, "message operation does not support streams"); 7283 return false; 7284 } 7285 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 7286 Error(Stream.Loc, "invalid message stream id"); 7287 return false; 7288 } 7289 return true; 7290 } 7291 7292 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { 7293 using namespace llvm::AMDGPU::SendMsg; 7294 7295 int64_t ImmVal = 0; 7296 SMLoc Loc = getLoc(); 7297 7298 if (trySkipId("sendmsg", AsmToken::LParen)) { 7299 OperandInfoTy Msg(OPR_ID_UNKNOWN); 7300 OperandInfoTy Op(OP_NONE_); 7301 OperandInfoTy Stream(STREAM_ID_NONE_); 7302 if (parseSendMsgBody(Msg, Op, Stream) && 7303 validateSendMsg(Msg, Op, Stream)) { 7304 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 7305 } else { 7306 return ParseStatus::Failure; 7307 } 7308 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 7309 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7310 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7311 } else { 7312 return ParseStatus::Failure; 7313 } 7314 7315 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 7316 return ParseStatus::Success; 7317 } 7318 7319 bool AMDGPUOperand::isSendMsg() const { 7320 return isImmTy(ImmTySendMsg); 7321 } 7322 7323 //===----------------------------------------------------------------------===// 7324 // v_interp 7325 //===----------------------------------------------------------------------===// 7326 7327 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 7328 StringRef Str; 7329 SMLoc S = getLoc(); 7330 7331 if (!parseId(Str)) 7332 return ParseStatus::NoMatch; 7333 7334 int Slot = StringSwitch<int>(Str) 7335 .Case("p10", 0) 7336 .Case("p20", 1) 7337 .Case("p0", 2) 7338 .Default(-1); 7339 7340 if (Slot == -1) 7341 return Error(S, "invalid interpolation slot"); 7342 7343 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 7344 AMDGPUOperand::ImmTyInterpSlot)); 7345 return ParseStatus::Success; 7346 } 7347 7348 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 7349 StringRef Str; 7350 SMLoc S = getLoc(); 7351 7352 if (!parseId(Str)) 7353 return ParseStatus::NoMatch; 7354 7355 if (!Str.starts_with("attr")) 7356 return Error(S, "invalid interpolation attribute"); 7357 7358 StringRef Chan = Str.take_back(2); 7359 int AttrChan = StringSwitch<int>(Chan) 7360 .Case(".x", 0) 7361 .Case(".y", 1) 7362 .Case(".z", 2) 7363 .Case(".w", 3) 7364 .Default(-1); 7365 if (AttrChan == -1) 7366 return Error(S, "invalid or missing interpolation attribute channel"); 7367 7368 Str = Str.drop_back(2).drop_front(4); 7369 7370 uint8_t Attr; 7371 if (Str.getAsInteger(10, Attr)) 7372 return Error(S, "invalid or missing interpolation attribute number"); 7373 7374 if (Attr > 32) 7375 return Error(S, "out of bounds interpolation attribute number"); 7376 7377 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 7378 7379 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 7380 AMDGPUOperand::ImmTyInterpAttr)); 7381 Operands.push_back(AMDGPUOperand::CreateImm( 7382 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan)); 7383 return ParseStatus::Success; 7384 } 7385 7386 //===----------------------------------------------------------------------===// 7387 // exp 7388 //===----------------------------------------------------------------------===// 7389 7390 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 7391 using namespace llvm::AMDGPU::Exp; 7392 7393 StringRef Str; 7394 SMLoc S = getLoc(); 7395 7396 if (!parseId(Str)) 7397 return ParseStatus::NoMatch; 7398 7399 unsigned Id = getTgtId(Str); 7400 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) 7401 return Error(S, (Id == ET_INVALID) 7402 ? "invalid exp target" 7403 : "exp target is not supported on this GPU"); 7404 7405 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 7406 AMDGPUOperand::ImmTyExpTgt)); 7407 return ParseStatus::Success; 7408 } 7409 7410 //===----------------------------------------------------------------------===// 7411 // parser helpers 7412 //===----------------------------------------------------------------------===// 7413 7414 bool 7415 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 7416 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 7417 } 7418 7419 bool 7420 AMDGPUAsmParser::isId(const StringRef Id) const { 7421 return isId(getToken(), Id); 7422 } 7423 7424 bool 7425 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 7426 return getTokenKind() == Kind; 7427 } 7428 7429 StringRef AMDGPUAsmParser::getId() const { 7430 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef(); 7431 } 7432 7433 bool 7434 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7435 if (isId(Id)) { 7436 lex(); 7437 return true; 7438 } 7439 return false; 7440 } 7441 7442 bool 7443 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7444 if (isToken(AsmToken::Identifier)) { 7445 StringRef Tok = getTokenStr(); 7446 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) { 7447 lex(); 7448 return true; 7449 } 7450 } 7451 return false; 7452 } 7453 7454 bool 7455 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7456 if (isId(Id) && peekToken().is(Kind)) { 7457 lex(); 7458 lex(); 7459 return true; 7460 } 7461 return false; 7462 } 7463 7464 bool 7465 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7466 if (isToken(Kind)) { 7467 lex(); 7468 return true; 7469 } 7470 return false; 7471 } 7472 7473 bool 7474 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7475 const StringRef ErrMsg) { 7476 if (!trySkipToken(Kind)) { 7477 Error(getLoc(), ErrMsg); 7478 return false; 7479 } 7480 return true; 7481 } 7482 7483 bool 7484 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7485 SMLoc S = getLoc(); 7486 7487 const MCExpr *Expr; 7488 if (Parser.parseExpression(Expr)) 7489 return false; 7490 7491 if (Expr->evaluateAsAbsolute(Imm)) 7492 return true; 7493 7494 if (Expected.empty()) { 7495 Error(S, "expected absolute expression"); 7496 } else { 7497 Error(S, Twine("expected ", Expected) + 7498 Twine(" or an absolute expression")); 7499 } 7500 return false; 7501 } 7502 7503 bool 7504 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7505 SMLoc S = getLoc(); 7506 7507 const MCExpr *Expr; 7508 if (Parser.parseExpression(Expr)) 7509 return false; 7510 7511 int64_t IntVal; 7512 if (Expr->evaluateAsAbsolute(IntVal)) { 7513 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7514 } else { 7515 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7516 } 7517 return true; 7518 } 7519 7520 bool 7521 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7522 if (isToken(AsmToken::String)) { 7523 Val = getToken().getStringContents(); 7524 lex(); 7525 return true; 7526 } else { 7527 Error(getLoc(), ErrMsg); 7528 return false; 7529 } 7530 } 7531 7532 bool 7533 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7534 if (isToken(AsmToken::Identifier)) { 7535 Val = getTokenStr(); 7536 lex(); 7537 return true; 7538 } else { 7539 if (!ErrMsg.empty()) 7540 Error(getLoc(), ErrMsg); 7541 return false; 7542 } 7543 } 7544 7545 AsmToken 7546 AMDGPUAsmParser::getToken() const { 7547 return Parser.getTok(); 7548 } 7549 7550 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7551 return isToken(AsmToken::EndOfStatement) 7552 ? getToken() 7553 : getLexer().peekTok(ShouldSkipSpace); 7554 } 7555 7556 void 7557 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7558 auto TokCount = getLexer().peekTokens(Tokens); 7559 7560 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7561 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7562 } 7563 7564 AsmToken::TokenKind 7565 AMDGPUAsmParser::getTokenKind() const { 7566 return getLexer().getKind(); 7567 } 7568 7569 SMLoc 7570 AMDGPUAsmParser::getLoc() const { 7571 return getToken().getLoc(); 7572 } 7573 7574 StringRef 7575 AMDGPUAsmParser::getTokenStr() const { 7576 return getToken().getString(); 7577 } 7578 7579 void 7580 AMDGPUAsmParser::lex() { 7581 Parser.Lex(); 7582 } 7583 7584 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { 7585 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7586 } 7587 7588 SMLoc 7589 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7590 const OperandVector &Operands) const { 7591 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7592 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7593 if (Test(Op)) 7594 return Op.getStartLoc(); 7595 } 7596 return getInstLoc(Operands); 7597 } 7598 7599 SMLoc 7600 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7601 const OperandVector &Operands) const { 7602 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7603 return getOperandLoc(Test, Operands); 7604 } 7605 7606 SMLoc 7607 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7608 const OperandVector &Operands) const { 7609 auto Test = [=](const AMDGPUOperand& Op) { 7610 return Op.isRegKind() && Op.getReg() == Reg; 7611 }; 7612 return getOperandLoc(Test, Operands); 7613 } 7614 7615 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, 7616 bool SearchMandatoryLiterals) const { 7617 auto Test = [](const AMDGPUOperand& Op) { 7618 return Op.IsImmKindLiteral() || Op.isExpr(); 7619 }; 7620 SMLoc Loc = getOperandLoc(Test, Operands); 7621 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) 7622 Loc = getMandatoryLitLoc(Operands); 7623 return Loc; 7624 } 7625 7626 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { 7627 auto Test = [](const AMDGPUOperand &Op) { 7628 return Op.IsImmKindMandatoryLiteral(); 7629 }; 7630 return getOperandLoc(Test, Operands); 7631 } 7632 7633 SMLoc 7634 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7635 auto Test = [](const AMDGPUOperand& Op) { 7636 return Op.isImmKindConst(); 7637 }; 7638 return getOperandLoc(Test, Operands); 7639 } 7640 7641 //===----------------------------------------------------------------------===// 7642 // swizzle 7643 //===----------------------------------------------------------------------===// 7644 7645 LLVM_READNONE 7646 static unsigned 7647 encodeBitmaskPerm(const unsigned AndMask, 7648 const unsigned OrMask, 7649 const unsigned XorMask) { 7650 using namespace llvm::AMDGPU::Swizzle; 7651 7652 return BITMASK_PERM_ENC | 7653 (AndMask << BITMASK_AND_SHIFT) | 7654 (OrMask << BITMASK_OR_SHIFT) | 7655 (XorMask << BITMASK_XOR_SHIFT); 7656 } 7657 7658 bool 7659 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7660 const unsigned MinVal, 7661 const unsigned MaxVal, 7662 const StringRef ErrMsg, 7663 SMLoc &Loc) { 7664 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7665 return false; 7666 } 7667 Loc = getLoc(); 7668 if (!parseExpr(Op)) { 7669 return false; 7670 } 7671 if (Op < MinVal || Op > MaxVal) { 7672 Error(Loc, ErrMsg); 7673 return false; 7674 } 7675 7676 return true; 7677 } 7678 7679 bool 7680 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7681 const unsigned MinVal, 7682 const unsigned MaxVal, 7683 const StringRef ErrMsg) { 7684 SMLoc Loc; 7685 for (unsigned i = 0; i < OpNum; ++i) { 7686 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7687 return false; 7688 } 7689 7690 return true; 7691 } 7692 7693 bool 7694 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7695 using namespace llvm::AMDGPU::Swizzle; 7696 7697 int64_t Lane[LANE_NUM]; 7698 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7699 "expected a 2-bit lane id")) { 7700 Imm = QUAD_PERM_ENC; 7701 for (unsigned I = 0; I < LANE_NUM; ++I) { 7702 Imm |= Lane[I] << (LANE_SHIFT * I); 7703 } 7704 return true; 7705 } 7706 return false; 7707 } 7708 7709 bool 7710 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7711 using namespace llvm::AMDGPU::Swizzle; 7712 7713 SMLoc Loc; 7714 int64_t GroupSize; 7715 int64_t LaneIdx; 7716 7717 if (!parseSwizzleOperand(GroupSize, 7718 2, 32, 7719 "group size must be in the interval [2,32]", 7720 Loc)) { 7721 return false; 7722 } 7723 if (!isPowerOf2_64(GroupSize)) { 7724 Error(Loc, "group size must be a power of two"); 7725 return false; 7726 } 7727 if (parseSwizzleOperand(LaneIdx, 7728 0, GroupSize - 1, 7729 "lane id must be in the interval [0,group size - 1]", 7730 Loc)) { 7731 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7732 return true; 7733 } 7734 return false; 7735 } 7736 7737 bool 7738 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7739 using namespace llvm::AMDGPU::Swizzle; 7740 7741 SMLoc Loc; 7742 int64_t GroupSize; 7743 7744 if (!parseSwizzleOperand(GroupSize, 7745 2, 32, 7746 "group size must be in the interval [2,32]", 7747 Loc)) { 7748 return false; 7749 } 7750 if (!isPowerOf2_64(GroupSize)) { 7751 Error(Loc, "group size must be a power of two"); 7752 return false; 7753 } 7754 7755 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7756 return true; 7757 } 7758 7759 bool 7760 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7761 using namespace llvm::AMDGPU::Swizzle; 7762 7763 SMLoc Loc; 7764 int64_t GroupSize; 7765 7766 if (!parseSwizzleOperand(GroupSize, 7767 1, 16, 7768 "group size must be in the interval [1,16]", 7769 Loc)) { 7770 return false; 7771 } 7772 if (!isPowerOf2_64(GroupSize)) { 7773 Error(Loc, "group size must be a power of two"); 7774 return false; 7775 } 7776 7777 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7778 return true; 7779 } 7780 7781 bool 7782 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7783 using namespace llvm::AMDGPU::Swizzle; 7784 7785 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7786 return false; 7787 } 7788 7789 StringRef Ctl; 7790 SMLoc StrLoc = getLoc(); 7791 if (!parseString(Ctl)) { 7792 return false; 7793 } 7794 if (Ctl.size() != BITMASK_WIDTH) { 7795 Error(StrLoc, "expected a 5-character mask"); 7796 return false; 7797 } 7798 7799 unsigned AndMask = 0; 7800 unsigned OrMask = 0; 7801 unsigned XorMask = 0; 7802 7803 for (size_t i = 0; i < Ctl.size(); ++i) { 7804 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7805 switch(Ctl[i]) { 7806 default: 7807 Error(StrLoc, "invalid mask"); 7808 return false; 7809 case '0': 7810 break; 7811 case '1': 7812 OrMask |= Mask; 7813 break; 7814 case 'p': 7815 AndMask |= Mask; 7816 break; 7817 case 'i': 7818 AndMask |= Mask; 7819 XorMask |= Mask; 7820 break; 7821 } 7822 } 7823 7824 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7825 return true; 7826 } 7827 7828 bool 7829 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7830 7831 SMLoc OffsetLoc = getLoc(); 7832 7833 if (!parseExpr(Imm, "a swizzle macro")) { 7834 return false; 7835 } 7836 if (!isUInt<16>(Imm)) { 7837 Error(OffsetLoc, "expected a 16-bit offset"); 7838 return false; 7839 } 7840 return true; 7841 } 7842 7843 bool 7844 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7845 using namespace llvm::AMDGPU::Swizzle; 7846 7847 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7848 7849 SMLoc ModeLoc = getLoc(); 7850 bool Ok = false; 7851 7852 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7853 Ok = parseSwizzleQuadPerm(Imm); 7854 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7855 Ok = parseSwizzleBitmaskPerm(Imm); 7856 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7857 Ok = parseSwizzleBroadcast(Imm); 7858 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7859 Ok = parseSwizzleSwap(Imm); 7860 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7861 Ok = parseSwizzleReverse(Imm); 7862 } else { 7863 Error(ModeLoc, "expected a swizzle mode"); 7864 } 7865 7866 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7867 } 7868 7869 return false; 7870 } 7871 7872 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { 7873 SMLoc S = getLoc(); 7874 int64_t Imm = 0; 7875 7876 if (trySkipId("offset")) { 7877 7878 bool Ok = false; 7879 if (skipToken(AsmToken::Colon, "expected a colon")) { 7880 if (trySkipId("swizzle")) { 7881 Ok = parseSwizzleMacro(Imm); 7882 } else { 7883 Ok = parseSwizzleOffset(Imm); 7884 } 7885 } 7886 7887 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7888 7889 return Ok ? ParseStatus::Success : ParseStatus::Failure; 7890 } 7891 return ParseStatus::NoMatch; 7892 } 7893 7894 bool 7895 AMDGPUOperand::isSwizzle() const { 7896 return isImmTy(ImmTySwizzle); 7897 } 7898 7899 //===----------------------------------------------------------------------===// 7900 // VGPR Index Mode 7901 //===----------------------------------------------------------------------===// 7902 7903 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7904 7905 using namespace llvm::AMDGPU::VGPRIndexMode; 7906 7907 if (trySkipToken(AsmToken::RParen)) { 7908 return OFF; 7909 } 7910 7911 int64_t Imm = 0; 7912 7913 while (true) { 7914 unsigned Mode = 0; 7915 SMLoc S = getLoc(); 7916 7917 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7918 if (trySkipId(IdSymbolic[ModeId])) { 7919 Mode = 1 << ModeId; 7920 break; 7921 } 7922 } 7923 7924 if (Mode == 0) { 7925 Error(S, (Imm == 0)? 7926 "expected a VGPR index mode or a closing parenthesis" : 7927 "expected a VGPR index mode"); 7928 return UNDEF; 7929 } 7930 7931 if (Imm & Mode) { 7932 Error(S, "duplicate VGPR index mode"); 7933 return UNDEF; 7934 } 7935 Imm |= Mode; 7936 7937 if (trySkipToken(AsmToken::RParen)) 7938 break; 7939 if (!skipToken(AsmToken::Comma, 7940 "expected a comma or a closing parenthesis")) 7941 return UNDEF; 7942 } 7943 7944 return Imm; 7945 } 7946 7947 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7948 7949 using namespace llvm::AMDGPU::VGPRIndexMode; 7950 7951 int64_t Imm = 0; 7952 SMLoc S = getLoc(); 7953 7954 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7955 Imm = parseGPRIdxMacro(); 7956 if (Imm == UNDEF) 7957 return ParseStatus::Failure; 7958 } else { 7959 if (getParser().parseAbsoluteExpression(Imm)) 7960 return ParseStatus::Failure; 7961 if (Imm < 0 || !isUInt<4>(Imm)) 7962 return Error(S, "invalid immediate: only 4-bit values are legal"); 7963 } 7964 7965 Operands.push_back( 7966 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7967 return ParseStatus::Success; 7968 } 7969 7970 bool AMDGPUOperand::isGPRIdxMode() const { 7971 return isImmTy(ImmTyGprIdxMode); 7972 } 7973 7974 //===----------------------------------------------------------------------===// 7975 // sopp branch targets 7976 //===----------------------------------------------------------------------===// 7977 7978 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { 7979 7980 // Make sure we are not parsing something 7981 // that looks like a label or an expression but is not. 7982 // This will improve error messages. 7983 if (isRegister() || isModifier()) 7984 return ParseStatus::NoMatch; 7985 7986 if (!parseExpr(Operands)) 7987 return ParseStatus::Failure; 7988 7989 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7990 assert(Opr.isImm() || Opr.isExpr()); 7991 SMLoc Loc = Opr.getStartLoc(); 7992 7993 // Currently we do not support arbitrary expressions as branch targets. 7994 // Only labels and absolute expressions are accepted. 7995 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7996 Error(Loc, "expected an absolute expression or a label"); 7997 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7998 Error(Loc, "expected a 16-bit signed jump offset"); 7999 } 8000 8001 return ParseStatus::Success; 8002 } 8003 8004 //===----------------------------------------------------------------------===// 8005 // Boolean holding registers 8006 //===----------------------------------------------------------------------===// 8007 8008 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 8009 return parseReg(Operands); 8010 } 8011 8012 //===----------------------------------------------------------------------===// 8013 // mubuf 8014 //===----------------------------------------------------------------------===// 8015 8016 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 8017 const OperandVector &Operands, 8018 bool IsAtomic) { 8019 OptionalImmIndexMap OptionalIdx; 8020 unsigned FirstOperandIdx = 1; 8021 bool IsAtomicReturn = false; 8022 8023 if (IsAtomic) { 8024 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 8025 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8026 if (!Op.isCPol()) 8027 continue; 8028 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 8029 break; 8030 } 8031 8032 if (!IsAtomicReturn) { 8033 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 8034 if (NewOpc != -1) 8035 Inst.setOpcode(NewOpc); 8036 } 8037 8038 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 8039 SIInstrFlags::IsAtomicRet; 8040 } 8041 8042 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 8043 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8044 8045 // Add the register arguments 8046 if (Op.isReg()) { 8047 Op.addRegOperands(Inst, 1); 8048 // Insert a tied src for atomic return dst. 8049 // This cannot be postponed as subsequent calls to 8050 // addImmOperands rely on correct number of MC operands. 8051 if (IsAtomicReturn && i == FirstOperandIdx) 8052 Op.addRegOperands(Inst, 1); 8053 continue; 8054 } 8055 8056 // Handle the case where soffset is an immediate 8057 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 8058 Op.addImmOperands(Inst, 1); 8059 continue; 8060 } 8061 8062 // Handle tokens like 'offen' which are sometimes hard-coded into the 8063 // asm string. There are no MCInst operands for these. 8064 if (Op.isToken()) { 8065 continue; 8066 } 8067 assert(Op.isImm()); 8068 8069 // Handle optional arguments 8070 OptionalIdx[Op.getImmTy()] = i; 8071 } 8072 8073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 8074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 8075 } 8076 8077 //===----------------------------------------------------------------------===// 8078 // smrd 8079 //===----------------------------------------------------------------------===// 8080 8081 bool AMDGPUOperand::isSMRDOffset8() const { 8082 return isImmLiteral() && isUInt<8>(getImm()); 8083 } 8084 8085 bool AMDGPUOperand::isSMEMOffset() const { 8086 // Offset range is checked later by validator. 8087 return isImmLiteral(); 8088 } 8089 8090 bool AMDGPUOperand::isSMRDLiteralOffset() const { 8091 // 32-bit literals are only supported on CI and we only want to use them 8092 // when the offset is > 8-bits. 8093 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 8094 } 8095 8096 //===----------------------------------------------------------------------===// 8097 // vop3 8098 //===----------------------------------------------------------------------===// 8099 8100 static bool ConvertOmodMul(int64_t &Mul) { 8101 if (Mul != 1 && Mul != 2 && Mul != 4) 8102 return false; 8103 8104 Mul >>= 1; 8105 return true; 8106 } 8107 8108 static bool ConvertOmodDiv(int64_t &Div) { 8109 if (Div == 1) { 8110 Div = 0; 8111 return true; 8112 } 8113 8114 if (Div == 2) { 8115 Div = 3; 8116 return true; 8117 } 8118 8119 return false; 8120 } 8121 8122 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 8123 // This is intentional and ensures compatibility with sp3. 8124 // See bug 35397 for details. 8125 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { 8126 if (BoundCtrl == 0 || BoundCtrl == 1) { 8127 if (!isGFX11Plus()) 8128 BoundCtrl = 1; 8129 return true; 8130 } 8131 return false; 8132 } 8133 8134 void AMDGPUAsmParser::onBeginOfFile() { 8135 if (!getParser().getStreamer().getTargetStreamer() || 8136 getSTI().getTargetTriple().getArch() == Triple::r600) 8137 return; 8138 8139 if (!getTargetStreamer().getTargetID()) 8140 getTargetStreamer().initializeTargetID(getSTI(), 8141 getSTI().getFeatureString()); 8142 8143 if (isHsaAbi(getSTI())) 8144 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 8145 } 8146 8147 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { 8148 StringRef Name = getTokenStr(); 8149 if (Name == "mul") { 8150 return parseIntWithPrefix("mul", Operands, 8151 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8152 } 8153 8154 if (Name == "div") { 8155 return parseIntWithPrefix("div", Operands, 8156 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8157 } 8158 8159 return ParseStatus::NoMatch; 8160 } 8161 8162 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8163 // the number of src operands present, then copies that bit into src0_modifiers. 8164 void cvtVOP3DstOpSelOnly(MCInst &Inst) { 8165 int Opc = Inst.getOpcode(); 8166 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8167 if (OpSelIdx == -1) 8168 return; 8169 8170 int SrcNum; 8171 const int Ops[] = { AMDGPU::OpName::src0, 8172 AMDGPU::OpName::src1, 8173 AMDGPU::OpName::src2 }; 8174 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); 8175 ++SrcNum) 8176 ; 8177 assert(SrcNum > 0); 8178 8179 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8180 8181 if ((OpSel & (1 << SrcNum)) != 0) { 8182 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8183 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8184 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8185 } 8186 } 8187 8188 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8189 const OperandVector &Operands) { 8190 cvtVOP3P(Inst, Operands); 8191 cvtVOP3DstOpSelOnly(Inst); 8192 } 8193 8194 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8195 OptionalImmIndexMap &OptionalIdx) { 8196 cvtVOP3P(Inst, Operands, OptionalIdx); 8197 cvtVOP3DstOpSelOnly(Inst); 8198 } 8199 8200 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8201 return 8202 // 1. This operand is input modifiers 8203 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8204 // 2. This is not last operand 8205 && Desc.NumOperands > (OpNum + 1) 8206 // 3. Next operand is register class 8207 && Desc.operands()[OpNum + 1].RegClass != -1 8208 // 4. Next register is not tied to any other operand 8209 && Desc.getOperandConstraint(OpNum + 1, 8210 MCOI::OperandConstraint::TIED_TO) == -1; 8211 } 8212 8213 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8214 { 8215 OptionalImmIndexMap OptionalIdx; 8216 unsigned Opc = Inst.getOpcode(); 8217 8218 unsigned I = 1; 8219 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8220 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8221 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8222 } 8223 8224 for (unsigned E = Operands.size(); I != E; ++I) { 8225 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8226 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8227 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8228 } else if (Op.isInterpSlot() || Op.isInterpAttr() || 8229 Op.isInterpAttrChan()) { 8230 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8231 } else if (Op.isImmModifier()) { 8232 OptionalIdx[Op.getImmTy()] = I; 8233 } else { 8234 llvm_unreachable("unhandled operand type"); 8235 } 8236 } 8237 8238 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) 8239 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8240 AMDGPUOperand::ImmTyHigh); 8241 8242 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8243 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8244 AMDGPUOperand::ImmTyClampSI); 8245 8246 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8247 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8248 AMDGPUOperand::ImmTyOModSI); 8249 } 8250 8251 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8252 { 8253 OptionalImmIndexMap OptionalIdx; 8254 unsigned Opc = Inst.getOpcode(); 8255 8256 unsigned I = 1; 8257 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8258 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8259 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8260 } 8261 8262 for (unsigned E = Operands.size(); I != E; ++I) { 8263 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8264 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8265 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8266 } else if (Op.isImmModifier()) { 8267 OptionalIdx[Op.getImmTy()] = I; 8268 } else { 8269 llvm_unreachable("unhandled operand type"); 8270 } 8271 } 8272 8273 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8274 8275 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8276 if (OpSelIdx != -1) 8277 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8278 8279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8280 8281 if (OpSelIdx == -1) 8282 return; 8283 8284 const int Ops[] = { AMDGPU::OpName::src0, 8285 AMDGPU::OpName::src1, 8286 AMDGPU::OpName::src2 }; 8287 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8288 AMDGPU::OpName::src1_modifiers, 8289 AMDGPU::OpName::src2_modifiers }; 8290 8291 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8292 8293 for (int J = 0; J < 3; ++J) { 8294 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8295 if (OpIdx == -1) 8296 break; 8297 8298 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8299 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8300 8301 if ((OpSel & (1 << J)) != 0) 8302 ModVal |= SISrcMods::OP_SEL_0; 8303 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8304 (OpSel & (1 << 3)) != 0) 8305 ModVal |= SISrcMods::DST_OP_SEL; 8306 8307 Inst.getOperand(ModIdx).setImm(ModVal); 8308 } 8309 } 8310 8311 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8312 OptionalImmIndexMap &OptionalIdx) { 8313 unsigned Opc = Inst.getOpcode(); 8314 8315 unsigned I = 1; 8316 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8317 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8318 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8319 } 8320 8321 for (unsigned E = Operands.size(); I != E; ++I) { 8322 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8323 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8324 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8325 } else if (Op.isImmModifier()) { 8326 OptionalIdx[Op.getImmTy()] = I; 8327 } else if (Op.isRegOrImm()) { 8328 Op.addRegOrImmOperands(Inst, 1); 8329 } else { 8330 llvm_unreachable("unhandled operand type"); 8331 } 8332 } 8333 8334 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8335 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8336 AMDGPUOperand::ImmTyClampSI); 8337 8338 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8339 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8340 AMDGPUOperand::ImmTyOModSI); 8341 8342 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8343 // it has src2 register operand that is tied to dst operand 8344 // we don't allow modifiers for this operand in assembler so src2_modifiers 8345 // should be 0. 8346 if (isMAC(Opc)) { 8347 auto it = Inst.begin(); 8348 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8349 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8350 ++it; 8351 // Copy the operand to ensure it's not invalidated when Inst grows. 8352 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8353 } 8354 } 8355 8356 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8357 OptionalImmIndexMap OptionalIdx; 8358 cvtVOP3(Inst, Operands, OptionalIdx); 8359 } 8360 8361 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8362 OptionalImmIndexMap &OptIdx) { 8363 const int Opc = Inst.getOpcode(); 8364 const MCInstrDesc &Desc = MII.get(Opc); 8365 8366 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8367 8368 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 8369 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || 8370 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 || 8371 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) { 8372 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 8373 Inst.addOperand(Inst.getOperand(0)); 8374 } 8375 8376 // Adding vdst_in operand is already covered for these DPP instructions in 8377 // cvtVOP3DPP. 8378 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) && 8379 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 || 8380 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 || 8381 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 || 8382 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) { 8383 assert(!IsPacked); 8384 Inst.addOperand(Inst.getOperand(0)); 8385 } 8386 8387 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8388 // instruction, and then figure out where to actually put the modifiers 8389 8390 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8391 if (OpSelIdx != -1) { 8392 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8393 } 8394 8395 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8396 if (OpSelHiIdx != -1) { 8397 int DefaultVal = IsPacked ? -1 : 0; 8398 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8399 DefaultVal); 8400 } 8401 8402 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8403 if (NegLoIdx != -1) 8404 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8405 8406 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8407 if (NegHiIdx != -1) 8408 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8409 8410 const int Ops[] = { AMDGPU::OpName::src0, 8411 AMDGPU::OpName::src1, 8412 AMDGPU::OpName::src2 }; 8413 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8414 AMDGPU::OpName::src1_modifiers, 8415 AMDGPU::OpName::src2_modifiers }; 8416 8417 unsigned OpSel = 0; 8418 unsigned OpSelHi = 0; 8419 unsigned NegLo = 0; 8420 unsigned NegHi = 0; 8421 8422 if (OpSelIdx != -1) 8423 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8424 8425 if (OpSelHiIdx != -1) 8426 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8427 8428 if (NegLoIdx != -1) 8429 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8430 8431 if (NegHiIdx != -1) 8432 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8433 8434 for (int J = 0; J < 3; ++J) { 8435 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8436 if (OpIdx == -1) 8437 break; 8438 8439 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8440 8441 if (ModIdx == -1) 8442 continue; 8443 8444 uint32_t ModVal = 0; 8445 8446 if ((OpSel & (1 << J)) != 0) 8447 ModVal |= SISrcMods::OP_SEL_0; 8448 8449 if ((OpSelHi & (1 << J)) != 0) 8450 ModVal |= SISrcMods::OP_SEL_1; 8451 8452 if ((NegLo & (1 << J)) != 0) 8453 ModVal |= SISrcMods::NEG; 8454 8455 if ((NegHi & (1 << J)) != 0) 8456 ModVal |= SISrcMods::NEG_HI; 8457 8458 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8459 } 8460 } 8461 8462 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8463 OptionalImmIndexMap OptIdx; 8464 cvtVOP3(Inst, Operands, OptIdx); 8465 cvtVOP3P(Inst, Operands, OptIdx); 8466 } 8467 8468 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, 8469 unsigned i, unsigned Opc, unsigned OpName) { 8470 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1) 8471 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2); 8472 else 8473 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1); 8474 } 8475 8476 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) { 8477 unsigned Opc = Inst.getOpcode(); 8478 8479 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); 8480 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers); 8481 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers); 8482 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef 8483 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2 8484 8485 OptionalImmIndexMap OptIdx; 8486 for (unsigned i = 5; i < Operands.size(); ++i) { 8487 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8488 OptIdx[Op.getImmTy()] = i; 8489 } 8490 8491 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit)) 8492 addOptionalImmOperand(Inst, Operands, OptIdx, 8493 AMDGPUOperand::ImmTyIndexKey8bit); 8494 8495 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit)) 8496 addOptionalImmOperand(Inst, Operands, OptIdx, 8497 AMDGPUOperand::ImmTyIndexKey16bit); 8498 8499 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8500 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI); 8501 8502 cvtVOP3P(Inst, Operands, OptIdx); 8503 } 8504 8505 //===----------------------------------------------------------------------===// 8506 // VOPD 8507 //===----------------------------------------------------------------------===// 8508 8509 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8510 if (!hasVOPD(getSTI())) 8511 return ParseStatus::NoMatch; 8512 8513 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8514 SMLoc S = getLoc(); 8515 lex(); 8516 lex(); 8517 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8518 SMLoc OpYLoc = getLoc(); 8519 StringRef OpYName; 8520 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) { 8521 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc)); 8522 return ParseStatus::Success; 8523 } 8524 return Error(OpYLoc, "expected a VOPDY instruction after ::"); 8525 } 8526 return ParseStatus::NoMatch; 8527 } 8528 8529 // Create VOPD MCInst operands using parsed assembler operands. 8530 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8531 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer 8532 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); 8533 if (Op.isReg()) { 8534 Op.addRegOperands(Inst, 1); 8535 return; 8536 } 8537 if (Op.isImm()) { 8538 Op.addImmOperands(Inst, 1); 8539 return; 8540 } 8541 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8542 }; 8543 8544 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); 8545 8546 // MCInst operands are ordered as follows: 8547 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8548 8549 for (auto CompIdx : VOPD::COMPONENTS) { 8550 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); 8551 } 8552 8553 for (auto CompIdx : VOPD::COMPONENTS) { 8554 const auto &CInfo = InstInfo[CompIdx]; 8555 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); 8556 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) 8557 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); 8558 if (CInfo.hasSrc2Acc()) 8559 addOp(CInfo.getIndexOfDstInParsedOperands()); 8560 } 8561 } 8562 8563 //===----------------------------------------------------------------------===// 8564 // dpp 8565 //===----------------------------------------------------------------------===// 8566 8567 bool AMDGPUOperand::isDPP8() const { 8568 return isImmTy(ImmTyDPP8); 8569 } 8570 8571 bool AMDGPUOperand::isDPPCtrl() const { 8572 using namespace AMDGPU::DPP; 8573 8574 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8575 if (result) { 8576 int64_t Imm = getImm(); 8577 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8578 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8579 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8580 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8581 (Imm == DppCtrl::WAVE_SHL1) || 8582 (Imm == DppCtrl::WAVE_ROL1) || 8583 (Imm == DppCtrl::WAVE_SHR1) || 8584 (Imm == DppCtrl::WAVE_ROR1) || 8585 (Imm == DppCtrl::ROW_MIRROR) || 8586 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8587 (Imm == DppCtrl::BCAST15) || 8588 (Imm == DppCtrl::BCAST31) || 8589 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8590 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8591 } 8592 return false; 8593 } 8594 8595 //===----------------------------------------------------------------------===// 8596 // mAI 8597 //===----------------------------------------------------------------------===// 8598 8599 bool AMDGPUOperand::isBLGP() const { 8600 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8601 } 8602 8603 bool AMDGPUOperand::isCBSZ() const { 8604 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8605 } 8606 8607 bool AMDGPUOperand::isABID() const { 8608 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8609 } 8610 8611 bool AMDGPUOperand::isS16Imm() const { 8612 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8613 } 8614 8615 bool AMDGPUOperand::isU16Imm() const { 8616 return isImmLiteral() && isUInt<16>(getImm()); 8617 } 8618 8619 //===----------------------------------------------------------------------===// 8620 // dim 8621 //===----------------------------------------------------------------------===// 8622 8623 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8624 // We want to allow "dim:1D" etc., 8625 // but the initial 1 is tokenized as an integer. 8626 std::string Token; 8627 if (isToken(AsmToken::Integer)) { 8628 SMLoc Loc = getToken().getEndLoc(); 8629 Token = std::string(getTokenStr()); 8630 lex(); 8631 if (getLoc() != Loc) 8632 return false; 8633 } 8634 8635 StringRef Suffix; 8636 if (!parseId(Suffix)) 8637 return false; 8638 Token += Suffix; 8639 8640 StringRef DimId = Token; 8641 if (DimId.starts_with("SQ_RSRC_IMG_")) 8642 DimId = DimId.drop_front(12); 8643 8644 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8645 if (!DimInfo) 8646 return false; 8647 8648 Encoding = DimInfo->Encoding; 8649 return true; 8650 } 8651 8652 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8653 if (!isGFX10Plus()) 8654 return ParseStatus::NoMatch; 8655 8656 SMLoc S = getLoc(); 8657 8658 if (!trySkipId("dim", AsmToken::Colon)) 8659 return ParseStatus::NoMatch; 8660 8661 unsigned Encoding; 8662 SMLoc Loc = getLoc(); 8663 if (!parseDimId(Encoding)) 8664 return Error(Loc, "invalid dim value"); 8665 8666 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8667 AMDGPUOperand::ImmTyDim)); 8668 return ParseStatus::Success; 8669 } 8670 8671 //===----------------------------------------------------------------------===// 8672 // dpp 8673 //===----------------------------------------------------------------------===// 8674 8675 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8676 SMLoc S = getLoc(); 8677 8678 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8679 return ParseStatus::NoMatch; 8680 8681 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8682 8683 int64_t Sels[8]; 8684 8685 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8686 return ParseStatus::Failure; 8687 8688 for (size_t i = 0; i < 8; ++i) { 8689 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8690 return ParseStatus::Failure; 8691 8692 SMLoc Loc = getLoc(); 8693 if (getParser().parseAbsoluteExpression(Sels[i])) 8694 return ParseStatus::Failure; 8695 if (0 > Sels[i] || 7 < Sels[i]) 8696 return Error(Loc, "expected a 3-bit value"); 8697 } 8698 8699 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8700 return ParseStatus::Failure; 8701 8702 unsigned DPP8 = 0; 8703 for (size_t i = 0; i < 8; ++i) 8704 DPP8 |= (Sels[i] << (i * 3)); 8705 8706 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8707 return ParseStatus::Success; 8708 } 8709 8710 bool 8711 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8712 const OperandVector &Operands) { 8713 if (Ctrl == "row_newbcast") 8714 return isGFX90A(); 8715 8716 if (Ctrl == "row_share" || 8717 Ctrl == "row_xmask") 8718 return isGFX10Plus(); 8719 8720 if (Ctrl == "wave_shl" || 8721 Ctrl == "wave_shr" || 8722 Ctrl == "wave_rol" || 8723 Ctrl == "wave_ror" || 8724 Ctrl == "row_bcast") 8725 return isVI() || isGFX9(); 8726 8727 return Ctrl == "row_mirror" || 8728 Ctrl == "row_half_mirror" || 8729 Ctrl == "quad_perm" || 8730 Ctrl == "row_shl" || 8731 Ctrl == "row_shr" || 8732 Ctrl == "row_ror"; 8733 } 8734 8735 int64_t 8736 AMDGPUAsmParser::parseDPPCtrlPerm() { 8737 // quad_perm:[%d,%d,%d,%d] 8738 8739 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8740 return -1; 8741 8742 int64_t Val = 0; 8743 for (int i = 0; i < 4; ++i) { 8744 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8745 return -1; 8746 8747 int64_t Temp; 8748 SMLoc Loc = getLoc(); 8749 if (getParser().parseAbsoluteExpression(Temp)) 8750 return -1; 8751 if (Temp < 0 || Temp > 3) { 8752 Error(Loc, "expected a 2-bit value"); 8753 return -1; 8754 } 8755 8756 Val += (Temp << i * 2); 8757 } 8758 8759 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8760 return -1; 8761 8762 return Val; 8763 } 8764 8765 int64_t 8766 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8767 using namespace AMDGPU::DPP; 8768 8769 // sel:%d 8770 8771 int64_t Val; 8772 SMLoc Loc = getLoc(); 8773 8774 if (getParser().parseAbsoluteExpression(Val)) 8775 return -1; 8776 8777 struct DppCtrlCheck { 8778 int64_t Ctrl; 8779 int Lo; 8780 int Hi; 8781 }; 8782 8783 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8784 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8785 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8786 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8787 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8788 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8789 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8790 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8791 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8792 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8793 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8794 .Default({-1, 0, 0}); 8795 8796 bool Valid; 8797 if (Check.Ctrl == -1) { 8798 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8799 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8800 } else { 8801 Valid = Check.Lo <= Val && Val <= Check.Hi; 8802 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8803 } 8804 8805 if (!Valid) { 8806 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8807 return -1; 8808 } 8809 8810 return Val; 8811 } 8812 8813 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8814 using namespace AMDGPU::DPP; 8815 8816 if (!isToken(AsmToken::Identifier) || 8817 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8818 return ParseStatus::NoMatch; 8819 8820 SMLoc S = getLoc(); 8821 int64_t Val = -1; 8822 StringRef Ctrl; 8823 8824 parseId(Ctrl); 8825 8826 if (Ctrl == "row_mirror") { 8827 Val = DppCtrl::ROW_MIRROR; 8828 } else if (Ctrl == "row_half_mirror") { 8829 Val = DppCtrl::ROW_HALF_MIRROR; 8830 } else { 8831 if (skipToken(AsmToken::Colon, "expected a colon")) { 8832 if (Ctrl == "quad_perm") { 8833 Val = parseDPPCtrlPerm(); 8834 } else { 8835 Val = parseDPPCtrlSel(Ctrl); 8836 } 8837 } 8838 } 8839 8840 if (Val == -1) 8841 return ParseStatus::Failure; 8842 8843 Operands.push_back( 8844 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8845 return ParseStatus::Success; 8846 } 8847 8848 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 8849 bool IsDPP8) { 8850 OptionalImmIndexMap OptionalIdx; 8851 unsigned Opc = Inst.getOpcode(); 8852 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8853 8854 // MAC instructions are special because they have 'old' 8855 // operand which is not tied to dst (but assumed to be). 8856 // They also have dummy unused src2_modifiers. 8857 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); 8858 int Src2ModIdx = 8859 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); 8860 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && 8861 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; 8862 8863 unsigned I = 1; 8864 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8865 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8866 } 8867 8868 int Fi = 0; 8869 for (unsigned E = Operands.size(); I != E; ++I) { 8870 8871 if (IsMAC) { 8872 int NumOperands = Inst.getNumOperands(); 8873 if (OldIdx == NumOperands) { 8874 // Handle old operand 8875 constexpr int DST_IDX = 0; 8876 Inst.addOperand(Inst.getOperand(DST_IDX)); 8877 } else if (Src2ModIdx == NumOperands) { 8878 // Add unused dummy src2_modifiers 8879 Inst.addOperand(MCOperand::createImm(0)); 8880 } 8881 } 8882 8883 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); 8884 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) { 8885 Inst.addOperand(Inst.getOperand(0)); 8886 } 8887 8888 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 || 8889 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 || 8890 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 || 8891 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12; 8892 if (IsVOP3CvtSrDpp) { 8893 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) { 8894 Inst.addOperand(MCOperand::createImm(0)); 8895 Inst.addOperand(MCOperand::createReg(0)); 8896 } 8897 } 8898 8899 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8900 MCOI::TIED_TO); 8901 if (TiedTo != -1) { 8902 assert((unsigned)TiedTo < Inst.getNumOperands()); 8903 // handle tied old or src2 for MAC instructions 8904 Inst.addOperand(Inst.getOperand(TiedTo)); 8905 } 8906 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8907 // Add the register arguments 8908 if (IsDPP8 && Op.isDppFI()) { 8909 Fi = Op.getImm(); 8910 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8911 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8912 } else if (Op.isReg()) { 8913 Op.addRegOperands(Inst, 1); 8914 } else if (Op.isImm() && 8915 Desc.operands()[Inst.getNumOperands()].RegClass != -1) { 8916 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8917 Op.addImmOperands(Inst, 1); 8918 } else if (Op.isImm()) { 8919 OptionalIdx[Op.getImmTy()] = I; 8920 } else { 8921 llvm_unreachable("unhandled operand type"); 8922 } 8923 } 8924 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8926 8927 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8928 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8929 8930 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8931 cvtVOP3P(Inst, Operands, OptionalIdx); 8932 else if (Desc.TSFlags & SIInstrFlags::VOP3) 8933 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 8934 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 8935 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8936 } 8937 8938 if (IsDPP8) { 8939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8940 using namespace llvm::AMDGPU::DPP; 8941 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8942 } else { 8943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8944 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8947 8948 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) 8949 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8950 AMDGPUOperand::ImmTyDppFI); 8951 } 8952 } 8953 8954 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8955 OptionalImmIndexMap OptionalIdx; 8956 8957 unsigned I = 1; 8958 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8959 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8960 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8961 } 8962 8963 int Fi = 0; 8964 for (unsigned E = Operands.size(); I != E; ++I) { 8965 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8966 MCOI::TIED_TO); 8967 if (TiedTo != -1) { 8968 assert((unsigned)TiedTo < Inst.getNumOperands()); 8969 // handle tied old or src2 for MAC instructions 8970 Inst.addOperand(Inst.getOperand(TiedTo)); 8971 } 8972 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8973 // Add the register arguments 8974 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8975 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8976 // Skip it. 8977 continue; 8978 } 8979 8980 if (IsDPP8) { 8981 if (Op.isDPP8()) { 8982 Op.addImmOperands(Inst, 1); 8983 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8984 Op.addRegWithFPInputModsOperands(Inst, 2); 8985 } else if (Op.isDppFI()) { 8986 Fi = Op.getImm(); 8987 } else if (Op.isReg()) { 8988 Op.addRegOperands(Inst, 1); 8989 } else { 8990 llvm_unreachable("Invalid operand type"); 8991 } 8992 } else { 8993 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8994 Op.addRegWithFPInputModsOperands(Inst, 2); 8995 } else if (Op.isReg()) { 8996 Op.addRegOperands(Inst, 1); 8997 } else if (Op.isDPPCtrl()) { 8998 Op.addImmOperands(Inst, 1); 8999 } else if (Op.isImm()) { 9000 // Handle optional arguments 9001 OptionalIdx[Op.getImmTy()] = I; 9002 } else { 9003 llvm_unreachable("Invalid operand type"); 9004 } 9005 } 9006 } 9007 9008 if (IsDPP8) { 9009 using namespace llvm::AMDGPU::DPP; 9010 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 9011 } else { 9012 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 9013 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 9014 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 9015 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { 9016 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9017 AMDGPUOperand::ImmTyDppFI); 9018 } 9019 } 9020 } 9021 9022 //===----------------------------------------------------------------------===// 9023 // sdwa 9024 //===----------------------------------------------------------------------===// 9025 9026 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, 9027 StringRef Prefix, 9028 AMDGPUOperand::ImmTy Type) { 9029 using namespace llvm::AMDGPU::SDWA; 9030 9031 SMLoc S = getLoc(); 9032 StringRef Value; 9033 9034 SMLoc StringLoc; 9035 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc); 9036 if (!Res.isSuccess()) 9037 return Res; 9038 9039 int64_t Int; 9040 Int = StringSwitch<int64_t>(Value) 9041 .Case("BYTE_0", SdwaSel::BYTE_0) 9042 .Case("BYTE_1", SdwaSel::BYTE_1) 9043 .Case("BYTE_2", SdwaSel::BYTE_2) 9044 .Case("BYTE_3", SdwaSel::BYTE_3) 9045 .Case("WORD_0", SdwaSel::WORD_0) 9046 .Case("WORD_1", SdwaSel::WORD_1) 9047 .Case("DWORD", SdwaSel::DWORD) 9048 .Default(0xffffffff); 9049 9050 if (Int == 0xffffffff) 9051 return Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 9052 9053 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 9054 return ParseStatus::Success; 9055 } 9056 9057 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 9058 using namespace llvm::AMDGPU::SDWA; 9059 9060 SMLoc S = getLoc(); 9061 StringRef Value; 9062 9063 SMLoc StringLoc; 9064 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc); 9065 if (!Res.isSuccess()) 9066 return Res; 9067 9068 int64_t Int; 9069 Int = StringSwitch<int64_t>(Value) 9070 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 9071 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 9072 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 9073 .Default(0xffffffff); 9074 9075 if (Int == 0xffffffff) 9076 return Error(StringLoc, "invalid dst_unused value"); 9077 9078 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused)); 9079 return ParseStatus::Success; 9080 } 9081 9082 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 9083 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 9084 } 9085 9086 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 9087 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 9088 } 9089 9090 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 9091 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 9092 } 9093 9094 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 9095 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 9096 } 9097 9098 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 9099 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 9100 } 9101 9102 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 9103 uint64_t BasicInstType, 9104 bool SkipDstVcc, 9105 bool SkipSrcVcc) { 9106 using namespace llvm::AMDGPU::SDWA; 9107 9108 OptionalImmIndexMap OptionalIdx; 9109 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 9110 bool SkippedVcc = false; 9111 9112 unsigned I = 1; 9113 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9114 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9115 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9116 } 9117 9118 for (unsigned E = Operands.size(); I != E; ++I) { 9119 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9120 if (SkipVcc && !SkippedVcc && Op.isReg() && 9121 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 9122 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9123 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9124 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9125 // Skip VCC only if we didn't skip it on previous iteration. 9126 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9127 if (BasicInstType == SIInstrFlags::VOP2 && 9128 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9129 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9130 SkippedVcc = true; 9131 continue; 9132 } else if (BasicInstType == SIInstrFlags::VOPC && 9133 Inst.getNumOperands() == 0) { 9134 SkippedVcc = true; 9135 continue; 9136 } 9137 } 9138 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9139 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9140 } else if (Op.isImm()) { 9141 // Handle optional arguments 9142 OptionalIdx[Op.getImmTy()] = I; 9143 } else { 9144 llvm_unreachable("Invalid operand type"); 9145 } 9146 SkippedVcc = false; 9147 } 9148 9149 const unsigned Opc = Inst.getOpcode(); 9150 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && 9151 Opc != AMDGPU::V_NOP_sdwa_vi) { 9152 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9153 switch (BasicInstType) { 9154 case SIInstrFlags::VOP1: 9155 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9156 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9157 AMDGPUOperand::ImmTyClampSI, 0); 9158 9159 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9160 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9161 AMDGPUOperand::ImmTyOModSI, 0); 9162 9163 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) 9164 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9165 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9166 9167 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) 9168 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9169 AMDGPUOperand::ImmTySDWADstUnused, 9170 DstUnused::UNUSED_PRESERVE); 9171 9172 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9173 break; 9174 9175 case SIInstrFlags::VOP2: 9176 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9177 9178 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) 9179 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9180 9181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9182 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE); 9183 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9184 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9185 break; 9186 9187 case SIInstrFlags::VOPC: 9188 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) 9189 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9190 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9191 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9192 break; 9193 9194 default: 9195 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9196 } 9197 } 9198 9199 // special case v_mac_{f16, f32}: 9200 // it has src2 register operand that is tied to dst operand 9201 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9202 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9203 auto it = Inst.begin(); 9204 std::advance( 9205 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9206 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9207 } 9208 } 9209 9210 /// Force static initialization. 9211 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9212 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); 9213 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9214 } 9215 9216 #define GET_REGISTER_MATCHER 9217 #define GET_MATCHER_IMPLEMENTATION 9218 #define GET_MNEMONIC_SPELL_CHECKER 9219 #define GET_MNEMONIC_CHECKER 9220 #include "AMDGPUGenAsmMatcher.inc" 9221 9222 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, 9223 unsigned MCK) { 9224 switch (MCK) { 9225 case MCK_addr64: 9226 return parseTokenOp("addr64", Operands); 9227 case MCK_done: 9228 return parseTokenOp("done", Operands); 9229 case MCK_idxen: 9230 return parseTokenOp("idxen", Operands); 9231 case MCK_lds: 9232 return parseTokenOp("lds", Operands); 9233 case MCK_offen: 9234 return parseTokenOp("offen", Operands); 9235 case MCK_off: 9236 return parseTokenOp("off", Operands); 9237 case MCK_row_95_en: 9238 return parseTokenOp("row_en", Operands); 9239 case MCK_gds: 9240 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS); 9241 case MCK_tfe: 9242 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE); 9243 } 9244 return tryCustomParseOperand(Operands, MCK); 9245 } 9246 9247 // This function should be defined after auto-generated include so that we have 9248 // MatchClassKind enum defined 9249 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9250 unsigned Kind) { 9251 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9252 // But MatchInstructionImpl() expects to meet token and fails to validate 9253 // operand. This method checks if we are given immediate operand but expect to 9254 // get corresponding token. 9255 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9256 switch (Kind) { 9257 case MCK_addr64: 9258 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9259 case MCK_gds: 9260 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9261 case MCK_lds: 9262 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9263 case MCK_idxen: 9264 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9265 case MCK_offen: 9266 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9267 case MCK_tfe: 9268 return Operand.isTFE() ? Match_Success : Match_InvalidOperand; 9269 case MCK_SSrcB32: 9270 // When operands have expression values, they will return true for isToken, 9271 // because it is not possible to distinguish between a token and an 9272 // expression at parse time. MatchInstructionImpl() will always try to 9273 // match an operand as a token, when isToken returns true, and when the 9274 // name of the expression is not a valid token, the match will fail, 9275 // so we need to handle it here. 9276 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9277 case MCK_SSrcF32: 9278 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9279 case MCK_SOPPBrTarget: 9280 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; 9281 case MCK_VReg32OrOff: 9282 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9283 case MCK_InterpSlot: 9284 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9285 case MCK_InterpAttr: 9286 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9287 case MCK_InterpAttrChan: 9288 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; 9289 case MCK_SReg_64: 9290 case MCK_SReg_64_XEXEC: 9291 // Null is defined as a 32-bit register but 9292 // it should also be enabled with 64-bit operands. 9293 // The following code enables it for SReg_64 operands 9294 // used as source and destination. Remaining source 9295 // operands are handled in isInlinableImm. 9296 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9297 default: 9298 return Match_InvalidOperand; 9299 } 9300 } 9301 9302 //===----------------------------------------------------------------------===// 9303 // endpgm 9304 //===----------------------------------------------------------------------===// 9305 9306 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { 9307 SMLoc S = getLoc(); 9308 int64_t Imm = 0; 9309 9310 if (!parseExpr(Imm)) { 9311 // The operand is optional, if not present default to 0 9312 Imm = 0; 9313 } 9314 9315 if (!isUInt<16>(Imm)) 9316 return Error(S, "expected a 16-bit value"); 9317 9318 Operands.push_back( 9319 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9320 return ParseStatus::Success; 9321 } 9322 9323 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9324 9325 //===----------------------------------------------------------------------===// 9326 // LDSDIR 9327 //===----------------------------------------------------------------------===// 9328 9329 bool AMDGPUOperand::isWaitVDST() const { 9330 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9331 } 9332 9333 bool AMDGPUOperand::isWaitVAVDst() const { 9334 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm()); 9335 } 9336 9337 bool AMDGPUOperand::isWaitVMVSrc() const { 9338 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm()); 9339 } 9340 9341 //===----------------------------------------------------------------------===// 9342 // VINTERP 9343 //===----------------------------------------------------------------------===// 9344 9345 bool AMDGPUOperand::isWaitEXP() const { 9346 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9347 } 9348 9349 //===----------------------------------------------------------------------===// 9350 // Split Barrier 9351 //===----------------------------------------------------------------------===// 9352 9353 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); } 9354