1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCExpr.h" 11 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" 12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 14 #include "SIDefines.h" 15 #include "SIInstrInfo.h" 16 #include "SIRegisterInfo.h" 17 #include "TargetInfo/AMDGPUTargetInfo.h" 18 #include "Utils/AMDGPUAsmUtils.h" 19 #include "Utils/AMDGPUBaseInfo.h" 20 #include "Utils/AMDKernelCodeTUtils.h" 21 #include "llvm/ADT/APFloat.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/StringSet.h" 24 #include "llvm/ADT/Twine.h" 25 #include "llvm/BinaryFormat/ELF.h" 26 #include "llvm/CodeGenTypes/MachineValueType.h" 27 #include "llvm/MC/MCAsmInfo.h" 28 #include "llvm/MC/MCContext.h" 29 #include "llvm/MC/MCExpr.h" 30 #include "llvm/MC/MCInst.h" 31 #include "llvm/MC/MCInstrDesc.h" 32 #include "llvm/MC/MCParser/MCAsmLexer.h" 33 #include "llvm/MC/MCParser/MCAsmParser.h" 34 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 35 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 36 #include "llvm/MC/MCSymbol.h" 37 #include "llvm/MC/TargetRegistry.h" 38 #include "llvm/Support/AMDGPUMetadata.h" 39 #include "llvm/Support/AMDHSAKernelDescriptor.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/MathExtras.h" 42 #include "llvm/TargetParser/TargetParser.h" 43 #include <optional> 44 45 using namespace llvm; 46 using namespace llvm::AMDGPU; 47 using namespace llvm::amdhsa; 48 49 namespace { 50 51 class AMDGPUAsmParser; 52 53 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 54 55 //===----------------------------------------------------------------------===// 56 // Operand 57 //===----------------------------------------------------------------------===// 58 59 class AMDGPUOperand : public MCParsedAsmOperand { 60 enum KindTy { 61 Token, 62 Immediate, 63 Register, 64 Expression 65 } Kind; 66 67 SMLoc StartLoc, EndLoc; 68 const AMDGPUAsmParser *AsmParser; 69 70 public: 71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 72 : Kind(Kind_), AsmParser(AsmParser_) {} 73 74 using Ptr = std::unique_ptr<AMDGPUOperand>; 75 76 struct Modifiers { 77 bool Abs = false; 78 bool Neg = false; 79 bool Sext = false; 80 bool Lit = false; 81 82 bool hasFPModifiers() const { return Abs || Neg; } 83 bool hasIntModifiers() const { return Sext; } 84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 85 86 int64_t getFPModifiersOperand() const { 87 int64_t Operand = 0; 88 Operand |= Abs ? SISrcMods::ABS : 0u; 89 Operand |= Neg ? SISrcMods::NEG : 0u; 90 return Operand; 91 } 92 93 int64_t getIntModifiersOperand() const { 94 int64_t Operand = 0; 95 Operand |= Sext ? SISrcMods::SEXT : 0u; 96 return Operand; 97 } 98 99 int64_t getModifiersOperand() const { 100 assert(!(hasFPModifiers() && hasIntModifiers()) 101 && "fp and int modifiers should not be used simultaneously"); 102 if (hasFPModifiers()) 103 return getFPModifiersOperand(); 104 if (hasIntModifiers()) 105 return getIntModifiersOperand(); 106 return 0; 107 } 108 109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 110 }; 111 112 enum ImmTy { 113 ImmTyNone, 114 ImmTyGDS, 115 ImmTyLDS, 116 ImmTyOffen, 117 ImmTyIdxen, 118 ImmTyAddr64, 119 ImmTyOffset, 120 ImmTyInstOffset, 121 ImmTyOffset0, 122 ImmTyOffset1, 123 ImmTySMEMOffsetMod, 124 ImmTyCPol, 125 ImmTyTFE, 126 ImmTyD16, 127 ImmTyClamp, 128 ImmTyOModSI, 129 ImmTySDWADstSel, 130 ImmTySDWASrc0Sel, 131 ImmTySDWASrc1Sel, 132 ImmTySDWADstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyInterpAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTyIndexKey8bit, 155 ImmTyIndexKey16bit, 156 ImmTyDPP8, 157 ImmTyDppCtrl, 158 ImmTyDppRowMask, 159 ImmTyDppBankMask, 160 ImmTyDppBoundCtrl, 161 ImmTyDppFI, 162 ImmTySwizzle, 163 ImmTyGprIdxMode, 164 ImmTyHigh, 165 ImmTyBLGP, 166 ImmTyCBSZ, 167 ImmTyABID, 168 ImmTyEndpgm, 169 ImmTyWaitVDST, 170 ImmTyWaitEXP, 171 ImmTyWaitVAVDst, 172 ImmTyWaitVMVSrc, 173 ImmTyByteSel, 174 }; 175 176 // Immediate operand kind. 177 // It helps to identify the location of an offending operand after an error. 178 // Note that regular literals and mandatory literals (KImm) must be handled 179 // differently. When looking for an offending operand, we should usually 180 // ignore mandatory literals because they are part of the instruction and 181 // cannot be changed. Report location of mandatory operands only for VOPD, 182 // when both OpX and OpY have a KImm and there are no other literals. 183 enum ImmKindTy { 184 ImmKindTyNone, 185 ImmKindTyLiteral, 186 ImmKindTyMandatoryLiteral, 187 ImmKindTyConst, 188 }; 189 190 private: 191 struct TokOp { 192 const char *Data; 193 unsigned Length; 194 }; 195 196 struct ImmOp { 197 int64_t Val; 198 ImmTy Type; 199 bool IsFPImm; 200 mutable ImmKindTy Kind; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { return Kind == Token; } 218 219 bool isSymbolRefExpr() const { 220 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 221 } 222 223 bool isImm() const override { 224 return Kind == Immediate; 225 } 226 227 void setImmKindNone() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyNone; 230 } 231 232 void setImmKindLiteral() const { 233 assert(isImm()); 234 Imm.Kind = ImmKindTyLiteral; 235 } 236 237 void setImmKindMandatoryLiteral() const { 238 assert(isImm()); 239 Imm.Kind = ImmKindTyMandatoryLiteral; 240 } 241 242 void setImmKindConst() const { 243 assert(isImm()); 244 Imm.Kind = ImmKindTyConst; 245 } 246 247 bool IsImmKindLiteral() const { 248 return isImm() && Imm.Kind == ImmKindTyLiteral; 249 } 250 251 bool IsImmKindMandatoryLiteral() const { 252 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; 253 } 254 255 bool isImmKindConst() const { 256 return isImm() && Imm.Kind == ImmKindTyConst; 257 } 258 259 bool isInlinableImm(MVT type) const; 260 bool isLiteralImm(MVT type) const; 261 262 bool isRegKind() const { 263 return Kind == Register; 264 } 265 266 bool isReg() const override { 267 return isRegKind() && !hasModifiers(); 268 } 269 270 bool isRegOrInline(unsigned RCID, MVT type) const { 271 return isRegClass(RCID) || isInlinableImm(type); 272 } 273 274 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 275 return isRegOrInline(RCID, type) || isLiteralImm(type); 276 } 277 278 bool isRegOrImmWithInt16InputMods() const { 279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 280 } 281 282 bool isRegOrImmWithIntT16InputMods() const { 283 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16); 284 } 285 286 bool isRegOrImmWithInt32InputMods() const { 287 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 288 } 289 290 bool isRegOrInlineImmWithInt16InputMods() const { 291 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 292 } 293 294 bool isRegOrInlineImmWithInt32InputMods() const { 295 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 296 } 297 298 bool isRegOrImmWithInt64InputMods() const { 299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 300 } 301 302 bool isRegOrImmWithFP16InputMods() const { 303 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 304 } 305 306 bool isRegOrImmWithFPT16InputMods() const { 307 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16); 308 } 309 310 bool isRegOrImmWithFP32InputMods() const { 311 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 312 } 313 314 bool isRegOrImmWithFP64InputMods() const { 315 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 316 } 317 318 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const { 319 return isRegOrInline( 320 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16); 321 } 322 323 bool isRegOrInlineImmWithFP32InputMods() const { 324 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 325 } 326 327 bool isPackedFP16InputMods() const { 328 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16); 329 } 330 331 bool isVReg() const { 332 return isRegClass(AMDGPU::VGPR_32RegClassID) || 333 isRegClass(AMDGPU::VReg_64RegClassID) || 334 isRegClass(AMDGPU::VReg_96RegClassID) || 335 isRegClass(AMDGPU::VReg_128RegClassID) || 336 isRegClass(AMDGPU::VReg_160RegClassID) || 337 isRegClass(AMDGPU::VReg_192RegClassID) || 338 isRegClass(AMDGPU::VReg_256RegClassID) || 339 isRegClass(AMDGPU::VReg_512RegClassID) || 340 isRegClass(AMDGPU::VReg_1024RegClassID); 341 } 342 343 bool isVReg32() const { 344 return isRegClass(AMDGPU::VGPR_32RegClassID); 345 } 346 347 bool isVReg32OrOff() const { 348 return isOff() || isVReg32(); 349 } 350 351 bool isNull() const { 352 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 353 } 354 355 bool isVRegWithInputMods() const; 356 template <bool IsFake16> bool isT16VRegWithInputMods() const; 357 358 bool isSDWAOperand(MVT type) const; 359 bool isSDWAFP16Operand() const; 360 bool isSDWAFP32Operand() const; 361 bool isSDWAInt16Operand() const; 362 bool isSDWAInt32Operand() const; 363 364 bool isImmTy(ImmTy ImmT) const { 365 return isImm() && Imm.Type == ImmT; 366 } 367 368 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); } 369 370 bool isImmLiteral() const { return isImmTy(ImmTyNone); } 371 372 bool isImmModifier() const { 373 return isImm() && Imm.Type != ImmTyNone; 374 } 375 376 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 377 bool isDim() const { return isImmTy(ImmTyDim); } 378 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 379 bool isOff() const { return isImmTy(ImmTyOff); } 380 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 381 bool isOffen() const { return isImmTy(ImmTyOffen); } 382 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 383 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 384 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); } 385 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 386 bool isGDS() const { return isImmTy(ImmTyGDS); } 387 bool isLDS() const { return isImmTy(ImmTyLDS); } 388 bool isCPol() const { return isImmTy(ImmTyCPol); } 389 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); } 390 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); } 391 bool isTFE() const { return isImmTy(ImmTyTFE); } 392 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 393 bool isDppFI() const { return isImmTy(ImmTyDppFI); } 394 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); } 395 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); } 396 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); } 397 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); } 398 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 399 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 400 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); } 401 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 402 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 403 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 404 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 405 406 bool isRegOrImm() const { 407 return isReg() || isImm(); 408 } 409 410 bool isRegClass(unsigned RCID) const; 411 412 bool isInlineValue() const; 413 414 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 415 return isRegOrInline(RCID, type) && !hasModifiers(); 416 } 417 418 bool isSCSrcB16() const { 419 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 420 } 421 422 bool isSCSrcV2B16() const { 423 return isSCSrcB16(); 424 } 425 426 bool isSCSrc_b32() const { 427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 428 } 429 430 bool isSCSrc_b64() const { 431 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 432 } 433 434 bool isBoolReg() const; 435 436 bool isSCSrcF16() const { 437 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 438 } 439 440 bool isSCSrcV2F16() const { 441 return isSCSrcF16(); 442 } 443 444 bool isSCSrcF32() const { 445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 446 } 447 448 bool isSCSrcF64() const { 449 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 450 } 451 452 bool isSSrc_b32() const { 453 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr(); 454 } 455 456 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); } 457 458 bool isSSrcV2B16() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrc_b16(); 461 } 462 463 bool isSSrc_b64() const { 464 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 465 // See isVSrc64(). 466 return isSCSrc_b64() || isLiteralImm(MVT::i64); 467 } 468 469 bool isSSrc_f32() const { 470 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr(); 471 } 472 473 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); } 474 475 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); } 476 477 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); } 478 479 bool isSSrcV2F16() const { 480 llvm_unreachable("cannot happen"); 481 return isSSrc_f16(); 482 } 483 484 bool isSSrcV2FP32() const { 485 llvm_unreachable("cannot happen"); 486 return isSSrc_f32(); 487 } 488 489 bool isSCSrcV2FP32() const { 490 llvm_unreachable("cannot happen"); 491 return isSCSrcF32(); 492 } 493 494 bool isSSrcV2INT32() const { 495 llvm_unreachable("cannot happen"); 496 return isSSrc_b32(); 497 } 498 499 bool isSCSrcV2INT32() const { 500 llvm_unreachable("cannot happen"); 501 return isSCSrc_b32(); 502 } 503 504 bool isSSrcOrLds_b32() const { 505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 506 isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVCSrc_b32() const { 510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 511 } 512 513 bool isVCSrcB64() const { 514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 515 } 516 517 bool isVCSrcTB16() const { 518 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16); 519 } 520 521 bool isVCSrcTB16_Lo128() const { 522 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16); 523 } 524 525 bool isVCSrcFake16B16_Lo128() const { 526 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); 527 } 528 529 bool isVCSrc_b16() const { 530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 531 } 532 533 bool isVCSrc_v2b16() const { return isVCSrc_b16(); } 534 535 bool isVCSrc_f32() const { 536 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 537 } 538 539 bool isVCSrcF64() const { 540 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 541 } 542 543 bool isVCSrcTBF16() const { 544 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16); 545 } 546 547 bool isVCSrcTF16() const { 548 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); 549 } 550 551 bool isVCSrcTBF16_Lo128() const { 552 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16); 553 } 554 555 bool isVCSrcTF16_Lo128() const { 556 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16); 557 } 558 559 bool isVCSrcFake16BF16_Lo128() const { 560 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16); 561 } 562 563 bool isVCSrcFake16F16_Lo128() const { 564 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); 565 } 566 567 bool isVCSrc_bf16() const { 568 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16); 569 } 570 571 bool isVCSrc_f16() const { 572 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 573 } 574 575 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); } 576 577 bool isVCSrc_v2f16() const { return isVCSrc_f16(); } 578 579 bool isVSrc_b32() const { 580 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr(); 581 } 582 583 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); } 584 585 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); } 586 587 bool isVSrcT_b16_Lo128() const { 588 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); 589 } 590 591 bool isVSrcFake16_b16_Lo128() const { 592 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16); 593 } 594 595 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); } 596 597 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); } 598 599 bool isVCSrcV2FP32() const { 600 return isVCSrcF64(); 601 } 602 603 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); } 604 605 bool isVCSrcV2INT32() const { 606 return isVCSrcB64(); 607 } 608 609 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); } 610 611 bool isVSrc_f32() const { 612 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr(); 613 } 614 615 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); } 616 617 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); } 618 619 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); } 620 621 bool isVSrcT_bf16_Lo128() const { 622 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16); 623 } 624 625 bool isVSrcT_f16_Lo128() const { 626 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); 627 } 628 629 bool isVSrcFake16_bf16_Lo128() const { 630 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16); 631 } 632 633 bool isVSrcFake16_f16_Lo128() const { 634 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16); 635 } 636 637 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); } 638 639 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); } 640 641 bool isVSrc_v2bf16() const { 642 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16); 643 } 644 645 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); } 646 647 bool isVISrcB32() const { 648 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 649 } 650 651 bool isVISrcB16() const { 652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 653 } 654 655 bool isVISrcV2B16() const { 656 return isVISrcB16(); 657 } 658 659 bool isVISrcF32() const { 660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 661 } 662 663 bool isVISrcF16() const { 664 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 665 } 666 667 bool isVISrcV2F16() const { 668 return isVISrcF16() || isVISrcB32(); 669 } 670 671 bool isVISrc_64_bf16() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16); 673 } 674 675 bool isVISrc_64_f16() const { 676 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16); 677 } 678 679 bool isVISrc_64_b32() const { 680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 681 } 682 683 bool isVISrc_64B64() const { 684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 685 } 686 687 bool isVISrc_64_f64() const { 688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 689 } 690 691 bool isVISrc_64V2FP32() const { 692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 693 } 694 695 bool isVISrc_64V2INT32() const { 696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 697 } 698 699 bool isVISrc_256_b32() const { 700 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 701 } 702 703 bool isVISrc_256_f32() const { 704 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 705 } 706 707 bool isVISrc_256B64() const { 708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 709 } 710 711 bool isVISrc_256_f64() const { 712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 713 } 714 715 bool isVISrc_128B16() const { 716 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 717 } 718 719 bool isVISrc_128V2B16() const { 720 return isVISrc_128B16(); 721 } 722 723 bool isVISrc_128_b32() const { 724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 725 } 726 727 bool isVISrc_128_f32() const { 728 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 729 } 730 731 bool isVISrc_256V2FP32() const { 732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 733 } 734 735 bool isVISrc_256V2INT32() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 737 } 738 739 bool isVISrc_512_b32() const { 740 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 741 } 742 743 bool isVISrc_512B16() const { 744 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 745 } 746 747 bool isVISrc_512V2B16() const { 748 return isVISrc_512B16(); 749 } 750 751 bool isVISrc_512_f32() const { 752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 753 } 754 755 bool isVISrc_512F16() const { 756 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 757 } 758 759 bool isVISrc_512V2F16() const { 760 return isVISrc_512F16() || isVISrc_512_b32(); 761 } 762 763 bool isVISrc_1024_b32() const { 764 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 765 } 766 767 bool isVISrc_1024B16() const { 768 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 769 } 770 771 bool isVISrc_1024V2B16() const { 772 return isVISrc_1024B16(); 773 } 774 775 bool isVISrc_1024_f32() const { 776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 777 } 778 779 bool isVISrc_1024F16() const { 780 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 781 } 782 783 bool isVISrc_1024V2F16() const { 784 return isVISrc_1024F16() || isVISrc_1024_b32(); 785 } 786 787 bool isAISrcB32() const { 788 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 789 } 790 791 bool isAISrcB16() const { 792 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 793 } 794 795 bool isAISrcV2B16() const { 796 return isAISrcB16(); 797 } 798 799 bool isAISrcF32() const { 800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 801 } 802 803 bool isAISrcF16() const { 804 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 805 } 806 807 bool isAISrcV2F16() const { 808 return isAISrcF16() || isAISrcB32(); 809 } 810 811 bool isAISrc_64B64() const { 812 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 813 } 814 815 bool isAISrc_64_f64() const { 816 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 817 } 818 819 bool isAISrc_128_b32() const { 820 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 821 } 822 823 bool isAISrc_128B16() const { 824 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 825 } 826 827 bool isAISrc_128V2B16() const { 828 return isAISrc_128B16(); 829 } 830 831 bool isAISrc_128_f32() const { 832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 833 } 834 835 bool isAISrc_128F16() const { 836 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 837 } 838 839 bool isAISrc_128V2F16() const { 840 return isAISrc_128F16() || isAISrc_128_b32(); 841 } 842 843 bool isVISrc_128_bf16() const { 844 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16); 845 } 846 847 bool isVISrc_128_f16() const { 848 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 849 } 850 851 bool isVISrc_128V2F16() const { 852 return isVISrc_128_f16() || isVISrc_128_b32(); 853 } 854 855 bool isAISrc_256B64() const { 856 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 857 } 858 859 bool isAISrc_256_f64() const { 860 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 861 } 862 863 bool isAISrc_512_b32() const { 864 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 865 } 866 867 bool isAISrc_512B16() const { 868 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 869 } 870 871 bool isAISrc_512V2B16() const { 872 return isAISrc_512B16(); 873 } 874 875 bool isAISrc_512_f32() const { 876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 877 } 878 879 bool isAISrc_512F16() const { 880 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 881 } 882 883 bool isAISrc_512V2F16() const { 884 return isAISrc_512F16() || isAISrc_512_b32(); 885 } 886 887 bool isAISrc_1024_b32() const { 888 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 889 } 890 891 bool isAISrc_1024B16() const { 892 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 893 } 894 895 bool isAISrc_1024V2B16() const { 896 return isAISrc_1024B16(); 897 } 898 899 bool isAISrc_1024_f32() const { 900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 901 } 902 903 bool isAISrc_1024F16() const { 904 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 905 } 906 907 bool isAISrc_1024V2F16() const { 908 return isAISrc_1024F16() || isAISrc_1024_b32(); 909 } 910 911 bool isKImmFP32() const { 912 return isLiteralImm(MVT::f32); 913 } 914 915 bool isKImmFP16() const { 916 return isLiteralImm(MVT::f16); 917 } 918 919 bool isMem() const override { 920 return false; 921 } 922 923 bool isExpr() const { 924 return Kind == Expression; 925 } 926 927 bool isSOPPBrTarget() const { return isExpr() || isImm(); } 928 929 bool isSWaitCnt() const; 930 bool isDepCtr() const; 931 bool isSDelayALU() const; 932 bool isHwreg() const; 933 bool isSendMsg() const; 934 bool isSplitBarrier() const; 935 bool isSwizzle() const; 936 bool isSMRDOffset8() const; 937 bool isSMEMOffset() const; 938 bool isSMRDLiteralOffset() const; 939 bool isDPP8() const; 940 bool isDPPCtrl() const; 941 bool isBLGP() const; 942 bool isGPRIdxMode() const; 943 bool isS16Imm() const; 944 bool isU16Imm() const; 945 bool isEndpgm() const; 946 947 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { 948 return [=](){ return P(*this); }; 949 } 950 951 StringRef getToken() const { 952 assert(isToken()); 953 return StringRef(Tok.Data, Tok.Length); 954 } 955 956 int64_t getImm() const { 957 assert(isImm()); 958 return Imm.Val; 959 } 960 961 void setImm(int64_t Val) { 962 assert(isImm()); 963 Imm.Val = Val; 964 } 965 966 ImmTy getImmTy() const { 967 assert(isImm()); 968 return Imm.Type; 969 } 970 971 MCRegister getReg() const override { 972 assert(isRegKind()); 973 return Reg.RegNo; 974 } 975 976 SMLoc getStartLoc() const override { 977 return StartLoc; 978 } 979 980 SMLoc getEndLoc() const override { 981 return EndLoc; 982 } 983 984 SMRange getLocRange() const { 985 return SMRange(StartLoc, EndLoc); 986 } 987 988 Modifiers getModifiers() const { 989 assert(isRegKind() || isImmTy(ImmTyNone)); 990 return isRegKind() ? Reg.Mods : Imm.Mods; 991 } 992 993 void setModifiers(Modifiers Mods) { 994 assert(isRegKind() || isImmTy(ImmTyNone)); 995 if (isRegKind()) 996 Reg.Mods = Mods; 997 else 998 Imm.Mods = Mods; 999 } 1000 1001 bool hasModifiers() const { 1002 return getModifiers().hasModifiers(); 1003 } 1004 1005 bool hasFPModifiers() const { 1006 return getModifiers().hasFPModifiers(); 1007 } 1008 1009 bool hasIntModifiers() const { 1010 return getModifiers().hasIntModifiers(); 1011 } 1012 1013 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 1014 1015 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 1016 1017 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 1018 1019 void addRegOperands(MCInst &Inst, unsigned N) const; 1020 1021 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 1022 if (isRegKind()) 1023 addRegOperands(Inst, N); 1024 else 1025 addImmOperands(Inst, N); 1026 } 1027 1028 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 1029 Modifiers Mods = getModifiers(); 1030 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1031 if (isRegKind()) { 1032 addRegOperands(Inst, N); 1033 } else { 1034 addImmOperands(Inst, N, false); 1035 } 1036 } 1037 1038 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1039 assert(!hasIntModifiers()); 1040 addRegOrImmWithInputModsOperands(Inst, N); 1041 } 1042 1043 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1044 assert(!hasFPModifiers()); 1045 addRegOrImmWithInputModsOperands(Inst, N); 1046 } 1047 1048 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 1049 Modifiers Mods = getModifiers(); 1050 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1051 assert(isRegKind()); 1052 addRegOperands(Inst, N); 1053 } 1054 1055 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1056 assert(!hasIntModifiers()); 1057 addRegWithInputModsOperands(Inst, N); 1058 } 1059 1060 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1061 assert(!hasFPModifiers()); 1062 addRegWithInputModsOperands(Inst, N); 1063 } 1064 1065 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1066 // clang-format off 1067 switch (Type) { 1068 case ImmTyNone: OS << "None"; break; 1069 case ImmTyGDS: OS << "GDS"; break; 1070 case ImmTyLDS: OS << "LDS"; break; 1071 case ImmTyOffen: OS << "Offen"; break; 1072 case ImmTyIdxen: OS << "Idxen"; break; 1073 case ImmTyAddr64: OS << "Addr64"; break; 1074 case ImmTyOffset: OS << "Offset"; break; 1075 case ImmTyInstOffset: OS << "InstOffset"; break; 1076 case ImmTyOffset0: OS << "Offset0"; break; 1077 case ImmTyOffset1: OS << "Offset1"; break; 1078 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break; 1079 case ImmTyCPol: OS << "CPol"; break; 1080 case ImmTyIndexKey8bit: OS << "index_key"; break; 1081 case ImmTyIndexKey16bit: OS << "index_key"; break; 1082 case ImmTyTFE: OS << "TFE"; break; 1083 case ImmTyD16: OS << "D16"; break; 1084 case ImmTyFORMAT: OS << "FORMAT"; break; 1085 case ImmTyClamp: OS << "Clamp"; break; 1086 case ImmTyOModSI: OS << "OModSI"; break; 1087 case ImmTyDPP8: OS << "DPP8"; break; 1088 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1089 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1090 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1091 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1092 case ImmTyDppFI: OS << "DppFI"; break; 1093 case ImmTySDWADstSel: OS << "SDWADstSel"; break; 1094 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break; 1095 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break; 1096 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break; 1097 case ImmTyDMask: OS << "DMask"; break; 1098 case ImmTyDim: OS << "Dim"; break; 1099 case ImmTyUNorm: OS << "UNorm"; break; 1100 case ImmTyDA: OS << "DA"; break; 1101 case ImmTyR128A16: OS << "R128A16"; break; 1102 case ImmTyA16: OS << "A16"; break; 1103 case ImmTyLWE: OS << "LWE"; break; 1104 case ImmTyOff: OS << "Off"; break; 1105 case ImmTyExpTgt: OS << "ExpTgt"; break; 1106 case ImmTyExpCompr: OS << "ExpCompr"; break; 1107 case ImmTyExpVM: OS << "ExpVM"; break; 1108 case ImmTyHwreg: OS << "Hwreg"; break; 1109 case ImmTySendMsg: OS << "SendMsg"; break; 1110 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1111 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1112 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break; 1113 case ImmTyOpSel: OS << "OpSel"; break; 1114 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1115 case ImmTyNegLo: OS << "NegLo"; break; 1116 case ImmTyNegHi: OS << "NegHi"; break; 1117 case ImmTySwizzle: OS << "Swizzle"; break; 1118 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1119 case ImmTyHigh: OS << "High"; break; 1120 case ImmTyBLGP: OS << "BLGP"; break; 1121 case ImmTyCBSZ: OS << "CBSZ"; break; 1122 case ImmTyABID: OS << "ABID"; break; 1123 case ImmTyEndpgm: OS << "Endpgm"; break; 1124 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1125 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1126 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; 1127 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; 1128 case ImmTyByteSel: OS << "ByteSel" ; break; 1129 } 1130 // clang-format on 1131 } 1132 1133 void print(raw_ostream &OS) const override { 1134 switch (Kind) { 1135 case Register: 1136 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1137 break; 1138 case Immediate: 1139 OS << '<' << getImm(); 1140 if (getImmTy() != ImmTyNone) { 1141 OS << " type: "; printImmTy(OS, getImmTy()); 1142 } 1143 OS << " mods: " << Imm.Mods << '>'; 1144 break; 1145 case Token: 1146 OS << '\'' << getToken() << '\''; 1147 break; 1148 case Expression: 1149 OS << "<expr " << *Expr << '>'; 1150 break; 1151 } 1152 } 1153 1154 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1155 int64_t Val, SMLoc Loc, 1156 ImmTy Type = ImmTyNone, 1157 bool IsFPImm = false) { 1158 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1159 Op->Imm.Val = Val; 1160 Op->Imm.IsFPImm = IsFPImm; 1161 Op->Imm.Kind = ImmKindTyNone; 1162 Op->Imm.Type = Type; 1163 Op->Imm.Mods = Modifiers(); 1164 Op->StartLoc = Loc; 1165 Op->EndLoc = Loc; 1166 return Op; 1167 } 1168 1169 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1170 StringRef Str, SMLoc Loc, 1171 bool HasExplicitEncodingSize = true) { 1172 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1173 Res->Tok.Data = Str.data(); 1174 Res->Tok.Length = Str.size(); 1175 Res->StartLoc = Loc; 1176 Res->EndLoc = Loc; 1177 return Res; 1178 } 1179 1180 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1181 unsigned RegNo, SMLoc S, 1182 SMLoc E) { 1183 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1184 Op->Reg.RegNo = RegNo; 1185 Op->Reg.Mods = Modifiers(); 1186 Op->StartLoc = S; 1187 Op->EndLoc = E; 1188 return Op; 1189 } 1190 1191 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1192 const class MCExpr *Expr, SMLoc S) { 1193 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1194 Op->Expr = Expr; 1195 Op->StartLoc = S; 1196 Op->EndLoc = S; 1197 return Op; 1198 } 1199 }; 1200 1201 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1202 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1203 return OS; 1204 } 1205 1206 //===----------------------------------------------------------------------===// 1207 // AsmParser 1208 //===----------------------------------------------------------------------===// 1209 1210 // Holds info related to the current kernel, e.g. count of SGPRs used. 1211 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1212 // .amdgpu_hsa_kernel or at EOF. 1213 class KernelScopeInfo { 1214 int SgprIndexUnusedMin = -1; 1215 int VgprIndexUnusedMin = -1; 1216 int AgprIndexUnusedMin = -1; 1217 MCContext *Ctx = nullptr; 1218 MCSubtargetInfo const *MSTI = nullptr; 1219 1220 void usesSgprAt(int i) { 1221 if (i >= SgprIndexUnusedMin) { 1222 SgprIndexUnusedMin = ++i; 1223 if (Ctx) { 1224 MCSymbol* const Sym = 1225 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1226 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1227 } 1228 } 1229 } 1230 1231 void usesVgprAt(int i) { 1232 if (i >= VgprIndexUnusedMin) { 1233 VgprIndexUnusedMin = ++i; 1234 if (Ctx) { 1235 MCSymbol* const Sym = 1236 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1237 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1238 VgprIndexUnusedMin); 1239 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1240 } 1241 } 1242 } 1243 1244 void usesAgprAt(int i) { 1245 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1246 if (!hasMAIInsts(*MSTI)) 1247 return; 1248 1249 if (i >= AgprIndexUnusedMin) { 1250 AgprIndexUnusedMin = ++i; 1251 if (Ctx) { 1252 MCSymbol* const Sym = 1253 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1254 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1255 1256 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1257 MCSymbol* const vSym = 1258 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1259 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1260 VgprIndexUnusedMin); 1261 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1262 } 1263 } 1264 } 1265 1266 public: 1267 KernelScopeInfo() = default; 1268 1269 void initialize(MCContext &Context) { 1270 Ctx = &Context; 1271 MSTI = Ctx->getSubtargetInfo(); 1272 1273 usesSgprAt(SgprIndexUnusedMin = -1); 1274 usesVgprAt(VgprIndexUnusedMin = -1); 1275 if (hasMAIInsts(*MSTI)) { 1276 usesAgprAt(AgprIndexUnusedMin = -1); 1277 } 1278 } 1279 1280 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1281 unsigned RegWidth) { 1282 switch (RegKind) { 1283 case IS_SGPR: 1284 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1285 break; 1286 case IS_AGPR: 1287 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1288 break; 1289 case IS_VGPR: 1290 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1291 break; 1292 default: 1293 break; 1294 } 1295 } 1296 }; 1297 1298 class AMDGPUAsmParser : public MCTargetAsmParser { 1299 MCAsmParser &Parser; 1300 1301 unsigned ForcedEncodingSize = 0; 1302 bool ForcedDPP = false; 1303 bool ForcedSDWA = false; 1304 KernelScopeInfo KernelScope; 1305 1306 /// @name Auto-generated Match Functions 1307 /// { 1308 1309 #define GET_ASSEMBLER_HEADER 1310 #include "AMDGPUGenAsmMatcher.inc" 1311 1312 /// } 1313 1314 private: 1315 void createConstantSymbol(StringRef Id, int64_t Val); 1316 1317 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1318 bool OutOfRangeError(SMRange Range); 1319 /// Calculate VGPR/SGPR blocks required for given target, reserved 1320 /// registers, and user-specified NextFreeXGPR values. 1321 /// 1322 /// \param Features [in] Target features, used for bug corrections. 1323 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1324 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1325 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1326 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1327 /// descriptor field, if valid. 1328 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1329 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1330 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1331 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1332 /// \param VGPRBlocks [out] Result VGPR block count. 1333 /// \param SGPRBlocks [out] Result SGPR block count. 1334 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed, 1335 const MCExpr *FlatScrUsed, bool XNACKUsed, 1336 std::optional<bool> EnableWavefrontSize32, 1337 const MCExpr *NextFreeVGPR, SMRange VGPRRange, 1338 const MCExpr *NextFreeSGPR, SMRange SGPRRange, 1339 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks); 1340 bool ParseDirectiveAMDGCNTarget(); 1341 bool ParseDirectiveAMDHSACodeObjectVersion(); 1342 bool ParseDirectiveAMDHSAKernel(); 1343 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header); 1344 bool ParseDirectiveAMDKernelCodeT(); 1345 // TODO: Possibly make subtargetHasRegister const. 1346 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1347 bool ParseDirectiveAMDGPUHsaKernel(); 1348 1349 bool ParseDirectiveISAVersion(); 1350 bool ParseDirectiveHSAMetadata(); 1351 bool ParseDirectivePALMetadataBegin(); 1352 bool ParseDirectivePALMetadata(); 1353 bool ParseDirectiveAMDGPULDS(); 1354 1355 /// Common code to parse out a block of text (typically YAML) between start and 1356 /// end directives. 1357 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1358 const char *AssemblerDirectiveEnd, 1359 std::string &CollectString); 1360 1361 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1362 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1363 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1364 unsigned &RegNum, unsigned &RegWidth, 1365 bool RestoreOnFailure = false); 1366 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1367 unsigned &RegNum, unsigned &RegWidth, 1368 SmallVectorImpl<AsmToken> &Tokens); 1369 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1370 unsigned &RegWidth, 1371 SmallVectorImpl<AsmToken> &Tokens); 1372 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1373 unsigned &RegWidth, 1374 SmallVectorImpl<AsmToken> &Tokens); 1375 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1376 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1377 bool ParseRegRange(unsigned& Num, unsigned& Width); 1378 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg, 1379 unsigned RegWidth, SMLoc Loc); 1380 1381 bool isRegister(); 1382 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1383 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1384 void initializeGprCountSymbol(RegisterKind RegKind); 1385 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1386 unsigned RegWidth); 1387 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1388 bool IsAtomic); 1389 1390 public: 1391 enum OperandMode { 1392 OperandMode_Default, 1393 OperandMode_NSA, 1394 }; 1395 1396 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1397 1398 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1399 const MCInstrInfo &MII, 1400 const MCTargetOptions &Options) 1401 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1402 MCAsmParserExtension::Initialize(Parser); 1403 1404 if (getFeatureBits().none()) { 1405 // Set default features. 1406 copySTI().ToggleFeature("southern-islands"); 1407 } 1408 1409 FeatureBitset FB = getFeatureBits(); 1410 if (!FB[AMDGPU::FeatureWavefrontSize64] && 1411 !FB[AMDGPU::FeatureWavefrontSize32]) { 1412 // If there is no default wave size it must be a generation before gfx10, 1413 // these have FeatureWavefrontSize64 in their definition already. For 1414 // gfx10+ set wave32 as a default. 1415 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32); 1416 } 1417 1418 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1419 1420 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1421 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1422 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major); 1423 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor); 1424 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping); 1425 } else { 1426 createConstantSymbol(".option.machine_version_major", ISA.Major); 1427 createConstantSymbol(".option.machine_version_minor", ISA.Minor); 1428 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping); 1429 } 1430 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1431 initializeGprCountSymbol(IS_VGPR); 1432 initializeGprCountSymbol(IS_SGPR); 1433 } else 1434 KernelScope.initialize(getContext()); 1435 1436 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions()) 1437 createConstantSymbol(Symbol, Code); 1438 1439 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000); 1440 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000); 1441 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000); 1442 } 1443 1444 bool hasMIMG_R128() const { 1445 return AMDGPU::hasMIMG_R128(getSTI()); 1446 } 1447 1448 bool hasPackedD16() const { 1449 return AMDGPU::hasPackedD16(getSTI()); 1450 } 1451 1452 bool hasA16() const { return AMDGPU::hasA16(getSTI()); } 1453 1454 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1455 1456 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); } 1457 1458 bool isSI() const { 1459 return AMDGPU::isSI(getSTI()); 1460 } 1461 1462 bool isCI() const { 1463 return AMDGPU::isCI(getSTI()); 1464 } 1465 1466 bool isVI() const { 1467 return AMDGPU::isVI(getSTI()); 1468 } 1469 1470 bool isGFX9() const { 1471 return AMDGPU::isGFX9(getSTI()); 1472 } 1473 1474 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1475 bool isGFX90A() const { 1476 return AMDGPU::isGFX90A(getSTI()); 1477 } 1478 1479 bool isGFX940() const { 1480 return AMDGPU::isGFX940(getSTI()); 1481 } 1482 1483 bool isGFX9Plus() const { 1484 return AMDGPU::isGFX9Plus(getSTI()); 1485 } 1486 1487 bool isGFX10() const { 1488 return AMDGPU::isGFX10(getSTI()); 1489 } 1490 1491 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1492 1493 bool isGFX11() const { 1494 return AMDGPU::isGFX11(getSTI()); 1495 } 1496 1497 bool isGFX11Plus() const { 1498 return AMDGPU::isGFX11Plus(getSTI()); 1499 } 1500 1501 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); } 1502 1503 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); } 1504 1505 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } 1506 1507 bool isGFX10_BEncoding() const { 1508 return AMDGPU::isGFX10_BEncoding(getSTI()); 1509 } 1510 1511 bool hasInv2PiInlineImm() const { 1512 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1513 } 1514 1515 bool hasFlatOffsets() const { 1516 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1517 } 1518 1519 bool hasArchitectedFlatScratch() const { 1520 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1521 } 1522 1523 bool hasSGPR102_SGPR103() const { 1524 return !isVI() && !isGFX9(); 1525 } 1526 1527 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1528 1529 bool hasIntClamp() const { 1530 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1531 } 1532 1533 bool hasPartialNSAEncoding() const { 1534 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; 1535 } 1536 1537 unsigned getNSAMaxSize(bool HasSampler = false) const { 1538 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler); 1539 } 1540 1541 unsigned getMaxNumUserSGPRs() const { 1542 return AMDGPU::getMaxNumUserSGPRs(getSTI()); 1543 } 1544 1545 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } 1546 1547 AMDGPUTargetStreamer &getTargetStreamer() { 1548 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1549 return static_cast<AMDGPUTargetStreamer &>(TS); 1550 } 1551 1552 const MCRegisterInfo *getMRI() const { 1553 // We need this const_cast because for some reason getContext() is not const 1554 // in MCAsmParser. 1555 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1556 } 1557 1558 const MCInstrInfo *getMII() const { 1559 return &MII; 1560 } 1561 1562 const FeatureBitset &getFeatureBits() const { 1563 return getSTI().getFeatureBits(); 1564 } 1565 1566 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1567 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1568 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1569 1570 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1571 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1572 bool isForcedDPP() const { return ForcedDPP; } 1573 bool isForcedSDWA() const { return ForcedSDWA; } 1574 ArrayRef<unsigned> getMatchedVariants() const; 1575 StringRef getMatchedVariantName() const; 1576 1577 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1578 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1579 bool RestoreOnFailure); 1580 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1581 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1582 SMLoc &EndLoc) override; 1583 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1584 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1585 unsigned Kind) override; 1586 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1587 OperandVector &Operands, MCStreamer &Out, 1588 uint64_t &ErrorInfo, 1589 bool MatchingInlineAsm) override; 1590 bool ParseDirective(AsmToken DirectiveID) override; 1591 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, 1592 OperandMode Mode = OperandMode_Default); 1593 StringRef parseMnemonicSuffix(StringRef Name); 1594 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1595 SMLoc NameLoc, OperandVector &Operands) override; 1596 //bool ProcessInstruction(MCInst &Inst); 1597 1598 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); 1599 1600 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); 1601 1602 ParseStatus 1603 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1604 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1605 std::function<bool(int64_t &)> ConvertResult = nullptr); 1606 1607 ParseStatus parseOperandArrayWithPrefix( 1608 const char *Prefix, OperandVector &Operands, 1609 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1610 bool (*ConvertResult)(int64_t &) = nullptr); 1611 1612 ParseStatus 1613 parseNamedBit(StringRef Name, OperandVector &Operands, 1614 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1615 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; 1616 ParseStatus parseCPol(OperandVector &Operands); 1617 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope); 1618 ParseStatus parseTH(OperandVector &Operands, int64_t &TH); 1619 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, 1620 SMLoc &StringLoc); 1621 1622 bool isModifier(); 1623 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1624 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1625 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1626 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1627 bool parseSP3NegModifier(); 1628 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false, 1629 bool HasLit = false); 1630 ParseStatus parseReg(OperandVector &Operands); 1631 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false, 1632 bool HasLit = false); 1633 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, 1634 bool AllowImm = true); 1635 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, 1636 bool AllowImm = true); 1637 ParseStatus parseRegWithFPInputMods(OperandVector &Operands); 1638 ParseStatus parseRegWithIntInputMods(OperandVector &Operands); 1639 ParseStatus parseVReg32OrOff(OperandVector &Operands); 1640 ParseStatus tryParseIndexKey(OperandVector &Operands, 1641 AMDGPUOperand::ImmTy ImmTy); 1642 ParseStatus parseIndexKey8bit(OperandVector &Operands); 1643 ParseStatus parseIndexKey16bit(OperandVector &Operands); 1644 1645 ParseStatus parseDfmtNfmt(int64_t &Format); 1646 ParseStatus parseUfmt(int64_t &Format); 1647 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, 1648 int64_t &Format); 1649 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, 1650 int64_t &Format); 1651 ParseStatus parseFORMAT(OperandVector &Operands); 1652 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); 1653 ParseStatus parseNumericFormat(int64_t &Format); 1654 ParseStatus parseFlatOffset(OperandVector &Operands); 1655 ParseStatus parseR128A16(OperandVector &Operands); 1656 ParseStatus parseBLGP(OperandVector &Operands); 1657 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1658 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1659 1660 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1661 1662 bool parseCnt(int64_t &IntVal); 1663 ParseStatus parseSWaitCnt(OperandVector &Operands); 1664 1665 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1666 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1667 ParseStatus parseDepCtr(OperandVector &Operands); 1668 1669 bool parseDelay(int64_t &Delay); 1670 ParseStatus parseSDelayALU(OperandVector &Operands); 1671 1672 ParseStatus parseHwreg(OperandVector &Operands); 1673 1674 private: 1675 struct OperandInfoTy { 1676 SMLoc Loc; 1677 int64_t Val; 1678 bool IsSymbolic = false; 1679 bool IsDefined = false; 1680 1681 OperandInfoTy(int64_t Val) : Val(Val) {} 1682 }; 1683 1684 struct StructuredOpField : OperandInfoTy { 1685 StringLiteral Id; 1686 StringLiteral Desc; 1687 unsigned Width; 1688 bool IsDefined = false; 1689 1690 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width, 1691 int64_t Default) 1692 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {} 1693 virtual ~StructuredOpField() = default; 1694 1695 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const { 1696 Parser.Error(Loc, "invalid " + Desc + ": " + Err); 1697 return false; 1698 } 1699 1700 virtual bool validate(AMDGPUAsmParser &Parser) const { 1701 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED) 1702 return Error(Parser, "not supported on this GPU"); 1703 if (!isUIntN(Width, Val)) 1704 return Error(Parser, "only " + Twine(Width) + "-bit values are legal"); 1705 return true; 1706 } 1707 }; 1708 1709 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields); 1710 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields); 1711 1712 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1713 bool validateSendMsg(const OperandInfoTy &Msg, 1714 const OperandInfoTy &Op, 1715 const OperandInfoTy &Stream); 1716 1717 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset, 1718 OperandInfoTy &Width); 1719 1720 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1721 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1722 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1723 1724 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1725 const OperandVector &Operands) const; 1726 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1727 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1728 SMLoc getLitLoc(const OperandVector &Operands, 1729 bool SearchMandatoryLiterals = false) const; 1730 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; 1731 SMLoc getConstLoc(const OperandVector &Operands) const; 1732 SMLoc getInstLoc(const OperandVector &Operands) const; 1733 1734 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1735 bool validateOffset(const MCInst &Inst, const OperandVector &Operands); 1736 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1737 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1738 bool validateSOPLiteral(const MCInst &Inst) const; 1739 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1740 bool validateVOPDRegBankConstraints(const MCInst &Inst, 1741 const OperandVector &Operands); 1742 bool validateIntClampSupported(const MCInst &Inst); 1743 bool validateMIMGAtomicDMask(const MCInst &Inst); 1744 bool validateMIMGGatherDMask(const MCInst &Inst); 1745 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1746 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); 1747 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); 1748 bool validateMIMGD16(const MCInst &Inst); 1749 bool validateMIMGMSAA(const MCInst &Inst); 1750 bool validateOpSel(const MCInst &Inst); 1751 bool validateNeg(const MCInst &Inst, int OpName); 1752 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1753 bool validateVccOperand(unsigned Reg) const; 1754 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1755 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1756 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); 1757 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1758 bool validateAGPRLdSt(const MCInst &Inst) const; 1759 bool validateVGPRAlign(const MCInst &Inst) const; 1760 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1761 bool validateDS(const MCInst &Inst, const OperandVector &Operands); 1762 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1763 bool validateDivScale(const MCInst &Inst); 1764 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); 1765 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1766 const SMLoc &IDLoc); 1767 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, 1768 const unsigned CPol); 1769 bool validateExeczVcczOperands(const OperandVector &Operands); 1770 bool validateTFE(const MCInst &Inst, const OperandVector &Operands); 1771 std::optional<StringRef> validateLdsDirect(const MCInst &Inst); 1772 unsigned getConstantBusLimit(unsigned Opcode) const; 1773 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1774 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1775 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1776 1777 bool isSupportedMnemo(StringRef Mnemo, 1778 const FeatureBitset &FBS); 1779 bool isSupportedMnemo(StringRef Mnemo, 1780 const FeatureBitset &FBS, 1781 ArrayRef<unsigned> Variants); 1782 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1783 1784 bool isId(const StringRef Id) const; 1785 bool isId(const AsmToken &Token, const StringRef Id) const; 1786 bool isToken(const AsmToken::TokenKind Kind) const; 1787 StringRef getId() const; 1788 bool trySkipId(const StringRef Id); 1789 bool trySkipId(const StringRef Pref, const StringRef Id); 1790 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1791 bool trySkipToken(const AsmToken::TokenKind Kind); 1792 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1793 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1794 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1795 1796 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1797 AsmToken::TokenKind getTokenKind() const; 1798 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1799 bool parseExpr(OperandVector &Operands); 1800 StringRef getTokenStr() const; 1801 AsmToken peekToken(bool ShouldSkipSpace = true); 1802 AsmToken getToken() const; 1803 SMLoc getLoc() const; 1804 void lex(); 1805 1806 public: 1807 void onBeginOfFile() override; 1808 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 1809 1810 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); 1811 1812 ParseStatus parseExpTgt(OperandVector &Operands); 1813 ParseStatus parseSendMsg(OperandVector &Operands); 1814 ParseStatus parseInterpSlot(OperandVector &Operands); 1815 ParseStatus parseInterpAttr(OperandVector &Operands); 1816 ParseStatus parseSOPPBrTarget(OperandVector &Operands); 1817 ParseStatus parseBoolReg(OperandVector &Operands); 1818 1819 bool parseSwizzleOperand(int64_t &Op, 1820 const unsigned MinVal, 1821 const unsigned MaxVal, 1822 const StringRef ErrMsg, 1823 SMLoc &Loc); 1824 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1825 const unsigned MinVal, 1826 const unsigned MaxVal, 1827 const StringRef ErrMsg); 1828 ParseStatus parseSwizzle(OperandVector &Operands); 1829 bool parseSwizzleOffset(int64_t &Imm); 1830 bool parseSwizzleMacro(int64_t &Imm); 1831 bool parseSwizzleQuadPerm(int64_t &Imm); 1832 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1833 bool parseSwizzleBroadcast(int64_t &Imm); 1834 bool parseSwizzleSwap(int64_t &Imm); 1835 bool parseSwizzleReverse(int64_t &Imm); 1836 1837 ParseStatus parseGPRIdxMode(OperandVector &Operands); 1838 int64_t parseGPRIdxMacro(); 1839 1840 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1841 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1842 1843 ParseStatus parseOModSI(OperandVector &Operands); 1844 1845 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1846 OptionalImmIndexMap &OptionalIdx); 1847 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1848 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1849 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1850 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands); 1851 1852 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1853 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1854 OptionalImmIndexMap &OptionalIdx); 1855 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1856 OptionalImmIndexMap &OptionalIdx); 1857 1858 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1859 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1860 1861 bool parseDimId(unsigned &Encoding); 1862 ParseStatus parseDim(OperandVector &Operands); 1863 bool convertDppBoundCtrl(int64_t &BoundCtrl); 1864 ParseStatus parseDPP8(OperandVector &Operands); 1865 ParseStatus parseDPPCtrl(OperandVector &Operands); 1866 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1867 int64_t parseDPPCtrlSel(StringRef Ctrl); 1868 int64_t parseDPPCtrlPerm(); 1869 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1870 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1871 cvtDPP(Inst, Operands, true); 1872 } 1873 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1874 bool IsDPP8 = false); 1875 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1876 cvtVOP3DPP(Inst, Operands, true); 1877 } 1878 1879 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, 1880 AMDGPUOperand::ImmTy Type); 1881 ParseStatus parseSDWADstUnused(OperandVector &Operands); 1882 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1883 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1884 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1885 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1886 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1887 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1888 uint64_t BasicInstType, 1889 bool SkipDstVcc = false, 1890 bool SkipSrcVcc = false); 1891 1892 ParseStatus parseEndpgm(OperandVector &Operands); 1893 1894 ParseStatus parseVOPD(OperandVector &Operands); 1895 }; 1896 1897 } // end anonymous namespace 1898 1899 // May be called with integer type with equivalent bitwidth. 1900 static const fltSemantics *getFltSemantics(unsigned Size) { 1901 switch (Size) { 1902 case 4: 1903 return &APFloat::IEEEsingle(); 1904 case 8: 1905 return &APFloat::IEEEdouble(); 1906 case 2: 1907 return &APFloat::IEEEhalf(); 1908 default: 1909 llvm_unreachable("unsupported fp type"); 1910 } 1911 } 1912 1913 static const fltSemantics *getFltSemantics(MVT VT) { 1914 return getFltSemantics(VT.getSizeInBits() / 8); 1915 } 1916 1917 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1918 switch (OperandType) { 1919 // When floating-point immediate is used as operand of type i16, the 32-bit 1920 // representation of the constant truncated to the 16 LSBs should be used. 1921 case AMDGPU::OPERAND_REG_IMM_INT16: 1922 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1923 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1924 case AMDGPU::OPERAND_REG_IMM_INT32: 1925 case AMDGPU::OPERAND_REG_IMM_FP32: 1926 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1927 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1928 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1929 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1930 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1931 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1932 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1933 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1934 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1935 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1936 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1937 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1938 case AMDGPU::OPERAND_KIMM32: 1939 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1940 return &APFloat::IEEEsingle(); 1941 case AMDGPU::OPERAND_REG_IMM_INT64: 1942 case AMDGPU::OPERAND_REG_IMM_FP64: 1943 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1944 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1945 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1946 return &APFloat::IEEEdouble(); 1947 case AMDGPU::OPERAND_REG_IMM_FP16: 1948 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1949 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1950 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1951 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1952 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1953 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1954 case AMDGPU::OPERAND_KIMM16: 1955 return &APFloat::IEEEhalf(); 1956 case AMDGPU::OPERAND_REG_IMM_BF16: 1957 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: 1958 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 1959 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 1960 case AMDGPU::OPERAND_REG_INLINE_AC_BF16: 1961 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: 1962 case AMDGPU::OPERAND_REG_IMM_V2BF16: 1963 return &APFloat::BFloat(); 1964 default: 1965 llvm_unreachable("unsupported fp type"); 1966 } 1967 } 1968 1969 //===----------------------------------------------------------------------===// 1970 // Operand 1971 //===----------------------------------------------------------------------===// 1972 1973 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1974 bool Lost; 1975 1976 // Convert literal to single precision 1977 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1978 APFloat::rmNearestTiesToEven, 1979 &Lost); 1980 // We allow precision lost but not overflow or underflow 1981 if (Status != APFloat::opOK && 1982 Lost && 1983 ((Status & APFloat::opOverflow) != 0 || 1984 (Status & APFloat::opUnderflow) != 0)) { 1985 return false; 1986 } 1987 1988 return true; 1989 } 1990 1991 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1992 return isUIntN(Size, Val) || isIntN(Size, Val); 1993 } 1994 1995 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1996 if (VT.getScalarType() == MVT::i16) 1997 return isInlinableLiteral32(Val, HasInv2Pi); 1998 1999 if (VT.getScalarType() == MVT::f16) 2000 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi); 2001 2002 assert(VT.getScalarType() == MVT::bf16); 2003 2004 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi); 2005 } 2006 2007 bool AMDGPUOperand::isInlinableImm(MVT type) const { 2008 2009 // This is a hack to enable named inline values like 2010 // shared_base with both 32-bit and 64-bit operands. 2011 // Note that these values are defined as 2012 // 32-bit operands only. 2013 if (isInlineValue()) { 2014 return true; 2015 } 2016 2017 if (!isImmTy(ImmTyNone)) { 2018 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 2019 return false; 2020 } 2021 // TODO: We should avoid using host float here. It would be better to 2022 // check the float bit values which is what a few other places do. 2023 // We've had bot failures before due to weird NaN support on mips hosts. 2024 2025 APInt Literal(64, Imm.Val); 2026 2027 if (Imm.IsFPImm) { // We got fp literal token 2028 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 2029 return AMDGPU::isInlinableLiteral64(Imm.Val, 2030 AsmParser->hasInv2PiInlineImm()); 2031 } 2032 2033 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2034 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 2035 return false; 2036 2037 if (type.getScalarSizeInBits() == 16) { 2038 bool Lost = false; 2039 switch (type.getScalarType().SimpleTy) { 2040 default: 2041 llvm_unreachable("unknown 16-bit type"); 2042 case MVT::bf16: 2043 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven, 2044 &Lost); 2045 break; 2046 case MVT::f16: 2047 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven, 2048 &Lost); 2049 break; 2050 case MVT::i16: 2051 FPLiteral.convert(APFloatBase::IEEEsingle(), 2052 APFloat::rmNearestTiesToEven, &Lost); 2053 break; 2054 } 2055 // We need to use 32-bit representation here because when a floating-point 2056 // inline constant is used as an i16 operand, its 32-bit representation 2057 // representation will be used. We will need the 32-bit value to check if 2058 // it is FP inline constant. 2059 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2060 return isInlineableLiteralOp16(ImmVal, type, 2061 AsmParser->hasInv2PiInlineImm()); 2062 } 2063 2064 // Check if single precision literal is inlinable 2065 return AMDGPU::isInlinableLiteral32( 2066 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 2067 AsmParser->hasInv2PiInlineImm()); 2068 } 2069 2070 // We got int literal token. 2071 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 2072 return AMDGPU::isInlinableLiteral64(Imm.Val, 2073 AsmParser->hasInv2PiInlineImm()); 2074 } 2075 2076 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 2077 return false; 2078 } 2079 2080 if (type.getScalarSizeInBits() == 16) { 2081 return isInlineableLiteralOp16( 2082 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 2083 type, AsmParser->hasInv2PiInlineImm()); 2084 } 2085 2086 return AMDGPU::isInlinableLiteral32( 2087 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 2088 AsmParser->hasInv2PiInlineImm()); 2089 } 2090 2091 bool AMDGPUOperand::isLiteralImm(MVT type) const { 2092 // Check that this immediate can be added as literal 2093 if (!isImmTy(ImmTyNone)) { 2094 return false; 2095 } 2096 2097 if (!Imm.IsFPImm) { 2098 // We got int literal token. 2099 2100 if (type == MVT::f64 && hasFPModifiers()) { 2101 // Cannot apply fp modifiers to int literals preserving the same semantics 2102 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 2103 // disable these cases. 2104 return false; 2105 } 2106 2107 unsigned Size = type.getSizeInBits(); 2108 if (Size == 64) 2109 Size = 32; 2110 2111 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2112 // types. 2113 return isSafeTruncation(Imm.Val, Size); 2114 } 2115 2116 // We got fp literal token 2117 if (type == MVT::f64) { // Expected 64-bit fp operand 2118 // We would set low 64-bits of literal to zeroes but we accept this literals 2119 return true; 2120 } 2121 2122 if (type == MVT::i64) { // Expected 64-bit int operand 2123 // We don't allow fp literals in 64-bit integer instructions. It is 2124 // unclear how we should encode them. 2125 return false; 2126 } 2127 2128 // We allow fp literals with f16x2 operands assuming that the specified 2129 // literal goes into the lower half and the upper half is zero. We also 2130 // require that the literal may be losslessly converted to f16. 2131 // 2132 // For i16x2 operands, we assume that the specified literal is encoded as a 2133 // single-precision float. This is pretty odd, but it matches SP3 and what 2134 // happens in hardware. 2135 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16 2136 : (type == MVT::v2i16) ? MVT::f32 2137 : (type == MVT::v2f32) ? MVT::f32 2138 : type; 2139 2140 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2141 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2142 } 2143 2144 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2145 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2146 } 2147 2148 bool AMDGPUOperand::isVRegWithInputMods() const { 2149 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2150 // GFX90A allows DPP on 64-bit operands. 2151 (isRegClass(AMDGPU::VReg_64RegClassID) && 2152 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); 2153 } 2154 2155 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const { 2156 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID 2157 : AMDGPU::VGPR_16_Lo128RegClassID); 2158 } 2159 2160 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2161 if (AsmParser->isVI()) 2162 return isVReg32(); 2163 if (AsmParser->isGFX9Plus()) 2164 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2165 return false; 2166 } 2167 2168 bool AMDGPUOperand::isSDWAFP16Operand() const { 2169 return isSDWAOperand(MVT::f16); 2170 } 2171 2172 bool AMDGPUOperand::isSDWAFP32Operand() const { 2173 return isSDWAOperand(MVT::f32); 2174 } 2175 2176 bool AMDGPUOperand::isSDWAInt16Operand() const { 2177 return isSDWAOperand(MVT::i16); 2178 } 2179 2180 bool AMDGPUOperand::isSDWAInt32Operand() const { 2181 return isSDWAOperand(MVT::i32); 2182 } 2183 2184 bool AMDGPUOperand::isBoolReg() const { 2185 auto FB = AsmParser->getFeatureBits(); 2186 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) || 2187 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32())); 2188 } 2189 2190 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2191 { 2192 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2193 assert(Size == 2 || Size == 4 || Size == 8); 2194 2195 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2196 2197 if (Imm.Mods.Abs) { 2198 Val &= ~FpSignMask; 2199 } 2200 if (Imm.Mods.Neg) { 2201 Val ^= FpSignMask; 2202 } 2203 2204 return Val; 2205 } 2206 2207 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2208 if (isExpr()) { 2209 Inst.addOperand(MCOperand::createExpr(Expr)); 2210 return; 2211 } 2212 2213 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2214 Inst.getNumOperands())) { 2215 addLiteralImmOperand(Inst, Imm.Val, 2216 ApplyModifiers & 2217 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2218 } else { 2219 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2220 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2221 setImmKindNone(); 2222 } 2223 } 2224 2225 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2226 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2227 auto OpNum = Inst.getNumOperands(); 2228 // Check that this operand accepts literals 2229 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2230 2231 if (ApplyModifiers) { 2232 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2233 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2234 Val = applyInputFPModifiers(Val, Size); 2235 } 2236 2237 APInt Literal(64, Val); 2238 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; 2239 2240 if (Imm.IsFPImm) { // We got fp literal token 2241 switch (OpTy) { 2242 case AMDGPU::OPERAND_REG_IMM_INT64: 2243 case AMDGPU::OPERAND_REG_IMM_FP64: 2244 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2245 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2246 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2247 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2248 AsmParser->hasInv2PiInlineImm())) { 2249 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2250 setImmKindConst(); 2251 return; 2252 } 2253 2254 // Non-inlineable 2255 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2256 // For fp operands we check if low 32 bits are zeros 2257 if (Literal.getLoBits(32) != 0) { 2258 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2259 "Can't encode literal as exact 64-bit floating-point operand. " 2260 "Low 32-bits will be set to zero"); 2261 Val &= 0xffffffff00000000u; 2262 } 2263 2264 Inst.addOperand(MCOperand::createImm(Val)); 2265 setImmKindLiteral(); 2266 return; 2267 } 2268 2269 // We don't allow fp literals in 64-bit integer instructions. It is 2270 // unclear how we should encode them. This case should be checked earlier 2271 // in predicate methods (isLiteralImm()) 2272 llvm_unreachable("fp literal in 64-bit integer instruction."); 2273 2274 case AMDGPU::OPERAND_REG_IMM_BF16: 2275 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: 2276 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 2277 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 2278 case AMDGPU::OPERAND_REG_INLINE_AC_BF16: 2279 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: 2280 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2281 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) { 2282 // This is the 1/(2*pi) which is going to be truncated to bf16 with the 2283 // loss of precision. The constant represents ideomatic fp32 value of 2284 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16 2285 // bits. Prevent rounding below. 2286 Inst.addOperand(MCOperand::createImm(0x3e22)); 2287 setImmKindLiteral(); 2288 return; 2289 } 2290 [[fallthrough]]; 2291 2292 case AMDGPU::OPERAND_REG_IMM_INT32: 2293 case AMDGPU::OPERAND_REG_IMM_FP32: 2294 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2295 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2296 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2297 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2298 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2299 case AMDGPU::OPERAND_REG_IMM_INT16: 2300 case AMDGPU::OPERAND_REG_IMM_FP16: 2301 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2302 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2303 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2304 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2305 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2306 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2307 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2308 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2309 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2310 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2311 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2312 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2313 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2314 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2315 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2316 case AMDGPU::OPERAND_KIMM32: 2317 case AMDGPU::OPERAND_KIMM16: 2318 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { 2319 bool lost; 2320 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2321 // Convert literal to single precision 2322 FPLiteral.convert(*getOpFltSemantics(OpTy), 2323 APFloat::rmNearestTiesToEven, &lost); 2324 // We allow precision lost but not overflow or underflow. This should be 2325 // checked earlier in isLiteralImm() 2326 2327 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2328 Inst.addOperand(MCOperand::createImm(ImmVal)); 2329 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { 2330 setImmKindMandatoryLiteral(); 2331 } else { 2332 setImmKindLiteral(); 2333 } 2334 return; 2335 } 2336 default: 2337 llvm_unreachable("invalid operand size"); 2338 } 2339 2340 return; 2341 } 2342 2343 // We got int literal token. 2344 // Only sign extend inline immediates. 2345 switch (OpTy) { 2346 case AMDGPU::OPERAND_REG_IMM_INT32: 2347 case AMDGPU::OPERAND_REG_IMM_FP32: 2348 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2349 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2350 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2351 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2352 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2353 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2354 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2355 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2356 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2357 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2358 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2359 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2360 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 2361 if (isSafeTruncation(Val, 32) && 2362 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2363 AsmParser->hasInv2PiInlineImm())) { 2364 Inst.addOperand(MCOperand::createImm(Val)); 2365 setImmKindConst(); 2366 return; 2367 } 2368 2369 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2370 setImmKindLiteral(); 2371 return; 2372 2373 case AMDGPU::OPERAND_REG_IMM_INT64: 2374 case AMDGPU::OPERAND_REG_IMM_FP64: 2375 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2376 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2377 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2378 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2379 Inst.addOperand(MCOperand::createImm(Val)); 2380 setImmKindConst(); 2381 return; 2382 } 2383 2384 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32 2385 : Lo_32(Val); 2386 2387 Inst.addOperand(MCOperand::createImm(Val)); 2388 setImmKindLiteral(); 2389 return; 2390 2391 case AMDGPU::OPERAND_REG_IMM_INT16: 2392 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2393 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2394 if (isSafeTruncation(Val, 16) && 2395 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) { 2396 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2397 setImmKindConst(); 2398 return; 2399 } 2400 2401 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2402 setImmKindLiteral(); 2403 return; 2404 2405 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2406 case AMDGPU::OPERAND_REG_IMM_FP16: 2407 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2408 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2409 if (isSafeTruncation(Val, 16) && 2410 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val), 2411 AsmParser->hasInv2PiInlineImm())) { 2412 Inst.addOperand(MCOperand::createImm(Val)); 2413 setImmKindConst(); 2414 return; 2415 } 2416 2417 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2418 setImmKindLiteral(); 2419 return; 2420 2421 case AMDGPU::OPERAND_REG_IMM_BF16: 2422 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: 2423 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 2424 case AMDGPU::OPERAND_REG_INLINE_AC_BF16: 2425 if (isSafeTruncation(Val, 16) && 2426 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val), 2427 AsmParser->hasInv2PiInlineImm())) { 2428 Inst.addOperand(MCOperand::createImm(Val)); 2429 setImmKindConst(); 2430 return; 2431 } 2432 2433 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2434 setImmKindLiteral(); 2435 return; 2436 2437 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2438 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: { 2439 assert(isSafeTruncation(Val, 16)); 2440 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))); 2441 Inst.addOperand(MCOperand::createImm(Val)); 2442 return; 2443 } 2444 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2445 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2446 assert(isSafeTruncation(Val, 16)); 2447 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val), 2448 AsmParser->hasInv2PiInlineImm())); 2449 2450 Inst.addOperand(MCOperand::createImm(Val)); 2451 return; 2452 } 2453 2454 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 2455 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: { 2456 assert(isSafeTruncation(Val, 16)); 2457 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val), 2458 AsmParser->hasInv2PiInlineImm())); 2459 2460 Inst.addOperand(MCOperand::createImm(Val)); 2461 return; 2462 } 2463 2464 case AMDGPU::OPERAND_KIMM32: 2465 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2466 setImmKindMandatoryLiteral(); 2467 return; 2468 case AMDGPU::OPERAND_KIMM16: 2469 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2470 setImmKindMandatoryLiteral(); 2471 return; 2472 default: 2473 llvm_unreachable("invalid operand size"); 2474 } 2475 } 2476 2477 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2478 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2479 } 2480 2481 bool AMDGPUOperand::isInlineValue() const { 2482 return isRegKind() && ::isInlineValue(getReg()); 2483 } 2484 2485 //===----------------------------------------------------------------------===// 2486 // AsmParser 2487 //===----------------------------------------------------------------------===// 2488 2489 void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) { 2490 // TODO: make those pre-defined variables read-only. 2491 // Currently there is none suitable machinery in the core llvm-mc for this. 2492 // MCSymbol::isRedefinable is intended for another purpose, and 2493 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 2494 MCContext &Ctx = getContext(); 2495 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id); 2496 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx)); 2497 } 2498 2499 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2500 if (Is == IS_VGPR) { 2501 switch (RegWidth) { 2502 default: return -1; 2503 case 32: 2504 return AMDGPU::VGPR_32RegClassID; 2505 case 64: 2506 return AMDGPU::VReg_64RegClassID; 2507 case 96: 2508 return AMDGPU::VReg_96RegClassID; 2509 case 128: 2510 return AMDGPU::VReg_128RegClassID; 2511 case 160: 2512 return AMDGPU::VReg_160RegClassID; 2513 case 192: 2514 return AMDGPU::VReg_192RegClassID; 2515 case 224: 2516 return AMDGPU::VReg_224RegClassID; 2517 case 256: 2518 return AMDGPU::VReg_256RegClassID; 2519 case 288: 2520 return AMDGPU::VReg_288RegClassID; 2521 case 320: 2522 return AMDGPU::VReg_320RegClassID; 2523 case 352: 2524 return AMDGPU::VReg_352RegClassID; 2525 case 384: 2526 return AMDGPU::VReg_384RegClassID; 2527 case 512: 2528 return AMDGPU::VReg_512RegClassID; 2529 case 1024: 2530 return AMDGPU::VReg_1024RegClassID; 2531 } 2532 } else if (Is == IS_TTMP) { 2533 switch (RegWidth) { 2534 default: return -1; 2535 case 32: 2536 return AMDGPU::TTMP_32RegClassID; 2537 case 64: 2538 return AMDGPU::TTMP_64RegClassID; 2539 case 128: 2540 return AMDGPU::TTMP_128RegClassID; 2541 case 256: 2542 return AMDGPU::TTMP_256RegClassID; 2543 case 512: 2544 return AMDGPU::TTMP_512RegClassID; 2545 } 2546 } else if (Is == IS_SGPR) { 2547 switch (RegWidth) { 2548 default: return -1; 2549 case 32: 2550 return AMDGPU::SGPR_32RegClassID; 2551 case 64: 2552 return AMDGPU::SGPR_64RegClassID; 2553 case 96: 2554 return AMDGPU::SGPR_96RegClassID; 2555 case 128: 2556 return AMDGPU::SGPR_128RegClassID; 2557 case 160: 2558 return AMDGPU::SGPR_160RegClassID; 2559 case 192: 2560 return AMDGPU::SGPR_192RegClassID; 2561 case 224: 2562 return AMDGPU::SGPR_224RegClassID; 2563 case 256: 2564 return AMDGPU::SGPR_256RegClassID; 2565 case 288: 2566 return AMDGPU::SGPR_288RegClassID; 2567 case 320: 2568 return AMDGPU::SGPR_320RegClassID; 2569 case 352: 2570 return AMDGPU::SGPR_352RegClassID; 2571 case 384: 2572 return AMDGPU::SGPR_384RegClassID; 2573 case 512: 2574 return AMDGPU::SGPR_512RegClassID; 2575 } 2576 } else if (Is == IS_AGPR) { 2577 switch (RegWidth) { 2578 default: return -1; 2579 case 32: 2580 return AMDGPU::AGPR_32RegClassID; 2581 case 64: 2582 return AMDGPU::AReg_64RegClassID; 2583 case 96: 2584 return AMDGPU::AReg_96RegClassID; 2585 case 128: 2586 return AMDGPU::AReg_128RegClassID; 2587 case 160: 2588 return AMDGPU::AReg_160RegClassID; 2589 case 192: 2590 return AMDGPU::AReg_192RegClassID; 2591 case 224: 2592 return AMDGPU::AReg_224RegClassID; 2593 case 256: 2594 return AMDGPU::AReg_256RegClassID; 2595 case 288: 2596 return AMDGPU::AReg_288RegClassID; 2597 case 320: 2598 return AMDGPU::AReg_320RegClassID; 2599 case 352: 2600 return AMDGPU::AReg_352RegClassID; 2601 case 384: 2602 return AMDGPU::AReg_384RegClassID; 2603 case 512: 2604 return AMDGPU::AReg_512RegClassID; 2605 case 1024: 2606 return AMDGPU::AReg_1024RegClassID; 2607 } 2608 } 2609 return -1; 2610 } 2611 2612 static unsigned getSpecialRegForName(StringRef RegName) { 2613 return StringSwitch<unsigned>(RegName) 2614 .Case("exec", AMDGPU::EXEC) 2615 .Case("vcc", AMDGPU::VCC) 2616 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2617 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2618 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2619 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2620 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2621 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2622 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2623 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2624 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2625 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2626 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2627 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2628 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2629 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2630 .Case("m0", AMDGPU::M0) 2631 .Case("vccz", AMDGPU::SRC_VCCZ) 2632 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2633 .Case("execz", AMDGPU::SRC_EXECZ) 2634 .Case("src_execz", AMDGPU::SRC_EXECZ) 2635 .Case("scc", AMDGPU::SRC_SCC) 2636 .Case("src_scc", AMDGPU::SRC_SCC) 2637 .Case("tba", AMDGPU::TBA) 2638 .Case("tma", AMDGPU::TMA) 2639 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2640 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2641 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2642 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2643 .Case("vcc_lo", AMDGPU::VCC_LO) 2644 .Case("vcc_hi", AMDGPU::VCC_HI) 2645 .Case("exec_lo", AMDGPU::EXEC_LO) 2646 .Case("exec_hi", AMDGPU::EXEC_HI) 2647 .Case("tma_lo", AMDGPU::TMA_LO) 2648 .Case("tma_hi", AMDGPU::TMA_HI) 2649 .Case("tba_lo", AMDGPU::TBA_LO) 2650 .Case("tba_hi", AMDGPU::TBA_HI) 2651 .Case("pc", AMDGPU::PC_REG) 2652 .Case("null", AMDGPU::SGPR_NULL) 2653 .Default(AMDGPU::NoRegister); 2654 } 2655 2656 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2657 SMLoc &EndLoc, bool RestoreOnFailure) { 2658 auto R = parseRegister(); 2659 if (!R) return true; 2660 assert(R->isReg()); 2661 RegNo = R->getReg(); 2662 StartLoc = R->getStartLoc(); 2663 EndLoc = R->getEndLoc(); 2664 return false; 2665 } 2666 2667 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 2668 SMLoc &EndLoc) { 2669 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2670 } 2671 2672 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 2673 SMLoc &EndLoc) { 2674 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2675 bool PendingErrors = getParser().hasPendingError(); 2676 getParser().clearPendingErrors(); 2677 if (PendingErrors) 2678 return ParseStatus::Failure; 2679 if (Result) 2680 return ParseStatus::NoMatch; 2681 return ParseStatus::Success; 2682 } 2683 2684 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2685 RegisterKind RegKind, unsigned Reg1, 2686 SMLoc Loc) { 2687 switch (RegKind) { 2688 case IS_SPECIAL: 2689 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2690 Reg = AMDGPU::EXEC; 2691 RegWidth = 64; 2692 return true; 2693 } 2694 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2695 Reg = AMDGPU::FLAT_SCR; 2696 RegWidth = 64; 2697 return true; 2698 } 2699 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2700 Reg = AMDGPU::XNACK_MASK; 2701 RegWidth = 64; 2702 return true; 2703 } 2704 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2705 Reg = AMDGPU::VCC; 2706 RegWidth = 64; 2707 return true; 2708 } 2709 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2710 Reg = AMDGPU::TBA; 2711 RegWidth = 64; 2712 return true; 2713 } 2714 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2715 Reg = AMDGPU::TMA; 2716 RegWidth = 64; 2717 return true; 2718 } 2719 Error(Loc, "register does not fit in the list"); 2720 return false; 2721 case IS_VGPR: 2722 case IS_SGPR: 2723 case IS_AGPR: 2724 case IS_TTMP: 2725 if (Reg1 != Reg + RegWidth / 32) { 2726 Error(Loc, "registers in a list must have consecutive indices"); 2727 return false; 2728 } 2729 RegWidth += 32; 2730 return true; 2731 default: 2732 llvm_unreachable("unexpected register kind"); 2733 } 2734 } 2735 2736 struct RegInfo { 2737 StringLiteral Name; 2738 RegisterKind Kind; 2739 }; 2740 2741 static constexpr RegInfo RegularRegisters[] = { 2742 {{"v"}, IS_VGPR}, 2743 {{"s"}, IS_SGPR}, 2744 {{"ttmp"}, IS_TTMP}, 2745 {{"acc"}, IS_AGPR}, 2746 {{"a"}, IS_AGPR}, 2747 }; 2748 2749 static bool isRegularReg(RegisterKind Kind) { 2750 return Kind == IS_VGPR || 2751 Kind == IS_SGPR || 2752 Kind == IS_TTMP || 2753 Kind == IS_AGPR; 2754 } 2755 2756 static const RegInfo* getRegularRegInfo(StringRef Str) { 2757 for (const RegInfo &Reg : RegularRegisters) 2758 if (Str.starts_with(Reg.Name)) 2759 return &Reg; 2760 return nullptr; 2761 } 2762 2763 static bool getRegNum(StringRef Str, unsigned& Num) { 2764 return !Str.getAsInteger(10, Num); 2765 } 2766 2767 bool 2768 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2769 const AsmToken &NextToken) const { 2770 2771 // A list of consecutive registers: [s0,s1,s2,s3] 2772 if (Token.is(AsmToken::LBrac)) 2773 return true; 2774 2775 if (!Token.is(AsmToken::Identifier)) 2776 return false; 2777 2778 // A single register like s0 or a range of registers like s[0:1] 2779 2780 StringRef Str = Token.getString(); 2781 const RegInfo *Reg = getRegularRegInfo(Str); 2782 if (Reg) { 2783 StringRef RegName = Reg->Name; 2784 StringRef RegSuffix = Str.substr(RegName.size()); 2785 if (!RegSuffix.empty()) { 2786 RegSuffix.consume_back(".l"); 2787 RegSuffix.consume_back(".h"); 2788 unsigned Num; 2789 // A single register with an index: rXX 2790 if (getRegNum(RegSuffix, Num)) 2791 return true; 2792 } else { 2793 // A range of registers: r[XX:YY]. 2794 if (NextToken.is(AsmToken::LBrac)) 2795 return true; 2796 } 2797 } 2798 2799 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2800 } 2801 2802 bool 2803 AMDGPUAsmParser::isRegister() 2804 { 2805 return isRegister(getToken(), peekToken()); 2806 } 2807 2808 unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, 2809 unsigned SubReg, unsigned RegWidth, 2810 SMLoc Loc) { 2811 assert(isRegularReg(RegKind)); 2812 2813 unsigned AlignSize = 1; 2814 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2815 // SGPR and TTMP registers must be aligned. 2816 // Max required alignment is 4 dwords. 2817 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u); 2818 } 2819 2820 if (RegNum % AlignSize != 0) { 2821 Error(Loc, "invalid register alignment"); 2822 return AMDGPU::NoRegister; 2823 } 2824 2825 unsigned RegIdx = RegNum / AlignSize; 2826 int RCID = getRegClass(RegKind, RegWidth); 2827 if (RCID == -1) { 2828 Error(Loc, "invalid or unsupported register size"); 2829 return AMDGPU::NoRegister; 2830 } 2831 2832 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2833 const MCRegisterClass RC = TRI->getRegClass(RCID); 2834 if (RegIdx >= RC.getNumRegs()) { 2835 Error(Loc, "register index is out of range"); 2836 return AMDGPU::NoRegister; 2837 } 2838 2839 unsigned Reg = RC.getRegister(RegIdx); 2840 2841 if (SubReg) { 2842 Reg = TRI->getSubReg(Reg, SubReg); 2843 2844 // Currently all regular registers have their .l and .h subregisters, so 2845 // we should never need to generate an error here. 2846 assert(Reg && "Invalid subregister!"); 2847 } 2848 2849 return Reg; 2850 } 2851 2852 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2853 int64_t RegLo, RegHi; 2854 if (!skipToken(AsmToken::LBrac, "missing register index")) 2855 return false; 2856 2857 SMLoc FirstIdxLoc = getLoc(); 2858 SMLoc SecondIdxLoc; 2859 2860 if (!parseExpr(RegLo)) 2861 return false; 2862 2863 if (trySkipToken(AsmToken::Colon)) { 2864 SecondIdxLoc = getLoc(); 2865 if (!parseExpr(RegHi)) 2866 return false; 2867 } else { 2868 RegHi = RegLo; 2869 } 2870 2871 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2872 return false; 2873 2874 if (!isUInt<32>(RegLo)) { 2875 Error(FirstIdxLoc, "invalid register index"); 2876 return false; 2877 } 2878 2879 if (!isUInt<32>(RegHi)) { 2880 Error(SecondIdxLoc, "invalid register index"); 2881 return false; 2882 } 2883 2884 if (RegLo > RegHi) { 2885 Error(FirstIdxLoc, "first register index should not exceed second index"); 2886 return false; 2887 } 2888 2889 Num = static_cast<unsigned>(RegLo); 2890 RegWidth = 32 * ((RegHi - RegLo) + 1); 2891 return true; 2892 } 2893 2894 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2895 unsigned &RegNum, unsigned &RegWidth, 2896 SmallVectorImpl<AsmToken> &Tokens) { 2897 assert(isToken(AsmToken::Identifier)); 2898 unsigned Reg = getSpecialRegForName(getTokenStr()); 2899 if (Reg) { 2900 RegNum = 0; 2901 RegWidth = 32; 2902 RegKind = IS_SPECIAL; 2903 Tokens.push_back(getToken()); 2904 lex(); // skip register name 2905 } 2906 return Reg; 2907 } 2908 2909 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2910 unsigned &RegNum, unsigned &RegWidth, 2911 SmallVectorImpl<AsmToken> &Tokens) { 2912 assert(isToken(AsmToken::Identifier)); 2913 StringRef RegName = getTokenStr(); 2914 auto Loc = getLoc(); 2915 2916 const RegInfo *RI = getRegularRegInfo(RegName); 2917 if (!RI) { 2918 Error(Loc, "invalid register name"); 2919 return AMDGPU::NoRegister; 2920 } 2921 2922 Tokens.push_back(getToken()); 2923 lex(); // skip register name 2924 2925 RegKind = RI->Kind; 2926 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2927 unsigned SubReg = NoSubRegister; 2928 if (!RegSuffix.empty()) { 2929 // We don't know the opcode till we are done parsing, so we don't know if 2930 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or 2931 // .h to correctly specify 16 bit registers. We also can't determine class 2932 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16. 2933 if (RegSuffix.consume_back(".l")) 2934 SubReg = AMDGPU::lo16; 2935 else if (RegSuffix.consume_back(".h")) 2936 SubReg = AMDGPU::hi16; 2937 2938 // Single 32-bit register: vXX. 2939 if (!getRegNum(RegSuffix, RegNum)) { 2940 Error(Loc, "invalid register index"); 2941 return AMDGPU::NoRegister; 2942 } 2943 RegWidth = 32; 2944 } else { 2945 // Range of registers: v[XX:YY]. ":YY" is optional. 2946 if (!ParseRegRange(RegNum, RegWidth)) 2947 return AMDGPU::NoRegister; 2948 } 2949 2950 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); 2951 } 2952 2953 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2954 unsigned &RegWidth, 2955 SmallVectorImpl<AsmToken> &Tokens) { 2956 unsigned Reg = AMDGPU::NoRegister; 2957 auto ListLoc = getLoc(); 2958 2959 if (!skipToken(AsmToken::LBrac, 2960 "expected a register or a list of registers")) { 2961 return AMDGPU::NoRegister; 2962 } 2963 2964 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2965 2966 auto Loc = getLoc(); 2967 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2968 return AMDGPU::NoRegister; 2969 if (RegWidth != 32) { 2970 Error(Loc, "expected a single 32-bit register"); 2971 return AMDGPU::NoRegister; 2972 } 2973 2974 for (; trySkipToken(AsmToken::Comma); ) { 2975 RegisterKind NextRegKind; 2976 unsigned NextReg, NextRegNum, NextRegWidth; 2977 Loc = getLoc(); 2978 2979 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2980 NextRegNum, NextRegWidth, 2981 Tokens)) { 2982 return AMDGPU::NoRegister; 2983 } 2984 if (NextRegWidth != 32) { 2985 Error(Loc, "expected a single 32-bit register"); 2986 return AMDGPU::NoRegister; 2987 } 2988 if (NextRegKind != RegKind) { 2989 Error(Loc, "registers in a list must be of the same kind"); 2990 return AMDGPU::NoRegister; 2991 } 2992 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2993 return AMDGPU::NoRegister; 2994 } 2995 2996 if (!skipToken(AsmToken::RBrac, 2997 "expected a comma or a closing square bracket")) { 2998 return AMDGPU::NoRegister; 2999 } 3000 3001 if (isRegularReg(RegKind)) 3002 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc); 3003 3004 return Reg; 3005 } 3006 3007 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 3008 unsigned &RegNum, unsigned &RegWidth, 3009 SmallVectorImpl<AsmToken> &Tokens) { 3010 auto Loc = getLoc(); 3011 Reg = AMDGPU::NoRegister; 3012 3013 if (isToken(AsmToken::Identifier)) { 3014 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 3015 if (Reg == AMDGPU::NoRegister) 3016 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 3017 } else { 3018 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 3019 } 3020 3021 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3022 if (Reg == AMDGPU::NoRegister) { 3023 assert(Parser.hasPendingError()); 3024 return false; 3025 } 3026 3027 if (!subtargetHasRegister(*TRI, Reg)) { 3028 if (Reg == AMDGPU::SGPR_NULL) { 3029 Error(Loc, "'null' operand is not supported on this GPU"); 3030 } else { 3031 Error(Loc, "register not available on this GPU"); 3032 } 3033 return false; 3034 } 3035 3036 return true; 3037 } 3038 3039 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 3040 unsigned &RegNum, unsigned &RegWidth, 3041 bool RestoreOnFailure /*=false*/) { 3042 Reg = AMDGPU::NoRegister; 3043 3044 SmallVector<AsmToken, 1> Tokens; 3045 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 3046 if (RestoreOnFailure) { 3047 while (!Tokens.empty()) { 3048 getLexer().UnLex(Tokens.pop_back_val()); 3049 } 3050 } 3051 return true; 3052 } 3053 return false; 3054 } 3055 3056 std::optional<StringRef> 3057 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 3058 switch (RegKind) { 3059 case IS_VGPR: 3060 return StringRef(".amdgcn.next_free_vgpr"); 3061 case IS_SGPR: 3062 return StringRef(".amdgcn.next_free_sgpr"); 3063 default: 3064 return std::nullopt; 3065 } 3066 } 3067 3068 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 3069 auto SymbolName = getGprCountSymbolName(RegKind); 3070 assert(SymbolName && "initializing invalid register kind"); 3071 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 3072 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 3073 } 3074 3075 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 3076 unsigned DwordRegIndex, 3077 unsigned RegWidth) { 3078 // Symbols are only defined for GCN targets 3079 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 3080 return true; 3081 3082 auto SymbolName = getGprCountSymbolName(RegKind); 3083 if (!SymbolName) 3084 return true; 3085 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 3086 3087 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 3088 int64_t OldCount; 3089 3090 if (!Sym->isVariable()) 3091 return !Error(getLoc(), 3092 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 3093 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 3094 return !Error( 3095 getLoc(), 3096 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 3097 3098 if (OldCount <= NewMax) 3099 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 3100 3101 return true; 3102 } 3103 3104 std::unique_ptr<AMDGPUOperand> 3105 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 3106 const auto &Tok = getToken(); 3107 SMLoc StartLoc = Tok.getLoc(); 3108 SMLoc EndLoc = Tok.getEndLoc(); 3109 RegisterKind RegKind; 3110 unsigned Reg, RegNum, RegWidth; 3111 3112 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 3113 return nullptr; 3114 } 3115 if (isHsaAbi(getSTI())) { 3116 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 3117 return nullptr; 3118 } else 3119 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 3120 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 3121 } 3122 3123 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, 3124 bool HasSP3AbsModifier, bool HasLit) { 3125 // TODO: add syntactic sugar for 1/(2*PI) 3126 3127 if (isRegister()) 3128 return ParseStatus::NoMatch; 3129 assert(!isModifier()); 3130 3131 if (!HasLit) { 3132 HasLit = trySkipId("lit"); 3133 if (HasLit) { 3134 if (!skipToken(AsmToken::LParen, "expected left paren after lit")) 3135 return ParseStatus::Failure; 3136 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit); 3137 if (S.isSuccess() && 3138 !skipToken(AsmToken::RParen, "expected closing parentheses")) 3139 return ParseStatus::Failure; 3140 return S; 3141 } 3142 } 3143 3144 const auto& Tok = getToken(); 3145 const auto& NextTok = peekToken(); 3146 bool IsReal = Tok.is(AsmToken::Real); 3147 SMLoc S = getLoc(); 3148 bool Negate = false; 3149 3150 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 3151 lex(); 3152 IsReal = true; 3153 Negate = true; 3154 } 3155 3156 AMDGPUOperand::Modifiers Mods; 3157 Mods.Lit = HasLit; 3158 3159 if (IsReal) { 3160 // Floating-point expressions are not supported. 3161 // Can only allow floating-point literals with an 3162 // optional sign. 3163 3164 StringRef Num = getTokenStr(); 3165 lex(); 3166 3167 APFloat RealVal(APFloat::IEEEdouble()); 3168 auto roundMode = APFloat::rmNearestTiesToEven; 3169 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) 3170 return ParseStatus::Failure; 3171 if (Negate) 3172 RealVal.changeSign(); 3173 3174 Operands.push_back( 3175 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 3176 AMDGPUOperand::ImmTyNone, true)); 3177 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3178 Op.setModifiers(Mods); 3179 3180 return ParseStatus::Success; 3181 3182 } else { 3183 int64_t IntVal; 3184 const MCExpr *Expr; 3185 SMLoc S = getLoc(); 3186 3187 if (HasSP3AbsModifier) { 3188 // This is a workaround for handling expressions 3189 // as arguments of SP3 'abs' modifier, for example: 3190 // |1.0| 3191 // |-1| 3192 // |1+x| 3193 // This syntax is not compatible with syntax of standard 3194 // MC expressions (due to the trailing '|'). 3195 SMLoc EndLoc; 3196 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 3197 return ParseStatus::Failure; 3198 } else { 3199 if (Parser.parseExpression(Expr)) 3200 return ParseStatus::Failure; 3201 } 3202 3203 if (Expr->evaluateAsAbsolute(IntVal)) { 3204 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 3205 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3206 Op.setModifiers(Mods); 3207 } else { 3208 if (HasLit) 3209 return ParseStatus::NoMatch; 3210 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3211 } 3212 3213 return ParseStatus::Success; 3214 } 3215 3216 return ParseStatus::NoMatch; 3217 } 3218 3219 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { 3220 if (!isRegister()) 3221 return ParseStatus::NoMatch; 3222 3223 if (auto R = parseRegister()) { 3224 assert(R->isReg()); 3225 Operands.push_back(std::move(R)); 3226 return ParseStatus::Success; 3227 } 3228 return ParseStatus::Failure; 3229 } 3230 3231 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, 3232 bool HasSP3AbsMod, bool HasLit) { 3233 ParseStatus Res = parseReg(Operands); 3234 if (!Res.isNoMatch()) 3235 return Res; 3236 if (isModifier()) 3237 return ParseStatus::NoMatch; 3238 return parseImm(Operands, HasSP3AbsMod, HasLit); 3239 } 3240 3241 bool 3242 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3243 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3244 const auto &str = Token.getString(); 3245 return str == "abs" || str == "neg" || str == "sext"; 3246 } 3247 return false; 3248 } 3249 3250 bool 3251 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3252 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3253 } 3254 3255 bool 3256 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3257 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3258 } 3259 3260 bool 3261 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3262 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3263 } 3264 3265 // Check if this is an operand modifier or an opcode modifier 3266 // which may look like an expression but it is not. We should 3267 // avoid parsing these modifiers as expressions. Currently 3268 // recognized sequences are: 3269 // |...| 3270 // abs(...) 3271 // neg(...) 3272 // sext(...) 3273 // -reg 3274 // -|...| 3275 // -abs(...) 3276 // name:... 3277 // 3278 bool 3279 AMDGPUAsmParser::isModifier() { 3280 3281 AsmToken Tok = getToken(); 3282 AsmToken NextToken[2]; 3283 peekTokens(NextToken); 3284 3285 return isOperandModifier(Tok, NextToken[0]) || 3286 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3287 isOpcodeModifierWithVal(Tok, NextToken[0]); 3288 } 3289 3290 // Check if the current token is an SP3 'neg' modifier. 3291 // Currently this modifier is allowed in the following context: 3292 // 3293 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3294 // 2. Before an 'abs' modifier: -abs(...) 3295 // 3. Before an SP3 'abs' modifier: -|...| 3296 // 3297 // In all other cases "-" is handled as a part 3298 // of an expression that follows the sign. 3299 // 3300 // Note: When "-" is followed by an integer literal, 3301 // this is interpreted as integer negation rather 3302 // than a floating-point NEG modifier applied to N. 3303 // Beside being contr-intuitive, such use of floating-point 3304 // NEG modifier would have resulted in different meaning 3305 // of integer literals used with VOP1/2/C and VOP3, 3306 // for example: 3307 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3308 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3309 // Negative fp literals with preceding "-" are 3310 // handled likewise for uniformity 3311 // 3312 bool 3313 AMDGPUAsmParser::parseSP3NegModifier() { 3314 3315 AsmToken NextToken[2]; 3316 peekTokens(NextToken); 3317 3318 if (isToken(AsmToken::Minus) && 3319 (isRegister(NextToken[0], NextToken[1]) || 3320 NextToken[0].is(AsmToken::Pipe) || 3321 isId(NextToken[0], "abs"))) { 3322 lex(); 3323 return true; 3324 } 3325 3326 return false; 3327 } 3328 3329 ParseStatus 3330 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3331 bool AllowImm) { 3332 bool Neg, SP3Neg; 3333 bool Abs, SP3Abs; 3334 bool Lit; 3335 SMLoc Loc; 3336 3337 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3338 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) 3339 return Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3340 3341 SP3Neg = parseSP3NegModifier(); 3342 3343 Loc = getLoc(); 3344 Neg = trySkipId("neg"); 3345 if (Neg && SP3Neg) 3346 return Error(Loc, "expected register or immediate"); 3347 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3348 return ParseStatus::Failure; 3349 3350 Abs = trySkipId("abs"); 3351 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3352 return ParseStatus::Failure; 3353 3354 Lit = trySkipId("lit"); 3355 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit")) 3356 return ParseStatus::Failure; 3357 3358 Loc = getLoc(); 3359 SP3Abs = trySkipToken(AsmToken::Pipe); 3360 if (Abs && SP3Abs) 3361 return Error(Loc, "expected register or immediate"); 3362 3363 ParseStatus Res; 3364 if (AllowImm) { 3365 Res = parseRegOrImm(Operands, SP3Abs, Lit); 3366 } else { 3367 Res = parseReg(Operands); 3368 } 3369 if (!Res.isSuccess()) 3370 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res; 3371 3372 if (Lit && !Operands.back()->isImm()) 3373 Error(Loc, "expected immediate with lit modifier"); 3374 3375 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3376 return ParseStatus::Failure; 3377 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3378 return ParseStatus::Failure; 3379 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3380 return ParseStatus::Failure; 3381 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3382 return ParseStatus::Failure; 3383 3384 AMDGPUOperand::Modifiers Mods; 3385 Mods.Abs = Abs || SP3Abs; 3386 Mods.Neg = Neg || SP3Neg; 3387 Mods.Lit = Lit; 3388 3389 if (Mods.hasFPModifiers() || Lit) { 3390 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3391 if (Op.isExpr()) 3392 return Error(Op.getStartLoc(), "expected an absolute expression"); 3393 Op.setModifiers(Mods); 3394 } 3395 return ParseStatus::Success; 3396 } 3397 3398 ParseStatus 3399 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3400 bool AllowImm) { 3401 bool Sext = trySkipId("sext"); 3402 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3403 return ParseStatus::Failure; 3404 3405 ParseStatus Res; 3406 if (AllowImm) { 3407 Res = parseRegOrImm(Operands); 3408 } else { 3409 Res = parseReg(Operands); 3410 } 3411 if (!Res.isSuccess()) 3412 return Sext ? ParseStatus::Failure : Res; 3413 3414 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3415 return ParseStatus::Failure; 3416 3417 AMDGPUOperand::Modifiers Mods; 3418 Mods.Sext = Sext; 3419 3420 if (Mods.hasIntModifiers()) { 3421 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3422 if (Op.isExpr()) 3423 return Error(Op.getStartLoc(), "expected an absolute expression"); 3424 Op.setModifiers(Mods); 3425 } 3426 3427 return ParseStatus::Success; 3428 } 3429 3430 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3431 return parseRegOrImmWithFPInputMods(Operands, false); 3432 } 3433 3434 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3435 return parseRegOrImmWithIntInputMods(Operands, false); 3436 } 3437 3438 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3439 auto Loc = getLoc(); 3440 if (trySkipId("off")) { 3441 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3442 AMDGPUOperand::ImmTyOff, false)); 3443 return ParseStatus::Success; 3444 } 3445 3446 if (!isRegister()) 3447 return ParseStatus::NoMatch; 3448 3449 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3450 if (Reg) { 3451 Operands.push_back(std::move(Reg)); 3452 return ParseStatus::Success; 3453 } 3454 3455 return ParseStatus::Failure; 3456 } 3457 3458 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3459 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3460 3461 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3462 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3463 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3464 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3465 return Match_InvalidOperand; 3466 3467 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3468 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3469 // v_mac_f32/16 allow only dst_sel == DWORD; 3470 auto OpNum = 3471 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3472 const auto &Op = Inst.getOperand(OpNum); 3473 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3474 return Match_InvalidOperand; 3475 } 3476 } 3477 3478 return Match_Success; 3479 } 3480 3481 static ArrayRef<unsigned> getAllVariants() { 3482 static const unsigned Variants[] = { 3483 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3484 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3485 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3486 }; 3487 3488 return ArrayRef(Variants); 3489 } 3490 3491 // What asm variants we should check 3492 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3493 if (isForcedDPP() && isForcedVOP3()) { 3494 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3495 return ArrayRef(Variants); 3496 } 3497 if (getForcedEncodingSize() == 32) { 3498 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3499 return ArrayRef(Variants); 3500 } 3501 3502 if (isForcedVOP3()) { 3503 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3504 return ArrayRef(Variants); 3505 } 3506 3507 if (isForcedSDWA()) { 3508 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3509 AMDGPUAsmVariants::SDWA9}; 3510 return ArrayRef(Variants); 3511 } 3512 3513 if (isForcedDPP()) { 3514 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3515 return ArrayRef(Variants); 3516 } 3517 3518 return getAllVariants(); 3519 } 3520 3521 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3522 if (isForcedDPP() && isForcedVOP3()) 3523 return "e64_dpp"; 3524 3525 if (getForcedEncodingSize() == 32) 3526 return "e32"; 3527 3528 if (isForcedVOP3()) 3529 return "e64"; 3530 3531 if (isForcedSDWA()) 3532 return "sdwa"; 3533 3534 if (isForcedDPP()) 3535 return "dpp"; 3536 3537 return ""; 3538 } 3539 3540 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3541 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3542 for (MCPhysReg Reg : Desc.implicit_uses()) { 3543 switch (Reg) { 3544 case AMDGPU::FLAT_SCR: 3545 case AMDGPU::VCC: 3546 case AMDGPU::VCC_LO: 3547 case AMDGPU::VCC_HI: 3548 case AMDGPU::M0: 3549 return Reg; 3550 default: 3551 break; 3552 } 3553 } 3554 return AMDGPU::NoRegister; 3555 } 3556 3557 // NB: This code is correct only when used to check constant 3558 // bus limitations because GFX7 support no f16 inline constants. 3559 // Note that there are no cases when a GFX7 opcode violates 3560 // constant bus limitations due to the use of an f16 constant. 3561 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3562 unsigned OpIdx) const { 3563 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3564 3565 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) || 3566 AMDGPU::isKImmOperand(Desc, OpIdx)) { 3567 return false; 3568 } 3569 3570 const MCOperand &MO = Inst.getOperand(OpIdx); 3571 3572 int64_t Val = MO.getImm(); 3573 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3574 3575 switch (OpSize) { // expected operand size 3576 case 8: 3577 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3578 case 4: 3579 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3580 case 2: { 3581 const unsigned OperandType = Desc.operands()[OpIdx].OperandType; 3582 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3583 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3584 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3585 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm()); 3586 3587 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3588 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3589 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3590 return AMDGPU::isInlinableLiteralV2I16(Val); 3591 3592 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3593 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3594 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3595 return AMDGPU::isInlinableLiteralV2F16(Val); 3596 3597 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 || 3598 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2BF16 || 3599 OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16) 3600 return AMDGPU::isInlinableLiteralV2BF16(Val); 3601 3602 if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 || 3603 OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 || 3604 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 || 3605 OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED) 3606 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm()); 3607 3608 if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 || 3609 OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 || 3610 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 || 3611 OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED) 3612 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm()); 3613 3614 llvm_unreachable("invalid operand type"); 3615 } 3616 default: 3617 llvm_unreachable("invalid operand size"); 3618 } 3619 } 3620 3621 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3622 if (!isGFX10Plus()) 3623 return 1; 3624 3625 switch (Opcode) { 3626 // 64-bit shift instructions can use only one scalar value input 3627 case AMDGPU::V_LSHLREV_B64_e64: 3628 case AMDGPU::V_LSHLREV_B64_gfx10: 3629 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3630 case AMDGPU::V_LSHLREV_B64_e32_gfx12: 3631 case AMDGPU::V_LSHLREV_B64_e64_gfx12: 3632 case AMDGPU::V_LSHRREV_B64_e64: 3633 case AMDGPU::V_LSHRREV_B64_gfx10: 3634 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3635 case AMDGPU::V_LSHRREV_B64_e64_gfx12: 3636 case AMDGPU::V_ASHRREV_I64_e64: 3637 case AMDGPU::V_ASHRREV_I64_gfx10: 3638 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3639 case AMDGPU::V_ASHRREV_I64_e64_gfx12: 3640 case AMDGPU::V_LSHL_B64_e64: 3641 case AMDGPU::V_LSHR_B64_e64: 3642 case AMDGPU::V_ASHR_I64_e64: 3643 return 1; 3644 default: 3645 return 2; 3646 } 3647 } 3648 3649 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; 3650 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; 3651 3652 // Get regular operand indices in the same order as specified 3653 // in the instruction (but append mandatory literals to the end). 3654 static OperandIndices getSrcOperandIndices(unsigned Opcode, 3655 bool AddMandatoryLiterals = false) { 3656 3657 int16_t ImmIdx = 3658 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1; 3659 3660 if (isVOPD(Opcode)) { 3661 int16_t ImmDeferredIdx = 3662 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred) 3663 : -1; 3664 3665 return {getNamedOperandIdx(Opcode, OpName::src0X), 3666 getNamedOperandIdx(Opcode, OpName::vsrc1X), 3667 getNamedOperandIdx(Opcode, OpName::src0Y), 3668 getNamedOperandIdx(Opcode, OpName::vsrc1Y), 3669 ImmDeferredIdx, 3670 ImmIdx}; 3671 } 3672 3673 return {getNamedOperandIdx(Opcode, OpName::src0), 3674 getNamedOperandIdx(Opcode, OpName::src1), 3675 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx}; 3676 } 3677 3678 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3679 const MCOperand &MO = Inst.getOperand(OpIdx); 3680 if (MO.isImm()) 3681 return !isInlineConstant(Inst, OpIdx); 3682 if (MO.isReg()) { 3683 auto Reg = MO.getReg(); 3684 if (!Reg) 3685 return false; 3686 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3687 auto PReg = mc2PseudoReg(Reg); 3688 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3689 } 3690 return true; 3691 } 3692 3693 // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`: 3694 // Writelane is special in that it can use SGPR and M0 (which would normally 3695 // count as using the constant bus twice - but in this case it is allowed since 3696 // the lane selector doesn't count as a use of the constant bus). However, it is 3697 // still required to abide by the 1 SGPR rule. 3698 static bool checkWriteLane(const MCInst &Inst) { 3699 const unsigned Opcode = Inst.getOpcode(); 3700 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi) 3701 return false; 3702 const MCOperand &LaneSelOp = Inst.getOperand(2); 3703 if (!LaneSelOp.isReg()) 3704 return false; 3705 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg()); 3706 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11; 3707 } 3708 3709 bool AMDGPUAsmParser::validateConstantBusLimitations( 3710 const MCInst &Inst, const OperandVector &Operands) { 3711 const unsigned Opcode = Inst.getOpcode(); 3712 const MCInstrDesc &Desc = MII.get(Opcode); 3713 unsigned LastSGPR = AMDGPU::NoRegister; 3714 unsigned ConstantBusUseCount = 0; 3715 unsigned NumLiterals = 0; 3716 unsigned LiteralSize; 3717 3718 if (!(Desc.TSFlags & 3719 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3720 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && 3721 !isVOPD(Opcode)) 3722 return true; 3723 3724 if (checkWriteLane(Inst)) 3725 return true; 3726 3727 // Check special imm operands (used by madmk, etc) 3728 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { 3729 ++NumLiterals; 3730 LiteralSize = 4; 3731 } 3732 3733 SmallDenseSet<unsigned> SGPRsUsed; 3734 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3735 if (SGPRUsed != AMDGPU::NoRegister) { 3736 SGPRsUsed.insert(SGPRUsed); 3737 ++ConstantBusUseCount; 3738 } 3739 3740 OperandIndices OpIndices = getSrcOperandIndices(Opcode); 3741 3742 for (int OpIdx : OpIndices) { 3743 if (OpIdx == -1) 3744 continue; 3745 3746 const MCOperand &MO = Inst.getOperand(OpIdx); 3747 if (usesConstantBus(Inst, OpIdx)) { 3748 if (MO.isReg()) { 3749 LastSGPR = mc2PseudoReg(MO.getReg()); 3750 // Pairs of registers with a partial intersections like these 3751 // s0, s[0:1] 3752 // flat_scratch_lo, flat_scratch 3753 // flat_scratch_lo, flat_scratch_hi 3754 // are theoretically valid but they are disabled anyway. 3755 // Note that this code mimics SIInstrInfo::verifyInstruction 3756 if (SGPRsUsed.insert(LastSGPR).second) { 3757 ++ConstantBusUseCount; 3758 } 3759 } else { // Expression or a literal 3760 3761 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3762 continue; // special operand like VINTERP attr_chan 3763 3764 // An instruction may use only one literal. 3765 // This has been validated on the previous step. 3766 // See validateVOPLiteral. 3767 // This literal may be used as more than one operand. 3768 // If all these operands are of the same size, 3769 // this literal counts as one scalar value. 3770 // Otherwise it counts as 2 scalar values. 3771 // See "GFX10 Shader Programming", section 3.6.2.3. 3772 3773 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3774 if (Size < 4) 3775 Size = 4; 3776 3777 if (NumLiterals == 0) { 3778 NumLiterals = 1; 3779 LiteralSize = Size; 3780 } else if (LiteralSize != Size) { 3781 NumLiterals = 2; 3782 } 3783 } 3784 } 3785 } 3786 ConstantBusUseCount += NumLiterals; 3787 3788 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3789 return true; 3790 3791 SMLoc LitLoc = getLitLoc(Operands); 3792 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3793 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3794 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3795 return false; 3796 } 3797 3798 bool AMDGPUAsmParser::validateVOPDRegBankConstraints( 3799 const MCInst &Inst, const OperandVector &Operands) { 3800 3801 const unsigned Opcode = Inst.getOpcode(); 3802 if (!isVOPD(Opcode)) 3803 return true; 3804 3805 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3806 3807 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { 3808 const MCOperand &Opr = Inst.getOperand(OperandIdx); 3809 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) 3810 ? Opr.getReg() 3811 : MCRegister::NoRegister; 3812 }; 3813 3814 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. 3815 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; 3816 3817 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 3818 auto InvalidCompOprIdx = 3819 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc); 3820 if (!InvalidCompOprIdx) 3821 return true; 3822 3823 auto CompOprIdx = *InvalidCompOprIdx; 3824 auto ParsedIdx = 3825 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), 3826 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); 3827 assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); 3828 3829 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); 3830 if (CompOprIdx == VOPD::Component::DST) { 3831 Error(Loc, "one dst register must be even and the other odd"); 3832 } else { 3833 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; 3834 Error(Loc, Twine("src") + Twine(CompSrcIdx) + 3835 " operands must use different VGPR banks"); 3836 } 3837 3838 return false; 3839 } 3840 3841 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3842 3843 const unsigned Opc = Inst.getOpcode(); 3844 const MCInstrDesc &Desc = MII.get(Opc); 3845 3846 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3847 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3848 assert(ClampIdx != -1); 3849 return Inst.getOperand(ClampIdx).getImm() == 0; 3850 } 3851 3852 return true; 3853 } 3854 3855 constexpr uint64_t MIMGFlags = 3856 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; 3857 3858 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, 3859 const SMLoc &IDLoc) { 3860 3861 const unsigned Opc = Inst.getOpcode(); 3862 const MCInstrDesc &Desc = MII.get(Opc); 3863 3864 if ((Desc.TSFlags & MIMGFlags) == 0) 3865 return true; 3866 3867 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3868 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3869 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3870 3871 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample 3872 return true; 3873 3874 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray 3875 return true; 3876 3877 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3878 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3879 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3880 if (DMask == 0) 3881 DMask = 1; 3882 3883 bool IsPackedD16 = false; 3884 unsigned DataSize = 3885 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask); 3886 if (hasPackedD16()) { 3887 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3888 IsPackedD16 = D16Idx >= 0; 3889 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm()) 3890 DataSize = (DataSize + 1) / 2; 3891 } 3892 3893 if ((VDataSize / 4) == DataSize + TFESize) 3894 return true; 3895 3896 StringRef Modifiers; 3897 if (isGFX90A()) 3898 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask"; 3899 else 3900 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe"; 3901 3902 Error(IDLoc, Twine("image data size does not match ") + Modifiers); 3903 return false; 3904 } 3905 3906 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, 3907 const SMLoc &IDLoc) { 3908 const unsigned Opc = Inst.getOpcode(); 3909 const MCInstrDesc &Desc = MII.get(Opc); 3910 3911 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus()) 3912 return true; 3913 3914 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3915 3916 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3917 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3918 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3919 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc 3920 : AMDGPU::OpName::rsrc; 3921 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName); 3922 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3923 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3924 3925 assert(VAddr0Idx != -1); 3926 assert(SrsrcIdx != -1); 3927 assert(SrsrcIdx > VAddr0Idx); 3928 3929 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3930 if (BaseOpcode->BVH) { 3931 if (IsA16 == BaseOpcode->A16) 3932 return true; 3933 Error(IDLoc, "image address size does not match a16"); 3934 return false; 3935 } 3936 3937 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3938 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3939 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3940 unsigned ActualAddrSize = 3941 IsNSA ? SrsrcIdx - VAddr0Idx 3942 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3943 3944 unsigned ExpectedAddrSize = 3945 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3946 3947 if (IsNSA) { 3948 if (hasPartialNSAEncoding() && 3949 ExpectedAddrSize > 3950 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) { 3951 int VAddrLastIdx = SrsrcIdx - 1; 3952 unsigned VAddrLastSize = 3953 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; 3954 3955 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; 3956 } 3957 } else { 3958 if (ExpectedAddrSize > 12) 3959 ExpectedAddrSize = 16; 3960 3961 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3962 // This provides backward compatibility for assembly created 3963 // before 160b/192b/224b types were directly supported. 3964 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3965 return true; 3966 } 3967 3968 if (ActualAddrSize == ExpectedAddrSize) 3969 return true; 3970 3971 Error(IDLoc, "image address size does not match dim and a16"); 3972 return false; 3973 } 3974 3975 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3976 3977 const unsigned Opc = Inst.getOpcode(); 3978 const MCInstrDesc &Desc = MII.get(Opc); 3979 3980 if ((Desc.TSFlags & MIMGFlags) == 0) 3981 return true; 3982 if (!Desc.mayLoad() || !Desc.mayStore()) 3983 return true; // Not atomic 3984 3985 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3986 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3987 3988 // This is an incomplete check because image_atomic_cmpswap 3989 // may only use 0x3 and 0xf while other atomic operations 3990 // may use 0x1 and 0x3. However these limitations are 3991 // verified when we check that dmask matches dst size. 3992 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3993 } 3994 3995 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3996 3997 const unsigned Opc = Inst.getOpcode(); 3998 const MCInstrDesc &Desc = MII.get(Opc); 3999 4000 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 4001 return true; 4002 4003 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 4004 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 4005 4006 // GATHER4 instructions use dmask in a different fashion compared to 4007 // other MIMG instructions. The only useful DMASK values are 4008 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 4009 // (red,red,red,red) etc.) The ISA document doesn't mention 4010 // this. 4011 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 4012 } 4013 4014 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 4015 const unsigned Opc = Inst.getOpcode(); 4016 const MCInstrDesc &Desc = MII.get(Opc); 4017 4018 if ((Desc.TSFlags & MIMGFlags) == 0) 4019 return true; 4020 4021 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 4022 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 4023 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 4024 4025 if (!BaseOpcode->MSAA) 4026 return true; 4027 4028 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 4029 assert(DimIdx != -1); 4030 4031 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 4032 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 4033 4034 return DimInfo->MSAA; 4035 } 4036 4037 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 4038 { 4039 switch (Opcode) { 4040 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 4041 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 4042 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 4043 return true; 4044 default: 4045 return false; 4046 } 4047 } 4048 4049 // movrels* opcodes should only allow VGPRS as src0. 4050 // This is specified in .td description for vop1/vop3, 4051 // but sdwa is handled differently. See isSDWAOperand. 4052 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 4053 const OperandVector &Operands) { 4054 4055 const unsigned Opc = Inst.getOpcode(); 4056 const MCInstrDesc &Desc = MII.get(Opc); 4057 4058 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 4059 return true; 4060 4061 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4062 assert(Src0Idx != -1); 4063 4064 SMLoc ErrLoc; 4065 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 4066 if (Src0.isReg()) { 4067 auto Reg = mc2PseudoReg(Src0.getReg()); 4068 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4069 if (!isSGPR(Reg, TRI)) 4070 return true; 4071 ErrLoc = getRegLoc(Reg, Operands); 4072 } else { 4073 ErrLoc = getConstLoc(Operands); 4074 } 4075 4076 Error(ErrLoc, "source operand must be a VGPR"); 4077 return false; 4078 } 4079 4080 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 4081 const OperandVector &Operands) { 4082 4083 const unsigned Opc = Inst.getOpcode(); 4084 4085 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 4086 return true; 4087 4088 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4089 assert(Src0Idx != -1); 4090 4091 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 4092 if (!Src0.isReg()) 4093 return true; 4094 4095 auto Reg = mc2PseudoReg(Src0.getReg()); 4096 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4097 if (!isGFX90A() && isSGPR(Reg, TRI)) { 4098 Error(getRegLoc(Reg, Operands), 4099 "source operand must be either a VGPR or an inline constant"); 4100 return false; 4101 } 4102 4103 return true; 4104 } 4105 4106 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, 4107 const OperandVector &Operands) { 4108 unsigned Opcode = Inst.getOpcode(); 4109 const MCInstrDesc &Desc = MII.get(Opcode); 4110 4111 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || 4112 !getFeatureBits()[FeatureMFMAInlineLiteralBug]) 4113 return true; 4114 4115 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2); 4116 if (Src2Idx == -1) 4117 return true; 4118 4119 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) { 4120 Error(getConstLoc(Operands), 4121 "inline constants are not allowed for this operand"); 4122 return false; 4123 } 4124 4125 return true; 4126 } 4127 4128 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 4129 const OperandVector &Operands) { 4130 const unsigned Opc = Inst.getOpcode(); 4131 const MCInstrDesc &Desc = MII.get(Opc); 4132 4133 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 4134 return true; 4135 4136 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 4137 if (Src2Idx == -1) 4138 return true; 4139 4140 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 4141 if (!Src2.isReg()) 4142 return true; 4143 4144 MCRegister Src2Reg = Src2.getReg(); 4145 MCRegister DstReg = Inst.getOperand(0).getReg(); 4146 if (Src2Reg == DstReg) 4147 return true; 4148 4149 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4150 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) 4151 return true; 4152 4153 if (TRI->regsOverlap(Src2Reg, DstReg)) { 4154 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 4155 "source 2 operand must not partially overlap with dst"); 4156 return false; 4157 } 4158 4159 return true; 4160 } 4161 4162 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 4163 switch (Inst.getOpcode()) { 4164 default: 4165 return true; 4166 case V_DIV_SCALE_F32_gfx6_gfx7: 4167 case V_DIV_SCALE_F32_vi: 4168 case V_DIV_SCALE_F32_gfx10: 4169 case V_DIV_SCALE_F64_gfx6_gfx7: 4170 case V_DIV_SCALE_F64_vi: 4171 case V_DIV_SCALE_F64_gfx10: 4172 break; 4173 } 4174 4175 // TODO: Check that src0 = src1 or src2. 4176 4177 for (auto Name : {AMDGPU::OpName::src0_modifiers, 4178 AMDGPU::OpName::src2_modifiers, 4179 AMDGPU::OpName::src2_modifiers}) { 4180 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 4181 .getImm() & 4182 SISrcMods::ABS) { 4183 return false; 4184 } 4185 } 4186 4187 return true; 4188 } 4189 4190 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 4191 4192 const unsigned Opc = Inst.getOpcode(); 4193 const MCInstrDesc &Desc = MII.get(Opc); 4194 4195 if ((Desc.TSFlags & MIMGFlags) == 0) 4196 return true; 4197 4198 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 4199 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 4200 if (isCI() || isSI()) 4201 return false; 4202 } 4203 4204 return true; 4205 } 4206 4207 static bool IsRevOpcode(const unsigned Opcode) 4208 { 4209 switch (Opcode) { 4210 case AMDGPU::V_SUBREV_F32_e32: 4211 case AMDGPU::V_SUBREV_F32_e64: 4212 case AMDGPU::V_SUBREV_F32_e32_gfx10: 4213 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 4214 case AMDGPU::V_SUBREV_F32_e32_vi: 4215 case AMDGPU::V_SUBREV_F32_e64_gfx10: 4216 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 4217 case AMDGPU::V_SUBREV_F32_e64_vi: 4218 4219 case AMDGPU::V_SUBREV_CO_U32_e32: 4220 case AMDGPU::V_SUBREV_CO_U32_e64: 4221 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 4222 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 4223 4224 case AMDGPU::V_SUBBREV_U32_e32: 4225 case AMDGPU::V_SUBBREV_U32_e64: 4226 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 4227 case AMDGPU::V_SUBBREV_U32_e32_vi: 4228 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 4229 case AMDGPU::V_SUBBREV_U32_e64_vi: 4230 4231 case AMDGPU::V_SUBREV_U32_e32: 4232 case AMDGPU::V_SUBREV_U32_e64: 4233 case AMDGPU::V_SUBREV_U32_e32_gfx9: 4234 case AMDGPU::V_SUBREV_U32_e32_vi: 4235 case AMDGPU::V_SUBREV_U32_e64_gfx9: 4236 case AMDGPU::V_SUBREV_U32_e64_vi: 4237 4238 case AMDGPU::V_SUBREV_F16_e32: 4239 case AMDGPU::V_SUBREV_F16_e64: 4240 case AMDGPU::V_SUBREV_F16_e32_gfx10: 4241 case AMDGPU::V_SUBREV_F16_e32_vi: 4242 case AMDGPU::V_SUBREV_F16_e64_gfx10: 4243 case AMDGPU::V_SUBREV_F16_e64_vi: 4244 4245 case AMDGPU::V_SUBREV_U16_e32: 4246 case AMDGPU::V_SUBREV_U16_e64: 4247 case AMDGPU::V_SUBREV_U16_e32_vi: 4248 case AMDGPU::V_SUBREV_U16_e64_vi: 4249 4250 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 4251 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 4252 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 4253 4254 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 4255 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 4256 4257 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 4258 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 4259 4260 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 4261 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 4262 4263 case AMDGPU::V_LSHRREV_B32_e32: 4264 case AMDGPU::V_LSHRREV_B32_e64: 4265 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 4266 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 4267 case AMDGPU::V_LSHRREV_B32_e32_vi: 4268 case AMDGPU::V_LSHRREV_B32_e64_vi: 4269 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 4270 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 4271 4272 case AMDGPU::V_ASHRREV_I32_e32: 4273 case AMDGPU::V_ASHRREV_I32_e64: 4274 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 4275 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 4276 case AMDGPU::V_ASHRREV_I32_e32_vi: 4277 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 4278 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 4279 case AMDGPU::V_ASHRREV_I32_e64_vi: 4280 4281 case AMDGPU::V_LSHLREV_B32_e32: 4282 case AMDGPU::V_LSHLREV_B32_e64: 4283 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 4284 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 4285 case AMDGPU::V_LSHLREV_B32_e32_vi: 4286 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 4287 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 4288 case AMDGPU::V_LSHLREV_B32_e64_vi: 4289 4290 case AMDGPU::V_LSHLREV_B16_e32: 4291 case AMDGPU::V_LSHLREV_B16_e64: 4292 case AMDGPU::V_LSHLREV_B16_e32_vi: 4293 case AMDGPU::V_LSHLREV_B16_e64_vi: 4294 case AMDGPU::V_LSHLREV_B16_gfx10: 4295 4296 case AMDGPU::V_LSHRREV_B16_e32: 4297 case AMDGPU::V_LSHRREV_B16_e64: 4298 case AMDGPU::V_LSHRREV_B16_e32_vi: 4299 case AMDGPU::V_LSHRREV_B16_e64_vi: 4300 case AMDGPU::V_LSHRREV_B16_gfx10: 4301 4302 case AMDGPU::V_ASHRREV_I16_e32: 4303 case AMDGPU::V_ASHRREV_I16_e64: 4304 case AMDGPU::V_ASHRREV_I16_e32_vi: 4305 case AMDGPU::V_ASHRREV_I16_e64_vi: 4306 case AMDGPU::V_ASHRREV_I16_gfx10: 4307 4308 case AMDGPU::V_LSHLREV_B64_e64: 4309 case AMDGPU::V_LSHLREV_B64_gfx10: 4310 case AMDGPU::V_LSHLREV_B64_vi: 4311 4312 case AMDGPU::V_LSHRREV_B64_e64: 4313 case AMDGPU::V_LSHRREV_B64_gfx10: 4314 case AMDGPU::V_LSHRREV_B64_vi: 4315 4316 case AMDGPU::V_ASHRREV_I64_e64: 4317 case AMDGPU::V_ASHRREV_I64_gfx10: 4318 case AMDGPU::V_ASHRREV_I64_vi: 4319 4320 case AMDGPU::V_PK_LSHLREV_B16: 4321 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 4322 case AMDGPU::V_PK_LSHLREV_B16_vi: 4323 4324 case AMDGPU::V_PK_LSHRREV_B16: 4325 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4326 case AMDGPU::V_PK_LSHRREV_B16_vi: 4327 case AMDGPU::V_PK_ASHRREV_I16: 4328 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4329 case AMDGPU::V_PK_ASHRREV_I16_vi: 4330 return true; 4331 default: 4332 return false; 4333 } 4334 } 4335 4336 std::optional<StringRef> 4337 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4338 4339 using namespace SIInstrFlags; 4340 const unsigned Opcode = Inst.getOpcode(); 4341 const MCInstrDesc &Desc = MII.get(Opcode); 4342 4343 // lds_direct register is defined so that it can be used 4344 // with 9-bit operands only. Ignore encodings which do not accept these. 4345 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4346 if ((Desc.TSFlags & Enc) == 0) 4347 return std::nullopt; 4348 4349 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4350 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4351 if (SrcIdx == -1) 4352 break; 4353 const auto &Src = Inst.getOperand(SrcIdx); 4354 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4355 4356 if (isGFX90A() || isGFX11Plus()) 4357 return StringRef("lds_direct is not supported on this GPU"); 4358 4359 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4360 return StringRef("lds_direct cannot be used with this instruction"); 4361 4362 if (SrcName != OpName::src0) 4363 return StringRef("lds_direct may be used as src0 only"); 4364 } 4365 } 4366 4367 return std::nullopt; 4368 } 4369 4370 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4371 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4372 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4373 if (Op.isFlatOffset()) 4374 return Op.getStartLoc(); 4375 } 4376 return getLoc(); 4377 } 4378 4379 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, 4380 const OperandVector &Operands) { 4381 auto Opcode = Inst.getOpcode(); 4382 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4383 if (OpNum == -1) 4384 return true; 4385 4386 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4387 if ((TSFlags & SIInstrFlags::FLAT)) 4388 return validateFlatOffset(Inst, Operands); 4389 4390 if ((TSFlags & SIInstrFlags::SMRD)) 4391 return validateSMEMOffset(Inst, Operands); 4392 4393 const auto &Op = Inst.getOperand(OpNum); 4394 if (isGFX12Plus() && 4395 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4396 const unsigned OffsetSize = 24; 4397 if (!isIntN(OffsetSize, Op.getImm())) { 4398 Error(getFlatOffsetLoc(Operands), 4399 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4400 return false; 4401 } 4402 } else { 4403 const unsigned OffsetSize = 16; 4404 if (!isUIntN(OffsetSize, Op.getImm())) { 4405 Error(getFlatOffsetLoc(Operands), 4406 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4407 return false; 4408 } 4409 } 4410 return true; 4411 } 4412 4413 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4414 const OperandVector &Operands) { 4415 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4416 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4417 return true; 4418 4419 auto Opcode = Inst.getOpcode(); 4420 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4421 assert(OpNum != -1); 4422 4423 const auto &Op = Inst.getOperand(OpNum); 4424 if (!hasFlatOffsets() && Op.getImm() != 0) { 4425 Error(getFlatOffsetLoc(Operands), 4426 "flat offset modifier is not supported on this GPU"); 4427 return false; 4428 } 4429 4430 // For pre-GFX12 FLAT instructions the offset must be positive; 4431 // MSB is ignored and forced to zero. 4432 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); 4433 bool AllowNegative = 4434 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || 4435 isGFX12Plus(); 4436 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { 4437 Error(getFlatOffsetLoc(Operands), 4438 Twine("expected a ") + 4439 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" 4440 : Twine(OffsetSize - 1) + "-bit unsigned offset")); 4441 return false; 4442 } 4443 4444 return true; 4445 } 4446 4447 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4448 // Start with second operand because SMEM Offset cannot be dst or src0. 4449 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4450 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4451 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) 4452 return Op.getStartLoc(); 4453 } 4454 return getLoc(); 4455 } 4456 4457 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4458 const OperandVector &Operands) { 4459 if (isCI() || isSI()) 4460 return true; 4461 4462 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4463 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4464 return true; 4465 4466 auto Opcode = Inst.getOpcode(); 4467 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4468 if (OpNum == -1) 4469 return true; 4470 4471 const auto &Op = Inst.getOperand(OpNum); 4472 if (!Op.isImm()) 4473 return true; 4474 4475 uint64_t Offset = Op.getImm(); 4476 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4477 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4478 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4479 return true; 4480 4481 Error(getSMEMOffsetLoc(Operands), 4482 isGFX12Plus() ? "expected a 24-bit signed offset" 4483 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" 4484 : "expected a 21-bit signed offset"); 4485 4486 return false; 4487 } 4488 4489 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4490 unsigned Opcode = Inst.getOpcode(); 4491 const MCInstrDesc &Desc = MII.get(Opcode); 4492 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4493 return true; 4494 4495 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4496 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4497 4498 const int OpIndices[] = { Src0Idx, Src1Idx }; 4499 4500 unsigned NumExprs = 0; 4501 unsigned NumLiterals = 0; 4502 uint32_t LiteralValue; 4503 4504 for (int OpIdx : OpIndices) { 4505 if (OpIdx == -1) break; 4506 4507 const MCOperand &MO = Inst.getOperand(OpIdx); 4508 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4509 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4510 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4511 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4512 if (NumLiterals == 0 || LiteralValue != Value) { 4513 LiteralValue = Value; 4514 ++NumLiterals; 4515 } 4516 } else if (MO.isExpr()) { 4517 ++NumExprs; 4518 } 4519 } 4520 } 4521 4522 return NumLiterals + NumExprs <= 1; 4523 } 4524 4525 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4526 const unsigned Opc = Inst.getOpcode(); 4527 if (isPermlane16(Opc)) { 4528 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4529 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4530 4531 if (OpSel & ~3) 4532 return false; 4533 } 4534 4535 uint64_t TSFlags = MII.get(Opc).TSFlags; 4536 4537 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4538 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4539 if (OpSelIdx != -1) { 4540 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4541 return false; 4542 } 4543 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4544 if (OpSelHiIdx != -1) { 4545 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4546 return false; 4547 } 4548 } 4549 4550 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4551 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && 4552 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { 4553 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4554 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4555 if (OpSel & 3) 4556 return false; 4557 } 4558 4559 return true; 4560 } 4561 4562 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) { 4563 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi); 4564 4565 const unsigned Opc = Inst.getOpcode(); 4566 uint64_t TSFlags = MII.get(Opc).TSFlags; 4567 4568 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2) 4569 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1) 4570 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1) 4571 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand. 4572 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) && 4573 !(TSFlags & SIInstrFlags::IsSWMMAC)) 4574 return true; 4575 4576 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName); 4577 if (NegIdx == -1) 4578 return true; 4579 4580 unsigned Neg = Inst.getOperand(NegIdx).getImm(); 4581 4582 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed 4583 // on some src operands but not allowed on other. 4584 // It is convenient that such instructions don't have src_modifiers operand 4585 // for src operands that don't allow neg because they also don't allow opsel. 4586 4587 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers, 4588 AMDGPU::OpName::src1_modifiers, 4589 AMDGPU::OpName::src2_modifiers}; 4590 4591 for (unsigned i = 0; i < 3; ++i) { 4592 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) { 4593 if (Neg & (1 << i)) 4594 return false; 4595 } 4596 } 4597 4598 return true; 4599 } 4600 4601 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4602 const OperandVector &Operands) { 4603 const unsigned Opc = Inst.getOpcode(); 4604 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4605 if (DppCtrlIdx >= 0) { 4606 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4607 4608 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) && 4609 AMDGPU::isDPALU_DPP(MII.get(Opc))) { 4610 // DP ALU DPP is supported for row_newbcast only on GFX9* 4611 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4612 Error(S, "DP ALU dpp only supports row_newbcast"); 4613 return false; 4614 } 4615 } 4616 4617 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8); 4618 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0; 4619 4620 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) { 4621 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 4622 if (Src1Idx >= 0) { 4623 const MCOperand &Src1 = Inst.getOperand(Src1Idx); 4624 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4625 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) { 4626 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()); 4627 SMLoc S = getRegLoc(Reg, Operands); 4628 Error(S, "invalid operand for instruction"); 4629 return false; 4630 } 4631 if (Src1.isImm()) { 4632 Error(getInstLoc(Operands), 4633 "src1 immediate operand invalid for instruction"); 4634 return false; 4635 } 4636 } 4637 } 4638 4639 return true; 4640 } 4641 4642 // Check if VCC register matches wavefront size 4643 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4644 auto FB = getFeatureBits(); 4645 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4646 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4647 } 4648 4649 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4650 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4651 const OperandVector &Operands) { 4652 unsigned Opcode = Inst.getOpcode(); 4653 const MCInstrDesc &Desc = MII.get(Opcode); 4654 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1; 4655 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4656 !HasMandatoryLiteral && !isVOPD(Opcode)) 4657 return true; 4658 4659 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral); 4660 4661 unsigned NumExprs = 0; 4662 unsigned NumLiterals = 0; 4663 uint32_t LiteralValue; 4664 4665 for (int OpIdx : OpIndices) { 4666 if (OpIdx == -1) 4667 continue; 4668 4669 const MCOperand &MO = Inst.getOperand(OpIdx); 4670 if (!MO.isImm() && !MO.isExpr()) 4671 continue; 4672 if (!isSISrcOperand(Desc, OpIdx)) 4673 continue; 4674 4675 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4676 uint64_t Value = static_cast<uint64_t>(MO.getImm()); 4677 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) && 4678 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; 4679 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); 4680 4681 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) { 4682 Error(getLitLoc(Operands), "invalid operand for instruction"); 4683 return false; 4684 } 4685 4686 if (IsFP64 && IsValid32Op) 4687 Value = Hi_32(Value); 4688 4689 if (NumLiterals == 0 || LiteralValue != Value) { 4690 LiteralValue = Value; 4691 ++NumLiterals; 4692 } 4693 } else if (MO.isExpr()) { 4694 ++NumExprs; 4695 } 4696 } 4697 NumLiterals += NumExprs; 4698 4699 if (!NumLiterals) 4700 return true; 4701 4702 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { 4703 Error(getLitLoc(Operands), "literal operands are not supported"); 4704 return false; 4705 } 4706 4707 if (NumLiterals > 1) { 4708 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed"); 4709 return false; 4710 } 4711 4712 return true; 4713 } 4714 4715 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4716 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4717 const MCRegisterInfo *MRI) { 4718 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4719 if (OpIdx < 0) 4720 return -1; 4721 4722 const MCOperand &Op = Inst.getOperand(OpIdx); 4723 if (!Op.isReg()) 4724 return -1; 4725 4726 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4727 auto Reg = Sub ? Sub : Op.getReg(); 4728 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4729 return AGPR32.contains(Reg) ? 1 : 0; 4730 } 4731 4732 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4733 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4734 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4735 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4736 SIInstrFlags::DS)) == 0) 4737 return true; 4738 4739 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4740 : AMDGPU::OpName::vdata; 4741 4742 const MCRegisterInfo *MRI = getMRI(); 4743 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4744 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4745 4746 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4747 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4748 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4749 return false; 4750 } 4751 4752 auto FB = getFeatureBits(); 4753 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4754 if (DataAreg < 0 || DstAreg < 0) 4755 return true; 4756 return DstAreg == DataAreg; 4757 } 4758 4759 return DstAreg < 1 && DataAreg < 1; 4760 } 4761 4762 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4763 auto FB = getFeatureBits(); 4764 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4765 return true; 4766 4767 const MCRegisterInfo *MRI = getMRI(); 4768 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4769 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4770 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4771 const MCOperand &Op = Inst.getOperand(I); 4772 if (!Op.isReg()) 4773 continue; 4774 4775 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4776 if (!Sub) 4777 continue; 4778 4779 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4780 return false; 4781 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4782 return false; 4783 } 4784 4785 return true; 4786 } 4787 4788 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4789 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4790 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4791 if (Op.isBLGP()) 4792 return Op.getStartLoc(); 4793 } 4794 return SMLoc(); 4795 } 4796 4797 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4798 const OperandVector &Operands) { 4799 unsigned Opc = Inst.getOpcode(); 4800 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4801 if (BlgpIdx == -1) 4802 return true; 4803 SMLoc BLGPLoc = getBLGPLoc(Operands); 4804 if (!BLGPLoc.isValid()) 4805 return true; 4806 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); 4807 auto FB = getFeatureBits(); 4808 bool UsesNeg = false; 4809 if (FB[AMDGPU::FeatureGFX940Insts]) { 4810 switch (Opc) { 4811 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4812 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4813 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4814 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4815 UsesNeg = true; 4816 } 4817 } 4818 4819 if (IsNeg == UsesNeg) 4820 return true; 4821 4822 Error(BLGPLoc, 4823 UsesNeg ? "invalid modifier: blgp is not supported" 4824 : "invalid modifier: neg is not supported"); 4825 4826 return false; 4827 } 4828 4829 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, 4830 const OperandVector &Operands) { 4831 if (!isGFX11Plus()) 4832 return true; 4833 4834 unsigned Opc = Inst.getOpcode(); 4835 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && 4836 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && 4837 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && 4838 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) 4839 return true; 4840 4841 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 4842 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); 4843 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()); 4844 if (Reg == AMDGPU::SGPR_NULL) 4845 return true; 4846 4847 SMLoc RegLoc = getRegLoc(Reg, Operands); 4848 Error(RegLoc, "src0 must be null"); 4849 return false; 4850 } 4851 4852 bool AMDGPUAsmParser::validateDS(const MCInst &Inst, 4853 const OperandVector &Operands) { 4854 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4855 if ((TSFlags & SIInstrFlags::DS) == 0) 4856 return true; 4857 if (TSFlags & SIInstrFlags::GWS) 4858 return validateGWS(Inst, Operands); 4859 // Only validate GDS for non-GWS instructions. 4860 if (hasGDS()) 4861 return true; 4862 int GDSIdx = 4863 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds); 4864 if (GDSIdx < 0) 4865 return true; 4866 unsigned GDS = Inst.getOperand(GDSIdx).getImm(); 4867 if (GDS) { 4868 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands); 4869 Error(S, "gds modifier is not supported on this GPU"); 4870 return false; 4871 } 4872 return true; 4873 } 4874 4875 // gfx90a has an undocumented limitation: 4876 // DS_GWS opcodes must use even aligned registers. 4877 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4878 const OperandVector &Operands) { 4879 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4880 return true; 4881 4882 int Opc = Inst.getOpcode(); 4883 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4884 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4885 return true; 4886 4887 const MCRegisterInfo *MRI = getMRI(); 4888 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4889 int Data0Pos = 4890 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4891 assert(Data0Pos != -1); 4892 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4893 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4894 if (RegIdx & 1) { 4895 SMLoc RegLoc = getRegLoc(Reg, Operands); 4896 Error(RegLoc, "vgpr must be even aligned"); 4897 return false; 4898 } 4899 4900 return true; 4901 } 4902 4903 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4904 const OperandVector &Operands, 4905 const SMLoc &IDLoc) { 4906 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4907 AMDGPU::OpName::cpol); 4908 if (CPolPos == -1) 4909 return true; 4910 4911 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4912 4913 if (isGFX12Plus()) 4914 return validateTHAndScopeBits(Inst, Operands, CPol); 4915 4916 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4917 if (TSFlags & SIInstrFlags::SMRD) { 4918 if (CPol && (isSI() || isCI())) { 4919 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4920 Error(S, "cache policy is not supported for SMRD instructions"); 4921 return false; 4922 } 4923 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4924 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4925 return false; 4926 } 4927 } 4928 4929 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4930 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | 4931 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4932 SIInstrFlags::FLAT; 4933 if (!(TSFlags & AllowSCCModifier)) { 4934 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4935 StringRef CStr(S.getPointer()); 4936 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4937 Error(S, 4938 "scc modifier is not supported for this instruction on this GPU"); 4939 return false; 4940 } 4941 } 4942 4943 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4944 return true; 4945 4946 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4947 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4948 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4949 : "instruction must use glc"); 4950 return false; 4951 } 4952 } else { 4953 if (CPol & CPol::GLC) { 4954 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4955 StringRef CStr(S.getPointer()); 4956 S = SMLoc::getFromPointer( 4957 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4958 Error(S, isGFX940() ? "instruction must not use sc0" 4959 : "instruction must not use glc"); 4960 return false; 4961 } 4962 } 4963 4964 return true; 4965 } 4966 4967 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst, 4968 const OperandVector &Operands, 4969 const unsigned CPol) { 4970 const unsigned TH = CPol & AMDGPU::CPol::TH; 4971 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE; 4972 4973 const unsigned Opcode = Inst.getOpcode(); 4974 const MCInstrDesc &TID = MII.get(Opcode); 4975 4976 auto PrintError = [&](StringRef Msg) { 4977 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4978 Error(S, Msg); 4979 return false; 4980 }; 4981 4982 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) && 4983 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) && 4984 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN))) 4985 return PrintError("instruction must use th:TH_ATOMIC_RETURN"); 4986 4987 if (TH == 0) 4988 return true; 4989 4990 if ((TID.TSFlags & SIInstrFlags::SMRD) && 4991 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) || 4992 (TH == AMDGPU::CPol::TH_NT_HT))) 4993 return PrintError("invalid th value for SMEM instruction"); 4994 4995 if (TH == AMDGPU::CPol::TH_BYPASS) { 4996 if ((Scope != AMDGPU::CPol::SCOPE_SYS && 4997 CPol & AMDGPU::CPol::TH_REAL_BYPASS) || 4998 (Scope == AMDGPU::CPol::SCOPE_SYS && 4999 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS))) 5000 return PrintError("scope and th combination is not valid"); 5001 } 5002 5003 bool IsStore = TID.mayStore(); 5004 bool IsAtomic = 5005 TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet); 5006 5007 if (IsAtomic) { 5008 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC)) 5009 return PrintError("invalid th value for atomic instructions"); 5010 } else if (IsStore) { 5011 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE)) 5012 return PrintError("invalid th value for store instructions"); 5013 } else { 5014 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD)) 5015 return PrintError("invalid th value for load instructions"); 5016 } 5017 5018 return true; 5019 } 5020 5021 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 5022 if (!isGFX11Plus()) 5023 return true; 5024 for (auto &Operand : Operands) { 5025 if (!Operand->isReg()) 5026 continue; 5027 unsigned Reg = Operand->getReg(); 5028 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 5029 Error(getRegLoc(Reg, Operands), 5030 "execz and vccz are not supported on this GPU"); 5031 return false; 5032 } 5033 } 5034 return true; 5035 } 5036 5037 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, 5038 const OperandVector &Operands) { 5039 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5040 if (Desc.mayStore() && 5041 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 5042 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands); 5043 if (Loc != getInstLoc(Operands)) { 5044 Error(Loc, "TFE modifier has no meaning for store instructions"); 5045 return false; 5046 } 5047 } 5048 5049 return true; 5050 } 5051 5052 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 5053 const SMLoc &IDLoc, 5054 const OperandVector &Operands) { 5055 if (auto ErrMsg = validateLdsDirect(Inst)) { 5056 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 5057 return false; 5058 } 5059 if (!validateSOPLiteral(Inst)) { 5060 Error(getLitLoc(Operands), 5061 "only one unique literal operand is allowed"); 5062 return false; 5063 } 5064 if (!validateVOPLiteral(Inst, Operands)) { 5065 return false; 5066 } 5067 if (!validateConstantBusLimitations(Inst, Operands)) { 5068 return false; 5069 } 5070 if (!validateVOPDRegBankConstraints(Inst, Operands)) { 5071 return false; 5072 } 5073 if (!validateIntClampSupported(Inst)) { 5074 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands), 5075 "integer clamping is not supported on this GPU"); 5076 return false; 5077 } 5078 if (!validateOpSel(Inst)) { 5079 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 5080 "invalid op_sel operand"); 5081 return false; 5082 } 5083 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) { 5084 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands), 5085 "invalid neg_lo operand"); 5086 return false; 5087 } 5088 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) { 5089 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands), 5090 "invalid neg_hi operand"); 5091 return false; 5092 } 5093 if (!validateDPP(Inst, Operands)) { 5094 return false; 5095 } 5096 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 5097 if (!validateMIMGD16(Inst)) { 5098 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 5099 "d16 modifier is not supported on this GPU"); 5100 return false; 5101 } 5102 if (!validateMIMGMSAA(Inst)) { 5103 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 5104 "invalid dim; must be MSAA type"); 5105 return false; 5106 } 5107 if (!validateMIMGDataSize(Inst, IDLoc)) { 5108 return false; 5109 } 5110 if (!validateMIMGAddrSize(Inst, IDLoc)) 5111 return false; 5112 if (!validateMIMGAtomicDMask(Inst)) { 5113 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 5114 "invalid atomic image dmask"); 5115 return false; 5116 } 5117 if (!validateMIMGGatherDMask(Inst)) { 5118 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 5119 "invalid image_gather dmask: only one bit must be set"); 5120 return false; 5121 } 5122 if (!validateMovrels(Inst, Operands)) { 5123 return false; 5124 } 5125 if (!validateOffset(Inst, Operands)) { 5126 return false; 5127 } 5128 if (!validateMAIAccWrite(Inst, Operands)) { 5129 return false; 5130 } 5131 if (!validateMAISrc2(Inst, Operands)) { 5132 return false; 5133 } 5134 if (!validateMFMA(Inst, Operands)) { 5135 return false; 5136 } 5137 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 5138 return false; 5139 } 5140 5141 if (!validateAGPRLdSt(Inst)) { 5142 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 5143 ? "invalid register class: data and dst should be all VGPR or AGPR" 5144 : "invalid register class: agpr loads and stores not supported on this GPU" 5145 ); 5146 return false; 5147 } 5148 if (!validateVGPRAlign(Inst)) { 5149 Error(IDLoc, 5150 "invalid register class: vgpr tuples must be 64 bit aligned"); 5151 return false; 5152 } 5153 if (!validateDS(Inst, Operands)) { 5154 return false; 5155 } 5156 5157 if (!validateBLGP(Inst, Operands)) { 5158 return false; 5159 } 5160 5161 if (!validateDivScale(Inst)) { 5162 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 5163 return false; 5164 } 5165 if (!validateWaitCnt(Inst, Operands)) { 5166 return false; 5167 } 5168 if (!validateExeczVcczOperands(Operands)) { 5169 return false; 5170 } 5171 if (!validateTFE(Inst, Operands)) { 5172 return false; 5173 } 5174 5175 return true; 5176 } 5177 5178 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 5179 const FeatureBitset &FBS, 5180 unsigned VariantID = 0); 5181 5182 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 5183 const FeatureBitset &AvailableFeatures, 5184 unsigned VariantID); 5185 5186 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 5187 const FeatureBitset &FBS) { 5188 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 5189 } 5190 5191 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 5192 const FeatureBitset &FBS, 5193 ArrayRef<unsigned> Variants) { 5194 for (auto Variant : Variants) { 5195 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 5196 return true; 5197 } 5198 5199 return false; 5200 } 5201 5202 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 5203 const SMLoc &IDLoc) { 5204 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits()); 5205 5206 // Check if requested instruction variant is supported. 5207 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 5208 return false; 5209 5210 // This instruction is not supported. 5211 // Clear any other pending errors because they are no longer relevant. 5212 getParser().clearPendingErrors(); 5213 5214 // Requested instruction variant is not supported. 5215 // Check if any other variants are supported. 5216 StringRef VariantName = getMatchedVariantName(); 5217 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 5218 return Error(IDLoc, 5219 Twine(VariantName, 5220 " variant of this instruction is not supported")); 5221 } 5222 5223 // Check if this instruction may be used with a different wavesize. 5224 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && 5225 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { 5226 5227 FeatureBitset FeaturesWS32 = getFeatureBits(); 5228 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) 5229 .flip(AMDGPU::FeatureWavefrontSize32); 5230 FeatureBitset AvailableFeaturesWS32 = 5231 ComputeAvailableFeatures(FeaturesWS32); 5232 5233 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants())) 5234 return Error(IDLoc, "instruction requires wavesize=32"); 5235 } 5236 5237 // Finally check if this instruction is supported on any other GPU. 5238 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 5239 return Error(IDLoc, "instruction not supported on this GPU"); 5240 } 5241 5242 // Instruction not supported on any GPU. Probably a typo. 5243 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 5244 return Error(IDLoc, "invalid instruction" + Suggestion); 5245 } 5246 5247 static bool isInvalidVOPDY(const OperandVector &Operands, 5248 uint64_t InvalidOprIdx) { 5249 assert(InvalidOprIdx < Operands.size()); 5250 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); 5251 if (Op.isToken() && InvalidOprIdx > 1) { 5252 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); 5253 return PrevOp.isToken() && PrevOp.getToken() == "::"; 5254 } 5255 return false; 5256 } 5257 5258 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 5259 OperandVector &Operands, 5260 MCStreamer &Out, 5261 uint64_t &ErrorInfo, 5262 bool MatchingInlineAsm) { 5263 MCInst Inst; 5264 unsigned Result = Match_Success; 5265 for (auto Variant : getMatchedVariants()) { 5266 uint64_t EI; 5267 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 5268 Variant); 5269 // We order match statuses from least to most specific. We use most specific 5270 // status as resulting 5271 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature 5272 if (R == Match_Success || R == Match_MissingFeature || 5273 (R == Match_InvalidOperand && Result != Match_MissingFeature) || 5274 (R == Match_MnemonicFail && Result != Match_InvalidOperand && 5275 Result != Match_MissingFeature)) { 5276 Result = R; 5277 ErrorInfo = EI; 5278 } 5279 if (R == Match_Success) 5280 break; 5281 } 5282 5283 if (Result == Match_Success) { 5284 if (!validateInstruction(Inst, IDLoc, Operands)) { 5285 return true; 5286 } 5287 Inst.setLoc(IDLoc); 5288 Out.emitInstruction(Inst, getSTI()); 5289 return false; 5290 } 5291 5292 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5293 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 5294 return true; 5295 } 5296 5297 switch (Result) { 5298 default: break; 5299 case Match_MissingFeature: 5300 // It has been verified that the specified instruction 5301 // mnemonic is valid. A match was found but it requires 5302 // features which are not supported on this GPU. 5303 return Error(IDLoc, "operands are not valid for this GPU or mode"); 5304 5305 case Match_InvalidOperand: { 5306 SMLoc ErrorLoc = IDLoc; 5307 if (ErrorInfo != ~0ULL) { 5308 if (ErrorInfo >= Operands.size()) { 5309 return Error(IDLoc, "too few operands for instruction"); 5310 } 5311 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 5312 if (ErrorLoc == SMLoc()) 5313 ErrorLoc = IDLoc; 5314 5315 if (isInvalidVOPDY(Operands, ErrorInfo)) 5316 return Error(ErrorLoc, "invalid VOPDY instruction"); 5317 } 5318 return Error(ErrorLoc, "invalid operand for instruction"); 5319 } 5320 5321 case Match_MnemonicFail: 5322 llvm_unreachable("Invalid instructions should have been handled already"); 5323 } 5324 llvm_unreachable("Implement any new match types added!"); 5325 } 5326 5327 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 5328 int64_t Tmp = -1; 5329 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 5330 return true; 5331 } 5332 if (getParser().parseAbsoluteExpression(Tmp)) { 5333 return true; 5334 } 5335 Ret = static_cast<uint32_t>(Tmp); 5336 return false; 5337 } 5338 5339 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 5340 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5341 return TokError("directive only supported for amdgcn architecture"); 5342 5343 std::string TargetIDDirective; 5344 SMLoc TargetStart = getTok().getLoc(); 5345 if (getParser().parseEscapedString(TargetIDDirective)) 5346 return true; 5347 5348 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 5349 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5350 return getParser().Error(TargetRange.Start, 5351 (Twine(".amdgcn_target directive's target id ") + 5352 Twine(TargetIDDirective) + 5353 Twine(" does not match the specified target id ") + 5354 Twine(getTargetStreamer().getTargetID()->toString())).str()); 5355 5356 return false; 5357 } 5358 5359 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 5360 return Error(Range.Start, "value out of range", Range); 5361 } 5362 5363 bool AMDGPUAsmParser::calculateGPRBlocks( 5364 const FeatureBitset &Features, const MCExpr *VCCUsed, 5365 const MCExpr *FlatScrUsed, bool XNACKUsed, 5366 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR, 5367 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange, 5368 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) { 5369 // TODO(scott.linder): These calculations are duplicated from 5370 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 5371 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 5372 MCContext &Ctx = getContext(); 5373 5374 const MCExpr *NumSGPRs = NextFreeSGPR; 5375 int64_t EvaluatedSGPRs; 5376 5377 if (Version.Major >= 10) 5378 NumSGPRs = MCConstantExpr::create(0, Ctx); 5379 else { 5380 unsigned MaxAddressableNumSGPRs = 5381 IsaInfo::getAddressableNumSGPRs(&getSTI()); 5382 5383 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 && 5384 !Features.test(FeatureSGPRInitBug) && 5385 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) 5386 return OutOfRangeError(SGPRRange); 5387 5388 const MCExpr *ExtraSGPRs = 5389 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx); 5390 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx); 5391 5392 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && 5393 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 5394 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) 5395 return OutOfRangeError(SGPRRange); 5396 5397 if (Features.test(FeatureSGPRInitBug)) 5398 NumSGPRs = 5399 MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx); 5400 } 5401 5402 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks: 5403 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1 5404 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR, 5405 unsigned Granule) -> const MCExpr * { 5406 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx); 5407 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx); 5408 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx); 5409 const MCExpr *AlignToGPR = 5410 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx); 5411 const MCExpr *DivGPR = 5412 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx); 5413 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx); 5414 return SubGPR; 5415 }; 5416 5417 VGPRBlocks = GetNumGPRBlocks( 5418 NextFreeVGPR, 5419 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32)); 5420 SGPRBlocks = 5421 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI())); 5422 5423 return false; 5424 } 5425 5426 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 5427 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5428 return TokError("directive only supported for amdgcn architecture"); 5429 5430 if (!isHsaAbi(getSTI())) 5431 return TokError("directive only supported for amdhsa OS"); 5432 5433 StringRef KernelName; 5434 if (getParser().parseIdentifier(KernelName)) 5435 return true; 5436 5437 AMDGPU::MCKernelDescriptor KD = 5438 AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor( 5439 &getSTI(), getContext()); 5440 5441 StringSet<> Seen; 5442 5443 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 5444 5445 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext()); 5446 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext()); 5447 5448 SMRange VGPRRange; 5449 const MCExpr *NextFreeVGPR = ZeroExpr; 5450 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext()); 5451 uint64_t SharedVGPRCount = 0; 5452 uint64_t PreloadLength = 0; 5453 uint64_t PreloadOffset = 0; 5454 SMRange SGPRRange; 5455 const MCExpr *NextFreeSGPR = ZeroExpr; 5456 5457 // Count the number of user SGPRs implied from the enabled feature bits. 5458 unsigned ImpliedUserSGPRCount = 0; 5459 5460 // Track if the asm explicitly contains the directive for the user SGPR 5461 // count. 5462 std::optional<unsigned> ExplicitUserSGPRCount; 5463 const MCExpr *ReserveVCC = OneExpr; 5464 const MCExpr *ReserveFlatScr = OneExpr; 5465 std::optional<bool> EnableWavefrontSize32; 5466 5467 while (true) { 5468 while (trySkipToken(AsmToken::EndOfStatement)); 5469 5470 StringRef ID; 5471 SMRange IDRange = getTok().getLocRange(); 5472 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 5473 return true; 5474 5475 if (ID == ".end_amdhsa_kernel") 5476 break; 5477 5478 if (!Seen.insert(ID).second) 5479 return TokError(".amdhsa_ directives cannot be repeated"); 5480 5481 SMLoc ValStart = getLoc(); 5482 const MCExpr *ExprVal; 5483 if (getParser().parseExpression(ExprVal)) 5484 return true; 5485 SMLoc ValEnd = getLoc(); 5486 SMRange ValRange = SMRange(ValStart, ValEnd); 5487 5488 int64_t IVal = 0; 5489 uint64_t Val = IVal; 5490 bool EvaluatableExpr; 5491 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) { 5492 if (IVal < 0) 5493 return OutOfRangeError(ValRange); 5494 Val = IVal; 5495 } 5496 5497 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 5498 if (!isUInt<ENTRY##_WIDTH>(Val)) \ 5499 return OutOfRangeError(RANGE); \ 5500 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \ 5501 getContext()); 5502 5503 // Some fields use the parsed value immediately which requires the expression to 5504 // be solvable. 5505 #define EXPR_RESOLVE_OR_ERROR(RESOLVED) \ 5506 if (!(RESOLVED)) \ 5507 return Error(IDRange.Start, "directive should have resolvable expression", \ 5508 IDRange); 5509 5510 if (ID == ".amdhsa_group_segment_fixed_size") { 5511 if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) * 5512 CHAR_BIT>(Val)) 5513 return OutOfRangeError(ValRange); 5514 KD.group_segment_fixed_size = ExprVal; 5515 } else if (ID == ".amdhsa_private_segment_fixed_size") { 5516 if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) * 5517 CHAR_BIT>(Val)) 5518 return OutOfRangeError(ValRange); 5519 KD.private_segment_fixed_size = ExprVal; 5520 } else if (ID == ".amdhsa_kernarg_size") { 5521 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val)) 5522 return OutOfRangeError(ValRange); 5523 KD.kernarg_size = ExprVal; 5524 } else if (ID == ".amdhsa_user_sgpr_count") { 5525 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5526 ExplicitUserSGPRCount = Val; 5527 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 5528 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5529 if (hasArchitectedFlatScratch()) 5530 return Error(IDRange.Start, 5531 "directive is not supported with architected flat scratch", 5532 IDRange); 5533 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5534 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 5535 ExprVal, ValRange); 5536 if (Val) 5537 ImpliedUserSGPRCount += 4; 5538 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") { 5539 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5540 if (!hasKernargPreload()) 5541 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5542 5543 if (Val > getMaxNumUserSGPRs()) 5544 return OutOfRangeError(ValRange); 5545 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal, 5546 ValRange); 5547 if (Val) { 5548 ImpliedUserSGPRCount += Val; 5549 PreloadLength = Val; 5550 } 5551 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") { 5552 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5553 if (!hasKernargPreload()) 5554 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5555 5556 if (Val >= 1024) 5557 return OutOfRangeError(ValRange); 5558 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal, 5559 ValRange); 5560 if (Val) 5561 PreloadOffset = Val; 5562 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 5563 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5564 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5565 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal, 5566 ValRange); 5567 if (Val) 5568 ImpliedUserSGPRCount += 2; 5569 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 5570 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5571 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5572 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal, 5573 ValRange); 5574 if (Val) 5575 ImpliedUserSGPRCount += 2; 5576 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 5577 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5578 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5579 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 5580 ExprVal, ValRange); 5581 if (Val) 5582 ImpliedUserSGPRCount += 2; 5583 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 5584 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5585 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5586 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal, 5587 ValRange); 5588 if (Val) 5589 ImpliedUserSGPRCount += 2; 5590 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 5591 if (hasArchitectedFlatScratch()) 5592 return Error(IDRange.Start, 5593 "directive is not supported with architected flat scratch", 5594 IDRange); 5595 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5596 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5597 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5598 ExprVal, ValRange); 5599 if (Val) 5600 ImpliedUserSGPRCount += 2; 5601 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 5602 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5603 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5604 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 5605 ExprVal, ValRange); 5606 if (Val) 5607 ImpliedUserSGPRCount += 1; 5608 } else if (ID == ".amdhsa_wavefront_size32") { 5609 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5610 if (IVersion.Major < 10) 5611 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5612 EnableWavefrontSize32 = Val; 5613 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5614 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal, 5615 ValRange); 5616 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5617 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5618 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal, 5619 ValRange); 5620 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5621 if (hasArchitectedFlatScratch()) 5622 return Error(IDRange.Start, 5623 "directive is not supported with architected flat scratch", 5624 IDRange); 5625 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5626 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, 5627 ValRange); 5628 } else if (ID == ".amdhsa_enable_private_segment") { 5629 if (!hasArchitectedFlatScratch()) 5630 return Error( 5631 IDRange.Start, 5632 "directive is not supported without architected flat scratch", 5633 IDRange); 5634 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5635 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, 5636 ValRange); 5637 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5638 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5639 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal, 5640 ValRange); 5641 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5642 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5643 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal, 5644 ValRange); 5645 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5646 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5647 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal, 5648 ValRange); 5649 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5650 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5651 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal, 5652 ValRange); 5653 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5654 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5655 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal, 5656 ValRange); 5657 } else if (ID == ".amdhsa_next_free_vgpr") { 5658 VGPRRange = ValRange; 5659 NextFreeVGPR = ExprVal; 5660 } else if (ID == ".amdhsa_next_free_sgpr") { 5661 SGPRRange = ValRange; 5662 NextFreeSGPR = ExprVal; 5663 } else if (ID == ".amdhsa_accum_offset") { 5664 if (!isGFX90A()) 5665 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5666 AccumOffset = ExprVal; 5667 } else if (ID == ".amdhsa_reserve_vcc") { 5668 if (EvaluatableExpr && !isUInt<1>(Val)) 5669 return OutOfRangeError(ValRange); 5670 ReserveVCC = ExprVal; 5671 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5672 if (IVersion.Major < 7) 5673 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5674 if (hasArchitectedFlatScratch()) 5675 return Error(IDRange.Start, 5676 "directive is not supported with architected flat scratch", 5677 IDRange); 5678 if (EvaluatableExpr && !isUInt<1>(Val)) 5679 return OutOfRangeError(ValRange); 5680 ReserveFlatScr = ExprVal; 5681 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5682 if (IVersion.Major < 8) 5683 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5684 if (!isUInt<1>(Val)) 5685 return OutOfRangeError(ValRange); 5686 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5687 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5688 IDRange); 5689 } else if (ID == ".amdhsa_float_round_mode_32") { 5690 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5691 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal, 5692 ValRange); 5693 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5694 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5695 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal, 5696 ValRange); 5697 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5698 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5699 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal, 5700 ValRange); 5701 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5702 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5703 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, 5704 ValRange); 5705 } else if (ID == ".amdhsa_dx10_clamp") { 5706 if (IVersion.Major >= 12) 5707 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5708 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5709 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, 5710 ValRange); 5711 } else if (ID == ".amdhsa_ieee_mode") { 5712 if (IVersion.Major >= 12) 5713 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5714 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5715 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal, 5716 ValRange); 5717 } else if (ID == ".amdhsa_fp16_overflow") { 5718 if (IVersion.Major < 9) 5719 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5720 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5721 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal, 5722 ValRange); 5723 } else if (ID == ".amdhsa_tg_split") { 5724 if (!isGFX90A()) 5725 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5726 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 5727 ExprVal, ValRange); 5728 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5729 if (IVersion.Major < 10) 5730 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5731 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5732 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal, 5733 ValRange); 5734 } else if (ID == ".amdhsa_memory_ordered") { 5735 if (IVersion.Major < 10) 5736 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5737 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5738 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal, 5739 ValRange); 5740 } else if (ID == ".amdhsa_forward_progress") { 5741 if (IVersion.Major < 10) 5742 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5744 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal, 5745 ValRange); 5746 } else if (ID == ".amdhsa_shared_vgpr_count") { 5747 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5748 if (IVersion.Major < 10 || IVersion.Major >= 12) 5749 return Error(IDRange.Start, "directive requires gfx10 or gfx11", 5750 IDRange); 5751 SharedVGPRCount = Val; 5752 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5753 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal, 5754 ValRange); 5755 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5756 PARSE_BITS_ENTRY( 5757 KD.compute_pgm_rsrc2, 5758 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 5759 ExprVal, ValRange); 5760 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5761 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5762 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5763 ExprVal, ValRange); 5764 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5765 PARSE_BITS_ENTRY( 5766 KD.compute_pgm_rsrc2, 5767 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 5768 ExprVal, ValRange); 5769 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5772 ExprVal, ValRange); 5773 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5775 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5776 ExprVal, ValRange); 5777 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5778 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5779 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5780 ExprVal, ValRange); 5781 } else if (ID == ".amdhsa_exception_int_div_zero") { 5782 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5783 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5784 ExprVal, ValRange); 5785 } else if (ID == ".amdhsa_round_robin_scheduling") { 5786 if (IVersion.Major < 12) 5787 return Error(IDRange.Start, "directive requires gfx12+", IDRange); 5788 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5789 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal, 5790 ValRange); 5791 } else { 5792 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5793 } 5794 5795 #undef PARSE_BITS_ENTRY 5796 } 5797 5798 if (!Seen.contains(".amdhsa_next_free_vgpr")) 5799 return TokError(".amdhsa_next_free_vgpr directive is required"); 5800 5801 if (!Seen.contains(".amdhsa_next_free_sgpr")) 5802 return TokError(".amdhsa_next_free_sgpr directive is required"); 5803 5804 const MCExpr *VGPRBlocks; 5805 const MCExpr *SGPRBlocks; 5806 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5807 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5808 EnableWavefrontSize32, NextFreeVGPR, 5809 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5810 SGPRBlocks)) 5811 return true; 5812 5813 int64_t EvaluatedVGPRBlocks; 5814 bool VGPRBlocksEvaluatable = 5815 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks); 5816 if (VGPRBlocksEvaluatable && 5817 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5818 static_cast<uint64_t>(EvaluatedVGPRBlocks))) { 5819 return OutOfRangeError(VGPRRange); 5820 } 5821 AMDGPU::MCKernelDescriptor::bits_set( 5822 KD.compute_pgm_rsrc1, VGPRBlocks, 5823 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT, 5824 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext()); 5825 5826 int64_t EvaluatedSGPRBlocks; 5827 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) && 5828 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5829 static_cast<uint64_t>(EvaluatedSGPRBlocks))) 5830 return OutOfRangeError(SGPRRange); 5831 AMDGPU::MCKernelDescriptor::bits_set( 5832 KD.compute_pgm_rsrc1, SGPRBlocks, 5833 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT, 5834 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext()); 5835 5836 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5837 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5838 "enabled user SGPRs"); 5839 5840 unsigned UserSGPRCount = 5841 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5842 5843 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5844 return TokError("too many user SGPRs enabled"); 5845 AMDGPU::MCKernelDescriptor::bits_set( 5846 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()), 5847 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, 5848 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext()); 5849 5850 int64_t IVal = 0; 5851 if (!KD.kernarg_size->evaluateAsAbsolute(IVal)) 5852 return TokError("Kernarg size should be resolvable"); 5853 uint64_t kernarg_size = IVal; 5854 if (PreloadLength && kernarg_size && 5855 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size)) 5856 return TokError("Kernarg preload length + offset is larger than the " 5857 "kernarg segment size"); 5858 5859 if (isGFX90A()) { 5860 if (!Seen.contains(".amdhsa_accum_offset")) 5861 return TokError(".amdhsa_accum_offset directive is required"); 5862 int64_t EvaluatedAccum; 5863 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum); 5864 uint64_t UEvaluatedAccum = EvaluatedAccum; 5865 if (AccumEvaluatable && 5866 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3))) 5867 return TokError("accum_offset should be in range [4..256] in " 5868 "increments of 4"); 5869 5870 int64_t EvaluatedNumVGPR; 5871 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) && 5872 AccumEvaluatable && 5873 UEvaluatedAccum > 5874 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4)) 5875 return TokError("accum_offset exceeds total VGPR allocation"); 5876 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub( 5877 MCBinaryExpr::createDiv( 5878 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()), 5879 MCConstantExpr::create(1, getContext()), getContext()); 5880 MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum, 5881 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, 5882 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5883 getContext()); 5884 } 5885 5886 if (IVersion.Major >= 10 && IVersion.Major < 12) { 5887 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5888 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { 5889 return TokError("shared_vgpr_count directive not valid on " 5890 "wavefront size 32"); 5891 } 5892 5893 if (VGPRBlocksEvaluatable && 5894 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) > 5895 63)) { 5896 return TokError("shared_vgpr_count*2 + " 5897 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5898 "exceed 63\n"); 5899 } 5900 } 5901 5902 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD, 5903 NextFreeVGPR, NextFreeSGPR, 5904 ReserveVCC, ReserveFlatScr); 5905 return false; 5906 } 5907 5908 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() { 5909 uint32_t Version; 5910 if (ParseAsAbsoluteExpression(Version)) 5911 return true; 5912 5913 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version); 5914 return false; 5915 } 5916 5917 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5918 AMDGPUMCKernelCodeT &C) { 5919 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5920 // assembly for backwards compatibility. 5921 if (ID == "max_scratch_backing_memory_byte_size") { 5922 Parser.eatToEndOfStatement(); 5923 return false; 5924 } 5925 5926 SmallString<40> ErrStr; 5927 raw_svector_ostream Err(ErrStr); 5928 if (!C.ParseKernelCodeT(ID, getParser(), Err)) { 5929 return TokError(Err.str()); 5930 } 5931 Lex(); 5932 5933 if (ID == "enable_wavefront_size32") { 5934 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5935 if (!isGFX10Plus()) 5936 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5937 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5938 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5939 } else { 5940 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5941 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5942 } 5943 } 5944 5945 if (ID == "wavefront_size") { 5946 if (C.wavefront_size == 5) { 5947 if (!isGFX10Plus()) 5948 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5949 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5950 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5951 } else if (C.wavefront_size == 6) { 5952 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5953 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5954 } 5955 } 5956 5957 return false; 5958 } 5959 5960 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5961 AMDGPUMCKernelCodeT KernelCode; 5962 KernelCode.initDefault(&getSTI(), getContext()); 5963 5964 while (true) { 5965 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5966 // will set the current token to EndOfStatement. 5967 while(trySkipToken(AsmToken::EndOfStatement)); 5968 5969 StringRef ID; 5970 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5971 return true; 5972 5973 if (ID == ".end_amd_kernel_code_t") 5974 break; 5975 5976 if (ParseAMDKernelCodeTValue(ID, KernelCode)) 5977 return true; 5978 } 5979 5980 KernelCode.validate(&getSTI(), getContext()); 5981 getTargetStreamer().EmitAMDKernelCodeT(KernelCode); 5982 5983 return false; 5984 } 5985 5986 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5987 StringRef KernelName; 5988 if (!parseId(KernelName, "expected symbol name")) 5989 return true; 5990 5991 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5992 ELF::STT_AMDGPU_HSA_KERNEL); 5993 5994 KernelScope.initialize(getContext()); 5995 return false; 5996 } 5997 5998 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5999 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 6000 return Error(getLoc(), 6001 ".amd_amdgpu_isa directive is not available on non-amdgcn " 6002 "architectures"); 6003 } 6004 6005 auto TargetIDDirective = getLexer().getTok().getStringContents(); 6006 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 6007 return Error(getParser().getTok().getLoc(), "target id must match options"); 6008 6009 getTargetStreamer().EmitISAVersion(); 6010 Lex(); 6011 6012 return false; 6013 } 6014 6015 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 6016 assert(isHsaAbi(getSTI())); 6017 6018 std::string HSAMetadataString; 6019 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin, 6020 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString)) 6021 return true; 6022 6023 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 6024 return Error(getLoc(), "invalid HSA metadata"); 6025 6026 return false; 6027 } 6028 6029 /// Common code to parse out a block of text (typically YAML) between start and 6030 /// end directives. 6031 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 6032 const char *AssemblerDirectiveEnd, 6033 std::string &CollectString) { 6034 6035 raw_string_ostream CollectStream(CollectString); 6036 6037 getLexer().setSkipSpace(false); 6038 6039 bool FoundEnd = false; 6040 while (!isToken(AsmToken::Eof)) { 6041 while (isToken(AsmToken::Space)) { 6042 CollectStream << getTokenStr(); 6043 Lex(); 6044 } 6045 6046 if (trySkipId(AssemblerDirectiveEnd)) { 6047 FoundEnd = true; 6048 break; 6049 } 6050 6051 CollectStream << Parser.parseStringToEndOfStatement() 6052 << getContext().getAsmInfo()->getSeparatorString(); 6053 6054 Parser.eatToEndOfStatement(); 6055 } 6056 6057 getLexer().setSkipSpace(true); 6058 6059 if (isToken(AsmToken::Eof) && !FoundEnd) { 6060 return TokError(Twine("expected directive ") + 6061 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 6062 } 6063 6064 CollectStream.flush(); 6065 return false; 6066 } 6067 6068 /// Parse the assembler directive for new MsgPack-format PAL metadata. 6069 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 6070 std::string String; 6071 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 6072 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 6073 return true; 6074 6075 auto PALMetadata = getTargetStreamer().getPALMetadata(); 6076 if (!PALMetadata->setFromString(String)) 6077 return Error(getLoc(), "invalid PAL metadata"); 6078 return false; 6079 } 6080 6081 /// Parse the assembler directive for old linear-format PAL metadata. 6082 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 6083 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 6084 return Error(getLoc(), 6085 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 6086 "not available on non-amdpal OSes")).str()); 6087 } 6088 6089 auto PALMetadata = getTargetStreamer().getPALMetadata(); 6090 PALMetadata->setLegacy(); 6091 for (;;) { 6092 uint32_t Key, Value; 6093 if (ParseAsAbsoluteExpression(Key)) { 6094 return TokError(Twine("invalid value in ") + 6095 Twine(PALMD::AssemblerDirective)); 6096 } 6097 if (!trySkipToken(AsmToken::Comma)) { 6098 return TokError(Twine("expected an even number of values in ") + 6099 Twine(PALMD::AssemblerDirective)); 6100 } 6101 if (ParseAsAbsoluteExpression(Value)) { 6102 return TokError(Twine("invalid value in ") + 6103 Twine(PALMD::AssemblerDirective)); 6104 } 6105 PALMetadata->setRegister(Key, Value); 6106 if (!trySkipToken(AsmToken::Comma)) 6107 break; 6108 } 6109 return false; 6110 } 6111 6112 /// ParseDirectiveAMDGPULDS 6113 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 6114 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 6115 if (getParser().checkForValidSection()) 6116 return true; 6117 6118 StringRef Name; 6119 SMLoc NameLoc = getLoc(); 6120 if (getParser().parseIdentifier(Name)) 6121 return TokError("expected identifier in directive"); 6122 6123 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 6124 if (getParser().parseComma()) 6125 return true; 6126 6127 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 6128 6129 int64_t Size; 6130 SMLoc SizeLoc = getLoc(); 6131 if (getParser().parseAbsoluteExpression(Size)) 6132 return true; 6133 if (Size < 0) 6134 return Error(SizeLoc, "size must be non-negative"); 6135 if (Size > LocalMemorySize) 6136 return Error(SizeLoc, "size is too large"); 6137 6138 int64_t Alignment = 4; 6139 if (trySkipToken(AsmToken::Comma)) { 6140 SMLoc AlignLoc = getLoc(); 6141 if (getParser().parseAbsoluteExpression(Alignment)) 6142 return true; 6143 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 6144 return Error(AlignLoc, "alignment must be a power of two"); 6145 6146 // Alignment larger than the size of LDS is possible in theory, as long 6147 // as the linker manages to place to symbol at address 0, but we do want 6148 // to make sure the alignment fits nicely into a 32-bit integer. 6149 if (Alignment >= 1u << 31) 6150 return Error(AlignLoc, "alignment is too large"); 6151 } 6152 6153 if (parseEOL()) 6154 return true; 6155 6156 Symbol->redefineIfPossible(); 6157 if (!Symbol->isUndefined()) 6158 return Error(NameLoc, "invalid symbol redefinition"); 6159 6160 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 6161 return false; 6162 } 6163 6164 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 6165 StringRef IDVal = DirectiveID.getString(); 6166 6167 if (isHsaAbi(getSTI())) { 6168 if (IDVal == ".amdhsa_kernel") 6169 return ParseDirectiveAMDHSAKernel(); 6170 6171 if (IDVal == ".amdhsa_code_object_version") 6172 return ParseDirectiveAMDHSACodeObjectVersion(); 6173 6174 // TODO: Restructure/combine with PAL metadata directive. 6175 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 6176 return ParseDirectiveHSAMetadata(); 6177 } else { 6178 if (IDVal == ".amd_kernel_code_t") 6179 return ParseDirectiveAMDKernelCodeT(); 6180 6181 if (IDVal == ".amdgpu_hsa_kernel") 6182 return ParseDirectiveAMDGPUHsaKernel(); 6183 6184 if (IDVal == ".amd_amdgpu_isa") 6185 return ParseDirectiveISAVersion(); 6186 6187 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) { 6188 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) + 6189 Twine(" directive is " 6190 "not available on non-amdhsa OSes")) 6191 .str()); 6192 } 6193 } 6194 6195 if (IDVal == ".amdgcn_target") 6196 return ParseDirectiveAMDGCNTarget(); 6197 6198 if (IDVal == ".amdgpu_lds") 6199 return ParseDirectiveAMDGPULDS(); 6200 6201 if (IDVal == PALMD::AssemblerDirectiveBegin) 6202 return ParseDirectivePALMetadataBegin(); 6203 6204 if (IDVal == PALMD::AssemblerDirective) 6205 return ParseDirectivePALMetadata(); 6206 6207 return true; 6208 } 6209 6210 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 6211 unsigned RegNo) { 6212 6213 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 6214 return isGFX9Plus(); 6215 6216 // GFX10+ has 2 more SGPRs 104 and 105. 6217 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 6218 return hasSGPR104_SGPR105(); 6219 6220 switch (RegNo) { 6221 case AMDGPU::SRC_SHARED_BASE_LO: 6222 case AMDGPU::SRC_SHARED_BASE: 6223 case AMDGPU::SRC_SHARED_LIMIT_LO: 6224 case AMDGPU::SRC_SHARED_LIMIT: 6225 case AMDGPU::SRC_PRIVATE_BASE_LO: 6226 case AMDGPU::SRC_PRIVATE_BASE: 6227 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 6228 case AMDGPU::SRC_PRIVATE_LIMIT: 6229 return isGFX9Plus(); 6230 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 6231 return isGFX9Plus() && !isGFX11Plus(); 6232 case AMDGPU::TBA: 6233 case AMDGPU::TBA_LO: 6234 case AMDGPU::TBA_HI: 6235 case AMDGPU::TMA: 6236 case AMDGPU::TMA_LO: 6237 case AMDGPU::TMA_HI: 6238 return !isGFX9Plus(); 6239 case AMDGPU::XNACK_MASK: 6240 case AMDGPU::XNACK_MASK_LO: 6241 case AMDGPU::XNACK_MASK_HI: 6242 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 6243 case AMDGPU::SGPR_NULL: 6244 return isGFX10Plus(); 6245 default: 6246 break; 6247 } 6248 6249 if (isCI()) 6250 return true; 6251 6252 if (isSI() || isGFX10Plus()) { 6253 // No flat_scr on SI. 6254 // On GFX10Plus flat scratch is not a valid register operand and can only be 6255 // accessed with s_setreg/s_getreg. 6256 switch (RegNo) { 6257 case AMDGPU::FLAT_SCR: 6258 case AMDGPU::FLAT_SCR_LO: 6259 case AMDGPU::FLAT_SCR_HI: 6260 return false; 6261 default: 6262 return true; 6263 } 6264 } 6265 6266 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 6267 // SI/CI have. 6268 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 6269 return hasSGPR102_SGPR103(); 6270 6271 return true; 6272 } 6273 6274 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, 6275 StringRef Mnemonic, 6276 OperandMode Mode) { 6277 ParseStatus Res = parseVOPD(Operands); 6278 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6279 return Res; 6280 6281 // Try to parse with a custom parser 6282 Res = MatchOperandParserImpl(Operands, Mnemonic); 6283 6284 // If we successfully parsed the operand or if there as an error parsing, 6285 // we are done. 6286 // 6287 // If we are parsing after we reach EndOfStatement then this means we 6288 // are appending default values to the Operands list. This is only done 6289 // by custom parser, so we shouldn't continue on to the generic parsing. 6290 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6291 return Res; 6292 6293 SMLoc RBraceLoc; 6294 SMLoc LBraceLoc = getLoc(); 6295 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 6296 unsigned Prefix = Operands.size(); 6297 6298 for (;;) { 6299 auto Loc = getLoc(); 6300 Res = parseReg(Operands); 6301 if (Res.isNoMatch()) 6302 Error(Loc, "expected a register"); 6303 if (!Res.isSuccess()) 6304 return ParseStatus::Failure; 6305 6306 RBraceLoc = getLoc(); 6307 if (trySkipToken(AsmToken::RBrac)) 6308 break; 6309 6310 if (!skipToken(AsmToken::Comma, 6311 "expected a comma or a closing square bracket")) 6312 return ParseStatus::Failure; 6313 } 6314 6315 if (Operands.size() - Prefix > 1) { 6316 Operands.insert(Operands.begin() + Prefix, 6317 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 6318 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 6319 } 6320 6321 return ParseStatus::Success; 6322 } 6323 6324 return parseRegOrImm(Operands); 6325 } 6326 6327 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 6328 // Clear any forced encodings from the previous instruction. 6329 setForcedEncodingSize(0); 6330 setForcedDPP(false); 6331 setForcedSDWA(false); 6332 6333 if (Name.ends_with("_e64_dpp")) { 6334 setForcedDPP(true); 6335 setForcedEncodingSize(64); 6336 return Name.substr(0, Name.size() - 8); 6337 } 6338 if (Name.ends_with("_e64")) { 6339 setForcedEncodingSize(64); 6340 return Name.substr(0, Name.size() - 4); 6341 } 6342 if (Name.ends_with("_e32")) { 6343 setForcedEncodingSize(32); 6344 return Name.substr(0, Name.size() - 4); 6345 } 6346 if (Name.ends_with("_dpp")) { 6347 setForcedDPP(true); 6348 return Name.substr(0, Name.size() - 4); 6349 } 6350 if (Name.ends_with("_sdwa")) { 6351 setForcedSDWA(true); 6352 return Name.substr(0, Name.size() - 5); 6353 } 6354 return Name; 6355 } 6356 6357 static void applyMnemonicAliases(StringRef &Mnemonic, 6358 const FeatureBitset &Features, 6359 unsigned VariantID); 6360 6361 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 6362 StringRef Name, 6363 SMLoc NameLoc, OperandVector &Operands) { 6364 // Add the instruction mnemonic 6365 Name = parseMnemonicSuffix(Name); 6366 6367 // If the target architecture uses MnemonicAlias, call it here to parse 6368 // operands correctly. 6369 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 6370 6371 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 6372 6373 bool IsMIMG = Name.starts_with("image_"); 6374 6375 while (!trySkipToken(AsmToken::EndOfStatement)) { 6376 OperandMode Mode = OperandMode_Default; 6377 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 6378 Mode = OperandMode_NSA; 6379 ParseStatus Res = parseOperand(Operands, Name, Mode); 6380 6381 if (!Res.isSuccess()) { 6382 checkUnsupportedInstruction(Name, NameLoc); 6383 if (!Parser.hasPendingError()) { 6384 // FIXME: use real operand location rather than the current location. 6385 StringRef Msg = Res.isFailure() ? "failed parsing operand." 6386 : "not a valid operand."; 6387 Error(getLoc(), Msg); 6388 } 6389 while (!trySkipToken(AsmToken::EndOfStatement)) { 6390 lex(); 6391 } 6392 return true; 6393 } 6394 6395 // Eat the comma or space if there is one. 6396 trySkipToken(AsmToken::Comma); 6397 } 6398 6399 return false; 6400 } 6401 6402 //===----------------------------------------------------------------------===// 6403 // Utility functions 6404 //===----------------------------------------------------------------------===// 6405 6406 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, 6407 OperandVector &Operands) { 6408 SMLoc S = getLoc(); 6409 if (!trySkipId(Name)) 6410 return ParseStatus::NoMatch; 6411 6412 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S)); 6413 return ParseStatus::Success; 6414 } 6415 6416 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, 6417 int64_t &IntVal) { 6418 6419 if (!trySkipId(Prefix, AsmToken::Colon)) 6420 return ParseStatus::NoMatch; 6421 6422 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure; 6423 } 6424 6425 ParseStatus AMDGPUAsmParser::parseIntWithPrefix( 6426 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6427 std::function<bool(int64_t &)> ConvertResult) { 6428 SMLoc S = getLoc(); 6429 int64_t Value = 0; 6430 6431 ParseStatus Res = parseIntWithPrefix(Prefix, Value); 6432 if (!Res.isSuccess()) 6433 return Res; 6434 6435 if (ConvertResult && !ConvertResult(Value)) { 6436 Error(S, "invalid " + StringRef(Prefix) + " value."); 6437 } 6438 6439 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 6440 return ParseStatus::Success; 6441 } 6442 6443 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( 6444 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6445 bool (*ConvertResult)(int64_t &)) { 6446 SMLoc S = getLoc(); 6447 if (!trySkipId(Prefix, AsmToken::Colon)) 6448 return ParseStatus::NoMatch; 6449 6450 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 6451 return ParseStatus::Failure; 6452 6453 unsigned Val = 0; 6454 const unsigned MaxSize = 4; 6455 6456 // FIXME: How to verify the number of elements matches the number of src 6457 // operands? 6458 for (int I = 0; ; ++I) { 6459 int64_t Op; 6460 SMLoc Loc = getLoc(); 6461 if (!parseExpr(Op)) 6462 return ParseStatus::Failure; 6463 6464 if (Op != 0 && Op != 1) 6465 return Error(Loc, "invalid " + StringRef(Prefix) + " value."); 6466 6467 Val |= (Op << I); 6468 6469 if (trySkipToken(AsmToken::RBrac)) 6470 break; 6471 6472 if (I + 1 == MaxSize) 6473 return Error(getLoc(), "expected a closing square bracket"); 6474 6475 if (!skipToken(AsmToken::Comma, "expected a comma")) 6476 return ParseStatus::Failure; 6477 } 6478 6479 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 6480 return ParseStatus::Success; 6481 } 6482 6483 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, 6484 OperandVector &Operands, 6485 AMDGPUOperand::ImmTy ImmTy) { 6486 int64_t Bit; 6487 SMLoc S = getLoc(); 6488 6489 if (trySkipId(Name)) { 6490 Bit = 1; 6491 } else if (trySkipId("no", Name)) { 6492 Bit = 0; 6493 } else { 6494 return ParseStatus::NoMatch; 6495 } 6496 6497 if (Name == "r128" && !hasMIMG_R128()) 6498 return Error(S, "r128 modifier is not supported on this GPU"); 6499 if (Name == "a16" && !hasA16()) 6500 return Error(S, "a16 modifier is not supported on this GPU"); 6501 6502 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 6503 ImmTy = AMDGPUOperand::ImmTyR128A16; 6504 6505 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 6506 return ParseStatus::Success; 6507 } 6508 6509 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, 6510 bool &Disabling) const { 6511 Disabling = Id.consume_front("no"); 6512 6513 if (isGFX940() && !Mnemo.starts_with("s_")) { 6514 return StringSwitch<unsigned>(Id) 6515 .Case("nt", AMDGPU::CPol::NT) 6516 .Case("sc0", AMDGPU::CPol::SC0) 6517 .Case("sc1", AMDGPU::CPol::SC1) 6518 .Default(0); 6519 } 6520 6521 return StringSwitch<unsigned>(Id) 6522 .Case("dlc", AMDGPU::CPol::DLC) 6523 .Case("glc", AMDGPU::CPol::GLC) 6524 .Case("scc", AMDGPU::CPol::SCC) 6525 .Case("slc", AMDGPU::CPol::SLC) 6526 .Default(0); 6527 } 6528 6529 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 6530 if (isGFX12Plus()) { 6531 SMLoc StringLoc = getLoc(); 6532 6533 int64_t CPolVal = 0; 6534 ParseStatus ResTH = ParseStatus::NoMatch; 6535 ParseStatus ResScope = ParseStatus::NoMatch; 6536 6537 for (;;) { 6538 if (ResTH.isNoMatch()) { 6539 int64_t TH; 6540 ResTH = parseTH(Operands, TH); 6541 if (ResTH.isFailure()) 6542 return ResTH; 6543 if (ResTH.isSuccess()) { 6544 CPolVal |= TH; 6545 continue; 6546 } 6547 } 6548 6549 if (ResScope.isNoMatch()) { 6550 int64_t Scope; 6551 ResScope = parseScope(Operands, Scope); 6552 if (ResScope.isFailure()) 6553 return ResScope; 6554 if (ResScope.isSuccess()) { 6555 CPolVal |= Scope; 6556 continue; 6557 } 6558 } 6559 6560 break; 6561 } 6562 6563 if (ResTH.isNoMatch() && ResScope.isNoMatch()) 6564 return ParseStatus::NoMatch; 6565 6566 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc, 6567 AMDGPUOperand::ImmTyCPol)); 6568 return ParseStatus::Success; 6569 } 6570 6571 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 6572 SMLoc OpLoc = getLoc(); 6573 unsigned Enabled = 0, Seen = 0; 6574 for (;;) { 6575 SMLoc S = getLoc(); 6576 bool Disabling; 6577 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); 6578 if (!CPol) 6579 break; 6580 6581 lex(); 6582 6583 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) 6584 return Error(S, "dlc modifier is not supported on this GPU"); 6585 6586 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) 6587 return Error(S, "scc modifier is not supported on this GPU"); 6588 6589 if (Seen & CPol) 6590 return Error(S, "duplicate cache policy modifier"); 6591 6592 if (!Disabling) 6593 Enabled |= CPol; 6594 6595 Seen |= CPol; 6596 } 6597 6598 if (!Seen) 6599 return ParseStatus::NoMatch; 6600 6601 Operands.push_back( 6602 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol)); 6603 return ParseStatus::Success; 6604 } 6605 6606 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands, 6607 int64_t &Scope) { 6608 Scope = AMDGPU::CPol::SCOPE_CU; // default; 6609 6610 StringRef Value; 6611 SMLoc StringLoc; 6612 ParseStatus Res; 6613 6614 Res = parseStringWithPrefix("scope", Value, StringLoc); 6615 if (!Res.isSuccess()) 6616 return Res; 6617 6618 Scope = StringSwitch<int64_t>(Value) 6619 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU) 6620 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE) 6621 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV) 6622 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS) 6623 .Default(0xffffffff); 6624 6625 if (Scope == 0xffffffff) 6626 return Error(StringLoc, "invalid scope value"); 6627 6628 return ParseStatus::Success; 6629 } 6630 6631 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { 6632 TH = AMDGPU::CPol::TH_RT; // default 6633 6634 StringRef Value; 6635 SMLoc StringLoc; 6636 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc); 6637 if (!Res.isSuccess()) 6638 return Res; 6639 6640 if (Value == "TH_DEFAULT") 6641 TH = AMDGPU::CPol::TH_RT; 6642 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" || 6643 Value == "TH_LOAD_NT_WB") { 6644 return Error(StringLoc, "invalid th value"); 6645 } else if (Value.consume_front("TH_ATOMIC_")) { 6646 TH = AMDGPU::CPol::TH_TYPE_ATOMIC; 6647 } else if (Value.consume_front("TH_LOAD_")) { 6648 TH = AMDGPU::CPol::TH_TYPE_LOAD; 6649 } else if (Value.consume_front("TH_STORE_")) { 6650 TH = AMDGPU::CPol::TH_TYPE_STORE; 6651 } else { 6652 return Error(StringLoc, "invalid th value"); 6653 } 6654 6655 if (Value == "BYPASS") 6656 TH |= AMDGPU::CPol::TH_REAL_BYPASS; 6657 6658 if (TH != 0) { 6659 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC) 6660 TH |= StringSwitch<int64_t>(Value) 6661 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6662 .Case("RT", AMDGPU::CPol::TH_RT) 6663 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6664 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT) 6665 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT | 6666 AMDGPU::CPol::TH_ATOMIC_RETURN) 6667 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE) 6668 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE | 6669 AMDGPU::CPol::TH_ATOMIC_NT) 6670 .Default(0xffffffff); 6671 else 6672 TH |= StringSwitch<int64_t>(Value) 6673 .Case("RT", AMDGPU::CPol::TH_RT) 6674 .Case("NT", AMDGPU::CPol::TH_NT) 6675 .Case("HT", AMDGPU::CPol::TH_HT) 6676 .Case("LU", AMDGPU::CPol::TH_LU) 6677 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB) 6678 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT) 6679 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT) 6680 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT) 6681 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB) 6682 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS) 6683 .Default(0xffffffff); 6684 } 6685 6686 if (TH == 0xffffffff) 6687 return Error(StringLoc, "invalid th value"); 6688 6689 return ParseStatus::Success; 6690 } 6691 6692 static void addOptionalImmOperand( 6693 MCInst& Inst, const OperandVector& Operands, 6694 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 6695 AMDGPUOperand::ImmTy ImmT, 6696 int64_t Default = 0) { 6697 auto i = OptionalIdx.find(ImmT); 6698 if (i != OptionalIdx.end()) { 6699 unsigned Idx = i->second; 6700 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 6701 } else { 6702 Inst.addOperand(MCOperand::createImm(Default)); 6703 } 6704 } 6705 6706 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6707 StringRef &Value, 6708 SMLoc &StringLoc) { 6709 if (!trySkipId(Prefix, AsmToken::Colon)) 6710 return ParseStatus::NoMatch; 6711 6712 StringLoc = getLoc(); 6713 return parseId(Value, "expected an identifier") ? ParseStatus::Success 6714 : ParseStatus::Failure; 6715 } 6716 6717 //===----------------------------------------------------------------------===// 6718 // MTBUF format 6719 //===----------------------------------------------------------------------===// 6720 6721 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6722 int64_t MaxVal, 6723 int64_t &Fmt) { 6724 int64_t Val; 6725 SMLoc Loc = getLoc(); 6726 6727 auto Res = parseIntWithPrefix(Pref, Val); 6728 if (Res.isFailure()) 6729 return false; 6730 if (Res.isNoMatch()) 6731 return true; 6732 6733 if (Val < 0 || Val > MaxVal) { 6734 Error(Loc, Twine("out of range ", StringRef(Pref))); 6735 return false; 6736 } 6737 6738 Fmt = Val; 6739 return true; 6740 } 6741 6742 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands, 6743 AMDGPUOperand::ImmTy ImmTy) { 6744 const char *Pref = "index_key"; 6745 int64_t ImmVal = 0; 6746 SMLoc Loc = getLoc(); 6747 auto Res = parseIntWithPrefix(Pref, ImmVal); 6748 if (!Res.isSuccess()) 6749 return Res; 6750 6751 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1)) 6752 return Error(Loc, Twine("out of range ", StringRef(Pref))); 6753 6754 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3)) 6755 return Error(Loc, Twine("out of range ", StringRef(Pref))); 6756 6757 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy)); 6758 return ParseStatus::Success; 6759 } 6760 6761 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) { 6762 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit); 6763 } 6764 6765 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) { 6766 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit); 6767 } 6768 6769 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6770 // values to live in a joint format operand in the MCInst encoding. 6771 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6772 using namespace llvm::AMDGPU::MTBUFFormat; 6773 6774 int64_t Dfmt = DFMT_UNDEF; 6775 int64_t Nfmt = NFMT_UNDEF; 6776 6777 // dfmt and nfmt can appear in either order, and each is optional. 6778 for (int I = 0; I < 2; ++I) { 6779 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6780 return ParseStatus::Failure; 6781 6782 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) 6783 return ParseStatus::Failure; 6784 6785 // Skip optional comma between dfmt/nfmt 6786 // but guard against 2 commas following each other. 6787 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6788 !peekToken().is(AsmToken::Comma)) { 6789 trySkipToken(AsmToken::Comma); 6790 } 6791 } 6792 6793 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6794 return ParseStatus::NoMatch; 6795 6796 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6797 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6798 6799 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6800 return ParseStatus::Success; 6801 } 6802 6803 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6804 using namespace llvm::AMDGPU::MTBUFFormat; 6805 6806 int64_t Fmt = UFMT_UNDEF; 6807 6808 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6809 return ParseStatus::Failure; 6810 6811 if (Fmt == UFMT_UNDEF) 6812 return ParseStatus::NoMatch; 6813 6814 Format = Fmt; 6815 return ParseStatus::Success; 6816 } 6817 6818 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6819 int64_t &Nfmt, 6820 StringRef FormatStr, 6821 SMLoc Loc) { 6822 using namespace llvm::AMDGPU::MTBUFFormat; 6823 int64_t Format; 6824 6825 Format = getDfmt(FormatStr); 6826 if (Format != DFMT_UNDEF) { 6827 Dfmt = Format; 6828 return true; 6829 } 6830 6831 Format = getNfmt(FormatStr, getSTI()); 6832 if (Format != NFMT_UNDEF) { 6833 Nfmt = Format; 6834 return true; 6835 } 6836 6837 Error(Loc, "unsupported format"); 6838 return false; 6839 } 6840 6841 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6842 SMLoc FormatLoc, 6843 int64_t &Format) { 6844 using namespace llvm::AMDGPU::MTBUFFormat; 6845 6846 int64_t Dfmt = DFMT_UNDEF; 6847 int64_t Nfmt = NFMT_UNDEF; 6848 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6849 return ParseStatus::Failure; 6850 6851 if (trySkipToken(AsmToken::Comma)) { 6852 StringRef Str; 6853 SMLoc Loc = getLoc(); 6854 if (!parseId(Str, "expected a format string") || 6855 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) 6856 return ParseStatus::Failure; 6857 if (Dfmt == DFMT_UNDEF) 6858 return Error(Loc, "duplicate numeric format"); 6859 if (Nfmt == NFMT_UNDEF) 6860 return Error(Loc, "duplicate data format"); 6861 } 6862 6863 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6864 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6865 6866 if (isGFX10Plus()) { 6867 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6868 if (Ufmt == UFMT_UNDEF) 6869 return Error(FormatLoc, "unsupported format"); 6870 Format = Ufmt; 6871 } else { 6872 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6873 } 6874 6875 return ParseStatus::Success; 6876 } 6877 6878 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6879 SMLoc Loc, 6880 int64_t &Format) { 6881 using namespace llvm::AMDGPU::MTBUFFormat; 6882 6883 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6884 if (Id == UFMT_UNDEF) 6885 return ParseStatus::NoMatch; 6886 6887 if (!isGFX10Plus()) 6888 return Error(Loc, "unified format is not supported on this GPU"); 6889 6890 Format = Id; 6891 return ParseStatus::Success; 6892 } 6893 6894 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6895 using namespace llvm::AMDGPU::MTBUFFormat; 6896 SMLoc Loc = getLoc(); 6897 6898 if (!parseExpr(Format)) 6899 return ParseStatus::Failure; 6900 if (!isValidFormatEncoding(Format, getSTI())) 6901 return Error(Loc, "out of range format"); 6902 6903 return ParseStatus::Success; 6904 } 6905 6906 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6907 using namespace llvm::AMDGPU::MTBUFFormat; 6908 6909 if (!trySkipId("format", AsmToken::Colon)) 6910 return ParseStatus::NoMatch; 6911 6912 if (trySkipToken(AsmToken::LBrac)) { 6913 StringRef FormatStr; 6914 SMLoc Loc = getLoc(); 6915 if (!parseId(FormatStr, "expected a format string")) 6916 return ParseStatus::Failure; 6917 6918 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6919 if (Res.isNoMatch()) 6920 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6921 if (!Res.isSuccess()) 6922 return Res; 6923 6924 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6925 return ParseStatus::Failure; 6926 6927 return ParseStatus::Success; 6928 } 6929 6930 return parseNumericFormat(Format); 6931 } 6932 6933 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6934 using namespace llvm::AMDGPU::MTBUFFormat; 6935 6936 int64_t Format = getDefaultFormatEncoding(getSTI()); 6937 ParseStatus Res; 6938 SMLoc Loc = getLoc(); 6939 6940 // Parse legacy format syntax. 6941 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6942 if (Res.isFailure()) 6943 return Res; 6944 6945 bool FormatFound = Res.isSuccess(); 6946 6947 Operands.push_back( 6948 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6949 6950 if (FormatFound) 6951 trySkipToken(AsmToken::Comma); 6952 6953 if (isToken(AsmToken::EndOfStatement)) { 6954 // We are expecting an soffset operand, 6955 // but let matcher handle the error. 6956 return ParseStatus::Success; 6957 } 6958 6959 // Parse soffset. 6960 Res = parseRegOrImm(Operands); 6961 if (!Res.isSuccess()) 6962 return Res; 6963 6964 trySkipToken(AsmToken::Comma); 6965 6966 if (!FormatFound) { 6967 Res = parseSymbolicOrNumericFormat(Format); 6968 if (Res.isFailure()) 6969 return Res; 6970 if (Res.isSuccess()) { 6971 auto Size = Operands.size(); 6972 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6973 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6974 Op.setImm(Format); 6975 } 6976 return ParseStatus::Success; 6977 } 6978 6979 if (isId("format") && peekToken().is(AsmToken::Colon)) 6980 return Error(getLoc(), "duplicate format"); 6981 return ParseStatus::Success; 6982 } 6983 6984 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { 6985 ParseStatus Res = 6986 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 6987 if (Res.isNoMatch()) { 6988 Res = parseIntWithPrefix("inst_offset", Operands, 6989 AMDGPUOperand::ImmTyInstOffset); 6990 } 6991 return Res; 6992 } 6993 6994 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { 6995 ParseStatus Res = 6996 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16); 6997 if (Res.isNoMatch()) 6998 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16); 6999 return Res; 7000 } 7001 7002 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { 7003 ParseStatus Res = 7004 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP); 7005 if (Res.isNoMatch()) { 7006 Res = 7007 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP); 7008 } 7009 return Res; 7010 } 7011 7012 //===----------------------------------------------------------------------===// 7013 // Exp 7014 //===----------------------------------------------------------------------===// 7015 7016 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 7017 OptionalImmIndexMap OptionalIdx; 7018 7019 unsigned OperandIdx[4]; 7020 unsigned EnMask = 0; 7021 int SrcIdx = 0; 7022 7023 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7024 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7025 7026 // Add the register arguments 7027 if (Op.isReg()) { 7028 assert(SrcIdx < 4); 7029 OperandIdx[SrcIdx] = Inst.size(); 7030 Op.addRegOperands(Inst, 1); 7031 ++SrcIdx; 7032 continue; 7033 } 7034 7035 if (Op.isOff()) { 7036 assert(SrcIdx < 4); 7037 OperandIdx[SrcIdx] = Inst.size(); 7038 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 7039 ++SrcIdx; 7040 continue; 7041 } 7042 7043 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 7044 Op.addImmOperands(Inst, 1); 7045 continue; 7046 } 7047 7048 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 7049 continue; 7050 7051 // Handle optional arguments 7052 OptionalIdx[Op.getImmTy()] = i; 7053 } 7054 7055 assert(SrcIdx == 4); 7056 7057 bool Compr = false; 7058 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 7059 Compr = true; 7060 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 7061 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 7062 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 7063 } 7064 7065 for (auto i = 0; i < SrcIdx; ++i) { 7066 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 7067 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 7068 } 7069 } 7070 7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 7072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 7073 7074 Inst.addOperand(MCOperand::createImm(EnMask)); 7075 } 7076 7077 //===----------------------------------------------------------------------===// 7078 // s_waitcnt 7079 //===----------------------------------------------------------------------===// 7080 7081 static bool 7082 encodeCnt( 7083 const AMDGPU::IsaVersion ISA, 7084 int64_t &IntVal, 7085 int64_t CntVal, 7086 bool Saturate, 7087 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 7088 unsigned (*decode)(const IsaVersion &Version, unsigned)) 7089 { 7090 bool Failed = false; 7091 7092 IntVal = encode(ISA, IntVal, CntVal); 7093 if (CntVal != decode(ISA, IntVal)) { 7094 if (Saturate) { 7095 IntVal = encode(ISA, IntVal, -1); 7096 } else { 7097 Failed = true; 7098 } 7099 } 7100 return Failed; 7101 } 7102 7103 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 7104 7105 SMLoc CntLoc = getLoc(); 7106 StringRef CntName = getTokenStr(); 7107 7108 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 7109 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7110 return false; 7111 7112 int64_t CntVal; 7113 SMLoc ValLoc = getLoc(); 7114 if (!parseExpr(CntVal)) 7115 return false; 7116 7117 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 7118 7119 bool Failed = true; 7120 bool Sat = CntName.ends_with("_sat"); 7121 7122 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 7123 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 7124 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 7125 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 7126 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 7127 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 7128 } else { 7129 Error(CntLoc, "invalid counter name " + CntName); 7130 return false; 7131 } 7132 7133 if (Failed) { 7134 Error(ValLoc, "too large value for " + CntName); 7135 return false; 7136 } 7137 7138 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7139 return false; 7140 7141 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 7142 if (isToken(AsmToken::EndOfStatement)) { 7143 Error(getLoc(), "expected a counter name"); 7144 return false; 7145 } 7146 } 7147 7148 return true; 7149 } 7150 7151 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { 7152 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 7153 int64_t Waitcnt = getWaitcntBitMask(ISA); 7154 SMLoc S = getLoc(); 7155 7156 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7157 while (!isToken(AsmToken::EndOfStatement)) { 7158 if (!parseCnt(Waitcnt)) 7159 return ParseStatus::Failure; 7160 } 7161 } else { 7162 if (!parseExpr(Waitcnt)) 7163 return ParseStatus::Failure; 7164 } 7165 7166 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 7167 return ParseStatus::Success; 7168 } 7169 7170 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 7171 SMLoc FieldLoc = getLoc(); 7172 StringRef FieldName = getTokenStr(); 7173 if (!skipToken(AsmToken::Identifier, "expected a field name") || 7174 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7175 return false; 7176 7177 SMLoc ValueLoc = getLoc(); 7178 StringRef ValueName = getTokenStr(); 7179 if (!skipToken(AsmToken::Identifier, "expected a value name") || 7180 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 7181 return false; 7182 7183 unsigned Shift; 7184 if (FieldName == "instid0") { 7185 Shift = 0; 7186 } else if (FieldName == "instskip") { 7187 Shift = 4; 7188 } else if (FieldName == "instid1") { 7189 Shift = 7; 7190 } else { 7191 Error(FieldLoc, "invalid field name " + FieldName); 7192 return false; 7193 } 7194 7195 int Value; 7196 if (Shift == 4) { 7197 // Parse values for instskip. 7198 Value = StringSwitch<int>(ValueName) 7199 .Case("SAME", 0) 7200 .Case("NEXT", 1) 7201 .Case("SKIP_1", 2) 7202 .Case("SKIP_2", 3) 7203 .Case("SKIP_3", 4) 7204 .Case("SKIP_4", 5) 7205 .Default(-1); 7206 } else { 7207 // Parse values for instid0 and instid1. 7208 Value = StringSwitch<int>(ValueName) 7209 .Case("NO_DEP", 0) 7210 .Case("VALU_DEP_1", 1) 7211 .Case("VALU_DEP_2", 2) 7212 .Case("VALU_DEP_3", 3) 7213 .Case("VALU_DEP_4", 4) 7214 .Case("TRANS32_DEP_1", 5) 7215 .Case("TRANS32_DEP_2", 6) 7216 .Case("TRANS32_DEP_3", 7) 7217 .Case("FMA_ACCUM_CYCLE_1", 8) 7218 .Case("SALU_CYCLE_1", 9) 7219 .Case("SALU_CYCLE_2", 10) 7220 .Case("SALU_CYCLE_3", 11) 7221 .Default(-1); 7222 } 7223 if (Value < 0) { 7224 Error(ValueLoc, "invalid value name " + ValueName); 7225 return false; 7226 } 7227 7228 Delay |= Value << Shift; 7229 return true; 7230 } 7231 7232 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { 7233 int64_t Delay = 0; 7234 SMLoc S = getLoc(); 7235 7236 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7237 do { 7238 if (!parseDelay(Delay)) 7239 return ParseStatus::Failure; 7240 } while (trySkipToken(AsmToken::Pipe)); 7241 } else { 7242 if (!parseExpr(Delay)) 7243 return ParseStatus::Failure; 7244 } 7245 7246 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 7247 return ParseStatus::Success; 7248 } 7249 7250 bool 7251 AMDGPUOperand::isSWaitCnt() const { 7252 return isImm(); 7253 } 7254 7255 bool AMDGPUOperand::isSDelayALU() const { return isImm(); } 7256 7257 //===----------------------------------------------------------------------===// 7258 // DepCtr 7259 //===----------------------------------------------------------------------===// 7260 7261 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 7262 StringRef DepCtrName) { 7263 switch (ErrorId) { 7264 case OPR_ID_UNKNOWN: 7265 Error(Loc, Twine("invalid counter name ", DepCtrName)); 7266 return; 7267 case OPR_ID_UNSUPPORTED: 7268 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 7269 return; 7270 case OPR_ID_DUPLICATE: 7271 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 7272 return; 7273 case OPR_VAL_INVALID: 7274 Error(Loc, Twine("invalid value for ", DepCtrName)); 7275 return; 7276 default: 7277 assert(false); 7278 } 7279 } 7280 7281 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 7282 7283 using namespace llvm::AMDGPU::DepCtr; 7284 7285 SMLoc DepCtrLoc = getLoc(); 7286 StringRef DepCtrName = getTokenStr(); 7287 7288 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 7289 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7290 return false; 7291 7292 int64_t ExprVal; 7293 if (!parseExpr(ExprVal)) 7294 return false; 7295 7296 unsigned PrevOprMask = UsedOprMask; 7297 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 7298 7299 if (CntVal < 0) { 7300 depCtrError(DepCtrLoc, CntVal, DepCtrName); 7301 return false; 7302 } 7303 7304 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7305 return false; 7306 7307 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 7308 if (isToken(AsmToken::EndOfStatement)) { 7309 Error(getLoc(), "expected a counter name"); 7310 return false; 7311 } 7312 } 7313 7314 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 7315 DepCtr = (DepCtr & ~CntValMask) | CntVal; 7316 return true; 7317 } 7318 7319 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { 7320 using namespace llvm::AMDGPU::DepCtr; 7321 7322 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 7323 SMLoc Loc = getLoc(); 7324 7325 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7326 unsigned UsedOprMask = 0; 7327 while (!isToken(AsmToken::EndOfStatement)) { 7328 if (!parseDepCtr(DepCtr, UsedOprMask)) 7329 return ParseStatus::Failure; 7330 } 7331 } else { 7332 if (!parseExpr(DepCtr)) 7333 return ParseStatus::Failure; 7334 } 7335 7336 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 7337 return ParseStatus::Success; 7338 } 7339 7340 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 7341 7342 //===----------------------------------------------------------------------===// 7343 // hwreg 7344 //===----------------------------------------------------------------------===// 7345 7346 ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg, 7347 OperandInfoTy &Offset, 7348 OperandInfoTy &Width) { 7349 using namespace llvm::AMDGPU::Hwreg; 7350 7351 if (!trySkipId("hwreg", AsmToken::LParen)) 7352 return ParseStatus::NoMatch; 7353 7354 // The register may be specified by name or using a numeric code 7355 HwReg.Loc = getLoc(); 7356 if (isToken(AsmToken::Identifier) && 7357 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7358 HwReg.IsSymbolic = true; 7359 lex(); // skip register name 7360 } else if (!parseExpr(HwReg.Val, "a register name")) { 7361 return ParseStatus::Failure; 7362 } 7363 7364 if (trySkipToken(AsmToken::RParen)) 7365 return ParseStatus::Success; 7366 7367 // parse optional params 7368 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 7369 return ParseStatus::Failure; 7370 7371 Offset.Loc = getLoc(); 7372 if (!parseExpr(Offset.Val)) 7373 return ParseStatus::Failure; 7374 7375 if (!skipToken(AsmToken::Comma, "expected a comma")) 7376 return ParseStatus::Failure; 7377 7378 Width.Loc = getLoc(); 7379 if (!parseExpr(Width.Val) || 7380 !skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7381 return ParseStatus::Failure; 7382 7383 return ParseStatus::Success; 7384 } 7385 7386 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 7387 using namespace llvm::AMDGPU::Hwreg; 7388 7389 int64_t ImmVal = 0; 7390 SMLoc Loc = getLoc(); 7391 7392 StructuredOpField HwReg("id", "hardware register", HwregId::Width, 7393 HwregId::Default); 7394 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width, 7395 HwregOffset::Default); 7396 struct : StructuredOpField { 7397 using StructuredOpField::StructuredOpField; 7398 bool validate(AMDGPUAsmParser &Parser) const override { 7399 if (!isUIntN(Width, Val - 1)) 7400 return Error(Parser, "only values from 1 to 32 are legal"); 7401 return true; 7402 } 7403 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default); 7404 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width}); 7405 7406 if (Res.isNoMatch()) 7407 Res = parseHwregFunc(HwReg, Offset, Width); 7408 7409 if (Res.isSuccess()) { 7410 if (!validateStructuredOpFields({&HwReg, &Offset, &Width})) 7411 return ParseStatus::Failure; 7412 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val); 7413 } 7414 7415 if (Res.isNoMatch() && 7416 parseExpr(ImmVal, "a hwreg macro, structured immediate")) 7417 Res = ParseStatus::Success; 7418 7419 if (!Res.isSuccess()) 7420 return ParseStatus::Failure; 7421 7422 if (!isUInt<16>(ImmVal)) 7423 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7424 Operands.push_back( 7425 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 7426 return ParseStatus::Success; 7427 } 7428 7429 bool AMDGPUOperand::isHwreg() const { 7430 return isImmTy(ImmTyHwreg); 7431 } 7432 7433 //===----------------------------------------------------------------------===// 7434 // sendmsg 7435 //===----------------------------------------------------------------------===// 7436 7437 bool 7438 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 7439 OperandInfoTy &Op, 7440 OperandInfoTy &Stream) { 7441 using namespace llvm::AMDGPU::SendMsg; 7442 7443 Msg.Loc = getLoc(); 7444 if (isToken(AsmToken::Identifier) && 7445 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7446 Msg.IsSymbolic = true; 7447 lex(); // skip message name 7448 } else if (!parseExpr(Msg.Val, "a message name")) { 7449 return false; 7450 } 7451 7452 if (trySkipToken(AsmToken::Comma)) { 7453 Op.IsDefined = true; 7454 Op.Loc = getLoc(); 7455 if (isToken(AsmToken::Identifier) && 7456 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) != 7457 OPR_ID_UNKNOWN) { 7458 lex(); // skip operation name 7459 } else if (!parseExpr(Op.Val, "an operation name")) { 7460 return false; 7461 } 7462 7463 if (trySkipToken(AsmToken::Comma)) { 7464 Stream.IsDefined = true; 7465 Stream.Loc = getLoc(); 7466 if (!parseExpr(Stream.Val)) 7467 return false; 7468 } 7469 } 7470 7471 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7472 } 7473 7474 bool 7475 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 7476 const OperandInfoTy &Op, 7477 const OperandInfoTy &Stream) { 7478 using namespace llvm::AMDGPU::SendMsg; 7479 7480 // Validation strictness depends on whether message is specified 7481 // in a symbolic or in a numeric form. In the latter case 7482 // only encoding possibility is checked. 7483 bool Strict = Msg.IsSymbolic; 7484 7485 if (Strict) { 7486 if (Msg.Val == OPR_ID_UNSUPPORTED) { 7487 Error(Msg.Loc, "specified message id is not supported on this GPU"); 7488 return false; 7489 } 7490 } else { 7491 if (!isValidMsgId(Msg.Val, getSTI())) { 7492 Error(Msg.Loc, "invalid message id"); 7493 return false; 7494 } 7495 } 7496 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) { 7497 if (Op.IsDefined) { 7498 Error(Op.Loc, "message does not support operations"); 7499 } else { 7500 Error(Msg.Loc, "missing message operation"); 7501 } 7502 return false; 7503 } 7504 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) { 7505 if (Op.Val == OPR_ID_UNSUPPORTED) 7506 Error(Op.Loc, "specified operation id is not supported on this GPU"); 7507 else 7508 Error(Op.Loc, "invalid operation id"); 7509 return false; 7510 } 7511 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) && 7512 Stream.IsDefined) { 7513 Error(Stream.Loc, "message operation does not support streams"); 7514 return false; 7515 } 7516 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) { 7517 Error(Stream.Loc, "invalid message stream id"); 7518 return false; 7519 } 7520 return true; 7521 } 7522 7523 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { 7524 using namespace llvm::AMDGPU::SendMsg; 7525 7526 int64_t ImmVal = 0; 7527 SMLoc Loc = getLoc(); 7528 7529 if (trySkipId("sendmsg", AsmToken::LParen)) { 7530 OperandInfoTy Msg(OPR_ID_UNKNOWN); 7531 OperandInfoTy Op(OP_NONE_); 7532 OperandInfoTy Stream(STREAM_ID_NONE_); 7533 if (parseSendMsgBody(Msg, Op, Stream) && 7534 validateSendMsg(Msg, Op, Stream)) { 7535 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val); 7536 } else { 7537 return ParseStatus::Failure; 7538 } 7539 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 7540 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7541 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7542 } else { 7543 return ParseStatus::Failure; 7544 } 7545 7546 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 7547 return ParseStatus::Success; 7548 } 7549 7550 bool AMDGPUOperand::isSendMsg() const { 7551 return isImmTy(ImmTySendMsg); 7552 } 7553 7554 //===----------------------------------------------------------------------===// 7555 // v_interp 7556 //===----------------------------------------------------------------------===// 7557 7558 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 7559 StringRef Str; 7560 SMLoc S = getLoc(); 7561 7562 if (!parseId(Str)) 7563 return ParseStatus::NoMatch; 7564 7565 int Slot = StringSwitch<int>(Str) 7566 .Case("p10", 0) 7567 .Case("p20", 1) 7568 .Case("p0", 2) 7569 .Default(-1); 7570 7571 if (Slot == -1) 7572 return Error(S, "invalid interpolation slot"); 7573 7574 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 7575 AMDGPUOperand::ImmTyInterpSlot)); 7576 return ParseStatus::Success; 7577 } 7578 7579 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 7580 StringRef Str; 7581 SMLoc S = getLoc(); 7582 7583 if (!parseId(Str)) 7584 return ParseStatus::NoMatch; 7585 7586 if (!Str.starts_with("attr")) 7587 return Error(S, "invalid interpolation attribute"); 7588 7589 StringRef Chan = Str.take_back(2); 7590 int AttrChan = StringSwitch<int>(Chan) 7591 .Case(".x", 0) 7592 .Case(".y", 1) 7593 .Case(".z", 2) 7594 .Case(".w", 3) 7595 .Default(-1); 7596 if (AttrChan == -1) 7597 return Error(S, "invalid or missing interpolation attribute channel"); 7598 7599 Str = Str.drop_back(2).drop_front(4); 7600 7601 uint8_t Attr; 7602 if (Str.getAsInteger(10, Attr)) 7603 return Error(S, "invalid or missing interpolation attribute number"); 7604 7605 if (Attr > 32) 7606 return Error(S, "out of bounds interpolation attribute number"); 7607 7608 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 7609 7610 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 7611 AMDGPUOperand::ImmTyInterpAttr)); 7612 Operands.push_back(AMDGPUOperand::CreateImm( 7613 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan)); 7614 return ParseStatus::Success; 7615 } 7616 7617 //===----------------------------------------------------------------------===// 7618 // exp 7619 //===----------------------------------------------------------------------===// 7620 7621 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 7622 using namespace llvm::AMDGPU::Exp; 7623 7624 StringRef Str; 7625 SMLoc S = getLoc(); 7626 7627 if (!parseId(Str)) 7628 return ParseStatus::NoMatch; 7629 7630 unsigned Id = getTgtId(Str); 7631 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) 7632 return Error(S, (Id == ET_INVALID) 7633 ? "invalid exp target" 7634 : "exp target is not supported on this GPU"); 7635 7636 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 7637 AMDGPUOperand::ImmTyExpTgt)); 7638 return ParseStatus::Success; 7639 } 7640 7641 //===----------------------------------------------------------------------===// 7642 // parser helpers 7643 //===----------------------------------------------------------------------===// 7644 7645 bool 7646 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 7647 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 7648 } 7649 7650 bool 7651 AMDGPUAsmParser::isId(const StringRef Id) const { 7652 return isId(getToken(), Id); 7653 } 7654 7655 bool 7656 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 7657 return getTokenKind() == Kind; 7658 } 7659 7660 StringRef AMDGPUAsmParser::getId() const { 7661 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef(); 7662 } 7663 7664 bool 7665 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7666 if (isId(Id)) { 7667 lex(); 7668 return true; 7669 } 7670 return false; 7671 } 7672 7673 bool 7674 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7675 if (isToken(AsmToken::Identifier)) { 7676 StringRef Tok = getTokenStr(); 7677 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) { 7678 lex(); 7679 return true; 7680 } 7681 } 7682 return false; 7683 } 7684 7685 bool 7686 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7687 if (isId(Id) && peekToken().is(Kind)) { 7688 lex(); 7689 lex(); 7690 return true; 7691 } 7692 return false; 7693 } 7694 7695 bool 7696 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7697 if (isToken(Kind)) { 7698 lex(); 7699 return true; 7700 } 7701 return false; 7702 } 7703 7704 bool 7705 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7706 const StringRef ErrMsg) { 7707 if (!trySkipToken(Kind)) { 7708 Error(getLoc(), ErrMsg); 7709 return false; 7710 } 7711 return true; 7712 } 7713 7714 bool 7715 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7716 SMLoc S = getLoc(); 7717 7718 const MCExpr *Expr; 7719 if (Parser.parseExpression(Expr)) 7720 return false; 7721 7722 if (Expr->evaluateAsAbsolute(Imm)) 7723 return true; 7724 7725 if (Expected.empty()) { 7726 Error(S, "expected absolute expression"); 7727 } else { 7728 Error(S, Twine("expected ", Expected) + 7729 Twine(" or an absolute expression")); 7730 } 7731 return false; 7732 } 7733 7734 bool 7735 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7736 SMLoc S = getLoc(); 7737 7738 const MCExpr *Expr; 7739 if (Parser.parseExpression(Expr)) 7740 return false; 7741 7742 int64_t IntVal; 7743 if (Expr->evaluateAsAbsolute(IntVal)) { 7744 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7745 } else { 7746 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7747 } 7748 return true; 7749 } 7750 7751 bool 7752 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7753 if (isToken(AsmToken::String)) { 7754 Val = getToken().getStringContents(); 7755 lex(); 7756 return true; 7757 } 7758 Error(getLoc(), ErrMsg); 7759 return false; 7760 } 7761 7762 bool 7763 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7764 if (isToken(AsmToken::Identifier)) { 7765 Val = getTokenStr(); 7766 lex(); 7767 return true; 7768 } 7769 if (!ErrMsg.empty()) 7770 Error(getLoc(), ErrMsg); 7771 return false; 7772 } 7773 7774 AsmToken 7775 AMDGPUAsmParser::getToken() const { 7776 return Parser.getTok(); 7777 } 7778 7779 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7780 return isToken(AsmToken::EndOfStatement) 7781 ? getToken() 7782 : getLexer().peekTok(ShouldSkipSpace); 7783 } 7784 7785 void 7786 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7787 auto TokCount = getLexer().peekTokens(Tokens); 7788 7789 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7790 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7791 } 7792 7793 AsmToken::TokenKind 7794 AMDGPUAsmParser::getTokenKind() const { 7795 return getLexer().getKind(); 7796 } 7797 7798 SMLoc 7799 AMDGPUAsmParser::getLoc() const { 7800 return getToken().getLoc(); 7801 } 7802 7803 StringRef 7804 AMDGPUAsmParser::getTokenStr() const { 7805 return getToken().getString(); 7806 } 7807 7808 void 7809 AMDGPUAsmParser::lex() { 7810 Parser.Lex(); 7811 } 7812 7813 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { 7814 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7815 } 7816 7817 SMLoc 7818 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7819 const OperandVector &Operands) const { 7820 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7821 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7822 if (Test(Op)) 7823 return Op.getStartLoc(); 7824 } 7825 return getInstLoc(Operands); 7826 } 7827 7828 SMLoc 7829 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7830 const OperandVector &Operands) const { 7831 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7832 return getOperandLoc(Test, Operands); 7833 } 7834 7835 SMLoc 7836 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7837 const OperandVector &Operands) const { 7838 auto Test = [=](const AMDGPUOperand& Op) { 7839 return Op.isRegKind() && Op.getReg() == Reg; 7840 }; 7841 return getOperandLoc(Test, Operands); 7842 } 7843 7844 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, 7845 bool SearchMandatoryLiterals) const { 7846 auto Test = [](const AMDGPUOperand& Op) { 7847 return Op.IsImmKindLiteral() || Op.isExpr(); 7848 }; 7849 SMLoc Loc = getOperandLoc(Test, Operands); 7850 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) 7851 Loc = getMandatoryLitLoc(Operands); 7852 return Loc; 7853 } 7854 7855 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { 7856 auto Test = [](const AMDGPUOperand &Op) { 7857 return Op.IsImmKindMandatoryLiteral(); 7858 }; 7859 return getOperandLoc(Test, Operands); 7860 } 7861 7862 SMLoc 7863 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7864 auto Test = [](const AMDGPUOperand& Op) { 7865 return Op.isImmKindConst(); 7866 }; 7867 return getOperandLoc(Test, Operands); 7868 } 7869 7870 ParseStatus 7871 AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) { 7872 if (!trySkipToken(AsmToken::LCurly)) 7873 return ParseStatus::NoMatch; 7874 7875 bool First = true; 7876 while (!trySkipToken(AsmToken::RCurly)) { 7877 if (!First && 7878 !skipToken(AsmToken::Comma, "comma or closing brace expected")) 7879 return ParseStatus::Failure; 7880 7881 StringRef Id = getTokenStr(); 7882 SMLoc IdLoc = getLoc(); 7883 if (!skipToken(AsmToken::Identifier, "field name expected") || 7884 !skipToken(AsmToken::Colon, "colon expected")) 7885 return ParseStatus::Failure; 7886 7887 auto I = 7888 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; }); 7889 if (I == Fields.end()) 7890 return Error(IdLoc, "unknown field"); 7891 if ((*I)->IsDefined) 7892 return Error(IdLoc, "duplicate field"); 7893 7894 // TODO: Support symbolic values. 7895 (*I)->Loc = getLoc(); 7896 if (!parseExpr((*I)->Val)) 7897 return ParseStatus::Failure; 7898 (*I)->IsDefined = true; 7899 7900 First = false; 7901 } 7902 return ParseStatus::Success; 7903 } 7904 7905 bool AMDGPUAsmParser::validateStructuredOpFields( 7906 ArrayRef<const StructuredOpField *> Fields) { 7907 return all_of(Fields, [this](const StructuredOpField *F) { 7908 return F->validate(*this); 7909 }); 7910 } 7911 7912 //===----------------------------------------------------------------------===// 7913 // swizzle 7914 //===----------------------------------------------------------------------===// 7915 7916 LLVM_READNONE 7917 static unsigned 7918 encodeBitmaskPerm(const unsigned AndMask, 7919 const unsigned OrMask, 7920 const unsigned XorMask) { 7921 using namespace llvm::AMDGPU::Swizzle; 7922 7923 return BITMASK_PERM_ENC | 7924 (AndMask << BITMASK_AND_SHIFT) | 7925 (OrMask << BITMASK_OR_SHIFT) | 7926 (XorMask << BITMASK_XOR_SHIFT); 7927 } 7928 7929 bool 7930 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7931 const unsigned MinVal, 7932 const unsigned MaxVal, 7933 const StringRef ErrMsg, 7934 SMLoc &Loc) { 7935 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7936 return false; 7937 } 7938 Loc = getLoc(); 7939 if (!parseExpr(Op)) { 7940 return false; 7941 } 7942 if (Op < MinVal || Op > MaxVal) { 7943 Error(Loc, ErrMsg); 7944 return false; 7945 } 7946 7947 return true; 7948 } 7949 7950 bool 7951 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7952 const unsigned MinVal, 7953 const unsigned MaxVal, 7954 const StringRef ErrMsg) { 7955 SMLoc Loc; 7956 for (unsigned i = 0; i < OpNum; ++i) { 7957 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7958 return false; 7959 } 7960 7961 return true; 7962 } 7963 7964 bool 7965 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7966 using namespace llvm::AMDGPU::Swizzle; 7967 7968 int64_t Lane[LANE_NUM]; 7969 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7970 "expected a 2-bit lane id")) { 7971 Imm = QUAD_PERM_ENC; 7972 for (unsigned I = 0; I < LANE_NUM; ++I) { 7973 Imm |= Lane[I] << (LANE_SHIFT * I); 7974 } 7975 return true; 7976 } 7977 return false; 7978 } 7979 7980 bool 7981 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7982 using namespace llvm::AMDGPU::Swizzle; 7983 7984 SMLoc Loc; 7985 int64_t GroupSize; 7986 int64_t LaneIdx; 7987 7988 if (!parseSwizzleOperand(GroupSize, 7989 2, 32, 7990 "group size must be in the interval [2,32]", 7991 Loc)) { 7992 return false; 7993 } 7994 if (!isPowerOf2_64(GroupSize)) { 7995 Error(Loc, "group size must be a power of two"); 7996 return false; 7997 } 7998 if (parseSwizzleOperand(LaneIdx, 7999 0, GroupSize - 1, 8000 "lane id must be in the interval [0,group size - 1]", 8001 Loc)) { 8002 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 8003 return true; 8004 } 8005 return false; 8006 } 8007 8008 bool 8009 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 8010 using namespace llvm::AMDGPU::Swizzle; 8011 8012 SMLoc Loc; 8013 int64_t GroupSize; 8014 8015 if (!parseSwizzleOperand(GroupSize, 8016 2, 32, 8017 "group size must be in the interval [2,32]", 8018 Loc)) { 8019 return false; 8020 } 8021 if (!isPowerOf2_64(GroupSize)) { 8022 Error(Loc, "group size must be a power of two"); 8023 return false; 8024 } 8025 8026 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 8027 return true; 8028 } 8029 8030 bool 8031 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 8032 using namespace llvm::AMDGPU::Swizzle; 8033 8034 SMLoc Loc; 8035 int64_t GroupSize; 8036 8037 if (!parseSwizzleOperand(GroupSize, 8038 1, 16, 8039 "group size must be in the interval [1,16]", 8040 Loc)) { 8041 return false; 8042 } 8043 if (!isPowerOf2_64(GroupSize)) { 8044 Error(Loc, "group size must be a power of two"); 8045 return false; 8046 } 8047 8048 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 8049 return true; 8050 } 8051 8052 bool 8053 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 8054 using namespace llvm::AMDGPU::Swizzle; 8055 8056 if (!skipToken(AsmToken::Comma, "expected a comma")) { 8057 return false; 8058 } 8059 8060 StringRef Ctl; 8061 SMLoc StrLoc = getLoc(); 8062 if (!parseString(Ctl)) { 8063 return false; 8064 } 8065 if (Ctl.size() != BITMASK_WIDTH) { 8066 Error(StrLoc, "expected a 5-character mask"); 8067 return false; 8068 } 8069 8070 unsigned AndMask = 0; 8071 unsigned OrMask = 0; 8072 unsigned XorMask = 0; 8073 8074 for (size_t i = 0; i < Ctl.size(); ++i) { 8075 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 8076 switch(Ctl[i]) { 8077 default: 8078 Error(StrLoc, "invalid mask"); 8079 return false; 8080 case '0': 8081 break; 8082 case '1': 8083 OrMask |= Mask; 8084 break; 8085 case 'p': 8086 AndMask |= Mask; 8087 break; 8088 case 'i': 8089 AndMask |= Mask; 8090 XorMask |= Mask; 8091 break; 8092 } 8093 } 8094 8095 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 8096 return true; 8097 } 8098 8099 bool 8100 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 8101 8102 SMLoc OffsetLoc = getLoc(); 8103 8104 if (!parseExpr(Imm, "a swizzle macro")) { 8105 return false; 8106 } 8107 if (!isUInt<16>(Imm)) { 8108 Error(OffsetLoc, "expected a 16-bit offset"); 8109 return false; 8110 } 8111 return true; 8112 } 8113 8114 bool 8115 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 8116 using namespace llvm::AMDGPU::Swizzle; 8117 8118 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 8119 8120 SMLoc ModeLoc = getLoc(); 8121 bool Ok = false; 8122 8123 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 8124 Ok = parseSwizzleQuadPerm(Imm); 8125 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 8126 Ok = parseSwizzleBitmaskPerm(Imm); 8127 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 8128 Ok = parseSwizzleBroadcast(Imm); 8129 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 8130 Ok = parseSwizzleSwap(Imm); 8131 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 8132 Ok = parseSwizzleReverse(Imm); 8133 } else { 8134 Error(ModeLoc, "expected a swizzle mode"); 8135 } 8136 8137 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 8138 } 8139 8140 return false; 8141 } 8142 8143 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { 8144 SMLoc S = getLoc(); 8145 int64_t Imm = 0; 8146 8147 if (trySkipId("offset")) { 8148 8149 bool Ok = false; 8150 if (skipToken(AsmToken::Colon, "expected a colon")) { 8151 if (trySkipId("swizzle")) { 8152 Ok = parseSwizzleMacro(Imm); 8153 } else { 8154 Ok = parseSwizzleOffset(Imm); 8155 } 8156 } 8157 8158 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 8159 8160 return Ok ? ParseStatus::Success : ParseStatus::Failure; 8161 } 8162 return ParseStatus::NoMatch; 8163 } 8164 8165 bool 8166 AMDGPUOperand::isSwizzle() const { 8167 return isImmTy(ImmTySwizzle); 8168 } 8169 8170 //===----------------------------------------------------------------------===// 8171 // VGPR Index Mode 8172 //===----------------------------------------------------------------------===// 8173 8174 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 8175 8176 using namespace llvm::AMDGPU::VGPRIndexMode; 8177 8178 if (trySkipToken(AsmToken::RParen)) { 8179 return OFF; 8180 } 8181 8182 int64_t Imm = 0; 8183 8184 while (true) { 8185 unsigned Mode = 0; 8186 SMLoc S = getLoc(); 8187 8188 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 8189 if (trySkipId(IdSymbolic[ModeId])) { 8190 Mode = 1 << ModeId; 8191 break; 8192 } 8193 } 8194 8195 if (Mode == 0) { 8196 Error(S, (Imm == 0)? 8197 "expected a VGPR index mode or a closing parenthesis" : 8198 "expected a VGPR index mode"); 8199 return UNDEF; 8200 } 8201 8202 if (Imm & Mode) { 8203 Error(S, "duplicate VGPR index mode"); 8204 return UNDEF; 8205 } 8206 Imm |= Mode; 8207 8208 if (trySkipToken(AsmToken::RParen)) 8209 break; 8210 if (!skipToken(AsmToken::Comma, 8211 "expected a comma or a closing parenthesis")) 8212 return UNDEF; 8213 } 8214 8215 return Imm; 8216 } 8217 8218 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 8219 8220 using namespace llvm::AMDGPU::VGPRIndexMode; 8221 8222 int64_t Imm = 0; 8223 SMLoc S = getLoc(); 8224 8225 if (trySkipId("gpr_idx", AsmToken::LParen)) { 8226 Imm = parseGPRIdxMacro(); 8227 if (Imm == UNDEF) 8228 return ParseStatus::Failure; 8229 } else { 8230 if (getParser().parseAbsoluteExpression(Imm)) 8231 return ParseStatus::Failure; 8232 if (Imm < 0 || !isUInt<4>(Imm)) 8233 return Error(S, "invalid immediate: only 4-bit values are legal"); 8234 } 8235 8236 Operands.push_back( 8237 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 8238 return ParseStatus::Success; 8239 } 8240 8241 bool AMDGPUOperand::isGPRIdxMode() const { 8242 return isImmTy(ImmTyGprIdxMode); 8243 } 8244 8245 //===----------------------------------------------------------------------===// 8246 // sopp branch targets 8247 //===----------------------------------------------------------------------===// 8248 8249 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { 8250 8251 // Make sure we are not parsing something 8252 // that looks like a label or an expression but is not. 8253 // This will improve error messages. 8254 if (isRegister() || isModifier()) 8255 return ParseStatus::NoMatch; 8256 8257 if (!parseExpr(Operands)) 8258 return ParseStatus::Failure; 8259 8260 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 8261 assert(Opr.isImm() || Opr.isExpr()); 8262 SMLoc Loc = Opr.getStartLoc(); 8263 8264 // Currently we do not support arbitrary expressions as branch targets. 8265 // Only labels and absolute expressions are accepted. 8266 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 8267 Error(Loc, "expected an absolute expression or a label"); 8268 } else if (Opr.isImm() && !Opr.isS16Imm()) { 8269 Error(Loc, "expected a 16-bit signed jump offset"); 8270 } 8271 8272 return ParseStatus::Success; 8273 } 8274 8275 //===----------------------------------------------------------------------===// 8276 // Boolean holding registers 8277 //===----------------------------------------------------------------------===// 8278 8279 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 8280 return parseReg(Operands); 8281 } 8282 8283 //===----------------------------------------------------------------------===// 8284 // mubuf 8285 //===----------------------------------------------------------------------===// 8286 8287 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 8288 const OperandVector &Operands, 8289 bool IsAtomic) { 8290 OptionalImmIndexMap OptionalIdx; 8291 unsigned FirstOperandIdx = 1; 8292 bool IsAtomicReturn = false; 8293 8294 if (IsAtomic) { 8295 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 8296 SIInstrFlags::IsAtomicRet; 8297 } 8298 8299 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 8300 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8301 8302 // Add the register arguments 8303 if (Op.isReg()) { 8304 Op.addRegOperands(Inst, 1); 8305 // Insert a tied src for atomic return dst. 8306 // This cannot be postponed as subsequent calls to 8307 // addImmOperands rely on correct number of MC operands. 8308 if (IsAtomicReturn && i == FirstOperandIdx) 8309 Op.addRegOperands(Inst, 1); 8310 continue; 8311 } 8312 8313 // Handle the case where soffset is an immediate 8314 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 8315 Op.addImmOperands(Inst, 1); 8316 continue; 8317 } 8318 8319 // Handle tokens like 'offen' which are sometimes hard-coded into the 8320 // asm string. There are no MCInst operands for these. 8321 if (Op.isToken()) { 8322 continue; 8323 } 8324 assert(Op.isImm()); 8325 8326 // Handle optional arguments 8327 OptionalIdx[Op.getImmTy()] = i; 8328 } 8329 8330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 8331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 8332 } 8333 8334 //===----------------------------------------------------------------------===// 8335 // smrd 8336 //===----------------------------------------------------------------------===// 8337 8338 bool AMDGPUOperand::isSMRDOffset8() const { 8339 return isImmLiteral() && isUInt<8>(getImm()); 8340 } 8341 8342 bool AMDGPUOperand::isSMEMOffset() const { 8343 // Offset range is checked later by validator. 8344 return isImmLiteral(); 8345 } 8346 8347 bool AMDGPUOperand::isSMRDLiteralOffset() const { 8348 // 32-bit literals are only supported on CI and we only want to use them 8349 // when the offset is > 8-bits. 8350 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 8351 } 8352 8353 //===----------------------------------------------------------------------===// 8354 // vop3 8355 //===----------------------------------------------------------------------===// 8356 8357 static bool ConvertOmodMul(int64_t &Mul) { 8358 if (Mul != 1 && Mul != 2 && Mul != 4) 8359 return false; 8360 8361 Mul >>= 1; 8362 return true; 8363 } 8364 8365 static bool ConvertOmodDiv(int64_t &Div) { 8366 if (Div == 1) { 8367 Div = 0; 8368 return true; 8369 } 8370 8371 if (Div == 2) { 8372 Div = 3; 8373 return true; 8374 } 8375 8376 return false; 8377 } 8378 8379 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 8380 // This is intentional and ensures compatibility with sp3. 8381 // See bug 35397 for details. 8382 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { 8383 if (BoundCtrl == 0 || BoundCtrl == 1) { 8384 if (!isGFX11Plus()) 8385 BoundCtrl = 1; 8386 return true; 8387 } 8388 return false; 8389 } 8390 8391 void AMDGPUAsmParser::onBeginOfFile() { 8392 if (!getParser().getStreamer().getTargetStreamer() || 8393 getSTI().getTargetTriple().getArch() == Triple::r600) 8394 return; 8395 8396 if (!getTargetStreamer().getTargetID()) 8397 getTargetStreamer().initializeTargetID(getSTI(), 8398 getSTI().getFeatureString()); 8399 8400 if (isHsaAbi(getSTI())) 8401 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 8402 } 8403 8404 /// Parse AMDGPU specific expressions. 8405 /// 8406 /// expr ::= or(expr, ...) | 8407 /// max(expr, ...) 8408 /// 8409 bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 8410 using AGVK = AMDGPUMCExpr::VariantKind; 8411 8412 if (isToken(AsmToken::Identifier)) { 8413 StringRef TokenId = getTokenStr(); 8414 AGVK VK = StringSwitch<AGVK>(TokenId) 8415 .Case("max", AGVK::AGVK_Max) 8416 .Case("or", AGVK::AGVK_Or) 8417 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs) 8418 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs) 8419 .Case("alignto", AGVK::AGVK_AlignTo) 8420 .Case("occupancy", AGVK::AGVK_Occupancy) 8421 .Default(AGVK::AGVK_None); 8422 8423 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) { 8424 SmallVector<const MCExpr *, 4> Exprs; 8425 uint64_t CommaCount = 0; 8426 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.) 8427 lex(); // Eat '(' 8428 while (true) { 8429 if (trySkipToken(AsmToken::RParen)) { 8430 if (Exprs.empty()) { 8431 Error(getToken().getLoc(), 8432 "empty " + Twine(TokenId) + " expression"); 8433 return true; 8434 } 8435 if (CommaCount + 1 != Exprs.size()) { 8436 Error(getToken().getLoc(), 8437 "mismatch of commas in " + Twine(TokenId) + " expression"); 8438 return true; 8439 } 8440 Res = AMDGPUMCExpr::create(VK, Exprs, getContext()); 8441 return false; 8442 } 8443 const MCExpr *Expr; 8444 if (getParser().parseExpression(Expr, EndLoc)) 8445 return true; 8446 Exprs.push_back(Expr); 8447 bool LastTokenWasComma = trySkipToken(AsmToken::Comma); 8448 if (LastTokenWasComma) 8449 CommaCount++; 8450 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) { 8451 Error(getToken().getLoc(), 8452 "unexpected token in " + Twine(TokenId) + " expression"); 8453 return true; 8454 } 8455 } 8456 } 8457 } 8458 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); 8459 } 8460 8461 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { 8462 StringRef Name = getTokenStr(); 8463 if (Name == "mul") { 8464 return parseIntWithPrefix("mul", Operands, 8465 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8466 } 8467 8468 if (Name == "div") { 8469 return parseIntWithPrefix("div", Operands, 8470 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8471 } 8472 8473 return ParseStatus::NoMatch; 8474 } 8475 8476 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8477 // the number of src operands present, then copies that bit into src0_modifiers. 8478 static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) { 8479 int Opc = Inst.getOpcode(); 8480 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8481 if (OpSelIdx == -1) 8482 return; 8483 8484 int SrcNum; 8485 const int Ops[] = { AMDGPU::OpName::src0, 8486 AMDGPU::OpName::src1, 8487 AMDGPU::OpName::src2 }; 8488 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); 8489 ++SrcNum) 8490 ; 8491 assert(SrcNum > 0); 8492 8493 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8494 8495 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 8496 if (DstIdx == -1) 8497 return; 8498 8499 const MCOperand &DstOp = Inst.getOperand(DstIdx); 8500 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8501 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8502 if (DstOp.isReg() && 8503 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) { 8504 if (AMDGPU::isHi(DstOp.getReg(), MRI)) 8505 ModVal |= SISrcMods::DST_OP_SEL; 8506 } else { 8507 if ((OpSel & (1 << SrcNum)) != 0) 8508 ModVal |= SISrcMods::DST_OP_SEL; 8509 } 8510 Inst.getOperand(ModIdx).setImm(ModVal); 8511 } 8512 8513 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8514 const OperandVector &Operands) { 8515 cvtVOP3P(Inst, Operands); 8516 cvtVOP3DstOpSelOnly(Inst, *getMRI()); 8517 } 8518 8519 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8520 OptionalImmIndexMap &OptionalIdx) { 8521 cvtVOP3P(Inst, Operands, OptionalIdx); 8522 cvtVOP3DstOpSelOnly(Inst, *getMRI()); 8523 } 8524 8525 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8526 return 8527 // 1. This operand is input modifiers 8528 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8529 // 2. This is not last operand 8530 && Desc.NumOperands > (OpNum + 1) 8531 // 3. Next operand is register class 8532 && Desc.operands()[OpNum + 1].RegClass != -1 8533 // 4. Next register is not tied to any other operand 8534 && Desc.getOperandConstraint(OpNum + 1, 8535 MCOI::OperandConstraint::TIED_TO) == -1; 8536 } 8537 8538 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8539 { 8540 OptionalImmIndexMap OptionalIdx; 8541 unsigned Opc = Inst.getOpcode(); 8542 8543 unsigned I = 1; 8544 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8545 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8546 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8547 } 8548 8549 for (unsigned E = Operands.size(); I != E; ++I) { 8550 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8551 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8552 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8553 } else if (Op.isInterpSlot() || Op.isInterpAttr() || 8554 Op.isInterpAttrChan()) { 8555 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8556 } else if (Op.isImmModifier()) { 8557 OptionalIdx[Op.getImmTy()] = I; 8558 } else { 8559 llvm_unreachable("unhandled operand type"); 8560 } 8561 } 8562 8563 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) 8564 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8565 AMDGPUOperand::ImmTyHigh); 8566 8567 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8568 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8569 AMDGPUOperand::ImmTyClamp); 8570 8571 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8572 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8573 AMDGPUOperand::ImmTyOModSI); 8574 } 8575 8576 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8577 { 8578 OptionalImmIndexMap OptionalIdx; 8579 unsigned Opc = Inst.getOpcode(); 8580 8581 unsigned I = 1; 8582 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8583 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8584 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8585 } 8586 8587 for (unsigned E = Operands.size(); I != E; ++I) { 8588 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8589 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8590 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8591 } else if (Op.isImmModifier()) { 8592 OptionalIdx[Op.getImmTy()] = I; 8593 } else { 8594 llvm_unreachable("unhandled operand type"); 8595 } 8596 } 8597 8598 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp); 8599 8600 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8601 if (OpSelIdx != -1) 8602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8603 8604 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8605 8606 if (OpSelIdx == -1) 8607 return; 8608 8609 const int Ops[] = { AMDGPU::OpName::src0, 8610 AMDGPU::OpName::src1, 8611 AMDGPU::OpName::src2 }; 8612 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8613 AMDGPU::OpName::src1_modifiers, 8614 AMDGPU::OpName::src2_modifiers }; 8615 8616 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8617 8618 for (int J = 0; J < 3; ++J) { 8619 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8620 if (OpIdx == -1) 8621 break; 8622 8623 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8624 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8625 8626 if ((OpSel & (1 << J)) != 0) 8627 ModVal |= SISrcMods::OP_SEL_0; 8628 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8629 (OpSel & (1 << 3)) != 0) 8630 ModVal |= SISrcMods::DST_OP_SEL; 8631 8632 Inst.getOperand(ModIdx).setImm(ModVal); 8633 } 8634 } 8635 8636 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8637 OptionalImmIndexMap &OptionalIdx) { 8638 unsigned Opc = Inst.getOpcode(); 8639 8640 unsigned I = 1; 8641 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8642 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8643 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8644 } 8645 8646 for (unsigned E = Operands.size(); I != E; ++I) { 8647 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8648 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8649 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8650 } else if (Op.isImmModifier()) { 8651 OptionalIdx[Op.getImmTy()] = I; 8652 } else if (Op.isRegOrImm()) { 8653 Op.addRegOrImmOperands(Inst, 1); 8654 } else { 8655 llvm_unreachable("unhandled operand type"); 8656 } 8657 } 8658 8659 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) { 8660 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) 8661 Inst.addOperand(Inst.getOperand(0)); 8662 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8663 AMDGPUOperand::ImmTyByteSel); 8664 } 8665 8666 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8667 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8668 AMDGPUOperand::ImmTyClamp); 8669 8670 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8671 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8672 AMDGPUOperand::ImmTyOModSI); 8673 8674 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8675 // it has src2 register operand that is tied to dst operand 8676 // we don't allow modifiers for this operand in assembler so src2_modifiers 8677 // should be 0. 8678 if (isMAC(Opc)) { 8679 auto it = Inst.begin(); 8680 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8681 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8682 ++it; 8683 // Copy the operand to ensure it's not invalidated when Inst grows. 8684 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8685 } 8686 } 8687 8688 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8689 OptionalImmIndexMap OptionalIdx; 8690 cvtVOP3(Inst, Operands, OptionalIdx); 8691 } 8692 8693 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8694 OptionalImmIndexMap &OptIdx) { 8695 const int Opc = Inst.getOpcode(); 8696 const MCInstrDesc &Desc = MII.get(Opc); 8697 8698 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8699 8700 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 8701 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || 8702 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 || 8703 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) { 8704 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 8705 Inst.addOperand(Inst.getOperand(0)); 8706 } 8707 8708 // Adding vdst_in operand is already covered for these DPP instructions in 8709 // cvtVOP3DPP. 8710 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) && 8711 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 || 8712 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 || 8713 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 || 8714 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 || 8715 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 || 8716 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || 8717 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || 8718 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) { 8719 assert(!IsPacked); 8720 Inst.addOperand(Inst.getOperand(0)); 8721 } 8722 8723 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8724 // instruction, and then figure out where to actually put the modifiers 8725 8726 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8727 if (OpSelIdx != -1) { 8728 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8729 } 8730 8731 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8732 if (OpSelHiIdx != -1) { 8733 int DefaultVal = IsPacked ? -1 : 0; 8734 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8735 DefaultVal); 8736 } 8737 8738 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8739 if (NegLoIdx != -1) 8740 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8741 8742 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8743 if (NegHiIdx != -1) 8744 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8745 8746 const int Ops[] = { AMDGPU::OpName::src0, 8747 AMDGPU::OpName::src1, 8748 AMDGPU::OpName::src2 }; 8749 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8750 AMDGPU::OpName::src1_modifiers, 8751 AMDGPU::OpName::src2_modifiers }; 8752 8753 unsigned OpSel = 0; 8754 unsigned OpSelHi = 0; 8755 unsigned NegLo = 0; 8756 unsigned NegHi = 0; 8757 8758 if (OpSelIdx != -1) 8759 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8760 8761 if (OpSelHiIdx != -1) 8762 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8763 8764 if (NegLoIdx != -1) 8765 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8766 8767 if (NegHiIdx != -1) 8768 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8769 8770 for (int J = 0; J < 3; ++J) { 8771 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8772 if (OpIdx == -1) 8773 break; 8774 8775 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8776 8777 if (ModIdx == -1) 8778 continue; 8779 8780 uint32_t ModVal = 0; 8781 8782 const MCOperand &SrcOp = Inst.getOperand(OpIdx); 8783 if (SrcOp.isReg() && getMRI() 8784 ->getRegClass(AMDGPU::VGPR_16RegClassID) 8785 .contains(SrcOp.getReg())) { 8786 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI()); 8787 if (VGPRSuffixIsHi) 8788 ModVal |= SISrcMods::OP_SEL_0; 8789 } else { 8790 if ((OpSel & (1 << J)) != 0) 8791 ModVal |= SISrcMods::OP_SEL_0; 8792 } 8793 8794 if ((OpSelHi & (1 << J)) != 0) 8795 ModVal |= SISrcMods::OP_SEL_1; 8796 8797 if ((NegLo & (1 << J)) != 0) 8798 ModVal |= SISrcMods::NEG; 8799 8800 if ((NegHi & (1 << J)) != 0) 8801 ModVal |= SISrcMods::NEG_HI; 8802 8803 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8804 } 8805 } 8806 8807 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8808 OptionalImmIndexMap OptIdx; 8809 cvtVOP3(Inst, Operands, OptIdx); 8810 cvtVOP3P(Inst, Operands, OptIdx); 8811 } 8812 8813 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, 8814 unsigned i, unsigned Opc, unsigned OpName) { 8815 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1) 8816 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2); 8817 else 8818 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1); 8819 } 8820 8821 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) { 8822 unsigned Opc = Inst.getOpcode(); 8823 8824 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); 8825 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers); 8826 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers); 8827 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef 8828 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2 8829 8830 OptionalImmIndexMap OptIdx; 8831 for (unsigned i = 5; i < Operands.size(); ++i) { 8832 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8833 OptIdx[Op.getImmTy()] = i; 8834 } 8835 8836 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit)) 8837 addOptionalImmOperand(Inst, Operands, OptIdx, 8838 AMDGPUOperand::ImmTyIndexKey8bit); 8839 8840 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit)) 8841 addOptionalImmOperand(Inst, Operands, OptIdx, 8842 AMDGPUOperand::ImmTyIndexKey16bit); 8843 8844 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8845 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp); 8846 8847 cvtVOP3P(Inst, Operands, OptIdx); 8848 } 8849 8850 //===----------------------------------------------------------------------===// 8851 // VOPD 8852 //===----------------------------------------------------------------------===// 8853 8854 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8855 if (!hasVOPD(getSTI())) 8856 return ParseStatus::NoMatch; 8857 8858 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8859 SMLoc S = getLoc(); 8860 lex(); 8861 lex(); 8862 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8863 SMLoc OpYLoc = getLoc(); 8864 StringRef OpYName; 8865 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) { 8866 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc)); 8867 return ParseStatus::Success; 8868 } 8869 return Error(OpYLoc, "expected a VOPDY instruction after ::"); 8870 } 8871 return ParseStatus::NoMatch; 8872 } 8873 8874 // Create VOPD MCInst operands using parsed assembler operands. 8875 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8876 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer 8877 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); 8878 if (Op.isReg()) { 8879 Op.addRegOperands(Inst, 1); 8880 return; 8881 } 8882 if (Op.isImm()) { 8883 Op.addImmOperands(Inst, 1); 8884 return; 8885 } 8886 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8887 }; 8888 8889 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); 8890 8891 // MCInst operands are ordered as follows: 8892 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8893 8894 for (auto CompIdx : VOPD::COMPONENTS) { 8895 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); 8896 } 8897 8898 for (auto CompIdx : VOPD::COMPONENTS) { 8899 const auto &CInfo = InstInfo[CompIdx]; 8900 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); 8901 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) 8902 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); 8903 if (CInfo.hasSrc2Acc()) 8904 addOp(CInfo.getIndexOfDstInParsedOperands()); 8905 } 8906 } 8907 8908 //===----------------------------------------------------------------------===// 8909 // dpp 8910 //===----------------------------------------------------------------------===// 8911 8912 bool AMDGPUOperand::isDPP8() const { 8913 return isImmTy(ImmTyDPP8); 8914 } 8915 8916 bool AMDGPUOperand::isDPPCtrl() const { 8917 using namespace AMDGPU::DPP; 8918 8919 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8920 if (result) { 8921 int64_t Imm = getImm(); 8922 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8923 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8924 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8925 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8926 (Imm == DppCtrl::WAVE_SHL1) || 8927 (Imm == DppCtrl::WAVE_ROL1) || 8928 (Imm == DppCtrl::WAVE_SHR1) || 8929 (Imm == DppCtrl::WAVE_ROR1) || 8930 (Imm == DppCtrl::ROW_MIRROR) || 8931 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8932 (Imm == DppCtrl::BCAST15) || 8933 (Imm == DppCtrl::BCAST31) || 8934 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8935 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8936 } 8937 return false; 8938 } 8939 8940 //===----------------------------------------------------------------------===// 8941 // mAI 8942 //===----------------------------------------------------------------------===// 8943 8944 bool AMDGPUOperand::isBLGP() const { 8945 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8946 } 8947 8948 bool AMDGPUOperand::isS16Imm() const { 8949 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8950 } 8951 8952 bool AMDGPUOperand::isU16Imm() const { 8953 return isImmLiteral() && isUInt<16>(getImm()); 8954 } 8955 8956 //===----------------------------------------------------------------------===// 8957 // dim 8958 //===----------------------------------------------------------------------===// 8959 8960 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8961 // We want to allow "dim:1D" etc., 8962 // but the initial 1 is tokenized as an integer. 8963 std::string Token; 8964 if (isToken(AsmToken::Integer)) { 8965 SMLoc Loc = getToken().getEndLoc(); 8966 Token = std::string(getTokenStr()); 8967 lex(); 8968 if (getLoc() != Loc) 8969 return false; 8970 } 8971 8972 StringRef Suffix; 8973 if (!parseId(Suffix)) 8974 return false; 8975 Token += Suffix; 8976 8977 StringRef DimId = Token; 8978 if (DimId.starts_with("SQ_RSRC_IMG_")) 8979 DimId = DimId.drop_front(12); 8980 8981 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8982 if (!DimInfo) 8983 return false; 8984 8985 Encoding = DimInfo->Encoding; 8986 return true; 8987 } 8988 8989 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8990 if (!isGFX10Plus()) 8991 return ParseStatus::NoMatch; 8992 8993 SMLoc S = getLoc(); 8994 8995 if (!trySkipId("dim", AsmToken::Colon)) 8996 return ParseStatus::NoMatch; 8997 8998 unsigned Encoding; 8999 SMLoc Loc = getLoc(); 9000 if (!parseDimId(Encoding)) 9001 return Error(Loc, "invalid dim value"); 9002 9003 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 9004 AMDGPUOperand::ImmTyDim)); 9005 return ParseStatus::Success; 9006 } 9007 9008 //===----------------------------------------------------------------------===// 9009 // dpp 9010 //===----------------------------------------------------------------------===// 9011 9012 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 9013 SMLoc S = getLoc(); 9014 9015 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 9016 return ParseStatus::NoMatch; 9017 9018 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 9019 9020 int64_t Sels[8]; 9021 9022 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 9023 return ParseStatus::Failure; 9024 9025 for (size_t i = 0; i < 8; ++i) { 9026 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 9027 return ParseStatus::Failure; 9028 9029 SMLoc Loc = getLoc(); 9030 if (getParser().parseAbsoluteExpression(Sels[i])) 9031 return ParseStatus::Failure; 9032 if (0 > Sels[i] || 7 < Sels[i]) 9033 return Error(Loc, "expected a 3-bit value"); 9034 } 9035 9036 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 9037 return ParseStatus::Failure; 9038 9039 unsigned DPP8 = 0; 9040 for (size_t i = 0; i < 8; ++i) 9041 DPP8 |= (Sels[i] << (i * 3)); 9042 9043 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 9044 return ParseStatus::Success; 9045 } 9046 9047 bool 9048 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 9049 const OperandVector &Operands) { 9050 if (Ctrl == "row_newbcast") 9051 return isGFX90A(); 9052 9053 if (Ctrl == "row_share" || 9054 Ctrl == "row_xmask") 9055 return isGFX10Plus(); 9056 9057 if (Ctrl == "wave_shl" || 9058 Ctrl == "wave_shr" || 9059 Ctrl == "wave_rol" || 9060 Ctrl == "wave_ror" || 9061 Ctrl == "row_bcast") 9062 return isVI() || isGFX9(); 9063 9064 return Ctrl == "row_mirror" || 9065 Ctrl == "row_half_mirror" || 9066 Ctrl == "quad_perm" || 9067 Ctrl == "row_shl" || 9068 Ctrl == "row_shr" || 9069 Ctrl == "row_ror"; 9070 } 9071 9072 int64_t 9073 AMDGPUAsmParser::parseDPPCtrlPerm() { 9074 // quad_perm:[%d,%d,%d,%d] 9075 9076 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 9077 return -1; 9078 9079 int64_t Val = 0; 9080 for (int i = 0; i < 4; ++i) { 9081 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 9082 return -1; 9083 9084 int64_t Temp; 9085 SMLoc Loc = getLoc(); 9086 if (getParser().parseAbsoluteExpression(Temp)) 9087 return -1; 9088 if (Temp < 0 || Temp > 3) { 9089 Error(Loc, "expected a 2-bit value"); 9090 return -1; 9091 } 9092 9093 Val += (Temp << i * 2); 9094 } 9095 9096 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 9097 return -1; 9098 9099 return Val; 9100 } 9101 9102 int64_t 9103 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 9104 using namespace AMDGPU::DPP; 9105 9106 // sel:%d 9107 9108 int64_t Val; 9109 SMLoc Loc = getLoc(); 9110 9111 if (getParser().parseAbsoluteExpression(Val)) 9112 return -1; 9113 9114 struct DppCtrlCheck { 9115 int64_t Ctrl; 9116 int Lo; 9117 int Hi; 9118 }; 9119 9120 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 9121 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 9122 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 9123 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 9124 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 9125 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 9126 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 9127 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 9128 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 9129 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 9130 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 9131 .Default({-1, 0, 0}); 9132 9133 bool Valid; 9134 if (Check.Ctrl == -1) { 9135 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 9136 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 9137 } else { 9138 Valid = Check.Lo <= Val && Val <= Check.Hi; 9139 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 9140 } 9141 9142 if (!Valid) { 9143 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 9144 return -1; 9145 } 9146 9147 return Val; 9148 } 9149 9150 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 9151 using namespace AMDGPU::DPP; 9152 9153 if (!isToken(AsmToken::Identifier) || 9154 !isSupportedDPPCtrl(getTokenStr(), Operands)) 9155 return ParseStatus::NoMatch; 9156 9157 SMLoc S = getLoc(); 9158 int64_t Val = -1; 9159 StringRef Ctrl; 9160 9161 parseId(Ctrl); 9162 9163 if (Ctrl == "row_mirror") { 9164 Val = DppCtrl::ROW_MIRROR; 9165 } else if (Ctrl == "row_half_mirror") { 9166 Val = DppCtrl::ROW_HALF_MIRROR; 9167 } else { 9168 if (skipToken(AsmToken::Colon, "expected a colon")) { 9169 if (Ctrl == "quad_perm") { 9170 Val = parseDPPCtrlPerm(); 9171 } else { 9172 Val = parseDPPCtrlSel(Ctrl); 9173 } 9174 } 9175 } 9176 9177 if (Val == -1) 9178 return ParseStatus::Failure; 9179 9180 Operands.push_back( 9181 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 9182 return ParseStatus::Success; 9183 } 9184 9185 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 9186 bool IsDPP8) { 9187 OptionalImmIndexMap OptionalIdx; 9188 unsigned Opc = Inst.getOpcode(); 9189 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9190 9191 // MAC instructions are special because they have 'old' 9192 // operand which is not tied to dst (but assumed to be). 9193 // They also have dummy unused src2_modifiers. 9194 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); 9195 int Src2ModIdx = 9196 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); 9197 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && 9198 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; 9199 9200 unsigned I = 1; 9201 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9202 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9203 } 9204 9205 int Fi = 0; 9206 for (unsigned E = Operands.size(); I != E; ++I) { 9207 9208 if (IsMAC) { 9209 int NumOperands = Inst.getNumOperands(); 9210 if (OldIdx == NumOperands) { 9211 // Handle old operand 9212 constexpr int DST_IDX = 0; 9213 Inst.addOperand(Inst.getOperand(DST_IDX)); 9214 } else if (Src2ModIdx == NumOperands) { 9215 // Add unused dummy src2_modifiers 9216 Inst.addOperand(MCOperand::createImm(0)); 9217 } 9218 } 9219 9220 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); 9221 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) { 9222 Inst.addOperand(Inst.getOperand(0)); 9223 } 9224 9225 bool IsVOP3CvtSrDpp = 9226 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 || 9227 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || 9228 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || 9229 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12; 9230 if (IsVOP3CvtSrDpp) { 9231 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) { 9232 Inst.addOperand(MCOperand::createImm(0)); 9233 Inst.addOperand(MCOperand::createReg(0)); 9234 } 9235 } 9236 9237 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 9238 MCOI::TIED_TO); 9239 if (TiedTo != -1) { 9240 assert((unsigned)TiedTo < Inst.getNumOperands()); 9241 // handle tied old or src2 for MAC instructions 9242 Inst.addOperand(Inst.getOperand(TiedTo)); 9243 } 9244 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9245 // Add the register arguments 9246 if (IsDPP8 && Op.isDppFI()) { 9247 Fi = Op.getImm(); 9248 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9249 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 9250 } else if (Op.isReg()) { 9251 Op.addRegOperands(Inst, 1); 9252 } else if (Op.isImm() && 9253 Desc.operands()[Inst.getNumOperands()].RegClass != -1) { 9254 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 9255 Op.addImmOperands(Inst, 1); 9256 } else if (Op.isImm()) { 9257 OptionalIdx[Op.getImmTy()] = I; 9258 } else { 9259 llvm_unreachable("unhandled operand type"); 9260 } 9261 } 9262 9263 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) 9264 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9265 AMDGPUOperand::ImmTyByteSel); 9266 9267 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9268 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9269 AMDGPUOperand::ImmTyClamp); 9270 9271 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9272 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 9273 9274 if (Desc.TSFlags & SIInstrFlags::VOP3P) 9275 cvtVOP3P(Inst, Operands, OptionalIdx); 9276 else if (Desc.TSFlags & SIInstrFlags::VOP3) 9277 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 9278 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 9279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 9280 } 9281 9282 if (IsDPP8) { 9283 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 9284 using namespace llvm::AMDGPU::DPP; 9285 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 9286 } else { 9287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 9288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 9289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 9290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 9291 9292 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) 9293 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9294 AMDGPUOperand::ImmTyDppFI); 9295 } 9296 } 9297 9298 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 9299 OptionalImmIndexMap OptionalIdx; 9300 9301 unsigned I = 1; 9302 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9303 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9304 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9305 } 9306 9307 int Fi = 0; 9308 for (unsigned E = Operands.size(); I != E; ++I) { 9309 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 9310 MCOI::TIED_TO); 9311 if (TiedTo != -1) { 9312 assert((unsigned)TiedTo < Inst.getNumOperands()); 9313 // handle tied old or src2 for MAC instructions 9314 Inst.addOperand(Inst.getOperand(TiedTo)); 9315 } 9316 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9317 // Add the register arguments 9318 if (Op.isReg() && validateVccOperand(Op.getReg())) { 9319 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 9320 // Skip it. 9321 continue; 9322 } 9323 9324 if (IsDPP8) { 9325 if (Op.isDPP8()) { 9326 Op.addImmOperands(Inst, 1); 9327 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9328 Op.addRegWithFPInputModsOperands(Inst, 2); 9329 } else if (Op.isDppFI()) { 9330 Fi = Op.getImm(); 9331 } else if (Op.isReg()) { 9332 Op.addRegOperands(Inst, 1); 9333 } else { 9334 llvm_unreachable("Invalid operand type"); 9335 } 9336 } else { 9337 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9338 Op.addRegWithFPInputModsOperands(Inst, 2); 9339 } else if (Op.isReg()) { 9340 Op.addRegOperands(Inst, 1); 9341 } else if (Op.isDPPCtrl()) { 9342 Op.addImmOperands(Inst, 1); 9343 } else if (Op.isImm()) { 9344 // Handle optional arguments 9345 OptionalIdx[Op.getImmTy()] = I; 9346 } else { 9347 llvm_unreachable("Invalid operand type"); 9348 } 9349 } 9350 } 9351 9352 if (IsDPP8) { 9353 using namespace llvm::AMDGPU::DPP; 9354 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 9355 } else { 9356 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 9357 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 9358 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 9359 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { 9360 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9361 AMDGPUOperand::ImmTyDppFI); 9362 } 9363 } 9364 } 9365 9366 //===----------------------------------------------------------------------===// 9367 // sdwa 9368 //===----------------------------------------------------------------------===// 9369 9370 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, 9371 StringRef Prefix, 9372 AMDGPUOperand::ImmTy Type) { 9373 using namespace llvm::AMDGPU::SDWA; 9374 9375 SMLoc S = getLoc(); 9376 StringRef Value; 9377 9378 SMLoc StringLoc; 9379 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc); 9380 if (!Res.isSuccess()) 9381 return Res; 9382 9383 int64_t Int; 9384 Int = StringSwitch<int64_t>(Value) 9385 .Case("BYTE_0", SdwaSel::BYTE_0) 9386 .Case("BYTE_1", SdwaSel::BYTE_1) 9387 .Case("BYTE_2", SdwaSel::BYTE_2) 9388 .Case("BYTE_3", SdwaSel::BYTE_3) 9389 .Case("WORD_0", SdwaSel::WORD_0) 9390 .Case("WORD_1", SdwaSel::WORD_1) 9391 .Case("DWORD", SdwaSel::DWORD) 9392 .Default(0xffffffff); 9393 9394 if (Int == 0xffffffff) 9395 return Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 9396 9397 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 9398 return ParseStatus::Success; 9399 } 9400 9401 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 9402 using namespace llvm::AMDGPU::SDWA; 9403 9404 SMLoc S = getLoc(); 9405 StringRef Value; 9406 9407 SMLoc StringLoc; 9408 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc); 9409 if (!Res.isSuccess()) 9410 return Res; 9411 9412 int64_t Int; 9413 Int = StringSwitch<int64_t>(Value) 9414 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 9415 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 9416 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 9417 .Default(0xffffffff); 9418 9419 if (Int == 0xffffffff) 9420 return Error(StringLoc, "invalid dst_unused value"); 9421 9422 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused)); 9423 return ParseStatus::Success; 9424 } 9425 9426 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 9427 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 9428 } 9429 9430 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 9431 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 9432 } 9433 9434 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 9435 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 9436 } 9437 9438 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 9439 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 9440 } 9441 9442 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 9443 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 9444 } 9445 9446 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 9447 uint64_t BasicInstType, 9448 bool SkipDstVcc, 9449 bool SkipSrcVcc) { 9450 using namespace llvm::AMDGPU::SDWA; 9451 9452 OptionalImmIndexMap OptionalIdx; 9453 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 9454 bool SkippedVcc = false; 9455 9456 unsigned I = 1; 9457 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9458 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9459 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9460 } 9461 9462 for (unsigned E = Operands.size(); I != E; ++I) { 9463 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9464 if (SkipVcc && !SkippedVcc && Op.isReg() && 9465 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 9466 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9467 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9468 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9469 // Skip VCC only if we didn't skip it on previous iteration. 9470 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9471 if (BasicInstType == SIInstrFlags::VOP2 && 9472 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9473 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9474 SkippedVcc = true; 9475 continue; 9476 } 9477 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) { 9478 SkippedVcc = true; 9479 continue; 9480 } 9481 } 9482 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9483 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9484 } else if (Op.isImm()) { 9485 // Handle optional arguments 9486 OptionalIdx[Op.getImmTy()] = I; 9487 } else { 9488 llvm_unreachable("Invalid operand type"); 9489 } 9490 SkippedVcc = false; 9491 } 9492 9493 const unsigned Opc = Inst.getOpcode(); 9494 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && 9495 Opc != AMDGPU::V_NOP_sdwa_vi) { 9496 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9497 switch (BasicInstType) { 9498 case SIInstrFlags::VOP1: 9499 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9500 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9501 AMDGPUOperand::ImmTyClamp, 0); 9502 9503 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9504 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9505 AMDGPUOperand::ImmTyOModSI, 0); 9506 9507 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) 9508 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9509 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9510 9511 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) 9512 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9513 AMDGPUOperand::ImmTySDWADstUnused, 9514 DstUnused::UNUSED_PRESERVE); 9515 9516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9517 break; 9518 9519 case SIInstrFlags::VOP2: 9520 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9521 AMDGPUOperand::ImmTyClamp, 0); 9522 9523 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) 9524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9525 9526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE); 9528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9530 break; 9531 9532 case SIInstrFlags::VOPC: 9533 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) 9534 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9535 AMDGPUOperand::ImmTyClamp, 0); 9536 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9537 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9538 break; 9539 9540 default: 9541 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9542 } 9543 } 9544 9545 // special case v_mac_{f16, f32}: 9546 // it has src2 register operand that is tied to dst operand 9547 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9548 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9549 auto it = Inst.begin(); 9550 std::advance( 9551 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9552 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9553 } 9554 } 9555 9556 /// Force static initialization. 9557 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9558 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); 9559 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9560 } 9561 9562 #define GET_REGISTER_MATCHER 9563 #define GET_MATCHER_IMPLEMENTATION 9564 #define GET_MNEMONIC_SPELL_CHECKER 9565 #define GET_MNEMONIC_CHECKER 9566 #include "AMDGPUGenAsmMatcher.inc" 9567 9568 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, 9569 unsigned MCK) { 9570 switch (MCK) { 9571 case MCK_addr64: 9572 return parseTokenOp("addr64", Operands); 9573 case MCK_done: 9574 return parseTokenOp("done", Operands); 9575 case MCK_idxen: 9576 return parseTokenOp("idxen", Operands); 9577 case MCK_lds: 9578 return parseTokenOp("lds", Operands); 9579 case MCK_offen: 9580 return parseTokenOp("offen", Operands); 9581 case MCK_off: 9582 return parseTokenOp("off", Operands); 9583 case MCK_row_95_en: 9584 return parseTokenOp("row_en", Operands); 9585 case MCK_gds: 9586 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS); 9587 case MCK_tfe: 9588 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE); 9589 } 9590 return tryCustomParseOperand(Operands, MCK); 9591 } 9592 9593 // This function should be defined after auto-generated include so that we have 9594 // MatchClassKind enum defined 9595 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9596 unsigned Kind) { 9597 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9598 // But MatchInstructionImpl() expects to meet token and fails to validate 9599 // operand. This method checks if we are given immediate operand but expect to 9600 // get corresponding token. 9601 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9602 switch (Kind) { 9603 case MCK_addr64: 9604 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9605 case MCK_gds: 9606 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9607 case MCK_lds: 9608 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9609 case MCK_idxen: 9610 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9611 case MCK_offen: 9612 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9613 case MCK_tfe: 9614 return Operand.isTFE() ? Match_Success : Match_InvalidOperand; 9615 case MCK_SSrc_b32: 9616 // When operands have expression values, they will return true for isToken, 9617 // because it is not possible to distinguish between a token and an 9618 // expression at parse time. MatchInstructionImpl() will always try to 9619 // match an operand as a token, when isToken returns true, and when the 9620 // name of the expression is not a valid token, the match will fail, 9621 // so we need to handle it here. 9622 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand; 9623 case MCK_SSrc_f32: 9624 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand; 9625 case MCK_SOPPBrTarget: 9626 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; 9627 case MCK_VReg32OrOff: 9628 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9629 case MCK_InterpSlot: 9630 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9631 case MCK_InterpAttr: 9632 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9633 case MCK_InterpAttrChan: 9634 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; 9635 case MCK_SReg_64: 9636 case MCK_SReg_64_XEXEC: 9637 // Null is defined as a 32-bit register but 9638 // it should also be enabled with 64-bit operands. 9639 // The following code enables it for SReg_64 operands 9640 // used as source and destination. Remaining source 9641 // operands are handled in isInlinableImm. 9642 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9643 default: 9644 return Match_InvalidOperand; 9645 } 9646 } 9647 9648 //===----------------------------------------------------------------------===// 9649 // endpgm 9650 //===----------------------------------------------------------------------===// 9651 9652 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { 9653 SMLoc S = getLoc(); 9654 int64_t Imm = 0; 9655 9656 if (!parseExpr(Imm)) { 9657 // The operand is optional, if not present default to 0 9658 Imm = 0; 9659 } 9660 9661 if (!isUInt<16>(Imm)) 9662 return Error(S, "expected a 16-bit value"); 9663 9664 Operands.push_back( 9665 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9666 return ParseStatus::Success; 9667 } 9668 9669 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9670 9671 //===----------------------------------------------------------------------===// 9672 // Split Barrier 9673 //===----------------------------------------------------------------------===// 9674 9675 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); } 9676