1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 #include <optional> 42 43 using namespace llvm; 44 using namespace llvm::AMDGPU; 45 using namespace llvm::amdhsa; 46 47 namespace { 48 49 class AMDGPUAsmParser; 50 51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 52 53 //===----------------------------------------------------------------------===// 54 // Operand 55 //===----------------------------------------------------------------------===// 56 57 class AMDGPUOperand : public MCParsedAsmOperand { 58 enum KindTy { 59 Token, 60 Immediate, 61 Register, 62 Expression 63 } Kind; 64 65 SMLoc StartLoc, EndLoc; 66 const AMDGPUAsmParser *AsmParser; 67 68 public: 69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 70 : Kind(Kind_), AsmParser(AsmParser_) {} 71 72 using Ptr = std::unique_ptr<AMDGPUOperand>; 73 74 struct Modifiers { 75 bool Abs = false; 76 bool Neg = false; 77 bool Sext = false; 78 79 bool hasFPModifiers() const { return Abs || Neg; } 80 bool hasIntModifiers() const { return Sext; } 81 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 82 83 int64_t getFPModifiersOperand() const { 84 int64_t Operand = 0; 85 Operand |= Abs ? SISrcMods::ABS : 0u; 86 Operand |= Neg ? SISrcMods::NEG : 0u; 87 return Operand; 88 } 89 90 int64_t getIntModifiersOperand() const { 91 int64_t Operand = 0; 92 Operand |= Sext ? SISrcMods::SEXT : 0u; 93 return Operand; 94 } 95 96 int64_t getModifiersOperand() const { 97 assert(!(hasFPModifiers() && hasIntModifiers()) 98 && "fp and int modifiers should not be used simultaneously"); 99 if (hasFPModifiers()) { 100 return getFPModifiersOperand(); 101 } else if (hasIntModifiers()) { 102 return getIntModifiersOperand(); 103 } else { 104 return 0; 105 } 106 } 107 108 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 109 }; 110 111 enum ImmTy { 112 ImmTyNone, 113 ImmTyGDS, 114 ImmTyLDS, 115 ImmTyOffen, 116 ImmTyIdxen, 117 ImmTyAddr64, 118 ImmTyOffset, 119 ImmTyInstOffset, 120 ImmTyOffset0, 121 ImmTyOffset1, 122 ImmTyCPol, 123 ImmTySWZ, 124 ImmTyTFE, 125 ImmTyD16, 126 ImmTyClampSI, 127 ImmTyOModSI, 128 ImmTySdwaDstSel, 129 ImmTySdwaSrc0Sel, 130 ImmTySdwaSrc1Sel, 131 ImmTySdwaDstUnused, 132 ImmTyDMask, 133 ImmTyDim, 134 ImmTyUNorm, 135 ImmTyDA, 136 ImmTyR128A16, 137 ImmTyA16, 138 ImmTyLWE, 139 ImmTyExpTgt, 140 ImmTyExpCompr, 141 ImmTyExpVM, 142 ImmTyFORMAT, 143 ImmTyHwreg, 144 ImmTyOff, 145 ImmTySendMsg, 146 ImmTyInterpSlot, 147 ImmTyInterpAttr, 148 ImmTyAttrChan, 149 ImmTyOpSel, 150 ImmTyOpSelHi, 151 ImmTyNegLo, 152 ImmTyNegHi, 153 ImmTyDPP8, 154 ImmTyDppCtrl, 155 ImmTyDppRowMask, 156 ImmTyDppBankMask, 157 ImmTyDppBoundCtrl, 158 ImmTyDppFi, 159 ImmTySwizzle, 160 ImmTyGprIdxMode, 161 ImmTyHigh, 162 ImmTyBLGP, 163 ImmTyCBSZ, 164 ImmTyABID, 165 ImmTyEndpgm, 166 ImmTyWaitVDST, 167 ImmTyWaitEXP, 168 }; 169 170 // Immediate operand kind. 171 // It helps to identify the location of an offending operand after an error. 172 // Note that regular literals and mandatory literals (KImm) must be handled 173 // differently. When looking for an offending operand, we should usually 174 // ignore mandatory literals because they are part of the instruction and 175 // cannot be changed. Report location of mandatory operands only for VOPD, 176 // when both OpX and OpY have a KImm and there are no other literals. 177 enum ImmKindTy { 178 ImmKindTyNone, 179 ImmKindTyLiteral, 180 ImmKindTyMandatoryLiteral, 181 ImmKindTyConst, 182 }; 183 184 private: 185 struct TokOp { 186 const char *Data; 187 unsigned Length; 188 }; 189 190 struct ImmOp { 191 int64_t Val; 192 ImmTy Type; 193 bool IsFPImm; 194 mutable ImmKindTy Kind; 195 Modifiers Mods; 196 }; 197 198 struct RegOp { 199 unsigned RegNo; 200 Modifiers Mods; 201 }; 202 203 union { 204 TokOp Tok; 205 ImmOp Imm; 206 RegOp Reg; 207 const MCExpr *Expr; 208 }; 209 210 public: 211 bool isToken() const override { return Kind == Token; } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindMandatoryLiteral() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyMandatoryLiteral; 234 } 235 236 void setImmKindConst() const { 237 assert(isImm()); 238 Imm.Kind = ImmKindTyConst; 239 } 240 241 bool IsImmKindLiteral() const { 242 return isImm() && Imm.Kind == ImmKindTyLiteral; 243 } 244 245 bool IsImmKindMandatoryLiteral() const { 246 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; 247 } 248 249 bool isImmKindConst() const { 250 return isImm() && Imm.Kind == ImmKindTyConst; 251 } 252 253 bool isInlinableImm(MVT type) const; 254 bool isLiteralImm(MVT type) const; 255 256 bool isRegKind() const { 257 return Kind == Register; 258 } 259 260 bool isReg() const override { 261 return isRegKind() && !hasModifiers(); 262 } 263 264 bool isRegOrInline(unsigned RCID, MVT type) const { 265 return isRegClass(RCID) || isInlinableImm(type); 266 } 267 268 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 269 return isRegOrInline(RCID, type) || isLiteralImm(type); 270 } 271 272 bool isRegOrImmWithInt16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 274 } 275 276 bool isRegOrImmWithInt32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 278 } 279 280 bool isRegOrInlineImmWithInt16InputMods() const { 281 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 282 } 283 284 bool isRegOrInlineImmWithInt32InputMods() const { 285 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 286 } 287 288 bool isRegOrImmWithInt64InputMods() const { 289 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 290 } 291 292 bool isRegOrImmWithFP16InputMods() const { 293 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 294 } 295 296 bool isRegOrImmWithFP32InputMods() const { 297 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 298 } 299 300 bool isRegOrImmWithFP64InputMods() const { 301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 302 } 303 304 bool isRegOrInlineImmWithFP16InputMods() const { 305 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 306 } 307 308 bool isRegOrInlineImmWithFP32InputMods() const { 309 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 310 } 311 312 313 bool isVReg() const { 314 return isRegClass(AMDGPU::VGPR_32RegClassID) || 315 isRegClass(AMDGPU::VReg_64RegClassID) || 316 isRegClass(AMDGPU::VReg_96RegClassID) || 317 isRegClass(AMDGPU::VReg_128RegClassID) || 318 isRegClass(AMDGPU::VReg_160RegClassID) || 319 isRegClass(AMDGPU::VReg_192RegClassID) || 320 isRegClass(AMDGPU::VReg_256RegClassID) || 321 isRegClass(AMDGPU::VReg_512RegClassID) || 322 isRegClass(AMDGPU::VReg_1024RegClassID); 323 } 324 325 bool isVReg32() const { 326 return isRegClass(AMDGPU::VGPR_32RegClassID); 327 } 328 329 bool isVReg32OrOff() const { 330 return isOff() || isVReg32(); 331 } 332 333 bool isNull() const { 334 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 335 } 336 337 bool isVRegWithInputMods() const; 338 bool isT16VRegWithInputMods() const; 339 340 bool isSDWAOperand(MVT type) const; 341 bool isSDWAFP16Operand() const; 342 bool isSDWAFP32Operand() const; 343 bool isSDWAInt16Operand() const; 344 bool isSDWAInt32Operand() const; 345 346 bool isImmTy(ImmTy ImmT) const { 347 return isImm() && Imm.Type == ImmT; 348 } 349 350 bool isImmModifier() const { 351 return isImm() && Imm.Type != ImmTyNone; 352 } 353 354 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 355 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 356 bool isDMask() const { return isImmTy(ImmTyDMask); } 357 bool isDim() const { return isImmTy(ImmTyDim); } 358 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 359 bool isDA() const { return isImmTy(ImmTyDA); } 360 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 361 bool isA16() const { return isImmTy(ImmTyA16); } 362 bool isLWE() const { return isImmTy(ImmTyLWE); } 363 bool isOff() const { return isImmTy(ImmTyOff); } 364 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 365 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 366 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 367 bool isOffen() const { return isImmTy(ImmTyOffen); } 368 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 369 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 370 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 371 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 372 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 373 374 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 375 bool isGDS() const { return isImmTy(ImmTyGDS); } 376 bool isLDS() const { return isImmTy(ImmTyLDS); } 377 bool isCPol() const { return isImmTy(ImmTyCPol); } 378 bool isSWZ() const { return isImmTy(ImmTySWZ); } 379 bool isTFE() const { return isImmTy(ImmTyTFE); } 380 bool isD16() const { return isImmTy(ImmTyD16); } 381 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 382 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 383 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 384 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 385 bool isFI() const { return isImmTy(ImmTyDppFi); } 386 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 387 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 388 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 389 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 390 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 391 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 392 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 393 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 394 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 395 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 396 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 397 bool isHigh() const { return isImmTy(ImmTyHigh); } 398 399 bool isRegOrImm() const { 400 return isReg() || isImm(); 401 } 402 403 bool isRegClass(unsigned RCID) const; 404 405 bool isInlineValue() const; 406 407 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 408 return isRegOrInline(RCID, type) && !hasModifiers(); 409 } 410 411 bool isSCSrcB16() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 413 } 414 415 bool isSCSrcV2B16() const { 416 return isSCSrcB16(); 417 } 418 419 bool isSCSrcB32() const { 420 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 421 } 422 423 bool isSCSrcB64() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 425 } 426 427 bool isBoolReg() const; 428 429 bool isSCSrcF16() const { 430 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 431 } 432 433 bool isSCSrcV2F16() const { 434 return isSCSrcF16(); 435 } 436 437 bool isSCSrcF32() const { 438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 439 } 440 441 bool isSCSrcF64() const { 442 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 443 } 444 445 bool isSSrcB32() const { 446 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 447 } 448 449 bool isSSrcB16() const { 450 return isSCSrcB16() || isLiteralImm(MVT::i16); 451 } 452 453 bool isSSrcV2B16() const { 454 llvm_unreachable("cannot happen"); 455 return isSSrcB16(); 456 } 457 458 bool isSSrcB64() const { 459 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 460 // See isVSrc64(). 461 return isSCSrcB64() || isLiteralImm(MVT::i64); 462 } 463 464 bool isSSrcF32() const { 465 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 466 } 467 468 bool isSSrcF64() const { 469 return isSCSrcB64() || isLiteralImm(MVT::f64); 470 } 471 472 bool isSSrcF16() const { 473 return isSCSrcB16() || isLiteralImm(MVT::f16); 474 } 475 476 bool isSSrcV2F16() const { 477 llvm_unreachable("cannot happen"); 478 return isSSrcF16(); 479 } 480 481 bool isSSrcV2FP32() const { 482 llvm_unreachable("cannot happen"); 483 return isSSrcF32(); 484 } 485 486 bool isSCSrcV2FP32() const { 487 llvm_unreachable("cannot happen"); 488 return isSCSrcF32(); 489 } 490 491 bool isSSrcV2INT32() const { 492 llvm_unreachable("cannot happen"); 493 return isSSrcB32(); 494 } 495 496 bool isSCSrcV2INT32() const { 497 llvm_unreachable("cannot happen"); 498 return isSCSrcB32(); 499 } 500 501 bool isSSrcOrLdsB32() const { 502 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 503 isLiteralImm(MVT::i32) || isExpr(); 504 } 505 506 bool isVCSrcB32() const { 507 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 508 } 509 510 bool isVCSrcB64() const { 511 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 512 } 513 514 bool isVCSrcTB16_Lo128() const { 515 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); 516 } 517 518 bool isVCSrcB16() const { 519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 520 } 521 522 bool isVCSrcV2B16() const { 523 return isVCSrcB16(); 524 } 525 526 bool isVCSrcF32() const { 527 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 528 } 529 530 bool isVCSrcF64() const { 531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 532 } 533 534 bool isVCSrcTF16_Lo128() const { 535 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); 536 } 537 538 bool isVCSrcF16() const { 539 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 540 } 541 542 bool isVCSrcV2F16() const { 543 return isVCSrcF16(); 544 } 545 546 bool isVSrcB32() const { 547 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 548 } 549 550 bool isVSrcB64() const { 551 return isVCSrcF64() || isLiteralImm(MVT::i64); 552 } 553 554 bool isVSrcTB16_Lo128() const { 555 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); 556 } 557 558 bool isVSrcB16() const { 559 return isVCSrcB16() || isLiteralImm(MVT::i16); 560 } 561 562 bool isVSrcV2B16() const { 563 return isVSrcB16() || isLiteralImm(MVT::v2i16); 564 } 565 566 bool isVCSrcV2FP32() const { 567 return isVCSrcF64(); 568 } 569 570 bool isVSrcV2FP32() const { 571 return isVSrcF64() || isLiteralImm(MVT::v2f32); 572 } 573 574 bool isVCSrcV2INT32() const { 575 return isVCSrcB64(); 576 } 577 578 bool isVSrcV2INT32() const { 579 return isVSrcB64() || isLiteralImm(MVT::v2i32); 580 } 581 582 bool isVSrcF32() const { 583 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 584 } 585 586 bool isVSrcF64() const { 587 return isVCSrcF64() || isLiteralImm(MVT::f64); 588 } 589 590 bool isVSrcTF16_Lo128() const { 591 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); 592 } 593 594 bool isVSrcF16() const { 595 return isVCSrcF16() || isLiteralImm(MVT::f16); 596 } 597 598 bool isVSrcV2F16() const { 599 return isVSrcF16() || isLiteralImm(MVT::v2f16); 600 } 601 602 bool isVISrcB32() const { 603 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 604 } 605 606 bool isVISrcB16() const { 607 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 608 } 609 610 bool isVISrcV2B16() const { 611 return isVISrcB16(); 612 } 613 614 bool isVISrcF32() const { 615 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 616 } 617 618 bool isVISrcF16() const { 619 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 620 } 621 622 bool isVISrcV2F16() const { 623 return isVISrcF16() || isVISrcB32(); 624 } 625 626 bool isVISrc_64B64() const { 627 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 628 } 629 630 bool isVISrc_64F64() const { 631 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 632 } 633 634 bool isVISrc_64V2FP32() const { 635 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 636 } 637 638 bool isVISrc_64V2INT32() const { 639 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 640 } 641 642 bool isVISrc_256B64() const { 643 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 644 } 645 646 bool isVISrc_256F64() const { 647 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 648 } 649 650 bool isVISrc_128B16() const { 651 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 652 } 653 654 bool isVISrc_128V2B16() const { 655 return isVISrc_128B16(); 656 } 657 658 bool isVISrc_128B32() const { 659 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 660 } 661 662 bool isVISrc_128F32() const { 663 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 664 } 665 666 bool isVISrc_256V2FP32() const { 667 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 668 } 669 670 bool isVISrc_256V2INT32() const { 671 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 672 } 673 674 bool isVISrc_512B32() const { 675 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 676 } 677 678 bool isVISrc_512B16() const { 679 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 680 } 681 682 bool isVISrc_512V2B16() const { 683 return isVISrc_512B16(); 684 } 685 686 bool isVISrc_512F32() const { 687 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 688 } 689 690 bool isVISrc_512F16() const { 691 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 692 } 693 694 bool isVISrc_512V2F16() const { 695 return isVISrc_512F16() || isVISrc_512B32(); 696 } 697 698 bool isVISrc_1024B32() const { 699 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 700 } 701 702 bool isVISrc_1024B16() const { 703 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 704 } 705 706 bool isVISrc_1024V2B16() const { 707 return isVISrc_1024B16(); 708 } 709 710 bool isVISrc_1024F32() const { 711 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 712 } 713 714 bool isVISrc_1024F16() const { 715 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 716 } 717 718 bool isVISrc_1024V2F16() const { 719 return isVISrc_1024F16() || isVISrc_1024B32(); 720 } 721 722 bool isAISrcB32() const { 723 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 724 } 725 726 bool isAISrcB16() const { 727 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 728 } 729 730 bool isAISrcV2B16() const { 731 return isAISrcB16(); 732 } 733 734 bool isAISrcF32() const { 735 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 736 } 737 738 bool isAISrcF16() const { 739 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 740 } 741 742 bool isAISrcV2F16() const { 743 return isAISrcF16() || isAISrcB32(); 744 } 745 746 bool isAISrc_64B64() const { 747 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 748 } 749 750 bool isAISrc_64F64() const { 751 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 752 } 753 754 bool isAISrc_128B32() const { 755 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 756 } 757 758 bool isAISrc_128B16() const { 759 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 760 } 761 762 bool isAISrc_128V2B16() const { 763 return isAISrc_128B16(); 764 } 765 766 bool isAISrc_128F32() const { 767 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 768 } 769 770 bool isAISrc_128F16() const { 771 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 772 } 773 774 bool isAISrc_128V2F16() const { 775 return isAISrc_128F16() || isAISrc_128B32(); 776 } 777 778 bool isVISrc_128F16() const { 779 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 780 } 781 782 bool isVISrc_128V2F16() const { 783 return isVISrc_128F16() || isVISrc_128B32(); 784 } 785 786 bool isAISrc_256B64() const { 787 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 788 } 789 790 bool isAISrc_256F64() const { 791 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 792 } 793 794 bool isAISrc_512B32() const { 795 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 796 } 797 798 bool isAISrc_512B16() const { 799 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 800 } 801 802 bool isAISrc_512V2B16() const { 803 return isAISrc_512B16(); 804 } 805 806 bool isAISrc_512F32() const { 807 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 808 } 809 810 bool isAISrc_512F16() const { 811 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 812 } 813 814 bool isAISrc_512V2F16() const { 815 return isAISrc_512F16() || isAISrc_512B32(); 816 } 817 818 bool isAISrc_1024B32() const { 819 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 820 } 821 822 bool isAISrc_1024B16() const { 823 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 824 } 825 826 bool isAISrc_1024V2B16() const { 827 return isAISrc_1024B16(); 828 } 829 830 bool isAISrc_1024F32() const { 831 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 832 } 833 834 bool isAISrc_1024F16() const { 835 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 836 } 837 838 bool isAISrc_1024V2F16() const { 839 return isAISrc_1024F16() || isAISrc_1024B32(); 840 } 841 842 bool isKImmFP32() const { 843 return isLiteralImm(MVT::f32); 844 } 845 846 bool isKImmFP16() const { 847 return isLiteralImm(MVT::f16); 848 } 849 850 bool isMem() const override { 851 return false; 852 } 853 854 bool isExpr() const { 855 return Kind == Expression; 856 } 857 858 bool isSoppBrTarget() const { 859 return isExpr() || isImm(); 860 } 861 862 bool isSWaitCnt() const; 863 bool isDepCtr() const; 864 bool isSDelayAlu() const; 865 bool isHwreg() const; 866 bool isSendMsg() const; 867 bool isSwizzle() const; 868 bool isSMRDOffset8() const; 869 bool isSMEMOffset() const; 870 bool isSMRDLiteralOffset() const; 871 bool isDPP8() const; 872 bool isDPPCtrl() const; 873 bool isBLGP() const; 874 bool isCBSZ() const; 875 bool isABID() const; 876 bool isGPRIdxMode() const; 877 bool isS16Imm() const; 878 bool isU16Imm() const; 879 bool isEndpgm() const; 880 bool isWaitVDST() const; 881 bool isWaitEXP() const; 882 883 StringRef getToken() const { 884 assert(isToken()); 885 return StringRef(Tok.Data, Tok.Length); 886 } 887 888 int64_t getImm() const { 889 assert(isImm()); 890 return Imm.Val; 891 } 892 893 void setImm(int64_t Val) { 894 assert(isImm()); 895 Imm.Val = Val; 896 } 897 898 ImmTy getImmTy() const { 899 assert(isImm()); 900 return Imm.Type; 901 } 902 903 unsigned getReg() const override { 904 assert(isRegKind()); 905 return Reg.RegNo; 906 } 907 908 SMLoc getStartLoc() const override { 909 return StartLoc; 910 } 911 912 SMLoc getEndLoc() const override { 913 return EndLoc; 914 } 915 916 SMRange getLocRange() const { 917 return SMRange(StartLoc, EndLoc); 918 } 919 920 Modifiers getModifiers() const { 921 assert(isRegKind() || isImmTy(ImmTyNone)); 922 return isRegKind() ? Reg.Mods : Imm.Mods; 923 } 924 925 void setModifiers(Modifiers Mods) { 926 assert(isRegKind() || isImmTy(ImmTyNone)); 927 if (isRegKind()) 928 Reg.Mods = Mods; 929 else 930 Imm.Mods = Mods; 931 } 932 933 bool hasModifiers() const { 934 return getModifiers().hasModifiers(); 935 } 936 937 bool hasFPModifiers() const { 938 return getModifiers().hasFPModifiers(); 939 } 940 941 bool hasIntModifiers() const { 942 return getModifiers().hasIntModifiers(); 943 } 944 945 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 946 947 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 948 949 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 950 951 template <unsigned Bitwidth> 952 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 953 954 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 955 addKImmFPOperands<16>(Inst, N); 956 } 957 958 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 959 addKImmFPOperands<32>(Inst, N); 960 } 961 962 void addRegOperands(MCInst &Inst, unsigned N) const; 963 964 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 965 addRegOperands(Inst, N); 966 } 967 968 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 969 if (isRegKind()) 970 addRegOperands(Inst, N); 971 else if (isExpr()) 972 Inst.addOperand(MCOperand::createExpr(Expr)); 973 else 974 addImmOperands(Inst, N); 975 } 976 977 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 978 Modifiers Mods = getModifiers(); 979 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 980 if (isRegKind()) { 981 addRegOperands(Inst, N); 982 } else { 983 addImmOperands(Inst, N, false); 984 } 985 } 986 987 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 988 assert(!hasIntModifiers()); 989 addRegOrImmWithInputModsOperands(Inst, N); 990 } 991 992 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasFPModifiers()); 994 addRegOrImmWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 998 Modifiers Mods = getModifiers(); 999 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1000 assert(isRegKind()); 1001 addRegOperands(Inst, N); 1002 } 1003 1004 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1005 assert(!hasIntModifiers()); 1006 addRegWithInputModsOperands(Inst, N); 1007 } 1008 1009 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1010 assert(!hasFPModifiers()); 1011 addRegWithInputModsOperands(Inst, N); 1012 } 1013 1014 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1015 if (isImm()) 1016 addImmOperands(Inst, N); 1017 else { 1018 assert(isExpr()); 1019 Inst.addOperand(MCOperand::createExpr(Expr)); 1020 } 1021 } 1022 1023 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1024 switch (Type) { 1025 case ImmTyNone: OS << "None"; break; 1026 case ImmTyGDS: OS << "GDS"; break; 1027 case ImmTyLDS: OS << "LDS"; break; 1028 case ImmTyOffen: OS << "Offen"; break; 1029 case ImmTyIdxen: OS << "Idxen"; break; 1030 case ImmTyAddr64: OS << "Addr64"; break; 1031 case ImmTyOffset: OS << "Offset"; break; 1032 case ImmTyInstOffset: OS << "InstOffset"; break; 1033 case ImmTyOffset0: OS << "Offset0"; break; 1034 case ImmTyOffset1: OS << "Offset1"; break; 1035 case ImmTyCPol: OS << "CPol"; break; 1036 case ImmTySWZ: OS << "SWZ"; break; 1037 case ImmTyTFE: OS << "TFE"; break; 1038 case ImmTyD16: OS << "D16"; break; 1039 case ImmTyFORMAT: OS << "FORMAT"; break; 1040 case ImmTyClampSI: OS << "ClampSI"; break; 1041 case ImmTyOModSI: OS << "OModSI"; break; 1042 case ImmTyDPP8: OS << "DPP8"; break; 1043 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1044 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1045 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1046 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1047 case ImmTyDppFi: OS << "FI"; break; 1048 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1049 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1050 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1051 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1052 case ImmTyDMask: OS << "DMask"; break; 1053 case ImmTyDim: OS << "Dim"; break; 1054 case ImmTyUNorm: OS << "UNorm"; break; 1055 case ImmTyDA: OS << "DA"; break; 1056 case ImmTyR128A16: OS << "R128A16"; break; 1057 case ImmTyA16: OS << "A16"; break; 1058 case ImmTyLWE: OS << "LWE"; break; 1059 case ImmTyOff: OS << "Off"; break; 1060 case ImmTyExpTgt: OS << "ExpTgt"; break; 1061 case ImmTyExpCompr: OS << "ExpCompr"; break; 1062 case ImmTyExpVM: OS << "ExpVM"; break; 1063 case ImmTyHwreg: OS << "Hwreg"; break; 1064 case ImmTySendMsg: OS << "SendMsg"; break; 1065 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1066 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1067 case ImmTyAttrChan: OS << "AttrChan"; break; 1068 case ImmTyOpSel: OS << "OpSel"; break; 1069 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1070 case ImmTyNegLo: OS << "NegLo"; break; 1071 case ImmTyNegHi: OS << "NegHi"; break; 1072 case ImmTySwizzle: OS << "Swizzle"; break; 1073 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1074 case ImmTyHigh: OS << "High"; break; 1075 case ImmTyBLGP: OS << "BLGP"; break; 1076 case ImmTyCBSZ: OS << "CBSZ"; break; 1077 case ImmTyABID: OS << "ABID"; break; 1078 case ImmTyEndpgm: OS << "Endpgm"; break; 1079 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1080 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1081 } 1082 } 1083 1084 void print(raw_ostream &OS) const override { 1085 switch (Kind) { 1086 case Register: 1087 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1088 break; 1089 case Immediate: 1090 OS << '<' << getImm(); 1091 if (getImmTy() != ImmTyNone) { 1092 OS << " type: "; printImmTy(OS, getImmTy()); 1093 } 1094 OS << " mods: " << Imm.Mods << '>'; 1095 break; 1096 case Token: 1097 OS << '\'' << getToken() << '\''; 1098 break; 1099 case Expression: 1100 OS << "<expr " << *Expr << '>'; 1101 break; 1102 } 1103 } 1104 1105 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1106 int64_t Val, SMLoc Loc, 1107 ImmTy Type = ImmTyNone, 1108 bool IsFPImm = false) { 1109 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1110 Op->Imm.Val = Val; 1111 Op->Imm.IsFPImm = IsFPImm; 1112 Op->Imm.Kind = ImmKindTyNone; 1113 Op->Imm.Type = Type; 1114 Op->Imm.Mods = Modifiers(); 1115 Op->StartLoc = Loc; 1116 Op->EndLoc = Loc; 1117 return Op; 1118 } 1119 1120 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1121 StringRef Str, SMLoc Loc, 1122 bool HasExplicitEncodingSize = true) { 1123 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1124 Res->Tok.Data = Str.data(); 1125 Res->Tok.Length = Str.size(); 1126 Res->StartLoc = Loc; 1127 Res->EndLoc = Loc; 1128 return Res; 1129 } 1130 1131 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1132 unsigned RegNo, SMLoc S, 1133 SMLoc E) { 1134 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1135 Op->Reg.RegNo = RegNo; 1136 Op->Reg.Mods = Modifiers(); 1137 Op->StartLoc = S; 1138 Op->EndLoc = E; 1139 return Op; 1140 } 1141 1142 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1143 const class MCExpr *Expr, SMLoc S) { 1144 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1145 Op->Expr = Expr; 1146 Op->StartLoc = S; 1147 Op->EndLoc = S; 1148 return Op; 1149 } 1150 }; 1151 1152 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1153 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1154 return OS; 1155 } 1156 1157 //===----------------------------------------------------------------------===// 1158 // AsmParser 1159 //===----------------------------------------------------------------------===// 1160 1161 // Holds info related to the current kernel, e.g. count of SGPRs used. 1162 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1163 // .amdgpu_hsa_kernel or at EOF. 1164 class KernelScopeInfo { 1165 int SgprIndexUnusedMin = -1; 1166 int VgprIndexUnusedMin = -1; 1167 int AgprIndexUnusedMin = -1; 1168 MCContext *Ctx = nullptr; 1169 MCSubtargetInfo const *MSTI = nullptr; 1170 1171 void usesSgprAt(int i) { 1172 if (i >= SgprIndexUnusedMin) { 1173 SgprIndexUnusedMin = ++i; 1174 if (Ctx) { 1175 MCSymbol* const Sym = 1176 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1177 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1178 } 1179 } 1180 } 1181 1182 void usesVgprAt(int i) { 1183 if (i >= VgprIndexUnusedMin) { 1184 VgprIndexUnusedMin = ++i; 1185 if (Ctx) { 1186 MCSymbol* const Sym = 1187 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1188 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1189 VgprIndexUnusedMin); 1190 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1191 } 1192 } 1193 } 1194 1195 void usesAgprAt(int i) { 1196 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1197 if (!hasMAIInsts(*MSTI)) 1198 return; 1199 1200 if (i >= AgprIndexUnusedMin) { 1201 AgprIndexUnusedMin = ++i; 1202 if (Ctx) { 1203 MCSymbol* const Sym = 1204 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1205 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1206 1207 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1208 MCSymbol* const vSym = 1209 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1210 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1211 VgprIndexUnusedMin); 1212 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1213 } 1214 } 1215 } 1216 1217 public: 1218 KernelScopeInfo() = default; 1219 1220 void initialize(MCContext &Context) { 1221 Ctx = &Context; 1222 MSTI = Ctx->getSubtargetInfo(); 1223 1224 usesSgprAt(SgprIndexUnusedMin = -1); 1225 usesVgprAt(VgprIndexUnusedMin = -1); 1226 if (hasMAIInsts(*MSTI)) { 1227 usesAgprAt(AgprIndexUnusedMin = -1); 1228 } 1229 } 1230 1231 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1232 unsigned RegWidth) { 1233 switch (RegKind) { 1234 case IS_SGPR: 1235 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1236 break; 1237 case IS_AGPR: 1238 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1239 break; 1240 case IS_VGPR: 1241 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1242 break; 1243 default: 1244 break; 1245 } 1246 } 1247 }; 1248 1249 class AMDGPUAsmParser : public MCTargetAsmParser { 1250 MCAsmParser &Parser; 1251 1252 unsigned ForcedEncodingSize = 0; 1253 bool ForcedDPP = false; 1254 bool ForcedSDWA = false; 1255 KernelScopeInfo KernelScope; 1256 1257 /// @name Auto-generated Match Functions 1258 /// { 1259 1260 #define GET_ASSEMBLER_HEADER 1261 #include "AMDGPUGenAsmMatcher.inc" 1262 1263 /// } 1264 1265 private: 1266 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1267 bool OutOfRangeError(SMRange Range); 1268 /// Calculate VGPR/SGPR blocks required for given target, reserved 1269 /// registers, and user-specified NextFreeXGPR values. 1270 /// 1271 /// \param Features [in] Target features, used for bug corrections. 1272 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1273 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1274 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1275 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1276 /// descriptor field, if valid. 1277 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1278 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1279 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1280 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1281 /// \param VGPRBlocks [out] Result VGPR block count. 1282 /// \param SGPRBlocks [out] Result SGPR block count. 1283 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1284 bool FlatScrUsed, bool XNACKUsed, 1285 std::optional<bool> EnableWavefrontSize32, 1286 unsigned NextFreeVGPR, SMRange VGPRRange, 1287 unsigned NextFreeSGPR, SMRange SGPRRange, 1288 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 1289 bool ParseDirectiveAMDGCNTarget(); 1290 bool ParseDirectiveAMDHSAKernel(); 1291 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1292 bool ParseDirectiveHSACodeObjectVersion(); 1293 bool ParseDirectiveHSACodeObjectISA(); 1294 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1295 bool ParseDirectiveAMDKernelCodeT(); 1296 // TODO: Possibly make subtargetHasRegister const. 1297 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1298 bool ParseDirectiveAMDGPUHsaKernel(); 1299 1300 bool ParseDirectiveISAVersion(); 1301 bool ParseDirectiveHSAMetadata(); 1302 bool ParseDirectivePALMetadataBegin(); 1303 bool ParseDirectivePALMetadata(); 1304 bool ParseDirectiveAMDGPULDS(); 1305 1306 /// Common code to parse out a block of text (typically YAML) between start and 1307 /// end directives. 1308 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1309 const char *AssemblerDirectiveEnd, 1310 std::string &CollectString); 1311 1312 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1313 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1314 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1315 unsigned &RegNum, unsigned &RegWidth, 1316 bool RestoreOnFailure = false); 1317 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1318 unsigned &RegNum, unsigned &RegWidth, 1319 SmallVectorImpl<AsmToken> &Tokens); 1320 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1321 unsigned &RegWidth, 1322 SmallVectorImpl<AsmToken> &Tokens); 1323 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1324 unsigned &RegWidth, 1325 SmallVectorImpl<AsmToken> &Tokens); 1326 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1327 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1328 bool ParseRegRange(unsigned& Num, unsigned& Width); 1329 unsigned getRegularReg(RegisterKind RegKind, 1330 unsigned RegNum, 1331 unsigned RegWidth, 1332 SMLoc Loc); 1333 1334 bool isRegister(); 1335 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1336 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1337 void initializeGprCountSymbol(RegisterKind RegKind); 1338 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1339 unsigned RegWidth); 1340 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1341 bool IsAtomic); 1342 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1343 bool IsGdsHardcoded); 1344 1345 public: 1346 enum AMDGPUMatchResultTy { 1347 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1348 }; 1349 enum OperandMode { 1350 OperandMode_Default, 1351 OperandMode_NSA, 1352 }; 1353 1354 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1355 1356 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1357 const MCInstrInfo &MII, 1358 const MCTargetOptions &Options) 1359 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1360 MCAsmParserExtension::Initialize(Parser); 1361 1362 if (getFeatureBits().none()) { 1363 // Set default features. 1364 copySTI().ToggleFeature("southern-islands"); 1365 } 1366 1367 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1368 1369 { 1370 // TODO: make those pre-defined variables read-only. 1371 // Currently there is none suitable machinery in the core llvm-mc for this. 1372 // MCSymbol::isRedefinable is intended for another purpose, and 1373 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1374 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1375 MCContext &Ctx = getContext(); 1376 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1377 MCSymbol *Sym = 1378 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1379 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1380 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1381 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1382 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1383 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1384 } else { 1385 MCSymbol *Sym = 1386 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1387 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1388 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1389 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1390 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1391 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1392 } 1393 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1394 initializeGprCountSymbol(IS_VGPR); 1395 initializeGprCountSymbol(IS_SGPR); 1396 } else 1397 KernelScope.initialize(getContext()); 1398 } 1399 } 1400 1401 bool hasMIMG_R128() const { 1402 return AMDGPU::hasMIMG_R128(getSTI()); 1403 } 1404 1405 bool hasPackedD16() const { 1406 return AMDGPU::hasPackedD16(getSTI()); 1407 } 1408 1409 bool hasA16() const { return AMDGPU::hasA16(getSTI()); } 1410 1411 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1412 1413 bool isSI() const { 1414 return AMDGPU::isSI(getSTI()); 1415 } 1416 1417 bool isCI() const { 1418 return AMDGPU::isCI(getSTI()); 1419 } 1420 1421 bool isVI() const { 1422 return AMDGPU::isVI(getSTI()); 1423 } 1424 1425 bool isGFX9() const { 1426 return AMDGPU::isGFX9(getSTI()); 1427 } 1428 1429 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1430 bool isGFX90A() const { 1431 return AMDGPU::isGFX90A(getSTI()); 1432 } 1433 1434 bool isGFX940() const { 1435 return AMDGPU::isGFX940(getSTI()); 1436 } 1437 1438 bool isGFX9Plus() const { 1439 return AMDGPU::isGFX9Plus(getSTI()); 1440 } 1441 1442 bool isGFX10() const { 1443 return AMDGPU::isGFX10(getSTI()); 1444 } 1445 1446 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1447 1448 bool isGFX11() const { 1449 return AMDGPU::isGFX11(getSTI()); 1450 } 1451 1452 bool isGFX11Plus() const { 1453 return AMDGPU::isGFX11Plus(getSTI()); 1454 } 1455 1456 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } 1457 1458 bool isGFX10_BEncoding() const { 1459 return AMDGPU::isGFX10_BEncoding(getSTI()); 1460 } 1461 1462 bool hasInv2PiInlineImm() const { 1463 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1464 } 1465 1466 bool hasFlatOffsets() const { 1467 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1468 } 1469 1470 bool hasArchitectedFlatScratch() const { 1471 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1472 } 1473 1474 bool hasSGPR102_SGPR103() const { 1475 return !isVI() && !isGFX9(); 1476 } 1477 1478 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1479 1480 bool hasIntClamp() const { 1481 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1482 } 1483 1484 AMDGPUTargetStreamer &getTargetStreamer() { 1485 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1486 return static_cast<AMDGPUTargetStreamer &>(TS); 1487 } 1488 1489 const MCRegisterInfo *getMRI() const { 1490 // We need this const_cast because for some reason getContext() is not const 1491 // in MCAsmParser. 1492 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1493 } 1494 1495 const MCInstrInfo *getMII() const { 1496 return &MII; 1497 } 1498 1499 const FeatureBitset &getFeatureBits() const { 1500 return getSTI().getFeatureBits(); 1501 } 1502 1503 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1504 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1505 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1506 1507 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1508 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1509 bool isForcedDPP() const { return ForcedDPP; } 1510 bool isForcedSDWA() const { return ForcedSDWA; } 1511 ArrayRef<unsigned> getMatchedVariants() const; 1512 StringRef getMatchedVariantName() const; 1513 1514 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1515 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1516 bool RestoreOnFailure); 1517 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1518 SMLoc &EndLoc) override; 1519 OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1520 SMLoc &EndLoc) override; 1521 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1522 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1523 unsigned Kind) override; 1524 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1525 OperandVector &Operands, MCStreamer &Out, 1526 uint64_t &ErrorInfo, 1527 bool MatchingInlineAsm) override; 1528 bool ParseDirective(AsmToken DirectiveID) override; 1529 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1530 OperandMode Mode = OperandMode_Default); 1531 StringRef parseMnemonicSuffix(StringRef Name); 1532 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1533 SMLoc NameLoc, OperandVector &Operands) override; 1534 //bool ProcessInstruction(MCInst &Inst); 1535 1536 OperandMatchResultTy parseTokenOp(StringRef Name, OperandVector &Operands); 1537 1538 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1539 1540 OperandMatchResultTy 1541 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1542 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1543 bool (*ConvertResult)(int64_t &) = nullptr); 1544 1545 OperandMatchResultTy 1546 parseOperandArrayWithPrefix(const char *Prefix, 1547 OperandVector &Operands, 1548 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1549 bool (*ConvertResult)(int64_t&) = nullptr); 1550 1551 OperandMatchResultTy 1552 parseNamedBit(StringRef Name, OperandVector &Operands, 1553 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1554 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; 1555 OperandMatchResultTy parseCPol(OperandVector &Operands); 1556 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1557 StringRef &Value, 1558 SMLoc &StringLoc); 1559 1560 bool isModifier(); 1561 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1562 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1563 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1564 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1565 bool parseSP3NegModifier(); 1566 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1567 OperandMatchResultTy parseReg(OperandVector &Operands); 1568 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1569 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1570 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1571 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1572 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1573 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1574 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1575 OperandMatchResultTy parseUfmt(int64_t &Format); 1576 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1577 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1578 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1579 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1580 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1581 OperandMatchResultTy parseFlatOffset(OperandVector &Operands); 1582 OperandMatchResultTy parseR128A16(OperandVector &Operands); 1583 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1584 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1585 1586 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1587 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1588 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1589 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1590 1591 bool parseCnt(int64_t &IntVal); 1592 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1593 1594 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1595 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1596 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1597 1598 bool parseDelay(int64_t &Delay); 1599 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1600 1601 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1602 1603 private: 1604 struct OperandInfoTy { 1605 SMLoc Loc; 1606 int64_t Id; 1607 bool IsSymbolic = false; 1608 bool IsDefined = false; 1609 1610 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1611 }; 1612 1613 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1614 bool validateSendMsg(const OperandInfoTy &Msg, 1615 const OperandInfoTy &Op, 1616 const OperandInfoTy &Stream); 1617 1618 bool parseHwregBody(OperandInfoTy &HwReg, 1619 OperandInfoTy &Offset, 1620 OperandInfoTy &Width); 1621 bool validateHwreg(const OperandInfoTy &HwReg, 1622 const OperandInfoTy &Offset, 1623 const OperandInfoTy &Width); 1624 1625 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1626 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1627 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1628 1629 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1630 const OperandVector &Operands) const; 1631 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1632 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1633 SMLoc getLitLoc(const OperandVector &Operands, 1634 bool SearchMandatoryLiterals = false) const; 1635 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; 1636 SMLoc getConstLoc(const OperandVector &Operands) const; 1637 SMLoc getInstLoc(const OperandVector &Operands) const; 1638 1639 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1640 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1641 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateSOPLiteral(const MCInst &Inst) const; 1643 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateVOPDRegBankConstraints(const MCInst &Inst, 1645 const OperandVector &Operands); 1646 bool validateIntClampSupported(const MCInst &Inst); 1647 bool validateMIMGAtomicDMask(const MCInst &Inst); 1648 bool validateMIMGGatherDMask(const MCInst &Inst); 1649 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1650 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); 1651 bool validateMIMGAddrSize(const MCInst &Inst); 1652 bool validateMIMGD16(const MCInst &Inst); 1653 bool validateMIMGMSAA(const MCInst &Inst); 1654 bool validateOpSel(const MCInst &Inst); 1655 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1656 bool validateVccOperand(unsigned Reg) const; 1657 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1658 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1659 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); 1660 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1661 bool validateAGPRLdSt(const MCInst &Inst) const; 1662 bool validateVGPRAlign(const MCInst &Inst) const; 1663 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1664 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1665 bool validateDivScale(const MCInst &Inst); 1666 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); 1667 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1668 const SMLoc &IDLoc); 1669 bool validateExeczVcczOperands(const OperandVector &Operands); 1670 bool validateTFE(const MCInst &Inst, const OperandVector &Operands); 1671 std::optional<StringRef> validateLdsDirect(const MCInst &Inst); 1672 unsigned getConstantBusLimit(unsigned Opcode) const; 1673 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1674 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1675 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1676 1677 bool isSupportedMnemo(StringRef Mnemo, 1678 const FeatureBitset &FBS); 1679 bool isSupportedMnemo(StringRef Mnemo, 1680 const FeatureBitset &FBS, 1681 ArrayRef<unsigned> Variants); 1682 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1683 1684 bool isId(const StringRef Id) const; 1685 bool isId(const AsmToken &Token, const StringRef Id) const; 1686 bool isToken(const AsmToken::TokenKind Kind) const; 1687 StringRef getId() const; 1688 bool trySkipId(const StringRef Id); 1689 bool trySkipId(const StringRef Pref, const StringRef Id); 1690 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1691 bool trySkipToken(const AsmToken::TokenKind Kind); 1692 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1693 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1694 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1695 1696 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1697 AsmToken::TokenKind getTokenKind() const; 1698 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1699 bool parseExpr(OperandVector &Operands); 1700 StringRef getTokenStr() const; 1701 AsmToken peekToken(bool ShouldSkipSpace = true); 1702 AsmToken getToken() const; 1703 SMLoc getLoc() const; 1704 void lex(); 1705 1706 public: 1707 void onBeginOfFile() override; 1708 1709 OperandMatchResultTy parseCustomOperand(OperandVector &Operands, 1710 unsigned MCK); 1711 1712 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1713 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1714 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1715 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1716 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1717 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1718 1719 bool parseSwizzleOperand(int64_t &Op, 1720 const unsigned MinVal, 1721 const unsigned MaxVal, 1722 const StringRef ErrMsg, 1723 SMLoc &Loc); 1724 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1725 const unsigned MinVal, 1726 const unsigned MaxVal, 1727 const StringRef ErrMsg); 1728 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1729 bool parseSwizzleOffset(int64_t &Imm); 1730 bool parseSwizzleMacro(int64_t &Imm); 1731 bool parseSwizzleQuadPerm(int64_t &Imm); 1732 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1733 bool parseSwizzleBroadcast(int64_t &Imm); 1734 bool parseSwizzleSwap(int64_t &Imm); 1735 bool parseSwizzleReverse(int64_t &Imm); 1736 1737 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1738 int64_t parseGPRIdxMacro(); 1739 1740 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1741 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1742 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1743 1744 AMDGPUOperand::Ptr defaultCPol() const; 1745 1746 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1747 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1748 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1749 AMDGPUOperand::Ptr defaultFlatOffset() const; 1750 1751 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1752 1753 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1754 OptionalImmIndexMap &OptionalIdx); 1755 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1756 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1757 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1758 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1759 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1760 OptionalImmIndexMap &OptionalIdx); 1761 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1762 OptionalImmIndexMap &OptionalIdx); 1763 1764 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1765 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1766 1767 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1768 bool IsAtomic = false); 1769 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1770 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1771 1772 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1773 1774 bool parseDimId(unsigned &Encoding); 1775 OperandMatchResultTy parseDim(OperandVector &Operands); 1776 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1777 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1778 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1779 int64_t parseDPPCtrlSel(StringRef Ctrl); 1780 int64_t parseDPPCtrlPerm(); 1781 AMDGPUOperand::Ptr defaultRowMask() const; 1782 AMDGPUOperand::Ptr defaultBankMask() const; 1783 AMDGPUOperand::Ptr defaultDppBoundCtrl() const; 1784 AMDGPUOperand::Ptr defaultFI() const; 1785 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1786 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1787 cvtDPP(Inst, Operands, true); 1788 } 1789 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1790 bool IsDPP8 = false); 1791 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1792 cvtVOP3DPP(Inst, Operands, true); 1793 } 1794 1795 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1796 AMDGPUOperand::ImmTy Type); 1797 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1798 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1799 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1800 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1801 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1802 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1803 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1804 uint64_t BasicInstType, 1805 bool SkipDstVcc = false, 1806 bool SkipSrcVcc = false); 1807 1808 AMDGPUOperand::Ptr defaultBLGP() const; 1809 AMDGPUOperand::Ptr defaultCBSZ() const; 1810 AMDGPUOperand::Ptr defaultABID() const; 1811 1812 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1813 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1814 1815 AMDGPUOperand::Ptr defaultWaitVDST() const; 1816 AMDGPUOperand::Ptr defaultWaitEXP() const; 1817 OperandMatchResultTy parseVOPD(OperandVector &Operands); 1818 }; 1819 1820 } // end anonymous namespace 1821 1822 // May be called with integer type with equivalent bitwidth. 1823 static const fltSemantics *getFltSemantics(unsigned Size) { 1824 switch (Size) { 1825 case 4: 1826 return &APFloat::IEEEsingle(); 1827 case 8: 1828 return &APFloat::IEEEdouble(); 1829 case 2: 1830 return &APFloat::IEEEhalf(); 1831 default: 1832 llvm_unreachable("unsupported fp type"); 1833 } 1834 } 1835 1836 static const fltSemantics *getFltSemantics(MVT VT) { 1837 return getFltSemantics(VT.getSizeInBits() / 8); 1838 } 1839 1840 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1841 switch (OperandType) { 1842 case AMDGPU::OPERAND_REG_IMM_INT32: 1843 case AMDGPU::OPERAND_REG_IMM_FP32: 1844 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1845 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1846 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1848 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1849 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1850 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1851 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1852 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1853 case AMDGPU::OPERAND_KIMM32: 1854 return &APFloat::IEEEsingle(); 1855 case AMDGPU::OPERAND_REG_IMM_INT64: 1856 case AMDGPU::OPERAND_REG_IMM_FP64: 1857 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1858 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1859 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1860 return &APFloat::IEEEdouble(); 1861 case AMDGPU::OPERAND_REG_IMM_INT16: 1862 case AMDGPU::OPERAND_REG_IMM_FP16: 1863 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1864 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1868 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1869 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1870 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1871 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1872 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1873 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1874 case AMDGPU::OPERAND_KIMM16: 1875 return &APFloat::IEEEhalf(); 1876 default: 1877 llvm_unreachable("unsupported fp type"); 1878 } 1879 } 1880 1881 //===----------------------------------------------------------------------===// 1882 // Operand 1883 //===----------------------------------------------------------------------===// 1884 1885 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1886 bool Lost; 1887 1888 // Convert literal to single precision 1889 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1890 APFloat::rmNearestTiesToEven, 1891 &Lost); 1892 // We allow precision lost but not overflow or underflow 1893 if (Status != APFloat::opOK && 1894 Lost && 1895 ((Status & APFloat::opOverflow) != 0 || 1896 (Status & APFloat::opUnderflow) != 0)) { 1897 return false; 1898 } 1899 1900 return true; 1901 } 1902 1903 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1904 return isUIntN(Size, Val) || isIntN(Size, Val); 1905 } 1906 1907 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1908 if (VT.getScalarType() == MVT::i16) { 1909 // FP immediate values are broken. 1910 return isInlinableIntLiteral(Val); 1911 } 1912 1913 // f16/v2f16 operands work correctly for all values. 1914 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1915 } 1916 1917 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1918 1919 // This is a hack to enable named inline values like 1920 // shared_base with both 32-bit and 64-bit operands. 1921 // Note that these values are defined as 1922 // 32-bit operands only. 1923 if (isInlineValue()) { 1924 return true; 1925 } 1926 1927 if (!isImmTy(ImmTyNone)) { 1928 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1929 return false; 1930 } 1931 // TODO: We should avoid using host float here. It would be better to 1932 // check the float bit values which is what a few other places do. 1933 // We've had bot failures before due to weird NaN support on mips hosts. 1934 1935 APInt Literal(64, Imm.Val); 1936 1937 if (Imm.IsFPImm) { // We got fp literal token 1938 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1939 return AMDGPU::isInlinableLiteral64(Imm.Val, 1940 AsmParser->hasInv2PiInlineImm()); 1941 } 1942 1943 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1944 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1945 return false; 1946 1947 if (type.getScalarSizeInBits() == 16) { 1948 return isInlineableLiteralOp16( 1949 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1950 type, AsmParser->hasInv2PiInlineImm()); 1951 } 1952 1953 // Check if single precision literal is inlinable 1954 return AMDGPU::isInlinableLiteral32( 1955 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1956 AsmParser->hasInv2PiInlineImm()); 1957 } 1958 1959 // We got int literal token. 1960 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1961 return AMDGPU::isInlinableLiteral64(Imm.Val, 1962 AsmParser->hasInv2PiInlineImm()); 1963 } 1964 1965 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1966 return false; 1967 } 1968 1969 if (type.getScalarSizeInBits() == 16) { 1970 return isInlineableLiteralOp16( 1971 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1972 type, AsmParser->hasInv2PiInlineImm()); 1973 } 1974 1975 return AMDGPU::isInlinableLiteral32( 1976 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1977 AsmParser->hasInv2PiInlineImm()); 1978 } 1979 1980 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1981 // Check that this immediate can be added as literal 1982 if (!isImmTy(ImmTyNone)) { 1983 return false; 1984 } 1985 1986 if (!Imm.IsFPImm) { 1987 // We got int literal token. 1988 1989 if (type == MVT::f64 && hasFPModifiers()) { 1990 // Cannot apply fp modifiers to int literals preserving the same semantics 1991 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1992 // disable these cases. 1993 return false; 1994 } 1995 1996 unsigned Size = type.getSizeInBits(); 1997 if (Size == 64) 1998 Size = 32; 1999 2000 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2001 // types. 2002 return isSafeTruncation(Imm.Val, Size); 2003 } 2004 2005 // We got fp literal token 2006 if (type == MVT::f64) { // Expected 64-bit fp operand 2007 // We would set low 64-bits of literal to zeroes but we accept this literals 2008 return true; 2009 } 2010 2011 if (type == MVT::i64) { // Expected 64-bit int operand 2012 // We don't allow fp literals in 64-bit integer instructions. It is 2013 // unclear how we should encode them. 2014 return false; 2015 } 2016 2017 // We allow fp literals with f16x2 operands assuming that the specified 2018 // literal goes into the lower half and the upper half is zero. We also 2019 // require that the literal may be losslessly converted to f16. 2020 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2021 (type == MVT::v2i16)? MVT::i16 : 2022 (type == MVT::v2f32)? MVT::f32 : type; 2023 2024 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2025 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2026 } 2027 2028 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2029 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2030 } 2031 2032 bool AMDGPUOperand::isVRegWithInputMods() const { 2033 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2034 // GFX90A allows DPP on 64-bit operands. 2035 (isRegClass(AMDGPU::VReg_64RegClassID) && 2036 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2037 } 2038 2039 bool AMDGPUOperand::isT16VRegWithInputMods() const { 2040 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID); 2041 } 2042 2043 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2044 if (AsmParser->isVI()) 2045 return isVReg32(); 2046 else if (AsmParser->isGFX9Plus()) 2047 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2048 else 2049 return false; 2050 } 2051 2052 bool AMDGPUOperand::isSDWAFP16Operand() const { 2053 return isSDWAOperand(MVT::f16); 2054 } 2055 2056 bool AMDGPUOperand::isSDWAFP32Operand() const { 2057 return isSDWAOperand(MVT::f32); 2058 } 2059 2060 bool AMDGPUOperand::isSDWAInt16Operand() const { 2061 return isSDWAOperand(MVT::i16); 2062 } 2063 2064 bool AMDGPUOperand::isSDWAInt32Operand() const { 2065 return isSDWAOperand(MVT::i32); 2066 } 2067 2068 bool AMDGPUOperand::isBoolReg() const { 2069 auto FB = AsmParser->getFeatureBits(); 2070 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2071 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2072 } 2073 2074 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2075 { 2076 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2077 assert(Size == 2 || Size == 4 || Size == 8); 2078 2079 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2080 2081 if (Imm.Mods.Abs) { 2082 Val &= ~FpSignMask; 2083 } 2084 if (Imm.Mods.Neg) { 2085 Val ^= FpSignMask; 2086 } 2087 2088 return Val; 2089 } 2090 2091 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2092 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2093 Inst.getNumOperands())) { 2094 addLiteralImmOperand(Inst, Imm.Val, 2095 ApplyModifiers & 2096 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2097 } else { 2098 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2099 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2100 setImmKindNone(); 2101 } 2102 } 2103 2104 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2105 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2106 auto OpNum = Inst.getNumOperands(); 2107 // Check that this operand accepts literals 2108 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2109 2110 if (ApplyModifiers) { 2111 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2112 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2113 Val = applyInputFPModifiers(Val, Size); 2114 } 2115 2116 APInt Literal(64, Val); 2117 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; 2118 2119 if (Imm.IsFPImm) { // We got fp literal token 2120 switch (OpTy) { 2121 case AMDGPU::OPERAND_REG_IMM_INT64: 2122 case AMDGPU::OPERAND_REG_IMM_FP64: 2123 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2124 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2126 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2127 AsmParser->hasInv2PiInlineImm())) { 2128 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2129 setImmKindConst(); 2130 return; 2131 } 2132 2133 // Non-inlineable 2134 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2135 // For fp operands we check if low 32 bits are zeros 2136 if (Literal.getLoBits(32) != 0) { 2137 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2138 "Can't encode literal as exact 64-bit floating-point operand. " 2139 "Low 32-bits will be set to zero"); 2140 } 2141 2142 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2143 setImmKindLiteral(); 2144 return; 2145 } 2146 2147 // We don't allow fp literals in 64-bit integer instructions. It is 2148 // unclear how we should encode them. This case should be checked earlier 2149 // in predicate methods (isLiteralImm()) 2150 llvm_unreachable("fp literal in 64-bit integer instruction."); 2151 2152 case AMDGPU::OPERAND_REG_IMM_INT32: 2153 case AMDGPU::OPERAND_REG_IMM_FP32: 2154 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2155 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2156 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2157 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2158 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2159 case AMDGPU::OPERAND_REG_IMM_INT16: 2160 case AMDGPU::OPERAND_REG_IMM_FP16: 2161 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2162 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2163 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2164 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2165 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2166 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2167 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2168 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2169 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2170 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2171 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2172 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2173 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2174 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2175 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2176 case AMDGPU::OPERAND_KIMM32: 2177 case AMDGPU::OPERAND_KIMM16: { 2178 bool lost; 2179 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2180 // Convert literal to single precision 2181 FPLiteral.convert(*getOpFltSemantics(OpTy), 2182 APFloat::rmNearestTiesToEven, &lost); 2183 // We allow precision lost but not overflow or underflow. This should be 2184 // checked earlier in isLiteralImm() 2185 2186 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2187 Inst.addOperand(MCOperand::createImm(ImmVal)); 2188 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { 2189 setImmKindMandatoryLiteral(); 2190 } else { 2191 setImmKindLiteral(); 2192 } 2193 return; 2194 } 2195 default: 2196 llvm_unreachable("invalid operand size"); 2197 } 2198 2199 return; 2200 } 2201 2202 // We got int literal token. 2203 // Only sign extend inline immediates. 2204 switch (OpTy) { 2205 case AMDGPU::OPERAND_REG_IMM_INT32: 2206 case AMDGPU::OPERAND_REG_IMM_FP32: 2207 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2208 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2209 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2210 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2211 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2212 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2213 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2214 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2215 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2216 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2217 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2218 if (isSafeTruncation(Val, 32) && 2219 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2220 AsmParser->hasInv2PiInlineImm())) { 2221 Inst.addOperand(MCOperand::createImm(Val)); 2222 setImmKindConst(); 2223 return; 2224 } 2225 2226 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2227 setImmKindLiteral(); 2228 return; 2229 2230 case AMDGPU::OPERAND_REG_IMM_INT64: 2231 case AMDGPU::OPERAND_REG_IMM_FP64: 2232 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2233 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2234 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2235 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2236 Inst.addOperand(MCOperand::createImm(Val)); 2237 setImmKindConst(); 2238 return; 2239 } 2240 2241 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2242 setImmKindLiteral(); 2243 return; 2244 2245 case AMDGPU::OPERAND_REG_IMM_INT16: 2246 case AMDGPU::OPERAND_REG_IMM_FP16: 2247 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2248 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2249 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2250 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2251 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2252 if (isSafeTruncation(Val, 16) && 2253 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2254 AsmParser->hasInv2PiInlineImm())) { 2255 Inst.addOperand(MCOperand::createImm(Val)); 2256 setImmKindConst(); 2257 return; 2258 } 2259 2260 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2261 setImmKindLiteral(); 2262 return; 2263 2264 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2265 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2266 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2267 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2268 assert(isSafeTruncation(Val, 16)); 2269 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2270 AsmParser->hasInv2PiInlineImm())); 2271 2272 Inst.addOperand(MCOperand::createImm(Val)); 2273 return; 2274 } 2275 case AMDGPU::OPERAND_KIMM32: 2276 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2277 setImmKindMandatoryLiteral(); 2278 return; 2279 case AMDGPU::OPERAND_KIMM16: 2280 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2281 setImmKindMandatoryLiteral(); 2282 return; 2283 default: 2284 llvm_unreachable("invalid operand size"); 2285 } 2286 } 2287 2288 template <unsigned Bitwidth> 2289 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2290 APInt Literal(64, Imm.Val); 2291 setImmKindMandatoryLiteral(); 2292 2293 if (!Imm.IsFPImm) { 2294 // We got int literal token. 2295 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2296 return; 2297 } 2298 2299 bool Lost; 2300 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2301 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2302 APFloat::rmNearestTiesToEven, &Lost); 2303 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2304 } 2305 2306 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2307 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2308 } 2309 2310 bool AMDGPUOperand::isInlineValue() const { 2311 return isRegKind() && ::isInlineValue(getReg()); 2312 } 2313 2314 //===----------------------------------------------------------------------===// 2315 // AsmParser 2316 //===----------------------------------------------------------------------===// 2317 2318 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2319 if (Is == IS_VGPR) { 2320 switch (RegWidth) { 2321 default: return -1; 2322 case 32: 2323 return AMDGPU::VGPR_32RegClassID; 2324 case 64: 2325 return AMDGPU::VReg_64RegClassID; 2326 case 96: 2327 return AMDGPU::VReg_96RegClassID; 2328 case 128: 2329 return AMDGPU::VReg_128RegClassID; 2330 case 160: 2331 return AMDGPU::VReg_160RegClassID; 2332 case 192: 2333 return AMDGPU::VReg_192RegClassID; 2334 case 224: 2335 return AMDGPU::VReg_224RegClassID; 2336 case 256: 2337 return AMDGPU::VReg_256RegClassID; 2338 case 288: 2339 return AMDGPU::VReg_288RegClassID; 2340 case 320: 2341 return AMDGPU::VReg_320RegClassID; 2342 case 352: 2343 return AMDGPU::VReg_352RegClassID; 2344 case 384: 2345 return AMDGPU::VReg_384RegClassID; 2346 case 512: 2347 return AMDGPU::VReg_512RegClassID; 2348 case 1024: 2349 return AMDGPU::VReg_1024RegClassID; 2350 } 2351 } else if (Is == IS_TTMP) { 2352 switch (RegWidth) { 2353 default: return -1; 2354 case 32: 2355 return AMDGPU::TTMP_32RegClassID; 2356 case 64: 2357 return AMDGPU::TTMP_64RegClassID; 2358 case 128: 2359 return AMDGPU::TTMP_128RegClassID; 2360 case 256: 2361 return AMDGPU::TTMP_256RegClassID; 2362 case 512: 2363 return AMDGPU::TTMP_512RegClassID; 2364 } 2365 } else if (Is == IS_SGPR) { 2366 switch (RegWidth) { 2367 default: return -1; 2368 case 32: 2369 return AMDGPU::SGPR_32RegClassID; 2370 case 64: 2371 return AMDGPU::SGPR_64RegClassID; 2372 case 96: 2373 return AMDGPU::SGPR_96RegClassID; 2374 case 128: 2375 return AMDGPU::SGPR_128RegClassID; 2376 case 160: 2377 return AMDGPU::SGPR_160RegClassID; 2378 case 192: 2379 return AMDGPU::SGPR_192RegClassID; 2380 case 224: 2381 return AMDGPU::SGPR_224RegClassID; 2382 case 256: 2383 return AMDGPU::SGPR_256RegClassID; 2384 case 288: 2385 return AMDGPU::SGPR_288RegClassID; 2386 case 320: 2387 return AMDGPU::SGPR_320RegClassID; 2388 case 352: 2389 return AMDGPU::SGPR_352RegClassID; 2390 case 384: 2391 return AMDGPU::SGPR_384RegClassID; 2392 case 512: 2393 return AMDGPU::SGPR_512RegClassID; 2394 } 2395 } else if (Is == IS_AGPR) { 2396 switch (RegWidth) { 2397 default: return -1; 2398 case 32: 2399 return AMDGPU::AGPR_32RegClassID; 2400 case 64: 2401 return AMDGPU::AReg_64RegClassID; 2402 case 96: 2403 return AMDGPU::AReg_96RegClassID; 2404 case 128: 2405 return AMDGPU::AReg_128RegClassID; 2406 case 160: 2407 return AMDGPU::AReg_160RegClassID; 2408 case 192: 2409 return AMDGPU::AReg_192RegClassID; 2410 case 224: 2411 return AMDGPU::AReg_224RegClassID; 2412 case 256: 2413 return AMDGPU::AReg_256RegClassID; 2414 case 288: 2415 return AMDGPU::AReg_288RegClassID; 2416 case 320: 2417 return AMDGPU::AReg_320RegClassID; 2418 case 352: 2419 return AMDGPU::AReg_352RegClassID; 2420 case 384: 2421 return AMDGPU::AReg_384RegClassID; 2422 case 512: 2423 return AMDGPU::AReg_512RegClassID; 2424 case 1024: 2425 return AMDGPU::AReg_1024RegClassID; 2426 } 2427 } 2428 return -1; 2429 } 2430 2431 static unsigned getSpecialRegForName(StringRef RegName) { 2432 return StringSwitch<unsigned>(RegName) 2433 .Case("exec", AMDGPU::EXEC) 2434 .Case("vcc", AMDGPU::VCC) 2435 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2436 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2437 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2438 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2439 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2440 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2441 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2442 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2443 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2444 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2445 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2446 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2447 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2448 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2449 .Case("m0", AMDGPU::M0) 2450 .Case("vccz", AMDGPU::SRC_VCCZ) 2451 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2452 .Case("execz", AMDGPU::SRC_EXECZ) 2453 .Case("src_execz", AMDGPU::SRC_EXECZ) 2454 .Case("scc", AMDGPU::SRC_SCC) 2455 .Case("src_scc", AMDGPU::SRC_SCC) 2456 .Case("tba", AMDGPU::TBA) 2457 .Case("tma", AMDGPU::TMA) 2458 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2459 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2460 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2461 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2462 .Case("vcc_lo", AMDGPU::VCC_LO) 2463 .Case("vcc_hi", AMDGPU::VCC_HI) 2464 .Case("exec_lo", AMDGPU::EXEC_LO) 2465 .Case("exec_hi", AMDGPU::EXEC_HI) 2466 .Case("tma_lo", AMDGPU::TMA_LO) 2467 .Case("tma_hi", AMDGPU::TMA_HI) 2468 .Case("tba_lo", AMDGPU::TBA_LO) 2469 .Case("tba_hi", AMDGPU::TBA_HI) 2470 .Case("pc", AMDGPU::PC_REG) 2471 .Case("null", AMDGPU::SGPR_NULL) 2472 .Default(AMDGPU::NoRegister); 2473 } 2474 2475 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2476 SMLoc &EndLoc, bool RestoreOnFailure) { 2477 auto R = parseRegister(); 2478 if (!R) return true; 2479 assert(R->isReg()); 2480 RegNo = R->getReg(); 2481 StartLoc = R->getStartLoc(); 2482 EndLoc = R->getEndLoc(); 2483 return false; 2484 } 2485 2486 bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2487 SMLoc &EndLoc) { 2488 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2489 } 2490 2491 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo, 2492 SMLoc &StartLoc, 2493 SMLoc &EndLoc) { 2494 bool Result = 2495 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2496 bool PendingErrors = getParser().hasPendingError(); 2497 getParser().clearPendingErrors(); 2498 if (PendingErrors) 2499 return MatchOperand_ParseFail; 2500 if (Result) 2501 return MatchOperand_NoMatch; 2502 return MatchOperand_Success; 2503 } 2504 2505 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2506 RegisterKind RegKind, unsigned Reg1, 2507 SMLoc Loc) { 2508 switch (RegKind) { 2509 case IS_SPECIAL: 2510 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2511 Reg = AMDGPU::EXEC; 2512 RegWidth = 64; 2513 return true; 2514 } 2515 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2516 Reg = AMDGPU::FLAT_SCR; 2517 RegWidth = 64; 2518 return true; 2519 } 2520 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2521 Reg = AMDGPU::XNACK_MASK; 2522 RegWidth = 64; 2523 return true; 2524 } 2525 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2526 Reg = AMDGPU::VCC; 2527 RegWidth = 64; 2528 return true; 2529 } 2530 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2531 Reg = AMDGPU::TBA; 2532 RegWidth = 64; 2533 return true; 2534 } 2535 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2536 Reg = AMDGPU::TMA; 2537 RegWidth = 64; 2538 return true; 2539 } 2540 Error(Loc, "register does not fit in the list"); 2541 return false; 2542 case IS_VGPR: 2543 case IS_SGPR: 2544 case IS_AGPR: 2545 case IS_TTMP: 2546 if (Reg1 != Reg + RegWidth / 32) { 2547 Error(Loc, "registers in a list must have consecutive indices"); 2548 return false; 2549 } 2550 RegWidth += 32; 2551 return true; 2552 default: 2553 llvm_unreachable("unexpected register kind"); 2554 } 2555 } 2556 2557 struct RegInfo { 2558 StringLiteral Name; 2559 RegisterKind Kind; 2560 }; 2561 2562 static constexpr RegInfo RegularRegisters[] = { 2563 {{"v"}, IS_VGPR}, 2564 {{"s"}, IS_SGPR}, 2565 {{"ttmp"}, IS_TTMP}, 2566 {{"acc"}, IS_AGPR}, 2567 {{"a"}, IS_AGPR}, 2568 }; 2569 2570 static bool isRegularReg(RegisterKind Kind) { 2571 return Kind == IS_VGPR || 2572 Kind == IS_SGPR || 2573 Kind == IS_TTMP || 2574 Kind == IS_AGPR; 2575 } 2576 2577 static const RegInfo* getRegularRegInfo(StringRef Str) { 2578 for (const RegInfo &Reg : RegularRegisters) 2579 if (Str.startswith(Reg.Name)) 2580 return &Reg; 2581 return nullptr; 2582 } 2583 2584 static bool getRegNum(StringRef Str, unsigned& Num) { 2585 return !Str.getAsInteger(10, Num); 2586 } 2587 2588 bool 2589 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2590 const AsmToken &NextToken) const { 2591 2592 // A list of consecutive registers: [s0,s1,s2,s3] 2593 if (Token.is(AsmToken::LBrac)) 2594 return true; 2595 2596 if (!Token.is(AsmToken::Identifier)) 2597 return false; 2598 2599 // A single register like s0 or a range of registers like s[0:1] 2600 2601 StringRef Str = Token.getString(); 2602 const RegInfo *Reg = getRegularRegInfo(Str); 2603 if (Reg) { 2604 StringRef RegName = Reg->Name; 2605 StringRef RegSuffix = Str.substr(RegName.size()); 2606 if (!RegSuffix.empty()) { 2607 unsigned Num; 2608 // A single register with an index: rXX 2609 if (getRegNum(RegSuffix, Num)) 2610 return true; 2611 } else { 2612 // A range of registers: r[XX:YY]. 2613 if (NextToken.is(AsmToken::LBrac)) 2614 return true; 2615 } 2616 } 2617 2618 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2619 } 2620 2621 bool 2622 AMDGPUAsmParser::isRegister() 2623 { 2624 return isRegister(getToken(), peekToken()); 2625 } 2626 2627 unsigned 2628 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2629 unsigned RegNum, 2630 unsigned RegWidth, 2631 SMLoc Loc) { 2632 2633 assert(isRegularReg(RegKind)); 2634 2635 unsigned AlignSize = 1; 2636 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2637 // SGPR and TTMP registers must be aligned. 2638 // Max required alignment is 4 dwords. 2639 AlignSize = std::min(RegWidth / 32, 4u); 2640 } 2641 2642 if (RegNum % AlignSize != 0) { 2643 Error(Loc, "invalid register alignment"); 2644 return AMDGPU::NoRegister; 2645 } 2646 2647 unsigned RegIdx = RegNum / AlignSize; 2648 int RCID = getRegClass(RegKind, RegWidth); 2649 if (RCID == -1) { 2650 Error(Loc, "invalid or unsupported register size"); 2651 return AMDGPU::NoRegister; 2652 } 2653 2654 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2655 const MCRegisterClass RC = TRI->getRegClass(RCID); 2656 if (RegIdx >= RC.getNumRegs()) { 2657 Error(Loc, "register index is out of range"); 2658 return AMDGPU::NoRegister; 2659 } 2660 2661 return RC.getRegister(RegIdx); 2662 } 2663 2664 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2665 int64_t RegLo, RegHi; 2666 if (!skipToken(AsmToken::LBrac, "missing register index")) 2667 return false; 2668 2669 SMLoc FirstIdxLoc = getLoc(); 2670 SMLoc SecondIdxLoc; 2671 2672 if (!parseExpr(RegLo)) 2673 return false; 2674 2675 if (trySkipToken(AsmToken::Colon)) { 2676 SecondIdxLoc = getLoc(); 2677 if (!parseExpr(RegHi)) 2678 return false; 2679 } else { 2680 RegHi = RegLo; 2681 } 2682 2683 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2684 return false; 2685 2686 if (!isUInt<32>(RegLo)) { 2687 Error(FirstIdxLoc, "invalid register index"); 2688 return false; 2689 } 2690 2691 if (!isUInt<32>(RegHi)) { 2692 Error(SecondIdxLoc, "invalid register index"); 2693 return false; 2694 } 2695 2696 if (RegLo > RegHi) { 2697 Error(FirstIdxLoc, "first register index should not exceed second index"); 2698 return false; 2699 } 2700 2701 Num = static_cast<unsigned>(RegLo); 2702 RegWidth = 32 * ((RegHi - RegLo) + 1); 2703 return true; 2704 } 2705 2706 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2707 unsigned &RegNum, unsigned &RegWidth, 2708 SmallVectorImpl<AsmToken> &Tokens) { 2709 assert(isToken(AsmToken::Identifier)); 2710 unsigned Reg = getSpecialRegForName(getTokenStr()); 2711 if (Reg) { 2712 RegNum = 0; 2713 RegWidth = 32; 2714 RegKind = IS_SPECIAL; 2715 Tokens.push_back(getToken()); 2716 lex(); // skip register name 2717 } 2718 return Reg; 2719 } 2720 2721 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2722 unsigned &RegNum, unsigned &RegWidth, 2723 SmallVectorImpl<AsmToken> &Tokens) { 2724 assert(isToken(AsmToken::Identifier)); 2725 StringRef RegName = getTokenStr(); 2726 auto Loc = getLoc(); 2727 2728 const RegInfo *RI = getRegularRegInfo(RegName); 2729 if (!RI) { 2730 Error(Loc, "invalid register name"); 2731 return AMDGPU::NoRegister; 2732 } 2733 2734 Tokens.push_back(getToken()); 2735 lex(); // skip register name 2736 2737 RegKind = RI->Kind; 2738 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2739 if (!RegSuffix.empty()) { 2740 // Single 32-bit register: vXX. 2741 if (!getRegNum(RegSuffix, RegNum)) { 2742 Error(Loc, "invalid register index"); 2743 return AMDGPU::NoRegister; 2744 } 2745 RegWidth = 32; 2746 } else { 2747 // Range of registers: v[XX:YY]. ":YY" is optional. 2748 if (!ParseRegRange(RegNum, RegWidth)) 2749 return AMDGPU::NoRegister; 2750 } 2751 2752 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2753 } 2754 2755 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2756 unsigned &RegWidth, 2757 SmallVectorImpl<AsmToken> &Tokens) { 2758 unsigned Reg = AMDGPU::NoRegister; 2759 auto ListLoc = getLoc(); 2760 2761 if (!skipToken(AsmToken::LBrac, 2762 "expected a register or a list of registers")) { 2763 return AMDGPU::NoRegister; 2764 } 2765 2766 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2767 2768 auto Loc = getLoc(); 2769 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2770 return AMDGPU::NoRegister; 2771 if (RegWidth != 32) { 2772 Error(Loc, "expected a single 32-bit register"); 2773 return AMDGPU::NoRegister; 2774 } 2775 2776 for (; trySkipToken(AsmToken::Comma); ) { 2777 RegisterKind NextRegKind; 2778 unsigned NextReg, NextRegNum, NextRegWidth; 2779 Loc = getLoc(); 2780 2781 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2782 NextRegNum, NextRegWidth, 2783 Tokens)) { 2784 return AMDGPU::NoRegister; 2785 } 2786 if (NextRegWidth != 32) { 2787 Error(Loc, "expected a single 32-bit register"); 2788 return AMDGPU::NoRegister; 2789 } 2790 if (NextRegKind != RegKind) { 2791 Error(Loc, "registers in a list must be of the same kind"); 2792 return AMDGPU::NoRegister; 2793 } 2794 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2795 return AMDGPU::NoRegister; 2796 } 2797 2798 if (!skipToken(AsmToken::RBrac, 2799 "expected a comma or a closing square bracket")) { 2800 return AMDGPU::NoRegister; 2801 } 2802 2803 if (isRegularReg(RegKind)) 2804 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2805 2806 return Reg; 2807 } 2808 2809 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2810 unsigned &RegNum, unsigned &RegWidth, 2811 SmallVectorImpl<AsmToken> &Tokens) { 2812 auto Loc = getLoc(); 2813 Reg = AMDGPU::NoRegister; 2814 2815 if (isToken(AsmToken::Identifier)) { 2816 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2817 if (Reg == AMDGPU::NoRegister) 2818 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2819 } else { 2820 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2821 } 2822 2823 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2824 if (Reg == AMDGPU::NoRegister) { 2825 assert(Parser.hasPendingError()); 2826 return false; 2827 } 2828 2829 if (!subtargetHasRegister(*TRI, Reg)) { 2830 if (Reg == AMDGPU::SGPR_NULL) { 2831 Error(Loc, "'null' operand is not supported on this GPU"); 2832 } else { 2833 Error(Loc, "register not available on this GPU"); 2834 } 2835 return false; 2836 } 2837 2838 return true; 2839 } 2840 2841 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2842 unsigned &RegNum, unsigned &RegWidth, 2843 bool RestoreOnFailure /*=false*/) { 2844 Reg = AMDGPU::NoRegister; 2845 2846 SmallVector<AsmToken, 1> Tokens; 2847 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2848 if (RestoreOnFailure) { 2849 while (!Tokens.empty()) { 2850 getLexer().UnLex(Tokens.pop_back_val()); 2851 } 2852 } 2853 return true; 2854 } 2855 return false; 2856 } 2857 2858 std::optional<StringRef> 2859 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2860 switch (RegKind) { 2861 case IS_VGPR: 2862 return StringRef(".amdgcn.next_free_vgpr"); 2863 case IS_SGPR: 2864 return StringRef(".amdgcn.next_free_sgpr"); 2865 default: 2866 return std::nullopt; 2867 } 2868 } 2869 2870 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2871 auto SymbolName = getGprCountSymbolName(RegKind); 2872 assert(SymbolName && "initializing invalid register kind"); 2873 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2874 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2875 } 2876 2877 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2878 unsigned DwordRegIndex, 2879 unsigned RegWidth) { 2880 // Symbols are only defined for GCN targets 2881 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2882 return true; 2883 2884 auto SymbolName = getGprCountSymbolName(RegKind); 2885 if (!SymbolName) 2886 return true; 2887 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2888 2889 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2890 int64_t OldCount; 2891 2892 if (!Sym->isVariable()) 2893 return !Error(getLoc(), 2894 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2895 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2896 return !Error( 2897 getLoc(), 2898 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2899 2900 if (OldCount <= NewMax) 2901 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2902 2903 return true; 2904 } 2905 2906 std::unique_ptr<AMDGPUOperand> 2907 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2908 const auto &Tok = getToken(); 2909 SMLoc StartLoc = Tok.getLoc(); 2910 SMLoc EndLoc = Tok.getEndLoc(); 2911 RegisterKind RegKind; 2912 unsigned Reg, RegNum, RegWidth; 2913 2914 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2915 return nullptr; 2916 } 2917 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2918 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2919 return nullptr; 2920 } else 2921 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2922 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2923 } 2924 2925 OperandMatchResultTy 2926 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2927 // TODO: add syntactic sugar for 1/(2*PI) 2928 2929 if (isRegister()) 2930 return MatchOperand_NoMatch; 2931 assert(!isModifier()); 2932 2933 const auto& Tok = getToken(); 2934 const auto& NextTok = peekToken(); 2935 bool IsReal = Tok.is(AsmToken::Real); 2936 SMLoc S = getLoc(); 2937 bool Negate = false; 2938 2939 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2940 lex(); 2941 IsReal = true; 2942 Negate = true; 2943 } 2944 2945 if (IsReal) { 2946 // Floating-point expressions are not supported. 2947 // Can only allow floating-point literals with an 2948 // optional sign. 2949 2950 StringRef Num = getTokenStr(); 2951 lex(); 2952 2953 APFloat RealVal(APFloat::IEEEdouble()); 2954 auto roundMode = APFloat::rmNearestTiesToEven; 2955 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2956 return MatchOperand_ParseFail; 2957 } 2958 if (Negate) 2959 RealVal.changeSign(); 2960 2961 Operands.push_back( 2962 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2963 AMDGPUOperand::ImmTyNone, true)); 2964 2965 return MatchOperand_Success; 2966 2967 } else { 2968 int64_t IntVal; 2969 const MCExpr *Expr; 2970 SMLoc S = getLoc(); 2971 2972 if (HasSP3AbsModifier) { 2973 // This is a workaround for handling expressions 2974 // as arguments of SP3 'abs' modifier, for example: 2975 // |1.0| 2976 // |-1| 2977 // |1+x| 2978 // This syntax is not compatible with syntax of standard 2979 // MC expressions (due to the trailing '|'). 2980 SMLoc EndLoc; 2981 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2982 return MatchOperand_ParseFail; 2983 } else { 2984 if (Parser.parseExpression(Expr)) 2985 return MatchOperand_ParseFail; 2986 } 2987 2988 if (Expr->evaluateAsAbsolute(IntVal)) { 2989 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2990 } else { 2991 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2992 } 2993 2994 return MatchOperand_Success; 2995 } 2996 2997 return MatchOperand_NoMatch; 2998 } 2999 3000 OperandMatchResultTy 3001 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 3002 if (!isRegister()) 3003 return MatchOperand_NoMatch; 3004 3005 if (auto R = parseRegister()) { 3006 assert(R->isReg()); 3007 Operands.push_back(std::move(R)); 3008 return MatchOperand_Success; 3009 } 3010 return MatchOperand_ParseFail; 3011 } 3012 3013 OperandMatchResultTy 3014 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 3015 auto res = parseReg(Operands); 3016 if (res != MatchOperand_NoMatch) { 3017 return res; 3018 } else if (isModifier()) { 3019 return MatchOperand_NoMatch; 3020 } else { 3021 return parseImm(Operands, HasSP3AbsMod); 3022 } 3023 } 3024 3025 bool 3026 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3027 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3028 const auto &str = Token.getString(); 3029 return str == "abs" || str == "neg" || str == "sext"; 3030 } 3031 return false; 3032 } 3033 3034 bool 3035 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3036 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3037 } 3038 3039 bool 3040 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3041 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3042 } 3043 3044 bool 3045 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3046 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3047 } 3048 3049 // Check if this is an operand modifier or an opcode modifier 3050 // which may look like an expression but it is not. We should 3051 // avoid parsing these modifiers as expressions. Currently 3052 // recognized sequences are: 3053 // |...| 3054 // abs(...) 3055 // neg(...) 3056 // sext(...) 3057 // -reg 3058 // -|...| 3059 // -abs(...) 3060 // name:... 3061 // 3062 bool 3063 AMDGPUAsmParser::isModifier() { 3064 3065 AsmToken Tok = getToken(); 3066 AsmToken NextToken[2]; 3067 peekTokens(NextToken); 3068 3069 return isOperandModifier(Tok, NextToken[0]) || 3070 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3071 isOpcodeModifierWithVal(Tok, NextToken[0]); 3072 } 3073 3074 // Check if the current token is an SP3 'neg' modifier. 3075 // Currently this modifier is allowed in the following context: 3076 // 3077 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3078 // 2. Before an 'abs' modifier: -abs(...) 3079 // 3. Before an SP3 'abs' modifier: -|...| 3080 // 3081 // In all other cases "-" is handled as a part 3082 // of an expression that follows the sign. 3083 // 3084 // Note: When "-" is followed by an integer literal, 3085 // this is interpreted as integer negation rather 3086 // than a floating-point NEG modifier applied to N. 3087 // Beside being contr-intuitive, such use of floating-point 3088 // NEG modifier would have resulted in different meaning 3089 // of integer literals used with VOP1/2/C and VOP3, 3090 // for example: 3091 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3092 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3093 // Negative fp literals with preceding "-" are 3094 // handled likewise for uniformity 3095 // 3096 bool 3097 AMDGPUAsmParser::parseSP3NegModifier() { 3098 3099 AsmToken NextToken[2]; 3100 peekTokens(NextToken); 3101 3102 if (isToken(AsmToken::Minus) && 3103 (isRegister(NextToken[0], NextToken[1]) || 3104 NextToken[0].is(AsmToken::Pipe) || 3105 isId(NextToken[0], "abs"))) { 3106 lex(); 3107 return true; 3108 } 3109 3110 return false; 3111 } 3112 3113 OperandMatchResultTy 3114 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3115 bool AllowImm) { 3116 bool Neg, SP3Neg; 3117 bool Abs, SP3Abs; 3118 SMLoc Loc; 3119 3120 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3121 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3122 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3123 return MatchOperand_ParseFail; 3124 } 3125 3126 SP3Neg = parseSP3NegModifier(); 3127 3128 Loc = getLoc(); 3129 Neg = trySkipId("neg"); 3130 if (Neg && SP3Neg) { 3131 Error(Loc, "expected register or immediate"); 3132 return MatchOperand_ParseFail; 3133 } 3134 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3135 return MatchOperand_ParseFail; 3136 3137 Abs = trySkipId("abs"); 3138 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3139 return MatchOperand_ParseFail; 3140 3141 Loc = getLoc(); 3142 SP3Abs = trySkipToken(AsmToken::Pipe); 3143 if (Abs && SP3Abs) { 3144 Error(Loc, "expected register or immediate"); 3145 return MatchOperand_ParseFail; 3146 } 3147 3148 OperandMatchResultTy Res; 3149 if (AllowImm) { 3150 Res = parseRegOrImm(Operands, SP3Abs); 3151 } else { 3152 Res = parseReg(Operands); 3153 } 3154 if (Res != MatchOperand_Success) { 3155 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3156 } 3157 3158 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3159 return MatchOperand_ParseFail; 3160 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3161 return MatchOperand_ParseFail; 3162 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3163 return MatchOperand_ParseFail; 3164 3165 AMDGPUOperand::Modifiers Mods; 3166 Mods.Abs = Abs || SP3Abs; 3167 Mods.Neg = Neg || SP3Neg; 3168 3169 if (Mods.hasFPModifiers()) { 3170 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3171 if (Op.isExpr()) { 3172 Error(Op.getStartLoc(), "expected an absolute expression"); 3173 return MatchOperand_ParseFail; 3174 } 3175 Op.setModifiers(Mods); 3176 } 3177 return MatchOperand_Success; 3178 } 3179 3180 OperandMatchResultTy 3181 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3182 bool AllowImm) { 3183 bool Sext = trySkipId("sext"); 3184 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3185 return MatchOperand_ParseFail; 3186 3187 OperandMatchResultTy Res; 3188 if (AllowImm) { 3189 Res = parseRegOrImm(Operands); 3190 } else { 3191 Res = parseReg(Operands); 3192 } 3193 if (Res != MatchOperand_Success) { 3194 return Sext? MatchOperand_ParseFail : Res; 3195 } 3196 3197 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3198 return MatchOperand_ParseFail; 3199 3200 AMDGPUOperand::Modifiers Mods; 3201 Mods.Sext = Sext; 3202 3203 if (Mods.hasIntModifiers()) { 3204 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3205 if (Op.isExpr()) { 3206 Error(Op.getStartLoc(), "expected an absolute expression"); 3207 return MatchOperand_ParseFail; 3208 } 3209 Op.setModifiers(Mods); 3210 } 3211 3212 return MatchOperand_Success; 3213 } 3214 3215 OperandMatchResultTy 3216 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3217 return parseRegOrImmWithFPInputMods(Operands, false); 3218 } 3219 3220 OperandMatchResultTy 3221 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3222 return parseRegOrImmWithIntInputMods(Operands, false); 3223 } 3224 3225 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3226 auto Loc = getLoc(); 3227 if (trySkipId("off")) { 3228 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3229 AMDGPUOperand::ImmTyOff, false)); 3230 return MatchOperand_Success; 3231 } 3232 3233 if (!isRegister()) 3234 return MatchOperand_NoMatch; 3235 3236 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3237 if (Reg) { 3238 Operands.push_back(std::move(Reg)); 3239 return MatchOperand_Success; 3240 } 3241 3242 return MatchOperand_ParseFail; 3243 3244 } 3245 3246 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3247 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3248 3249 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3250 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3251 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3252 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3253 return Match_InvalidOperand; 3254 3255 if ((TSFlags & SIInstrFlags::VOP3) && 3256 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3257 getForcedEncodingSize() != 64) 3258 return Match_PreferE32; 3259 3260 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3261 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3262 // v_mac_f32/16 allow only dst_sel == DWORD; 3263 auto OpNum = 3264 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3265 const auto &Op = Inst.getOperand(OpNum); 3266 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3267 return Match_InvalidOperand; 3268 } 3269 } 3270 3271 return Match_Success; 3272 } 3273 3274 static ArrayRef<unsigned> getAllVariants() { 3275 static const unsigned Variants[] = { 3276 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3277 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3278 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3279 }; 3280 3281 return ArrayRef(Variants); 3282 } 3283 3284 // What asm variants we should check 3285 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3286 if (isForcedDPP() && isForcedVOP3()) { 3287 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3288 return ArrayRef(Variants); 3289 } 3290 if (getForcedEncodingSize() == 32) { 3291 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3292 return ArrayRef(Variants); 3293 } 3294 3295 if (isForcedVOP3()) { 3296 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3297 return ArrayRef(Variants); 3298 } 3299 3300 if (isForcedSDWA()) { 3301 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3302 AMDGPUAsmVariants::SDWA9}; 3303 return ArrayRef(Variants); 3304 } 3305 3306 if (isForcedDPP()) { 3307 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3308 return ArrayRef(Variants); 3309 } 3310 3311 return getAllVariants(); 3312 } 3313 3314 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3315 if (isForcedDPP() && isForcedVOP3()) 3316 return "e64_dpp"; 3317 3318 if (getForcedEncodingSize() == 32) 3319 return "e32"; 3320 3321 if (isForcedVOP3()) 3322 return "e64"; 3323 3324 if (isForcedSDWA()) 3325 return "sdwa"; 3326 3327 if (isForcedDPP()) 3328 return "dpp"; 3329 3330 return ""; 3331 } 3332 3333 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3334 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3335 for (MCPhysReg Reg : Desc.implicit_uses()) { 3336 switch (Reg) { 3337 case AMDGPU::FLAT_SCR: 3338 case AMDGPU::VCC: 3339 case AMDGPU::VCC_LO: 3340 case AMDGPU::VCC_HI: 3341 case AMDGPU::M0: 3342 return Reg; 3343 default: 3344 break; 3345 } 3346 } 3347 return AMDGPU::NoRegister; 3348 } 3349 3350 // NB: This code is correct only when used to check constant 3351 // bus limitations because GFX7 support no f16 inline constants. 3352 // Note that there are no cases when a GFX7 opcode violates 3353 // constant bus limitations due to the use of an f16 constant. 3354 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3355 unsigned OpIdx) const { 3356 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3357 3358 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) || 3359 AMDGPU::isKImmOperand(Desc, OpIdx)) { 3360 return false; 3361 } 3362 3363 const MCOperand &MO = Inst.getOperand(OpIdx); 3364 3365 int64_t Val = MO.getImm(); 3366 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3367 3368 switch (OpSize) { // expected operand size 3369 case 8: 3370 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3371 case 4: 3372 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3373 case 2: { 3374 const unsigned OperandType = Desc.operands()[OpIdx].OperandType; 3375 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3376 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3377 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3378 return AMDGPU::isInlinableIntLiteral(Val); 3379 3380 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3381 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3382 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3383 return AMDGPU::isInlinableIntLiteralV216(Val); 3384 3385 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3386 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3387 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3388 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3389 3390 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3391 } 3392 default: 3393 llvm_unreachable("invalid operand size"); 3394 } 3395 } 3396 3397 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3398 if (!isGFX10Plus()) 3399 return 1; 3400 3401 switch (Opcode) { 3402 // 64-bit shift instructions can use only one scalar value input 3403 case AMDGPU::V_LSHLREV_B64_e64: 3404 case AMDGPU::V_LSHLREV_B64_gfx10: 3405 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3406 case AMDGPU::V_LSHRREV_B64_e64: 3407 case AMDGPU::V_LSHRREV_B64_gfx10: 3408 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3409 case AMDGPU::V_ASHRREV_I64_e64: 3410 case AMDGPU::V_ASHRREV_I64_gfx10: 3411 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3412 case AMDGPU::V_LSHL_B64_e64: 3413 case AMDGPU::V_LSHR_B64_e64: 3414 case AMDGPU::V_ASHR_I64_e64: 3415 return 1; 3416 default: 3417 return 2; 3418 } 3419 } 3420 3421 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; 3422 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; 3423 3424 // Get regular operand indices in the same order as specified 3425 // in the instruction (but append mandatory literals to the end). 3426 static OperandIndices getSrcOperandIndices(unsigned Opcode, 3427 bool AddMandatoryLiterals = false) { 3428 3429 int16_t ImmIdx = 3430 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1; 3431 3432 if (isVOPD(Opcode)) { 3433 int16_t ImmDeferredIdx = 3434 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred) 3435 : -1; 3436 3437 return {getNamedOperandIdx(Opcode, OpName::src0X), 3438 getNamedOperandIdx(Opcode, OpName::vsrc1X), 3439 getNamedOperandIdx(Opcode, OpName::src0Y), 3440 getNamedOperandIdx(Opcode, OpName::vsrc1Y), 3441 ImmDeferredIdx, 3442 ImmIdx}; 3443 } 3444 3445 return {getNamedOperandIdx(Opcode, OpName::src0), 3446 getNamedOperandIdx(Opcode, OpName::src1), 3447 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx}; 3448 } 3449 3450 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3451 const MCOperand &MO = Inst.getOperand(OpIdx); 3452 if (MO.isImm()) { 3453 return !isInlineConstant(Inst, OpIdx); 3454 } else if (MO.isReg()) { 3455 auto Reg = MO.getReg(); 3456 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3457 auto PReg = mc2PseudoReg(Reg); 3458 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3459 } else { 3460 return true; 3461 } 3462 } 3463 3464 bool AMDGPUAsmParser::validateConstantBusLimitations( 3465 const MCInst &Inst, const OperandVector &Operands) { 3466 const unsigned Opcode = Inst.getOpcode(); 3467 const MCInstrDesc &Desc = MII.get(Opcode); 3468 unsigned LastSGPR = AMDGPU::NoRegister; 3469 unsigned ConstantBusUseCount = 0; 3470 unsigned NumLiterals = 0; 3471 unsigned LiteralSize; 3472 3473 if (!(Desc.TSFlags & 3474 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3475 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && 3476 !isVOPD(Opcode)) 3477 return true; 3478 3479 // Check special imm operands (used by madmk, etc) 3480 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { 3481 ++NumLiterals; 3482 LiteralSize = 4; 3483 } 3484 3485 SmallDenseSet<unsigned> SGPRsUsed; 3486 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3487 if (SGPRUsed != AMDGPU::NoRegister) { 3488 SGPRsUsed.insert(SGPRUsed); 3489 ++ConstantBusUseCount; 3490 } 3491 3492 OperandIndices OpIndices = getSrcOperandIndices(Opcode); 3493 3494 for (int OpIdx : OpIndices) { 3495 if (OpIdx == -1) 3496 continue; 3497 3498 const MCOperand &MO = Inst.getOperand(OpIdx); 3499 if (usesConstantBus(Inst, OpIdx)) { 3500 if (MO.isReg()) { 3501 LastSGPR = mc2PseudoReg(MO.getReg()); 3502 // Pairs of registers with a partial intersections like these 3503 // s0, s[0:1] 3504 // flat_scratch_lo, flat_scratch 3505 // flat_scratch_lo, flat_scratch_hi 3506 // are theoretically valid but they are disabled anyway. 3507 // Note that this code mimics SIInstrInfo::verifyInstruction 3508 if (SGPRsUsed.insert(LastSGPR).second) { 3509 ++ConstantBusUseCount; 3510 } 3511 } else { // Expression or a literal 3512 3513 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3514 continue; // special operand like VINTERP attr_chan 3515 3516 // An instruction may use only one literal. 3517 // This has been validated on the previous step. 3518 // See validateVOPLiteral. 3519 // This literal may be used as more than one operand. 3520 // If all these operands are of the same size, 3521 // this literal counts as one scalar value. 3522 // Otherwise it counts as 2 scalar values. 3523 // See "GFX10 Shader Programming", section 3.6.2.3. 3524 3525 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3526 if (Size < 4) 3527 Size = 4; 3528 3529 if (NumLiterals == 0) { 3530 NumLiterals = 1; 3531 LiteralSize = Size; 3532 } else if (LiteralSize != Size) { 3533 NumLiterals = 2; 3534 } 3535 } 3536 } 3537 } 3538 ConstantBusUseCount += NumLiterals; 3539 3540 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3541 return true; 3542 3543 SMLoc LitLoc = getLitLoc(Operands); 3544 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3545 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3546 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3547 return false; 3548 } 3549 3550 bool AMDGPUAsmParser::validateVOPDRegBankConstraints( 3551 const MCInst &Inst, const OperandVector &Operands) { 3552 3553 const unsigned Opcode = Inst.getOpcode(); 3554 if (!isVOPD(Opcode)) 3555 return true; 3556 3557 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3558 3559 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { 3560 const MCOperand &Opr = Inst.getOperand(OperandIdx); 3561 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) 3562 ? Opr.getReg() 3563 : MCRegister::NoRegister; 3564 }; 3565 3566 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 3567 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx); 3568 if (!InvalidCompOprIdx) 3569 return true; 3570 3571 auto CompOprIdx = *InvalidCompOprIdx; 3572 auto ParsedIdx = 3573 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), 3574 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); 3575 assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); 3576 3577 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); 3578 if (CompOprIdx == VOPD::Component::DST) { 3579 Error(Loc, "one dst register must be even and the other odd"); 3580 } else { 3581 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; 3582 Error(Loc, Twine("src") + Twine(CompSrcIdx) + 3583 " operands must use different VGPR banks"); 3584 } 3585 3586 return false; 3587 } 3588 3589 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3590 3591 const unsigned Opc = Inst.getOpcode(); 3592 const MCInstrDesc &Desc = MII.get(Opc); 3593 3594 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3595 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3596 assert(ClampIdx != -1); 3597 return Inst.getOperand(ClampIdx).getImm() == 0; 3598 } 3599 3600 return true; 3601 } 3602 3603 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, 3604 const SMLoc &IDLoc) { 3605 3606 const unsigned Opc = Inst.getOpcode(); 3607 const MCInstrDesc &Desc = MII.get(Opc); 3608 3609 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3610 return true; 3611 3612 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3613 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3614 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3615 3616 assert(VDataIdx != -1); 3617 3618 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray 3619 return true; 3620 3621 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3622 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3623 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3624 if (DMask == 0) 3625 DMask = 1; 3626 3627 bool IsPackedD16 = false; 3628 unsigned DataSize = 3629 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask); 3630 if (hasPackedD16()) { 3631 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3632 IsPackedD16 = D16Idx >= 0; 3633 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm()) 3634 DataSize = (DataSize + 1) / 2; 3635 } 3636 3637 if ((VDataSize / 4) == DataSize + TFESize) 3638 return true; 3639 3640 StringRef Modifiers; 3641 if (isGFX90A()) 3642 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask"; 3643 else 3644 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe"; 3645 3646 Error(IDLoc, Twine("image data size does not match ") + Modifiers); 3647 return false; 3648 } 3649 3650 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3651 const unsigned Opc = Inst.getOpcode(); 3652 const MCInstrDesc &Desc = MII.get(Opc); 3653 3654 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3655 return true; 3656 3657 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3658 3659 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3660 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3661 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3662 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3663 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3664 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3665 3666 assert(VAddr0Idx != -1); 3667 assert(SrsrcIdx != -1); 3668 assert(SrsrcIdx > VAddr0Idx); 3669 3670 if (DimIdx == -1) 3671 return true; // intersect_ray 3672 3673 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3674 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3675 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3676 unsigned ActualAddrSize = 3677 IsNSA ? SrsrcIdx - VAddr0Idx 3678 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3679 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3680 3681 unsigned ExpectedAddrSize = 3682 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3683 3684 if (!IsNSA) { 3685 if (ExpectedAddrSize > 12) 3686 ExpectedAddrSize = 16; 3687 3688 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3689 // This provides backward compatibility for assembly created 3690 // before 160b/192b/224b types were directly supported. 3691 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3692 return true; 3693 } 3694 3695 return ActualAddrSize == ExpectedAddrSize; 3696 } 3697 3698 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3699 3700 const unsigned Opc = Inst.getOpcode(); 3701 const MCInstrDesc &Desc = MII.get(Opc); 3702 3703 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3704 return true; 3705 if (!Desc.mayLoad() || !Desc.mayStore()) 3706 return true; // Not atomic 3707 3708 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3709 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3710 3711 // This is an incomplete check because image_atomic_cmpswap 3712 // may only use 0x3 and 0xf while other atomic operations 3713 // may use 0x1 and 0x3. However these limitations are 3714 // verified when we check that dmask matches dst size. 3715 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3716 } 3717 3718 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3719 3720 const unsigned Opc = Inst.getOpcode(); 3721 const MCInstrDesc &Desc = MII.get(Opc); 3722 3723 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3724 return true; 3725 3726 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3727 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3728 3729 // GATHER4 instructions use dmask in a different fashion compared to 3730 // other MIMG instructions. The only useful DMASK values are 3731 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3732 // (red,red,red,red) etc.) The ISA document doesn't mention 3733 // this. 3734 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3735 } 3736 3737 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3738 const unsigned Opc = Inst.getOpcode(); 3739 const MCInstrDesc &Desc = MII.get(Opc); 3740 3741 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3742 return true; 3743 3744 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3745 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3746 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3747 3748 if (!BaseOpcode->MSAA) 3749 return true; 3750 3751 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3752 assert(DimIdx != -1); 3753 3754 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3755 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3756 3757 return DimInfo->MSAA; 3758 } 3759 3760 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3761 { 3762 switch (Opcode) { 3763 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3764 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3765 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3766 return true; 3767 default: 3768 return false; 3769 } 3770 } 3771 3772 // movrels* opcodes should only allow VGPRS as src0. 3773 // This is specified in .td description for vop1/vop3, 3774 // but sdwa is handled differently. See isSDWAOperand. 3775 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3776 const OperandVector &Operands) { 3777 3778 const unsigned Opc = Inst.getOpcode(); 3779 const MCInstrDesc &Desc = MII.get(Opc); 3780 3781 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3782 return true; 3783 3784 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3785 assert(Src0Idx != -1); 3786 3787 SMLoc ErrLoc; 3788 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3789 if (Src0.isReg()) { 3790 auto Reg = mc2PseudoReg(Src0.getReg()); 3791 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3792 if (!isSGPR(Reg, TRI)) 3793 return true; 3794 ErrLoc = getRegLoc(Reg, Operands); 3795 } else { 3796 ErrLoc = getConstLoc(Operands); 3797 } 3798 3799 Error(ErrLoc, "source operand must be a VGPR"); 3800 return false; 3801 } 3802 3803 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3804 const OperandVector &Operands) { 3805 3806 const unsigned Opc = Inst.getOpcode(); 3807 3808 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3809 return true; 3810 3811 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3812 assert(Src0Idx != -1); 3813 3814 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3815 if (!Src0.isReg()) 3816 return true; 3817 3818 auto Reg = mc2PseudoReg(Src0.getReg()); 3819 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3820 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3821 Error(getRegLoc(Reg, Operands), 3822 "source operand must be either a VGPR or an inline constant"); 3823 return false; 3824 } 3825 3826 return true; 3827 } 3828 3829 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, 3830 const OperandVector &Operands) { 3831 unsigned Opcode = Inst.getOpcode(); 3832 const MCInstrDesc &Desc = MII.get(Opcode); 3833 3834 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || 3835 !getFeatureBits()[FeatureMFMAInlineLiteralBug]) 3836 return true; 3837 3838 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2); 3839 if (Src2Idx == -1) 3840 return true; 3841 3842 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) { 3843 Error(getConstLoc(Operands), 3844 "inline constants are not allowed for this operand"); 3845 return false; 3846 } 3847 3848 return true; 3849 } 3850 3851 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3852 const OperandVector &Operands) { 3853 const unsigned Opc = Inst.getOpcode(); 3854 const MCInstrDesc &Desc = MII.get(Opc); 3855 3856 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3857 return true; 3858 3859 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3860 if (Src2Idx == -1) 3861 return true; 3862 3863 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3864 if (!Src2.isReg()) 3865 return true; 3866 3867 MCRegister Src2Reg = Src2.getReg(); 3868 MCRegister DstReg = Inst.getOperand(0).getReg(); 3869 if (Src2Reg == DstReg) 3870 return true; 3871 3872 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3873 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) 3874 return true; 3875 3876 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3877 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3878 "source 2 operand must not partially overlap with dst"); 3879 return false; 3880 } 3881 3882 return true; 3883 } 3884 3885 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3886 switch (Inst.getOpcode()) { 3887 default: 3888 return true; 3889 case V_DIV_SCALE_F32_gfx6_gfx7: 3890 case V_DIV_SCALE_F32_vi: 3891 case V_DIV_SCALE_F32_gfx10: 3892 case V_DIV_SCALE_F64_gfx6_gfx7: 3893 case V_DIV_SCALE_F64_vi: 3894 case V_DIV_SCALE_F64_gfx10: 3895 break; 3896 } 3897 3898 // TODO: Check that src0 = src1 or src2. 3899 3900 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3901 AMDGPU::OpName::src2_modifiers, 3902 AMDGPU::OpName::src2_modifiers}) { 3903 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3904 .getImm() & 3905 SISrcMods::ABS) { 3906 return false; 3907 } 3908 } 3909 3910 return true; 3911 } 3912 3913 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3914 3915 const unsigned Opc = Inst.getOpcode(); 3916 const MCInstrDesc &Desc = MII.get(Opc); 3917 3918 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3919 return true; 3920 3921 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3922 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3923 if (isCI() || isSI()) 3924 return false; 3925 } 3926 3927 return true; 3928 } 3929 3930 static bool IsRevOpcode(const unsigned Opcode) 3931 { 3932 switch (Opcode) { 3933 case AMDGPU::V_SUBREV_F32_e32: 3934 case AMDGPU::V_SUBREV_F32_e64: 3935 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3936 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3937 case AMDGPU::V_SUBREV_F32_e32_vi: 3938 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3939 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3940 case AMDGPU::V_SUBREV_F32_e64_vi: 3941 3942 case AMDGPU::V_SUBREV_CO_U32_e32: 3943 case AMDGPU::V_SUBREV_CO_U32_e64: 3944 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3945 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3946 3947 case AMDGPU::V_SUBBREV_U32_e32: 3948 case AMDGPU::V_SUBBREV_U32_e64: 3949 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3950 case AMDGPU::V_SUBBREV_U32_e32_vi: 3951 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3952 case AMDGPU::V_SUBBREV_U32_e64_vi: 3953 3954 case AMDGPU::V_SUBREV_U32_e32: 3955 case AMDGPU::V_SUBREV_U32_e64: 3956 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3957 case AMDGPU::V_SUBREV_U32_e32_vi: 3958 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3959 case AMDGPU::V_SUBREV_U32_e64_vi: 3960 3961 case AMDGPU::V_SUBREV_F16_e32: 3962 case AMDGPU::V_SUBREV_F16_e64: 3963 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3964 case AMDGPU::V_SUBREV_F16_e32_vi: 3965 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3966 case AMDGPU::V_SUBREV_F16_e64_vi: 3967 3968 case AMDGPU::V_SUBREV_U16_e32: 3969 case AMDGPU::V_SUBREV_U16_e64: 3970 case AMDGPU::V_SUBREV_U16_e32_vi: 3971 case AMDGPU::V_SUBREV_U16_e64_vi: 3972 3973 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3974 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3975 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3976 3977 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3978 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3979 3980 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3981 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3982 3983 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3984 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3985 3986 case AMDGPU::V_LSHRREV_B32_e32: 3987 case AMDGPU::V_LSHRREV_B32_e64: 3988 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3989 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3990 case AMDGPU::V_LSHRREV_B32_e32_vi: 3991 case AMDGPU::V_LSHRREV_B32_e64_vi: 3992 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3993 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3994 3995 case AMDGPU::V_ASHRREV_I32_e32: 3996 case AMDGPU::V_ASHRREV_I32_e64: 3997 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3998 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3999 case AMDGPU::V_ASHRREV_I32_e32_vi: 4000 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 4001 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 4002 case AMDGPU::V_ASHRREV_I32_e64_vi: 4003 4004 case AMDGPU::V_LSHLREV_B32_e32: 4005 case AMDGPU::V_LSHLREV_B32_e64: 4006 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 4007 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 4008 case AMDGPU::V_LSHLREV_B32_e32_vi: 4009 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 4010 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 4011 case AMDGPU::V_LSHLREV_B32_e64_vi: 4012 4013 case AMDGPU::V_LSHLREV_B16_e32: 4014 case AMDGPU::V_LSHLREV_B16_e64: 4015 case AMDGPU::V_LSHLREV_B16_e32_vi: 4016 case AMDGPU::V_LSHLREV_B16_e64_vi: 4017 case AMDGPU::V_LSHLREV_B16_gfx10: 4018 4019 case AMDGPU::V_LSHRREV_B16_e32: 4020 case AMDGPU::V_LSHRREV_B16_e64: 4021 case AMDGPU::V_LSHRREV_B16_e32_vi: 4022 case AMDGPU::V_LSHRREV_B16_e64_vi: 4023 case AMDGPU::V_LSHRREV_B16_gfx10: 4024 4025 case AMDGPU::V_ASHRREV_I16_e32: 4026 case AMDGPU::V_ASHRREV_I16_e64: 4027 case AMDGPU::V_ASHRREV_I16_e32_vi: 4028 case AMDGPU::V_ASHRREV_I16_e64_vi: 4029 case AMDGPU::V_ASHRREV_I16_gfx10: 4030 4031 case AMDGPU::V_LSHLREV_B64_e64: 4032 case AMDGPU::V_LSHLREV_B64_gfx10: 4033 case AMDGPU::V_LSHLREV_B64_vi: 4034 4035 case AMDGPU::V_LSHRREV_B64_e64: 4036 case AMDGPU::V_LSHRREV_B64_gfx10: 4037 case AMDGPU::V_LSHRREV_B64_vi: 4038 4039 case AMDGPU::V_ASHRREV_I64_e64: 4040 case AMDGPU::V_ASHRREV_I64_gfx10: 4041 case AMDGPU::V_ASHRREV_I64_vi: 4042 4043 case AMDGPU::V_PK_LSHLREV_B16: 4044 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 4045 case AMDGPU::V_PK_LSHLREV_B16_vi: 4046 4047 case AMDGPU::V_PK_LSHRREV_B16: 4048 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4049 case AMDGPU::V_PK_LSHRREV_B16_vi: 4050 case AMDGPU::V_PK_ASHRREV_I16: 4051 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4052 case AMDGPU::V_PK_ASHRREV_I16_vi: 4053 return true; 4054 default: 4055 return false; 4056 } 4057 } 4058 4059 std::optional<StringRef> 4060 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4061 4062 using namespace SIInstrFlags; 4063 const unsigned Opcode = Inst.getOpcode(); 4064 const MCInstrDesc &Desc = MII.get(Opcode); 4065 4066 // lds_direct register is defined so that it can be used 4067 // with 9-bit operands only. Ignore encodings which do not accept these. 4068 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4069 if ((Desc.TSFlags & Enc) == 0) 4070 return std::nullopt; 4071 4072 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4073 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4074 if (SrcIdx == -1) 4075 break; 4076 const auto &Src = Inst.getOperand(SrcIdx); 4077 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4078 4079 if (isGFX90A() || isGFX11Plus()) 4080 return StringRef("lds_direct is not supported on this GPU"); 4081 4082 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4083 return StringRef("lds_direct cannot be used with this instruction"); 4084 4085 if (SrcName != OpName::src0) 4086 return StringRef("lds_direct may be used as src0 only"); 4087 } 4088 } 4089 4090 return std::nullopt; 4091 } 4092 4093 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4094 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4096 if (Op.isFlatOffset()) 4097 return Op.getStartLoc(); 4098 } 4099 return getLoc(); 4100 } 4101 4102 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4103 const OperandVector &Operands) { 4104 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4105 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4106 return true; 4107 4108 auto Opcode = Inst.getOpcode(); 4109 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4110 assert(OpNum != -1); 4111 4112 const auto &Op = Inst.getOperand(OpNum); 4113 if (!hasFlatOffsets() && Op.getImm() != 0) { 4114 Error(getFlatOffsetLoc(Operands), 4115 "flat offset modifier is not supported on this GPU"); 4116 return false; 4117 } 4118 4119 // For FLAT segment the offset must be positive; 4120 // MSB is ignored and forced to zero. 4121 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); 4122 bool AllowNegative = 4123 TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 4124 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { 4125 Error(getFlatOffsetLoc(Operands), 4126 Twine("expected a ") + 4127 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" 4128 : Twine(OffsetSize - 1) + "-bit unsigned offset")); 4129 return false; 4130 } 4131 4132 return true; 4133 } 4134 4135 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4136 // Start with second operand because SMEM Offset cannot be dst or src0. 4137 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4138 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4139 if (Op.isSMEMOffset()) 4140 return Op.getStartLoc(); 4141 } 4142 return getLoc(); 4143 } 4144 4145 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4146 const OperandVector &Operands) { 4147 if (isCI() || isSI()) 4148 return true; 4149 4150 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4151 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4152 return true; 4153 4154 auto Opcode = Inst.getOpcode(); 4155 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4156 if (OpNum == -1) 4157 return true; 4158 4159 const auto &Op = Inst.getOperand(OpNum); 4160 if (!Op.isImm()) 4161 return true; 4162 4163 uint64_t Offset = Op.getImm(); 4164 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4165 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4166 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4167 return true; 4168 4169 Error(getSMEMOffsetLoc(Operands), 4170 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4171 "expected a 21-bit signed offset"); 4172 4173 return false; 4174 } 4175 4176 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4177 unsigned Opcode = Inst.getOpcode(); 4178 const MCInstrDesc &Desc = MII.get(Opcode); 4179 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4180 return true; 4181 4182 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4183 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4184 4185 const int OpIndices[] = { Src0Idx, Src1Idx }; 4186 4187 unsigned NumExprs = 0; 4188 unsigned NumLiterals = 0; 4189 uint32_t LiteralValue; 4190 4191 for (int OpIdx : OpIndices) { 4192 if (OpIdx == -1) break; 4193 4194 const MCOperand &MO = Inst.getOperand(OpIdx); 4195 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4196 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4197 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4198 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4199 if (NumLiterals == 0 || LiteralValue != Value) { 4200 LiteralValue = Value; 4201 ++NumLiterals; 4202 } 4203 } else if (MO.isExpr()) { 4204 ++NumExprs; 4205 } 4206 } 4207 } 4208 4209 return NumLiterals + NumExprs <= 1; 4210 } 4211 4212 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4213 const unsigned Opc = Inst.getOpcode(); 4214 if (isPermlane16(Opc)) { 4215 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4216 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4217 4218 if (OpSel & ~3) 4219 return false; 4220 } 4221 4222 uint64_t TSFlags = MII.get(Opc).TSFlags; 4223 4224 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4225 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4226 if (OpSelIdx != -1) { 4227 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4228 return false; 4229 } 4230 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4231 if (OpSelHiIdx != -1) { 4232 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4233 return false; 4234 } 4235 } 4236 4237 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4238 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && 4239 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { 4240 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4241 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4242 if (OpSel & 3) 4243 return false; 4244 } 4245 4246 return true; 4247 } 4248 4249 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4250 const OperandVector &Operands) { 4251 const unsigned Opc = Inst.getOpcode(); 4252 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4253 if (DppCtrlIdx < 0) 4254 return true; 4255 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4256 4257 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4258 // DPP64 is supported for row_newbcast only. 4259 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4260 if (Src0Idx >= 0 && 4261 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4262 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4263 Error(S, "64 bit dpp only supports row_newbcast"); 4264 return false; 4265 } 4266 } 4267 4268 return true; 4269 } 4270 4271 // Check if VCC register matches wavefront size 4272 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4273 auto FB = getFeatureBits(); 4274 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4275 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4276 } 4277 4278 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4279 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4280 const OperandVector &Operands) { 4281 unsigned Opcode = Inst.getOpcode(); 4282 const MCInstrDesc &Desc = MII.get(Opcode); 4283 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1; 4284 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4285 !HasMandatoryLiteral && !isVOPD(Opcode)) 4286 return true; 4287 4288 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral); 4289 4290 unsigned NumExprs = 0; 4291 unsigned NumLiterals = 0; 4292 uint32_t LiteralValue; 4293 4294 for (int OpIdx : OpIndices) { 4295 if (OpIdx == -1) 4296 continue; 4297 4298 const MCOperand &MO = Inst.getOperand(OpIdx); 4299 if (!MO.isImm() && !MO.isExpr()) 4300 continue; 4301 if (!isSISrcOperand(Desc, OpIdx)) 4302 continue; 4303 4304 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4305 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4306 if (NumLiterals == 0 || LiteralValue != Value) { 4307 LiteralValue = Value; 4308 ++NumLiterals; 4309 } 4310 } else if (MO.isExpr()) { 4311 ++NumExprs; 4312 } 4313 } 4314 NumLiterals += NumExprs; 4315 4316 if (!NumLiterals) 4317 return true; 4318 4319 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { 4320 Error(getLitLoc(Operands), "literal operands are not supported"); 4321 return false; 4322 } 4323 4324 if (NumLiterals > 1) { 4325 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed"); 4326 return false; 4327 } 4328 4329 return true; 4330 } 4331 4332 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4333 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4334 const MCRegisterInfo *MRI) { 4335 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4336 if (OpIdx < 0) 4337 return -1; 4338 4339 const MCOperand &Op = Inst.getOperand(OpIdx); 4340 if (!Op.isReg()) 4341 return -1; 4342 4343 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4344 auto Reg = Sub ? Sub : Op.getReg(); 4345 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4346 return AGPR32.contains(Reg) ? 1 : 0; 4347 } 4348 4349 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4350 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4351 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4352 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4353 SIInstrFlags::DS)) == 0) 4354 return true; 4355 4356 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4357 : AMDGPU::OpName::vdata; 4358 4359 const MCRegisterInfo *MRI = getMRI(); 4360 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4361 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4362 4363 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4364 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4365 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4366 return false; 4367 } 4368 4369 auto FB = getFeatureBits(); 4370 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4371 if (DataAreg < 0 || DstAreg < 0) 4372 return true; 4373 return DstAreg == DataAreg; 4374 } 4375 4376 return DstAreg < 1 && DataAreg < 1; 4377 } 4378 4379 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4380 auto FB = getFeatureBits(); 4381 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4382 return true; 4383 4384 const MCRegisterInfo *MRI = getMRI(); 4385 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4386 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4387 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4388 const MCOperand &Op = Inst.getOperand(I); 4389 if (!Op.isReg()) 4390 continue; 4391 4392 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4393 if (!Sub) 4394 continue; 4395 4396 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4397 return false; 4398 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4399 return false; 4400 } 4401 4402 return true; 4403 } 4404 4405 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4406 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4407 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4408 if (Op.isBLGP()) 4409 return Op.getStartLoc(); 4410 } 4411 return SMLoc(); 4412 } 4413 4414 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4415 const OperandVector &Operands) { 4416 unsigned Opc = Inst.getOpcode(); 4417 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4418 if (BlgpIdx == -1) 4419 return true; 4420 SMLoc BLGPLoc = getBLGPLoc(Operands); 4421 if (!BLGPLoc.isValid()) 4422 return true; 4423 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4424 auto FB = getFeatureBits(); 4425 bool UsesNeg = false; 4426 if (FB[AMDGPU::FeatureGFX940Insts]) { 4427 switch (Opc) { 4428 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4429 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4430 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4431 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4432 UsesNeg = true; 4433 } 4434 } 4435 4436 if (IsNeg == UsesNeg) 4437 return true; 4438 4439 Error(BLGPLoc, 4440 UsesNeg ? "invalid modifier: blgp is not supported" 4441 : "invalid modifier: neg is not supported"); 4442 4443 return false; 4444 } 4445 4446 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, 4447 const OperandVector &Operands) { 4448 if (!isGFX11Plus()) 4449 return true; 4450 4451 unsigned Opc = Inst.getOpcode(); 4452 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && 4453 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && 4454 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && 4455 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) 4456 return true; 4457 4458 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 4459 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); 4460 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()); 4461 if (Reg == AMDGPU::SGPR_NULL) 4462 return true; 4463 4464 SMLoc RegLoc = getRegLoc(Reg, Operands); 4465 Error(RegLoc, "src0 must be null"); 4466 return false; 4467 } 4468 4469 // gfx90a has an undocumented limitation: 4470 // DS_GWS opcodes must use even aligned registers. 4471 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4472 const OperandVector &Operands) { 4473 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4474 return true; 4475 4476 int Opc = Inst.getOpcode(); 4477 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4478 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4479 return true; 4480 4481 const MCRegisterInfo *MRI = getMRI(); 4482 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4483 int Data0Pos = 4484 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4485 assert(Data0Pos != -1); 4486 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4487 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4488 if (RegIdx & 1) { 4489 SMLoc RegLoc = getRegLoc(Reg, Operands); 4490 Error(RegLoc, "vgpr must be even aligned"); 4491 return false; 4492 } 4493 4494 return true; 4495 } 4496 4497 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4498 const OperandVector &Operands, 4499 const SMLoc &IDLoc) { 4500 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4501 AMDGPU::OpName::cpol); 4502 if (CPolPos == -1) 4503 return true; 4504 4505 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4506 4507 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4508 if (TSFlags & SIInstrFlags::SMRD) { 4509 if (CPol && (isSI() || isCI())) { 4510 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4511 Error(S, "cache policy is not supported for SMRD instructions"); 4512 return false; 4513 } 4514 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4515 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4516 return false; 4517 } 4518 } 4519 4520 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4521 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4522 StringRef CStr(S.getPointer()); 4523 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4524 Error(S, "scc is not supported on this GPU"); 4525 return false; 4526 } 4527 4528 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4529 return true; 4530 4531 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4532 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4533 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4534 : "instruction must use glc"); 4535 return false; 4536 } 4537 } else { 4538 if (CPol & CPol::GLC) { 4539 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4540 StringRef CStr(S.getPointer()); 4541 S = SMLoc::getFromPointer( 4542 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4543 Error(S, isGFX940() ? "instruction must not use sc0" 4544 : "instruction must not use glc"); 4545 return false; 4546 } 4547 } 4548 4549 return true; 4550 } 4551 4552 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4553 if (!isGFX11Plus()) 4554 return true; 4555 for (auto &Operand : Operands) { 4556 if (!Operand->isReg()) 4557 continue; 4558 unsigned Reg = Operand->getReg(); 4559 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4560 Error(getRegLoc(Reg, Operands), 4561 "execz and vccz are not supported on this GPU"); 4562 return false; 4563 } 4564 } 4565 return true; 4566 } 4567 4568 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, 4569 const OperandVector &Operands) { 4570 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4571 if (Desc.mayStore() && 4572 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4573 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands); 4574 if (Loc != getInstLoc(Operands)) { 4575 Error(Loc, "TFE modifier has no meaning for store instructions"); 4576 return false; 4577 } 4578 } 4579 4580 return true; 4581 } 4582 4583 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4584 const SMLoc &IDLoc, 4585 const OperandVector &Operands) { 4586 if (auto ErrMsg = validateLdsDirect(Inst)) { 4587 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4588 return false; 4589 } 4590 if (!validateSOPLiteral(Inst)) { 4591 Error(getLitLoc(Operands), 4592 "only one unique literal operand is allowed"); 4593 return false; 4594 } 4595 if (!validateVOPLiteral(Inst, Operands)) { 4596 return false; 4597 } 4598 if (!validateConstantBusLimitations(Inst, Operands)) { 4599 return false; 4600 } 4601 if (!validateVOPDRegBankConstraints(Inst, Operands)) { 4602 return false; 4603 } 4604 if (!validateIntClampSupported(Inst)) { 4605 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4606 "integer clamping is not supported on this GPU"); 4607 return false; 4608 } 4609 if (!validateOpSel(Inst)) { 4610 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4611 "invalid op_sel operand"); 4612 return false; 4613 } 4614 if (!validateDPP(Inst, Operands)) { 4615 return false; 4616 } 4617 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4618 if (!validateMIMGD16(Inst)) { 4619 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4620 "d16 modifier is not supported on this GPU"); 4621 return false; 4622 } 4623 if (!validateMIMGMSAA(Inst)) { 4624 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4625 "invalid dim; must be MSAA type"); 4626 return false; 4627 } 4628 if (!validateMIMGDataSize(Inst, IDLoc)) { 4629 return false; 4630 } 4631 if (!validateMIMGAddrSize(Inst)) { 4632 Error(IDLoc, 4633 "image address size does not match dim and a16"); 4634 return false; 4635 } 4636 if (!validateMIMGAtomicDMask(Inst)) { 4637 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4638 "invalid atomic image dmask"); 4639 return false; 4640 } 4641 if (!validateMIMGGatherDMask(Inst)) { 4642 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4643 "invalid image_gather dmask: only one bit must be set"); 4644 return false; 4645 } 4646 if (!validateMovrels(Inst, Operands)) { 4647 return false; 4648 } 4649 if (!validateFlatOffset(Inst, Operands)) { 4650 return false; 4651 } 4652 if (!validateSMEMOffset(Inst, Operands)) { 4653 return false; 4654 } 4655 if (!validateMAIAccWrite(Inst, Operands)) { 4656 return false; 4657 } 4658 if (!validateMAISrc2(Inst, Operands)) { 4659 return false; 4660 } 4661 if (!validateMFMA(Inst, Operands)) { 4662 return false; 4663 } 4664 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4665 return false; 4666 } 4667 4668 if (!validateAGPRLdSt(Inst)) { 4669 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4670 ? "invalid register class: data and dst should be all VGPR or AGPR" 4671 : "invalid register class: agpr loads and stores not supported on this GPU" 4672 ); 4673 return false; 4674 } 4675 if (!validateVGPRAlign(Inst)) { 4676 Error(IDLoc, 4677 "invalid register class: vgpr tuples must be 64 bit aligned"); 4678 return false; 4679 } 4680 if (!validateGWS(Inst, Operands)) { 4681 return false; 4682 } 4683 4684 if (!validateBLGP(Inst, Operands)) { 4685 return false; 4686 } 4687 4688 if (!validateDivScale(Inst)) { 4689 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4690 return false; 4691 } 4692 if (!validateWaitCnt(Inst, Operands)) { 4693 return false; 4694 } 4695 if (!validateExeczVcczOperands(Operands)) { 4696 return false; 4697 } 4698 if (!validateTFE(Inst, Operands)) { 4699 return false; 4700 } 4701 4702 return true; 4703 } 4704 4705 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4706 const FeatureBitset &FBS, 4707 unsigned VariantID = 0); 4708 4709 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4710 const FeatureBitset &AvailableFeatures, 4711 unsigned VariantID); 4712 4713 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4714 const FeatureBitset &FBS) { 4715 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4716 } 4717 4718 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4719 const FeatureBitset &FBS, 4720 ArrayRef<unsigned> Variants) { 4721 for (auto Variant : Variants) { 4722 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4723 return true; 4724 } 4725 4726 return false; 4727 } 4728 4729 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4730 const SMLoc &IDLoc) { 4731 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits()); 4732 4733 // Check if requested instruction variant is supported. 4734 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4735 return false; 4736 4737 // This instruction is not supported. 4738 // Clear any other pending errors because they are no longer relevant. 4739 getParser().clearPendingErrors(); 4740 4741 // Requested instruction variant is not supported. 4742 // Check if any other variants are supported. 4743 StringRef VariantName = getMatchedVariantName(); 4744 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4745 return Error(IDLoc, 4746 Twine(VariantName, 4747 " variant of this instruction is not supported")); 4748 } 4749 4750 // Check if this instruction may be used with a different wavesize. 4751 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && 4752 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { 4753 4754 FeatureBitset FeaturesWS32 = getFeatureBits(); 4755 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) 4756 .flip(AMDGPU::FeatureWavefrontSize32); 4757 FeatureBitset AvailableFeaturesWS32 = 4758 ComputeAvailableFeatures(FeaturesWS32); 4759 4760 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants())) 4761 return Error(IDLoc, "instruction requires wavesize=32"); 4762 } 4763 4764 // Finally check if this instruction is supported on any other GPU. 4765 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4766 return Error(IDLoc, "instruction not supported on this GPU"); 4767 } 4768 4769 // Instruction not supported on any GPU. Probably a typo. 4770 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4771 return Error(IDLoc, "invalid instruction" + Suggestion); 4772 } 4773 4774 static bool isInvalidVOPDY(const OperandVector &Operands, 4775 uint64_t InvalidOprIdx) { 4776 assert(InvalidOprIdx < Operands.size()); 4777 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); 4778 if (Op.isToken() && InvalidOprIdx > 1) { 4779 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); 4780 return PrevOp.isToken() && PrevOp.getToken() == "::"; 4781 } 4782 return false; 4783 } 4784 4785 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4786 OperandVector &Operands, 4787 MCStreamer &Out, 4788 uint64_t &ErrorInfo, 4789 bool MatchingInlineAsm) { 4790 MCInst Inst; 4791 unsigned Result = Match_Success; 4792 for (auto Variant : getMatchedVariants()) { 4793 uint64_t EI; 4794 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4795 Variant); 4796 // We order match statuses from least to most specific. We use most specific 4797 // status as resulting 4798 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4799 if ((R == Match_Success) || 4800 (R == Match_PreferE32) || 4801 (R == Match_MissingFeature && Result != Match_PreferE32) || 4802 (R == Match_InvalidOperand && Result != Match_MissingFeature 4803 && Result != Match_PreferE32) || 4804 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4805 && Result != Match_MissingFeature 4806 && Result != Match_PreferE32)) { 4807 Result = R; 4808 ErrorInfo = EI; 4809 } 4810 if (R == Match_Success) 4811 break; 4812 } 4813 4814 if (Result == Match_Success) { 4815 if (!validateInstruction(Inst, IDLoc, Operands)) { 4816 return true; 4817 } 4818 Inst.setLoc(IDLoc); 4819 Out.emitInstruction(Inst, getSTI()); 4820 return false; 4821 } 4822 4823 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4824 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4825 return true; 4826 } 4827 4828 switch (Result) { 4829 default: break; 4830 case Match_MissingFeature: 4831 // It has been verified that the specified instruction 4832 // mnemonic is valid. A match was found but it requires 4833 // features which are not supported on this GPU. 4834 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4835 4836 case Match_InvalidOperand: { 4837 SMLoc ErrorLoc = IDLoc; 4838 if (ErrorInfo != ~0ULL) { 4839 if (ErrorInfo >= Operands.size()) { 4840 return Error(IDLoc, "too few operands for instruction"); 4841 } 4842 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4843 if (ErrorLoc == SMLoc()) 4844 ErrorLoc = IDLoc; 4845 4846 if (isInvalidVOPDY(Operands, ErrorInfo)) 4847 return Error(ErrorLoc, "invalid VOPDY instruction"); 4848 } 4849 return Error(ErrorLoc, "invalid operand for instruction"); 4850 } 4851 4852 case Match_PreferE32: 4853 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4854 "should be encoded as e32"); 4855 case Match_MnemonicFail: 4856 llvm_unreachable("Invalid instructions should have been handled already"); 4857 } 4858 llvm_unreachable("Implement any new match types added!"); 4859 } 4860 4861 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4862 int64_t Tmp = -1; 4863 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4864 return true; 4865 } 4866 if (getParser().parseAbsoluteExpression(Tmp)) { 4867 return true; 4868 } 4869 Ret = static_cast<uint32_t>(Tmp); 4870 return false; 4871 } 4872 4873 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4874 uint32_t &Minor) { 4875 if (ParseAsAbsoluteExpression(Major)) 4876 return TokError("invalid major version"); 4877 4878 if (!trySkipToken(AsmToken::Comma)) 4879 return TokError("minor version number required, comma expected"); 4880 4881 if (ParseAsAbsoluteExpression(Minor)) 4882 return TokError("invalid minor version"); 4883 4884 return false; 4885 } 4886 4887 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4888 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4889 return TokError("directive only supported for amdgcn architecture"); 4890 4891 std::string TargetIDDirective; 4892 SMLoc TargetStart = getTok().getLoc(); 4893 if (getParser().parseEscapedString(TargetIDDirective)) 4894 return true; 4895 4896 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4897 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4898 return getParser().Error(TargetRange.Start, 4899 (Twine(".amdgcn_target directive's target id ") + 4900 Twine(TargetIDDirective) + 4901 Twine(" does not match the specified target id ") + 4902 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4903 4904 return false; 4905 } 4906 4907 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4908 return Error(Range.Start, "value out of range", Range); 4909 } 4910 4911 bool AMDGPUAsmParser::calculateGPRBlocks( 4912 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4913 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32, 4914 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR, 4915 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4916 // TODO(scott.linder): These calculations are duplicated from 4917 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4918 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4919 4920 unsigned NumVGPRs = NextFreeVGPR; 4921 unsigned NumSGPRs = NextFreeSGPR; 4922 4923 if (Version.Major >= 10) 4924 NumSGPRs = 0; 4925 else { 4926 unsigned MaxAddressableNumSGPRs = 4927 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4928 4929 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4930 NumSGPRs > MaxAddressableNumSGPRs) 4931 return OutOfRangeError(SGPRRange); 4932 4933 NumSGPRs += 4934 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4935 4936 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4937 NumSGPRs > MaxAddressableNumSGPRs) 4938 return OutOfRangeError(SGPRRange); 4939 4940 if (Features.test(FeatureSGPRInitBug)) 4941 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4942 } 4943 4944 VGPRBlocks = 4945 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4946 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4947 4948 return false; 4949 } 4950 4951 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4952 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4953 return TokError("directive only supported for amdgcn architecture"); 4954 4955 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4956 return TokError("directive only supported for amdhsa OS"); 4957 4958 StringRef KernelName; 4959 if (getParser().parseIdentifier(KernelName)) 4960 return true; 4961 4962 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4963 4964 StringSet<> Seen; 4965 4966 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4967 4968 SMRange VGPRRange; 4969 uint64_t NextFreeVGPR = 0; 4970 uint64_t AccumOffset = 0; 4971 uint64_t SharedVGPRCount = 0; 4972 SMRange SGPRRange; 4973 uint64_t NextFreeSGPR = 0; 4974 4975 // Count the number of user SGPRs implied from the enabled feature bits. 4976 unsigned ImpliedUserSGPRCount = 0; 4977 4978 // Track if the asm explicitly contains the directive for the user SGPR 4979 // count. 4980 std::optional<unsigned> ExplicitUserSGPRCount; 4981 bool ReserveVCC = true; 4982 bool ReserveFlatScr = true; 4983 std::optional<bool> EnableWavefrontSize32; 4984 4985 while (true) { 4986 while (trySkipToken(AsmToken::EndOfStatement)); 4987 4988 StringRef ID; 4989 SMRange IDRange = getTok().getLocRange(); 4990 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4991 return true; 4992 4993 if (ID == ".end_amdhsa_kernel") 4994 break; 4995 4996 if (!Seen.insert(ID).second) 4997 return TokError(".amdhsa_ directives cannot be repeated"); 4998 4999 SMLoc ValStart = getLoc(); 5000 int64_t IVal; 5001 if (getParser().parseAbsoluteExpression(IVal)) 5002 return true; 5003 SMLoc ValEnd = getLoc(); 5004 SMRange ValRange = SMRange(ValStart, ValEnd); 5005 5006 if (IVal < 0) 5007 return OutOfRangeError(ValRange); 5008 5009 uint64_t Val = IVal; 5010 5011 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 5012 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 5013 return OutOfRangeError(RANGE); \ 5014 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 5015 5016 if (ID == ".amdhsa_group_segment_fixed_size") { 5017 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 5018 return OutOfRangeError(ValRange); 5019 KD.group_segment_fixed_size = Val; 5020 } else if (ID == ".amdhsa_private_segment_fixed_size") { 5021 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 5022 return OutOfRangeError(ValRange); 5023 KD.private_segment_fixed_size = Val; 5024 } else if (ID == ".amdhsa_kernarg_size") { 5025 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 5026 return OutOfRangeError(ValRange); 5027 KD.kernarg_size = Val; 5028 } else if (ID == ".amdhsa_user_sgpr_count") { 5029 ExplicitUserSGPRCount = Val; 5030 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 5031 if (hasArchitectedFlatScratch()) 5032 return Error(IDRange.Start, 5033 "directive is not supported with architected flat scratch", 5034 IDRange); 5035 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5036 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 5037 Val, ValRange); 5038 if (Val) 5039 ImpliedUserSGPRCount += 4; 5040 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 5041 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5042 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 5043 ValRange); 5044 if (Val) 5045 ImpliedUserSGPRCount += 2; 5046 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 5047 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5048 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 5049 ValRange); 5050 if (Val) 5051 ImpliedUserSGPRCount += 2; 5052 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 5053 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5054 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 5055 Val, ValRange); 5056 if (Val) 5057 ImpliedUserSGPRCount += 2; 5058 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 5059 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5060 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 5061 ValRange); 5062 if (Val) 5063 ImpliedUserSGPRCount += 2; 5064 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 5065 if (hasArchitectedFlatScratch()) 5066 return Error(IDRange.Start, 5067 "directive is not supported with architected flat scratch", 5068 IDRange); 5069 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 5071 ValRange); 5072 if (Val) 5073 ImpliedUserSGPRCount += 2; 5074 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 5075 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 5077 Val, ValRange); 5078 if (Val) 5079 ImpliedUserSGPRCount += 1; 5080 } else if (ID == ".amdhsa_wavefront_size32") { 5081 if (IVersion.Major < 10) 5082 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5083 EnableWavefrontSize32 = Val; 5084 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5085 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5086 Val, ValRange); 5087 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5088 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5089 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); 5090 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5091 if (hasArchitectedFlatScratch()) 5092 return Error(IDRange.Start, 5093 "directive is not supported with architected flat scratch", 5094 IDRange); 5095 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5096 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5097 } else if (ID == ".amdhsa_enable_private_segment") { 5098 if (!hasArchitectedFlatScratch()) 5099 return Error( 5100 IDRange.Start, 5101 "directive is not supported without architected flat scratch", 5102 IDRange); 5103 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5104 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5105 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5106 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5107 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5108 ValRange); 5109 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5110 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5111 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5112 ValRange); 5113 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5114 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5115 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5116 ValRange); 5117 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5118 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5119 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5120 ValRange); 5121 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5122 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5123 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5124 ValRange); 5125 } else if (ID == ".amdhsa_next_free_vgpr") { 5126 VGPRRange = ValRange; 5127 NextFreeVGPR = Val; 5128 } else if (ID == ".amdhsa_next_free_sgpr") { 5129 SGPRRange = ValRange; 5130 NextFreeSGPR = Val; 5131 } else if (ID == ".amdhsa_accum_offset") { 5132 if (!isGFX90A()) 5133 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5134 AccumOffset = Val; 5135 } else if (ID == ".amdhsa_reserve_vcc") { 5136 if (!isUInt<1>(Val)) 5137 return OutOfRangeError(ValRange); 5138 ReserveVCC = Val; 5139 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5140 if (IVersion.Major < 7) 5141 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5142 if (hasArchitectedFlatScratch()) 5143 return Error(IDRange.Start, 5144 "directive is not supported with architected flat scratch", 5145 IDRange); 5146 if (!isUInt<1>(Val)) 5147 return OutOfRangeError(ValRange); 5148 ReserveFlatScr = Val; 5149 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5150 if (IVersion.Major < 8) 5151 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5152 if (!isUInt<1>(Val)) 5153 return OutOfRangeError(ValRange); 5154 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5155 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5156 IDRange); 5157 } else if (ID == ".amdhsa_float_round_mode_32") { 5158 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5159 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5160 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5161 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5162 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5163 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5164 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5165 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5166 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5167 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5168 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5169 ValRange); 5170 } else if (ID == ".amdhsa_dx10_clamp") { 5171 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5172 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5173 } else if (ID == ".amdhsa_ieee_mode") { 5174 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5175 Val, ValRange); 5176 } else if (ID == ".amdhsa_fp16_overflow") { 5177 if (IVersion.Major < 9) 5178 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5179 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5180 ValRange); 5181 } else if (ID == ".amdhsa_tg_split") { 5182 if (!isGFX90A()) 5183 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5184 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5185 ValRange); 5186 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5187 if (IVersion.Major < 10) 5188 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5189 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5190 ValRange); 5191 } else if (ID == ".amdhsa_memory_ordered") { 5192 if (IVersion.Major < 10) 5193 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5194 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5195 ValRange); 5196 } else if (ID == ".amdhsa_forward_progress") { 5197 if (IVersion.Major < 10) 5198 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5199 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5200 ValRange); 5201 } else if (ID == ".amdhsa_shared_vgpr_count") { 5202 if (IVersion.Major < 10) 5203 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5204 SharedVGPRCount = Val; 5205 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5206 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5207 ValRange); 5208 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5209 PARSE_BITS_ENTRY( 5210 KD.compute_pgm_rsrc2, 5211 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5212 ValRange); 5213 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5214 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5215 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5216 Val, ValRange); 5217 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5218 PARSE_BITS_ENTRY( 5219 KD.compute_pgm_rsrc2, 5220 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5221 ValRange); 5222 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5223 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5224 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5225 Val, ValRange); 5226 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5227 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5228 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5229 Val, ValRange); 5230 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5231 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5232 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5233 Val, ValRange); 5234 } else if (ID == ".amdhsa_exception_int_div_zero") { 5235 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5236 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5237 Val, ValRange); 5238 } else { 5239 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5240 } 5241 5242 #undef PARSE_BITS_ENTRY 5243 } 5244 5245 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5246 return TokError(".amdhsa_next_free_vgpr directive is required"); 5247 5248 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5249 return TokError(".amdhsa_next_free_sgpr directive is required"); 5250 5251 unsigned VGPRBlocks; 5252 unsigned SGPRBlocks; 5253 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5254 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5255 EnableWavefrontSize32, NextFreeVGPR, 5256 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5257 SGPRBlocks)) 5258 return true; 5259 5260 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5261 VGPRBlocks)) 5262 return OutOfRangeError(VGPRRange); 5263 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5264 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5265 5266 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5267 SGPRBlocks)) 5268 return OutOfRangeError(SGPRRange); 5269 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5270 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5271 SGPRBlocks); 5272 5273 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5274 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5275 "enabled user SGPRs"); 5276 5277 unsigned UserSGPRCount = 5278 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5279 5280 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5281 return TokError("too many user SGPRs enabled"); 5282 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5283 UserSGPRCount); 5284 5285 if (isGFX90A()) { 5286 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5287 return TokError(".amdhsa_accum_offset directive is required"); 5288 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5289 return TokError("accum_offset should be in range [4..256] in " 5290 "increments of 4"); 5291 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5292 return TokError("accum_offset exceeds total VGPR allocation"); 5293 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5294 (AccumOffset / 4 - 1)); 5295 } 5296 5297 if (IVersion.Major == 10) { 5298 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5299 if (SharedVGPRCount && EnableWavefrontSize32) { 5300 return TokError("shared_vgpr_count directive not valid on " 5301 "wavefront size 32"); 5302 } 5303 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5304 return TokError("shared_vgpr_count*2 + " 5305 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5306 "exceed 63\n"); 5307 } 5308 } 5309 5310 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5311 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5312 ReserveFlatScr); 5313 return false; 5314 } 5315 5316 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5317 uint32_t Major; 5318 uint32_t Minor; 5319 5320 if (ParseDirectiveMajorMinor(Major, Minor)) 5321 return true; 5322 5323 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5324 return false; 5325 } 5326 5327 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5328 uint32_t Major; 5329 uint32_t Minor; 5330 uint32_t Stepping; 5331 StringRef VendorName; 5332 StringRef ArchName; 5333 5334 // If this directive has no arguments, then use the ISA version for the 5335 // targeted GPU. 5336 if (isToken(AsmToken::EndOfStatement)) { 5337 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5338 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5339 ISA.Stepping, 5340 "AMD", "AMDGPU"); 5341 return false; 5342 } 5343 5344 if (ParseDirectiveMajorMinor(Major, Minor)) 5345 return true; 5346 5347 if (!trySkipToken(AsmToken::Comma)) 5348 return TokError("stepping version number required, comma expected"); 5349 5350 if (ParseAsAbsoluteExpression(Stepping)) 5351 return TokError("invalid stepping version"); 5352 5353 if (!trySkipToken(AsmToken::Comma)) 5354 return TokError("vendor name required, comma expected"); 5355 5356 if (!parseString(VendorName, "invalid vendor name")) 5357 return true; 5358 5359 if (!trySkipToken(AsmToken::Comma)) 5360 return TokError("arch name required, comma expected"); 5361 5362 if (!parseString(ArchName, "invalid arch name")) 5363 return true; 5364 5365 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5366 VendorName, ArchName); 5367 return false; 5368 } 5369 5370 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5371 amd_kernel_code_t &Header) { 5372 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5373 // assembly for backwards compatibility. 5374 if (ID == "max_scratch_backing_memory_byte_size") { 5375 Parser.eatToEndOfStatement(); 5376 return false; 5377 } 5378 5379 SmallString<40> ErrStr; 5380 raw_svector_ostream Err(ErrStr); 5381 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5382 return TokError(Err.str()); 5383 } 5384 Lex(); 5385 5386 if (ID == "enable_wavefront_size32") { 5387 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5388 if (!isGFX10Plus()) 5389 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5390 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5391 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5392 } else { 5393 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5394 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5395 } 5396 } 5397 5398 if (ID == "wavefront_size") { 5399 if (Header.wavefront_size == 5) { 5400 if (!isGFX10Plus()) 5401 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5402 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5403 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5404 } else if (Header.wavefront_size == 6) { 5405 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5406 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5407 } 5408 } 5409 5410 if (ID == "enable_wgp_mode") { 5411 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5412 !isGFX10Plus()) 5413 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5414 } 5415 5416 if (ID == "enable_mem_ordered") { 5417 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5418 !isGFX10Plus()) 5419 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5420 } 5421 5422 if (ID == "enable_fwd_progress") { 5423 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5424 !isGFX10Plus()) 5425 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5426 } 5427 5428 return false; 5429 } 5430 5431 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5432 amd_kernel_code_t Header; 5433 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5434 5435 while (true) { 5436 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5437 // will set the current token to EndOfStatement. 5438 while(trySkipToken(AsmToken::EndOfStatement)); 5439 5440 StringRef ID; 5441 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5442 return true; 5443 5444 if (ID == ".end_amd_kernel_code_t") 5445 break; 5446 5447 if (ParseAMDKernelCodeTValue(ID, Header)) 5448 return true; 5449 } 5450 5451 getTargetStreamer().EmitAMDKernelCodeT(Header); 5452 5453 return false; 5454 } 5455 5456 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5457 StringRef KernelName; 5458 if (!parseId(KernelName, "expected symbol name")) 5459 return true; 5460 5461 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5462 ELF::STT_AMDGPU_HSA_KERNEL); 5463 5464 KernelScope.initialize(getContext()); 5465 return false; 5466 } 5467 5468 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5469 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5470 return Error(getLoc(), 5471 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5472 "architectures"); 5473 } 5474 5475 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5476 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5477 return Error(getParser().getTok().getLoc(), "target id must match options"); 5478 5479 getTargetStreamer().EmitISAVersion(); 5480 Lex(); 5481 5482 return false; 5483 } 5484 5485 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5486 const char *AssemblerDirectiveBegin; 5487 const char *AssemblerDirectiveEnd; 5488 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5489 isHsaAbiVersion3AndAbove(&getSTI()) 5490 ? std::tuple(HSAMD::V3::AssemblerDirectiveBegin, 5491 HSAMD::V3::AssemblerDirectiveEnd) 5492 : std::tuple(HSAMD::AssemblerDirectiveBegin, 5493 HSAMD::AssemblerDirectiveEnd); 5494 5495 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5496 return Error(getLoc(), 5497 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5498 "not available on non-amdhsa OSes")).str()); 5499 } 5500 5501 std::string HSAMetadataString; 5502 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5503 HSAMetadataString)) 5504 return true; 5505 5506 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5507 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5508 return Error(getLoc(), "invalid HSA metadata"); 5509 } else { 5510 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5511 return Error(getLoc(), "invalid HSA metadata"); 5512 } 5513 5514 return false; 5515 } 5516 5517 /// Common code to parse out a block of text (typically YAML) between start and 5518 /// end directives. 5519 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5520 const char *AssemblerDirectiveEnd, 5521 std::string &CollectString) { 5522 5523 raw_string_ostream CollectStream(CollectString); 5524 5525 getLexer().setSkipSpace(false); 5526 5527 bool FoundEnd = false; 5528 while (!isToken(AsmToken::Eof)) { 5529 while (isToken(AsmToken::Space)) { 5530 CollectStream << getTokenStr(); 5531 Lex(); 5532 } 5533 5534 if (trySkipId(AssemblerDirectiveEnd)) { 5535 FoundEnd = true; 5536 break; 5537 } 5538 5539 CollectStream << Parser.parseStringToEndOfStatement() 5540 << getContext().getAsmInfo()->getSeparatorString(); 5541 5542 Parser.eatToEndOfStatement(); 5543 } 5544 5545 getLexer().setSkipSpace(true); 5546 5547 if (isToken(AsmToken::Eof) && !FoundEnd) { 5548 return TokError(Twine("expected directive ") + 5549 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5550 } 5551 5552 CollectStream.flush(); 5553 return false; 5554 } 5555 5556 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5557 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5558 std::string String; 5559 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5560 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5561 return true; 5562 5563 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5564 if (!PALMetadata->setFromString(String)) 5565 return Error(getLoc(), "invalid PAL metadata"); 5566 return false; 5567 } 5568 5569 /// Parse the assembler directive for old linear-format PAL metadata. 5570 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5571 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5572 return Error(getLoc(), 5573 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5574 "not available on non-amdpal OSes")).str()); 5575 } 5576 5577 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5578 PALMetadata->setLegacy(); 5579 for (;;) { 5580 uint32_t Key, Value; 5581 if (ParseAsAbsoluteExpression(Key)) { 5582 return TokError(Twine("invalid value in ") + 5583 Twine(PALMD::AssemblerDirective)); 5584 } 5585 if (!trySkipToken(AsmToken::Comma)) { 5586 return TokError(Twine("expected an even number of values in ") + 5587 Twine(PALMD::AssemblerDirective)); 5588 } 5589 if (ParseAsAbsoluteExpression(Value)) { 5590 return TokError(Twine("invalid value in ") + 5591 Twine(PALMD::AssemblerDirective)); 5592 } 5593 PALMetadata->setRegister(Key, Value); 5594 if (!trySkipToken(AsmToken::Comma)) 5595 break; 5596 } 5597 return false; 5598 } 5599 5600 /// ParseDirectiveAMDGPULDS 5601 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5602 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5603 if (getParser().checkForValidSection()) 5604 return true; 5605 5606 StringRef Name; 5607 SMLoc NameLoc = getLoc(); 5608 if (getParser().parseIdentifier(Name)) 5609 return TokError("expected identifier in directive"); 5610 5611 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5612 if (parseToken(AsmToken::Comma, "expected ','")) 5613 return true; 5614 5615 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5616 5617 int64_t Size; 5618 SMLoc SizeLoc = getLoc(); 5619 if (getParser().parseAbsoluteExpression(Size)) 5620 return true; 5621 if (Size < 0) 5622 return Error(SizeLoc, "size must be non-negative"); 5623 if (Size > LocalMemorySize) 5624 return Error(SizeLoc, "size is too large"); 5625 5626 int64_t Alignment = 4; 5627 if (trySkipToken(AsmToken::Comma)) { 5628 SMLoc AlignLoc = getLoc(); 5629 if (getParser().parseAbsoluteExpression(Alignment)) 5630 return true; 5631 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5632 return Error(AlignLoc, "alignment must be a power of two"); 5633 5634 // Alignment larger than the size of LDS is possible in theory, as long 5635 // as the linker manages to place to symbol at address 0, but we do want 5636 // to make sure the alignment fits nicely into a 32-bit integer. 5637 if (Alignment >= 1u << 31) 5638 return Error(AlignLoc, "alignment is too large"); 5639 } 5640 5641 if (parseEOL()) 5642 return true; 5643 5644 Symbol->redefineIfPossible(); 5645 if (!Symbol->isUndefined()) 5646 return Error(NameLoc, "invalid symbol redefinition"); 5647 5648 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5649 return false; 5650 } 5651 5652 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5653 StringRef IDVal = DirectiveID.getString(); 5654 5655 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5656 if (IDVal == ".amdhsa_kernel") 5657 return ParseDirectiveAMDHSAKernel(); 5658 5659 // TODO: Restructure/combine with PAL metadata directive. 5660 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5661 return ParseDirectiveHSAMetadata(); 5662 } else { 5663 if (IDVal == ".hsa_code_object_version") 5664 return ParseDirectiveHSACodeObjectVersion(); 5665 5666 if (IDVal == ".hsa_code_object_isa") 5667 return ParseDirectiveHSACodeObjectISA(); 5668 5669 if (IDVal == ".amd_kernel_code_t") 5670 return ParseDirectiveAMDKernelCodeT(); 5671 5672 if (IDVal == ".amdgpu_hsa_kernel") 5673 return ParseDirectiveAMDGPUHsaKernel(); 5674 5675 if (IDVal == ".amd_amdgpu_isa") 5676 return ParseDirectiveISAVersion(); 5677 5678 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5679 return ParseDirectiveHSAMetadata(); 5680 } 5681 5682 if (IDVal == ".amdgcn_target") 5683 return ParseDirectiveAMDGCNTarget(); 5684 5685 if (IDVal == ".amdgpu_lds") 5686 return ParseDirectiveAMDGPULDS(); 5687 5688 if (IDVal == PALMD::AssemblerDirectiveBegin) 5689 return ParseDirectivePALMetadataBegin(); 5690 5691 if (IDVal == PALMD::AssemblerDirective) 5692 return ParseDirectivePALMetadata(); 5693 5694 return true; 5695 } 5696 5697 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5698 unsigned RegNo) { 5699 5700 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5701 return isGFX9Plus(); 5702 5703 // GFX10+ has 2 more SGPRs 104 and 105. 5704 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5705 return hasSGPR104_SGPR105(); 5706 5707 switch (RegNo) { 5708 case AMDGPU::SRC_SHARED_BASE_LO: 5709 case AMDGPU::SRC_SHARED_BASE: 5710 case AMDGPU::SRC_SHARED_LIMIT_LO: 5711 case AMDGPU::SRC_SHARED_LIMIT: 5712 case AMDGPU::SRC_PRIVATE_BASE_LO: 5713 case AMDGPU::SRC_PRIVATE_BASE: 5714 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 5715 case AMDGPU::SRC_PRIVATE_LIMIT: 5716 return isGFX9Plus(); 5717 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5718 return isGFX9Plus() && !isGFX11Plus(); 5719 case AMDGPU::TBA: 5720 case AMDGPU::TBA_LO: 5721 case AMDGPU::TBA_HI: 5722 case AMDGPU::TMA: 5723 case AMDGPU::TMA_LO: 5724 case AMDGPU::TMA_HI: 5725 return !isGFX9Plus(); 5726 case AMDGPU::XNACK_MASK: 5727 case AMDGPU::XNACK_MASK_LO: 5728 case AMDGPU::XNACK_MASK_HI: 5729 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5730 case AMDGPU::SGPR_NULL: 5731 return isGFX10Plus(); 5732 default: 5733 break; 5734 } 5735 5736 if (isCI()) 5737 return true; 5738 5739 if (isSI() || isGFX10Plus()) { 5740 // No flat_scr on SI. 5741 // On GFX10Plus flat scratch is not a valid register operand and can only be 5742 // accessed with s_setreg/s_getreg. 5743 switch (RegNo) { 5744 case AMDGPU::FLAT_SCR: 5745 case AMDGPU::FLAT_SCR_LO: 5746 case AMDGPU::FLAT_SCR_HI: 5747 return false; 5748 default: 5749 return true; 5750 } 5751 } 5752 5753 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5754 // SI/CI have. 5755 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5756 return hasSGPR102_SGPR103(); 5757 5758 return true; 5759 } 5760 5761 OperandMatchResultTy 5762 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5763 OperandMode Mode) { 5764 OperandMatchResultTy ResTy = parseVOPD(Operands); 5765 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5766 isToken(AsmToken::EndOfStatement)) 5767 return ResTy; 5768 5769 // Try to parse with a custom parser 5770 ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5771 5772 // If we successfully parsed the operand or if there as an error parsing, 5773 // we are done. 5774 // 5775 // If we are parsing after we reach EndOfStatement then this means we 5776 // are appending default values to the Operands list. This is only done 5777 // by custom parser, so we shouldn't continue on to the generic parsing. 5778 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5779 isToken(AsmToken::EndOfStatement)) 5780 return ResTy; 5781 5782 SMLoc RBraceLoc; 5783 SMLoc LBraceLoc = getLoc(); 5784 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5785 unsigned Prefix = Operands.size(); 5786 5787 for (;;) { 5788 auto Loc = getLoc(); 5789 ResTy = parseReg(Operands); 5790 if (ResTy == MatchOperand_NoMatch) 5791 Error(Loc, "expected a register"); 5792 if (ResTy != MatchOperand_Success) 5793 return MatchOperand_ParseFail; 5794 5795 RBraceLoc = getLoc(); 5796 if (trySkipToken(AsmToken::RBrac)) 5797 break; 5798 5799 if (!skipToken(AsmToken::Comma, 5800 "expected a comma or a closing square bracket")) { 5801 return MatchOperand_ParseFail; 5802 } 5803 } 5804 5805 if (Operands.size() - Prefix > 1) { 5806 Operands.insert(Operands.begin() + Prefix, 5807 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5808 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5809 } 5810 5811 return MatchOperand_Success; 5812 } 5813 5814 return parseRegOrImm(Operands); 5815 } 5816 5817 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5818 // Clear any forced encodings from the previous instruction. 5819 setForcedEncodingSize(0); 5820 setForcedDPP(false); 5821 setForcedSDWA(false); 5822 5823 if (Name.endswith("_e64_dpp")) { 5824 setForcedDPP(true); 5825 setForcedEncodingSize(64); 5826 return Name.substr(0, Name.size() - 8); 5827 } else if (Name.endswith("_e64")) { 5828 setForcedEncodingSize(64); 5829 return Name.substr(0, Name.size() - 4); 5830 } else if (Name.endswith("_e32")) { 5831 setForcedEncodingSize(32); 5832 return Name.substr(0, Name.size() - 4); 5833 } else if (Name.endswith("_dpp")) { 5834 setForcedDPP(true); 5835 return Name.substr(0, Name.size() - 4); 5836 } else if (Name.endswith("_sdwa")) { 5837 setForcedSDWA(true); 5838 return Name.substr(0, Name.size() - 5); 5839 } 5840 return Name; 5841 } 5842 5843 static void applyMnemonicAliases(StringRef &Mnemonic, 5844 const FeatureBitset &Features, 5845 unsigned VariantID); 5846 5847 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5848 StringRef Name, 5849 SMLoc NameLoc, OperandVector &Operands) { 5850 // Add the instruction mnemonic 5851 Name = parseMnemonicSuffix(Name); 5852 5853 // If the target architecture uses MnemonicAlias, call it here to parse 5854 // operands correctly. 5855 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5856 5857 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5858 5859 bool IsMIMG = Name.startswith("image_"); 5860 5861 while (!trySkipToken(AsmToken::EndOfStatement)) { 5862 OperandMode Mode = OperandMode_Default; 5863 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5864 Mode = OperandMode_NSA; 5865 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5866 5867 if (Res != MatchOperand_Success) { 5868 checkUnsupportedInstruction(Name, NameLoc); 5869 if (!Parser.hasPendingError()) { 5870 // FIXME: use real operand location rather than the current location. 5871 StringRef Msg = 5872 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5873 "not a valid operand."; 5874 Error(getLoc(), Msg); 5875 } 5876 while (!trySkipToken(AsmToken::EndOfStatement)) { 5877 lex(); 5878 } 5879 return true; 5880 } 5881 5882 // Eat the comma or space if there is one. 5883 trySkipToken(AsmToken::Comma); 5884 } 5885 5886 return false; 5887 } 5888 5889 //===----------------------------------------------------------------------===// 5890 // Utility functions 5891 //===----------------------------------------------------------------------===// 5892 5893 OperandMatchResultTy AMDGPUAsmParser::parseTokenOp(StringRef Name, 5894 OperandVector &Operands) { 5895 SMLoc S = getLoc(); 5896 if (!trySkipId(Name)) 5897 return MatchOperand_NoMatch; 5898 5899 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S)); 5900 return MatchOperand_Success; 5901 } 5902 5903 OperandMatchResultTy 5904 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5905 5906 if (!trySkipId(Prefix, AsmToken::Colon)) 5907 return MatchOperand_NoMatch; 5908 5909 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5910 } 5911 5912 OperandMatchResultTy 5913 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5914 AMDGPUOperand::ImmTy ImmTy, 5915 bool (*ConvertResult)(int64_t&)) { 5916 SMLoc S = getLoc(); 5917 int64_t Value = 0; 5918 5919 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5920 if (Res != MatchOperand_Success) 5921 return Res; 5922 5923 if (ConvertResult && !ConvertResult(Value)) { 5924 Error(S, "invalid " + StringRef(Prefix) + " value."); 5925 } 5926 5927 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5928 return MatchOperand_Success; 5929 } 5930 5931 OperandMatchResultTy 5932 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5933 OperandVector &Operands, 5934 AMDGPUOperand::ImmTy ImmTy, 5935 bool (*ConvertResult)(int64_t&)) { 5936 SMLoc S = getLoc(); 5937 if (!trySkipId(Prefix, AsmToken::Colon)) 5938 return MatchOperand_NoMatch; 5939 5940 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5941 return MatchOperand_ParseFail; 5942 5943 unsigned Val = 0; 5944 const unsigned MaxSize = 4; 5945 5946 // FIXME: How to verify the number of elements matches the number of src 5947 // operands? 5948 for (int I = 0; ; ++I) { 5949 int64_t Op; 5950 SMLoc Loc = getLoc(); 5951 if (!parseExpr(Op)) 5952 return MatchOperand_ParseFail; 5953 5954 if (Op != 0 && Op != 1) { 5955 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5956 return MatchOperand_ParseFail; 5957 } 5958 5959 Val |= (Op << I); 5960 5961 if (trySkipToken(AsmToken::RBrac)) 5962 break; 5963 5964 if (I + 1 == MaxSize) { 5965 Error(getLoc(), "expected a closing square bracket"); 5966 return MatchOperand_ParseFail; 5967 } 5968 5969 if (!skipToken(AsmToken::Comma, "expected a comma")) 5970 return MatchOperand_ParseFail; 5971 } 5972 5973 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5974 return MatchOperand_Success; 5975 } 5976 5977 OperandMatchResultTy 5978 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5979 AMDGPUOperand::ImmTy ImmTy) { 5980 int64_t Bit; 5981 SMLoc S = getLoc(); 5982 5983 if (trySkipId(Name)) { 5984 Bit = 1; 5985 } else if (trySkipId("no", Name)) { 5986 Bit = 0; 5987 } else { 5988 return MatchOperand_NoMatch; 5989 } 5990 5991 if (Name == "r128" && !hasMIMG_R128()) { 5992 Error(S, "r128 modifier is not supported on this GPU"); 5993 return MatchOperand_ParseFail; 5994 } 5995 if (Name == "a16" && !hasA16()) { 5996 Error(S, "a16 modifier is not supported on this GPU"); 5997 return MatchOperand_ParseFail; 5998 } 5999 6000 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 6001 ImmTy = AMDGPUOperand::ImmTyR128A16; 6002 6003 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 6004 return MatchOperand_Success; 6005 } 6006 6007 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, 6008 bool &Disabling) const { 6009 Disabling = Id.startswith("no"); 6010 6011 if (isGFX940() && !Mnemo.startswith("s_")) { 6012 return StringSwitch<unsigned>(Id) 6013 .Case("nt", AMDGPU::CPol::NT) 6014 .Case("nont", AMDGPU::CPol::NT) 6015 .Case("sc0", AMDGPU::CPol::SC0) 6016 .Case("nosc0", AMDGPU::CPol::SC0) 6017 .Case("sc1", AMDGPU::CPol::SC1) 6018 .Case("nosc1", AMDGPU::CPol::SC1) 6019 .Default(0); 6020 } 6021 6022 return StringSwitch<unsigned>(Id) 6023 .Case("dlc", AMDGPU::CPol::DLC) 6024 .Case("nodlc", AMDGPU::CPol::DLC) 6025 .Case("glc", AMDGPU::CPol::GLC) 6026 .Case("noglc", AMDGPU::CPol::GLC) 6027 .Case("scc", AMDGPU::CPol::SCC) 6028 .Case("noscc", AMDGPU::CPol::SCC) 6029 .Case("slc", AMDGPU::CPol::SLC) 6030 .Case("noslc", AMDGPU::CPol::SLC) 6031 .Default(0); 6032 } 6033 6034 OperandMatchResultTy 6035 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 6036 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 6037 SMLoc OpLoc = getLoc(); 6038 unsigned Enabled = 0, Seen = 0; 6039 for (;;) { 6040 SMLoc S = getLoc(); 6041 bool Disabling; 6042 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); 6043 if (!CPol) 6044 break; 6045 6046 lex(); 6047 6048 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) { 6049 Error(S, "dlc modifier is not supported on this GPU"); 6050 return MatchOperand_ParseFail; 6051 } 6052 6053 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) { 6054 Error(S, "scc modifier is not supported on this GPU"); 6055 return MatchOperand_ParseFail; 6056 } 6057 6058 if (Seen & CPol) { 6059 Error(S, "duplicate cache policy modifier"); 6060 return MatchOperand_ParseFail; 6061 } 6062 6063 if (!Disabling) 6064 Enabled |= CPol; 6065 6066 Seen |= CPol; 6067 } 6068 6069 if (!Seen) 6070 return MatchOperand_NoMatch; 6071 6072 Operands.push_back( 6073 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol)); 6074 return MatchOperand_Success; 6075 } 6076 6077 static void addOptionalImmOperand( 6078 MCInst& Inst, const OperandVector& Operands, 6079 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 6080 AMDGPUOperand::ImmTy ImmT, 6081 int64_t Default = 0) { 6082 auto i = OptionalIdx.find(ImmT); 6083 if (i != OptionalIdx.end()) { 6084 unsigned Idx = i->second; 6085 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 6086 } else { 6087 Inst.addOperand(MCOperand::createImm(Default)); 6088 } 6089 } 6090 6091 OperandMatchResultTy 6092 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6093 StringRef &Value, 6094 SMLoc &StringLoc) { 6095 if (!trySkipId(Prefix, AsmToken::Colon)) 6096 return MatchOperand_NoMatch; 6097 6098 StringLoc = getLoc(); 6099 return parseId(Value, "expected an identifier") ? MatchOperand_Success 6100 : MatchOperand_ParseFail; 6101 } 6102 6103 //===----------------------------------------------------------------------===// 6104 // MTBUF format 6105 //===----------------------------------------------------------------------===// 6106 6107 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6108 int64_t MaxVal, 6109 int64_t &Fmt) { 6110 int64_t Val; 6111 SMLoc Loc = getLoc(); 6112 6113 auto Res = parseIntWithPrefix(Pref, Val); 6114 if (Res == MatchOperand_ParseFail) 6115 return false; 6116 if (Res == MatchOperand_NoMatch) 6117 return true; 6118 6119 if (Val < 0 || Val > MaxVal) { 6120 Error(Loc, Twine("out of range ", StringRef(Pref))); 6121 return false; 6122 } 6123 6124 Fmt = Val; 6125 return true; 6126 } 6127 6128 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6129 // values to live in a joint format operand in the MCInst encoding. 6130 OperandMatchResultTy 6131 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6132 using namespace llvm::AMDGPU::MTBUFFormat; 6133 6134 int64_t Dfmt = DFMT_UNDEF; 6135 int64_t Nfmt = NFMT_UNDEF; 6136 6137 // dfmt and nfmt can appear in either order, and each is optional. 6138 for (int I = 0; I < 2; ++I) { 6139 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6140 return MatchOperand_ParseFail; 6141 6142 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6143 return MatchOperand_ParseFail; 6144 } 6145 // Skip optional comma between dfmt/nfmt 6146 // but guard against 2 commas following each other. 6147 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6148 !peekToken().is(AsmToken::Comma)) { 6149 trySkipToken(AsmToken::Comma); 6150 } 6151 } 6152 6153 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6154 return MatchOperand_NoMatch; 6155 6156 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6157 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6158 6159 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6160 return MatchOperand_Success; 6161 } 6162 6163 OperandMatchResultTy 6164 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6165 using namespace llvm::AMDGPU::MTBUFFormat; 6166 6167 int64_t Fmt = UFMT_UNDEF; 6168 6169 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6170 return MatchOperand_ParseFail; 6171 6172 if (Fmt == UFMT_UNDEF) 6173 return MatchOperand_NoMatch; 6174 6175 Format = Fmt; 6176 return MatchOperand_Success; 6177 } 6178 6179 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6180 int64_t &Nfmt, 6181 StringRef FormatStr, 6182 SMLoc Loc) { 6183 using namespace llvm::AMDGPU::MTBUFFormat; 6184 int64_t Format; 6185 6186 Format = getDfmt(FormatStr); 6187 if (Format != DFMT_UNDEF) { 6188 Dfmt = Format; 6189 return true; 6190 } 6191 6192 Format = getNfmt(FormatStr, getSTI()); 6193 if (Format != NFMT_UNDEF) { 6194 Nfmt = Format; 6195 return true; 6196 } 6197 6198 Error(Loc, "unsupported format"); 6199 return false; 6200 } 6201 6202 OperandMatchResultTy 6203 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6204 SMLoc FormatLoc, 6205 int64_t &Format) { 6206 using namespace llvm::AMDGPU::MTBUFFormat; 6207 6208 int64_t Dfmt = DFMT_UNDEF; 6209 int64_t Nfmt = NFMT_UNDEF; 6210 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6211 return MatchOperand_ParseFail; 6212 6213 if (trySkipToken(AsmToken::Comma)) { 6214 StringRef Str; 6215 SMLoc Loc = getLoc(); 6216 if (!parseId(Str, "expected a format string") || 6217 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6218 return MatchOperand_ParseFail; 6219 } 6220 if (Dfmt == DFMT_UNDEF) { 6221 Error(Loc, "duplicate numeric format"); 6222 return MatchOperand_ParseFail; 6223 } else if (Nfmt == NFMT_UNDEF) { 6224 Error(Loc, "duplicate data format"); 6225 return MatchOperand_ParseFail; 6226 } 6227 } 6228 6229 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6230 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6231 6232 if (isGFX10Plus()) { 6233 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6234 if (Ufmt == UFMT_UNDEF) { 6235 Error(FormatLoc, "unsupported format"); 6236 return MatchOperand_ParseFail; 6237 } 6238 Format = Ufmt; 6239 } else { 6240 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6241 } 6242 6243 return MatchOperand_Success; 6244 } 6245 6246 OperandMatchResultTy 6247 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6248 SMLoc Loc, 6249 int64_t &Format) { 6250 using namespace llvm::AMDGPU::MTBUFFormat; 6251 6252 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6253 if (Id == UFMT_UNDEF) 6254 return MatchOperand_NoMatch; 6255 6256 if (!isGFX10Plus()) { 6257 Error(Loc, "unified format is not supported on this GPU"); 6258 return MatchOperand_ParseFail; 6259 } 6260 6261 Format = Id; 6262 return MatchOperand_Success; 6263 } 6264 6265 OperandMatchResultTy 6266 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6267 using namespace llvm::AMDGPU::MTBUFFormat; 6268 SMLoc Loc = getLoc(); 6269 6270 if (!parseExpr(Format)) 6271 return MatchOperand_ParseFail; 6272 if (!isValidFormatEncoding(Format, getSTI())) { 6273 Error(Loc, "out of range format"); 6274 return MatchOperand_ParseFail; 6275 } 6276 6277 return MatchOperand_Success; 6278 } 6279 6280 OperandMatchResultTy 6281 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6282 using namespace llvm::AMDGPU::MTBUFFormat; 6283 6284 if (!trySkipId("format", AsmToken::Colon)) 6285 return MatchOperand_NoMatch; 6286 6287 if (trySkipToken(AsmToken::LBrac)) { 6288 StringRef FormatStr; 6289 SMLoc Loc = getLoc(); 6290 if (!parseId(FormatStr, "expected a format string")) 6291 return MatchOperand_ParseFail; 6292 6293 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6294 if (Res == MatchOperand_NoMatch) 6295 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6296 if (Res != MatchOperand_Success) 6297 return Res; 6298 6299 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6300 return MatchOperand_ParseFail; 6301 6302 return MatchOperand_Success; 6303 } 6304 6305 return parseNumericFormat(Format); 6306 } 6307 6308 OperandMatchResultTy 6309 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6310 using namespace llvm::AMDGPU::MTBUFFormat; 6311 6312 int64_t Format = getDefaultFormatEncoding(getSTI()); 6313 OperandMatchResultTy Res; 6314 SMLoc Loc = getLoc(); 6315 6316 // Parse legacy format syntax. 6317 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6318 if (Res == MatchOperand_ParseFail) 6319 return Res; 6320 6321 bool FormatFound = (Res == MatchOperand_Success); 6322 6323 Operands.push_back( 6324 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6325 6326 if (FormatFound) 6327 trySkipToken(AsmToken::Comma); 6328 6329 if (isToken(AsmToken::EndOfStatement)) { 6330 // We are expecting an soffset operand, 6331 // but let matcher handle the error. 6332 return MatchOperand_Success; 6333 } 6334 6335 // Parse soffset. 6336 Res = parseRegOrImm(Operands); 6337 if (Res != MatchOperand_Success) 6338 return Res; 6339 6340 trySkipToken(AsmToken::Comma); 6341 6342 if (!FormatFound) { 6343 Res = parseSymbolicOrNumericFormat(Format); 6344 if (Res == MatchOperand_ParseFail) 6345 return Res; 6346 if (Res == MatchOperand_Success) { 6347 auto Size = Operands.size(); 6348 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6349 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6350 Op.setImm(Format); 6351 } 6352 return MatchOperand_Success; 6353 } 6354 6355 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6356 Error(getLoc(), "duplicate format"); 6357 return MatchOperand_ParseFail; 6358 } 6359 return MatchOperand_Success; 6360 } 6361 6362 OperandMatchResultTy AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { 6363 OperandMatchResultTy Res = 6364 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 6365 if (Res == MatchOperand_NoMatch) { 6366 Res = parseIntWithPrefix("inst_offset", Operands, 6367 AMDGPUOperand::ImmTyInstOffset); 6368 } 6369 return Res; 6370 } 6371 6372 OperandMatchResultTy AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { 6373 OperandMatchResultTy Res = 6374 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16); 6375 if (Res == MatchOperand_NoMatch) 6376 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16); 6377 return Res; 6378 } 6379 6380 //===----------------------------------------------------------------------===// 6381 // ds 6382 //===----------------------------------------------------------------------===// 6383 6384 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6385 const OperandVector &Operands) { 6386 OptionalImmIndexMap OptionalIdx; 6387 6388 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6389 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6390 6391 // Add the register arguments 6392 if (Op.isReg()) { 6393 Op.addRegOperands(Inst, 1); 6394 continue; 6395 } 6396 6397 // Handle optional arguments 6398 OptionalIdx[Op.getImmTy()] = i; 6399 } 6400 6401 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6402 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6403 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6404 6405 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6406 } 6407 6408 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6409 bool IsGdsHardcoded) { 6410 OptionalImmIndexMap OptionalIdx; 6411 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6412 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; 6413 6414 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6415 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6416 6417 auto TiedTo = 6418 Desc.getOperandConstraint(Inst.getNumOperands(), MCOI::TIED_TO); 6419 6420 if (TiedTo != -1) { 6421 assert((unsigned)TiedTo < Inst.getNumOperands()); 6422 Inst.addOperand(Inst.getOperand(TiedTo)); 6423 } 6424 6425 // Add the register arguments 6426 if (Op.isReg()) { 6427 Op.addRegOperands(Inst, 1); 6428 continue; 6429 } 6430 6431 if (Op.isToken() && Op.getToken() == "gds") { 6432 IsGdsHardcoded = true; 6433 continue; 6434 } 6435 6436 // Handle optional arguments 6437 OptionalIdx[Op.getImmTy()] = i; 6438 6439 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) 6440 OffsetType = AMDGPUOperand::ImmTySwizzle; 6441 } 6442 6443 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6444 6445 if (!IsGdsHardcoded) { 6446 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6447 } 6448 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6449 } 6450 6451 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6452 OptionalImmIndexMap OptionalIdx; 6453 6454 unsigned OperandIdx[4]; 6455 unsigned EnMask = 0; 6456 int SrcIdx = 0; 6457 6458 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6459 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6460 6461 // Add the register arguments 6462 if (Op.isReg()) { 6463 assert(SrcIdx < 4); 6464 OperandIdx[SrcIdx] = Inst.size(); 6465 Op.addRegOperands(Inst, 1); 6466 ++SrcIdx; 6467 continue; 6468 } 6469 6470 if (Op.isOff()) { 6471 assert(SrcIdx < 4); 6472 OperandIdx[SrcIdx] = Inst.size(); 6473 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6474 ++SrcIdx; 6475 continue; 6476 } 6477 6478 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6479 Op.addImmOperands(Inst, 1); 6480 continue; 6481 } 6482 6483 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6484 continue; 6485 6486 // Handle optional arguments 6487 OptionalIdx[Op.getImmTy()] = i; 6488 } 6489 6490 assert(SrcIdx == 4); 6491 6492 bool Compr = false; 6493 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6494 Compr = true; 6495 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6496 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6497 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6498 } 6499 6500 for (auto i = 0; i < SrcIdx; ++i) { 6501 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6502 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6503 } 6504 } 6505 6506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6507 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6508 6509 Inst.addOperand(MCOperand::createImm(EnMask)); 6510 } 6511 6512 //===----------------------------------------------------------------------===// 6513 // s_waitcnt 6514 //===----------------------------------------------------------------------===// 6515 6516 static bool 6517 encodeCnt( 6518 const AMDGPU::IsaVersion ISA, 6519 int64_t &IntVal, 6520 int64_t CntVal, 6521 bool Saturate, 6522 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6523 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6524 { 6525 bool Failed = false; 6526 6527 IntVal = encode(ISA, IntVal, CntVal); 6528 if (CntVal != decode(ISA, IntVal)) { 6529 if (Saturate) { 6530 IntVal = encode(ISA, IntVal, -1); 6531 } else { 6532 Failed = true; 6533 } 6534 } 6535 return Failed; 6536 } 6537 6538 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6539 6540 SMLoc CntLoc = getLoc(); 6541 StringRef CntName = getTokenStr(); 6542 6543 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6544 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6545 return false; 6546 6547 int64_t CntVal; 6548 SMLoc ValLoc = getLoc(); 6549 if (!parseExpr(CntVal)) 6550 return false; 6551 6552 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6553 6554 bool Failed = true; 6555 bool Sat = CntName.endswith("_sat"); 6556 6557 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6558 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6559 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6560 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6561 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6562 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6563 } else { 6564 Error(CntLoc, "invalid counter name " + CntName); 6565 return false; 6566 } 6567 6568 if (Failed) { 6569 Error(ValLoc, "too large value for " + CntName); 6570 return false; 6571 } 6572 6573 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6574 return false; 6575 6576 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6577 if (isToken(AsmToken::EndOfStatement)) { 6578 Error(getLoc(), "expected a counter name"); 6579 return false; 6580 } 6581 } 6582 6583 return true; 6584 } 6585 6586 OperandMatchResultTy 6587 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6588 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6589 int64_t Waitcnt = getWaitcntBitMask(ISA); 6590 SMLoc S = getLoc(); 6591 6592 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6593 while (!isToken(AsmToken::EndOfStatement)) { 6594 if (!parseCnt(Waitcnt)) 6595 return MatchOperand_ParseFail; 6596 } 6597 } else { 6598 if (!parseExpr(Waitcnt)) 6599 return MatchOperand_ParseFail; 6600 } 6601 6602 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6603 return MatchOperand_Success; 6604 } 6605 6606 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6607 SMLoc FieldLoc = getLoc(); 6608 StringRef FieldName = getTokenStr(); 6609 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6610 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6611 return false; 6612 6613 SMLoc ValueLoc = getLoc(); 6614 StringRef ValueName = getTokenStr(); 6615 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6616 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6617 return false; 6618 6619 unsigned Shift; 6620 if (FieldName == "instid0") { 6621 Shift = 0; 6622 } else if (FieldName == "instskip") { 6623 Shift = 4; 6624 } else if (FieldName == "instid1") { 6625 Shift = 7; 6626 } else { 6627 Error(FieldLoc, "invalid field name " + FieldName); 6628 return false; 6629 } 6630 6631 int Value; 6632 if (Shift == 4) { 6633 // Parse values for instskip. 6634 Value = StringSwitch<int>(ValueName) 6635 .Case("SAME", 0) 6636 .Case("NEXT", 1) 6637 .Case("SKIP_1", 2) 6638 .Case("SKIP_2", 3) 6639 .Case("SKIP_3", 4) 6640 .Case("SKIP_4", 5) 6641 .Default(-1); 6642 } else { 6643 // Parse values for instid0 and instid1. 6644 Value = StringSwitch<int>(ValueName) 6645 .Case("NO_DEP", 0) 6646 .Case("VALU_DEP_1", 1) 6647 .Case("VALU_DEP_2", 2) 6648 .Case("VALU_DEP_3", 3) 6649 .Case("VALU_DEP_4", 4) 6650 .Case("TRANS32_DEP_1", 5) 6651 .Case("TRANS32_DEP_2", 6) 6652 .Case("TRANS32_DEP_3", 7) 6653 .Case("FMA_ACCUM_CYCLE_1", 8) 6654 .Case("SALU_CYCLE_1", 9) 6655 .Case("SALU_CYCLE_2", 10) 6656 .Case("SALU_CYCLE_3", 11) 6657 .Default(-1); 6658 } 6659 if (Value < 0) { 6660 Error(ValueLoc, "invalid value name " + ValueName); 6661 return false; 6662 } 6663 6664 Delay |= Value << Shift; 6665 return true; 6666 } 6667 6668 OperandMatchResultTy 6669 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6670 int64_t Delay = 0; 6671 SMLoc S = getLoc(); 6672 6673 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6674 do { 6675 if (!parseDelay(Delay)) 6676 return MatchOperand_ParseFail; 6677 } while (trySkipToken(AsmToken::Pipe)); 6678 } else { 6679 if (!parseExpr(Delay)) 6680 return MatchOperand_ParseFail; 6681 } 6682 6683 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6684 return MatchOperand_Success; 6685 } 6686 6687 bool 6688 AMDGPUOperand::isSWaitCnt() const { 6689 return isImm(); 6690 } 6691 6692 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6693 6694 //===----------------------------------------------------------------------===// 6695 // DepCtr 6696 //===----------------------------------------------------------------------===// 6697 6698 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6699 StringRef DepCtrName) { 6700 switch (ErrorId) { 6701 case OPR_ID_UNKNOWN: 6702 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6703 return; 6704 case OPR_ID_UNSUPPORTED: 6705 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6706 return; 6707 case OPR_ID_DUPLICATE: 6708 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6709 return; 6710 case OPR_VAL_INVALID: 6711 Error(Loc, Twine("invalid value for ", DepCtrName)); 6712 return; 6713 default: 6714 assert(false); 6715 } 6716 } 6717 6718 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6719 6720 using namespace llvm::AMDGPU::DepCtr; 6721 6722 SMLoc DepCtrLoc = getLoc(); 6723 StringRef DepCtrName = getTokenStr(); 6724 6725 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6726 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6727 return false; 6728 6729 int64_t ExprVal; 6730 if (!parseExpr(ExprVal)) 6731 return false; 6732 6733 unsigned PrevOprMask = UsedOprMask; 6734 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6735 6736 if (CntVal < 0) { 6737 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6738 return false; 6739 } 6740 6741 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6742 return false; 6743 6744 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6745 if (isToken(AsmToken::EndOfStatement)) { 6746 Error(getLoc(), "expected a counter name"); 6747 return false; 6748 } 6749 } 6750 6751 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6752 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6753 return true; 6754 } 6755 6756 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6757 using namespace llvm::AMDGPU::DepCtr; 6758 6759 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6760 SMLoc Loc = getLoc(); 6761 6762 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6763 unsigned UsedOprMask = 0; 6764 while (!isToken(AsmToken::EndOfStatement)) { 6765 if (!parseDepCtr(DepCtr, UsedOprMask)) 6766 return MatchOperand_ParseFail; 6767 } 6768 } else { 6769 if (!parseExpr(DepCtr)) 6770 return MatchOperand_ParseFail; 6771 } 6772 6773 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6774 return MatchOperand_Success; 6775 } 6776 6777 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6778 6779 //===----------------------------------------------------------------------===// 6780 // hwreg 6781 //===----------------------------------------------------------------------===// 6782 6783 bool 6784 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6785 OperandInfoTy &Offset, 6786 OperandInfoTy &Width) { 6787 using namespace llvm::AMDGPU::Hwreg; 6788 6789 // The register may be specified by name or using a numeric code 6790 HwReg.Loc = getLoc(); 6791 if (isToken(AsmToken::Identifier) && 6792 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6793 HwReg.IsSymbolic = true; 6794 lex(); // skip register name 6795 } else if (!parseExpr(HwReg.Id, "a register name")) { 6796 return false; 6797 } 6798 6799 if (trySkipToken(AsmToken::RParen)) 6800 return true; 6801 6802 // parse optional params 6803 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6804 return false; 6805 6806 Offset.Loc = getLoc(); 6807 if (!parseExpr(Offset.Id)) 6808 return false; 6809 6810 if (!skipToken(AsmToken::Comma, "expected a comma")) 6811 return false; 6812 6813 Width.Loc = getLoc(); 6814 return parseExpr(Width.Id) && 6815 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6816 } 6817 6818 bool 6819 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6820 const OperandInfoTy &Offset, 6821 const OperandInfoTy &Width) { 6822 6823 using namespace llvm::AMDGPU::Hwreg; 6824 6825 if (HwReg.IsSymbolic) { 6826 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6827 Error(HwReg.Loc, 6828 "specified hardware register is not supported on this GPU"); 6829 return false; 6830 } 6831 } else { 6832 if (!isValidHwreg(HwReg.Id)) { 6833 Error(HwReg.Loc, 6834 "invalid code of hardware register: only 6-bit values are legal"); 6835 return false; 6836 } 6837 } 6838 if (!isValidHwregOffset(Offset.Id)) { 6839 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6840 return false; 6841 } 6842 if (!isValidHwregWidth(Width.Id)) { 6843 Error(Width.Loc, 6844 "invalid bitfield width: only values from 1 to 32 are legal"); 6845 return false; 6846 } 6847 return true; 6848 } 6849 6850 OperandMatchResultTy 6851 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6852 using namespace llvm::AMDGPU::Hwreg; 6853 6854 int64_t ImmVal = 0; 6855 SMLoc Loc = getLoc(); 6856 6857 if (trySkipId("hwreg", AsmToken::LParen)) { 6858 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6859 OperandInfoTy Offset(OFFSET_DEFAULT_); 6860 OperandInfoTy Width(WIDTH_DEFAULT_); 6861 if (parseHwregBody(HwReg, Offset, Width) && 6862 validateHwreg(HwReg, Offset, Width)) { 6863 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6864 } else { 6865 return MatchOperand_ParseFail; 6866 } 6867 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6868 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6869 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6870 return MatchOperand_ParseFail; 6871 } 6872 } else { 6873 return MatchOperand_ParseFail; 6874 } 6875 6876 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6877 return MatchOperand_Success; 6878 } 6879 6880 bool AMDGPUOperand::isHwreg() const { 6881 return isImmTy(ImmTyHwreg); 6882 } 6883 6884 //===----------------------------------------------------------------------===// 6885 // sendmsg 6886 //===----------------------------------------------------------------------===// 6887 6888 bool 6889 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6890 OperandInfoTy &Op, 6891 OperandInfoTy &Stream) { 6892 using namespace llvm::AMDGPU::SendMsg; 6893 6894 Msg.Loc = getLoc(); 6895 if (isToken(AsmToken::Identifier) && 6896 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6897 Msg.IsSymbolic = true; 6898 lex(); // skip message name 6899 } else if (!parseExpr(Msg.Id, "a message name")) { 6900 return false; 6901 } 6902 6903 if (trySkipToken(AsmToken::Comma)) { 6904 Op.IsDefined = true; 6905 Op.Loc = getLoc(); 6906 if (isToken(AsmToken::Identifier) && 6907 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6908 lex(); // skip operation name 6909 } else if (!parseExpr(Op.Id, "an operation name")) { 6910 return false; 6911 } 6912 6913 if (trySkipToken(AsmToken::Comma)) { 6914 Stream.IsDefined = true; 6915 Stream.Loc = getLoc(); 6916 if (!parseExpr(Stream.Id)) 6917 return false; 6918 } 6919 } 6920 6921 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6922 } 6923 6924 bool 6925 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6926 const OperandInfoTy &Op, 6927 const OperandInfoTy &Stream) { 6928 using namespace llvm::AMDGPU::SendMsg; 6929 6930 // Validation strictness depends on whether message is specified 6931 // in a symbolic or in a numeric form. In the latter case 6932 // only encoding possibility is checked. 6933 bool Strict = Msg.IsSymbolic; 6934 6935 if (Strict) { 6936 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6937 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6938 return false; 6939 } 6940 } else { 6941 if (!isValidMsgId(Msg.Id, getSTI())) { 6942 Error(Msg.Loc, "invalid message id"); 6943 return false; 6944 } 6945 } 6946 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6947 if (Op.IsDefined) { 6948 Error(Op.Loc, "message does not support operations"); 6949 } else { 6950 Error(Msg.Loc, "missing message operation"); 6951 } 6952 return false; 6953 } 6954 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6955 Error(Op.Loc, "invalid operation id"); 6956 return false; 6957 } 6958 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6959 Stream.IsDefined) { 6960 Error(Stream.Loc, "message operation does not support streams"); 6961 return false; 6962 } 6963 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6964 Error(Stream.Loc, "invalid message stream id"); 6965 return false; 6966 } 6967 return true; 6968 } 6969 6970 OperandMatchResultTy 6971 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6972 using namespace llvm::AMDGPU::SendMsg; 6973 6974 int64_t ImmVal = 0; 6975 SMLoc Loc = getLoc(); 6976 6977 if (trySkipId("sendmsg", AsmToken::LParen)) { 6978 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6979 OperandInfoTy Op(OP_NONE_); 6980 OperandInfoTy Stream(STREAM_ID_NONE_); 6981 if (parseSendMsgBody(Msg, Op, Stream) && 6982 validateSendMsg(Msg, Op, Stream)) { 6983 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6984 } else { 6985 return MatchOperand_ParseFail; 6986 } 6987 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6988 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6989 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6990 return MatchOperand_ParseFail; 6991 } 6992 } else { 6993 return MatchOperand_ParseFail; 6994 } 6995 6996 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6997 return MatchOperand_Success; 6998 } 6999 7000 bool AMDGPUOperand::isSendMsg() const { 7001 return isImmTy(ImmTySendMsg); 7002 } 7003 7004 //===----------------------------------------------------------------------===// 7005 // v_interp 7006 //===----------------------------------------------------------------------===// 7007 7008 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 7009 StringRef Str; 7010 SMLoc S = getLoc(); 7011 7012 if (!parseId(Str)) 7013 return MatchOperand_NoMatch; 7014 7015 int Slot = StringSwitch<int>(Str) 7016 .Case("p10", 0) 7017 .Case("p20", 1) 7018 .Case("p0", 2) 7019 .Default(-1); 7020 7021 if (Slot == -1) { 7022 Error(S, "invalid interpolation slot"); 7023 return MatchOperand_ParseFail; 7024 } 7025 7026 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 7027 AMDGPUOperand::ImmTyInterpSlot)); 7028 return MatchOperand_Success; 7029 } 7030 7031 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 7032 StringRef Str; 7033 SMLoc S = getLoc(); 7034 7035 if (!parseId(Str)) 7036 return MatchOperand_NoMatch; 7037 7038 if (!Str.startswith("attr")) { 7039 Error(S, "invalid interpolation attribute"); 7040 return MatchOperand_ParseFail; 7041 } 7042 7043 StringRef Chan = Str.take_back(2); 7044 int AttrChan = StringSwitch<int>(Chan) 7045 .Case(".x", 0) 7046 .Case(".y", 1) 7047 .Case(".z", 2) 7048 .Case(".w", 3) 7049 .Default(-1); 7050 if (AttrChan == -1) { 7051 Error(S, "invalid or missing interpolation attribute channel"); 7052 return MatchOperand_ParseFail; 7053 } 7054 7055 Str = Str.drop_back(2).drop_front(4); 7056 7057 uint8_t Attr; 7058 if (Str.getAsInteger(10, Attr)) { 7059 Error(S, "invalid or missing interpolation attribute number"); 7060 return MatchOperand_ParseFail; 7061 } 7062 7063 if (Attr > 63) { 7064 Error(S, "out of bounds interpolation attribute number"); 7065 return MatchOperand_ParseFail; 7066 } 7067 7068 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 7069 7070 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 7071 AMDGPUOperand::ImmTyInterpAttr)); 7072 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 7073 AMDGPUOperand::ImmTyAttrChan)); 7074 return MatchOperand_Success; 7075 } 7076 7077 //===----------------------------------------------------------------------===// 7078 // exp 7079 //===----------------------------------------------------------------------===// 7080 7081 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 7082 using namespace llvm::AMDGPU::Exp; 7083 7084 StringRef Str; 7085 SMLoc S = getLoc(); 7086 7087 if (!parseId(Str)) 7088 return MatchOperand_NoMatch; 7089 7090 unsigned Id = getTgtId(Str); 7091 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 7092 Error(S, (Id == ET_INVALID) ? 7093 "invalid exp target" : 7094 "exp target is not supported on this GPU"); 7095 return MatchOperand_ParseFail; 7096 } 7097 7098 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 7099 AMDGPUOperand::ImmTyExpTgt)); 7100 return MatchOperand_Success; 7101 } 7102 7103 //===----------------------------------------------------------------------===// 7104 // parser helpers 7105 //===----------------------------------------------------------------------===// 7106 7107 bool 7108 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 7109 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 7110 } 7111 7112 bool 7113 AMDGPUAsmParser::isId(const StringRef Id) const { 7114 return isId(getToken(), Id); 7115 } 7116 7117 bool 7118 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 7119 return getTokenKind() == Kind; 7120 } 7121 7122 StringRef AMDGPUAsmParser::getId() const { 7123 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef(); 7124 } 7125 7126 bool 7127 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7128 if (isId(Id)) { 7129 lex(); 7130 return true; 7131 } 7132 return false; 7133 } 7134 7135 bool 7136 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7137 if (isToken(AsmToken::Identifier)) { 7138 StringRef Tok = getTokenStr(); 7139 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7140 lex(); 7141 return true; 7142 } 7143 } 7144 return false; 7145 } 7146 7147 bool 7148 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7149 if (isId(Id) && peekToken().is(Kind)) { 7150 lex(); 7151 lex(); 7152 return true; 7153 } 7154 return false; 7155 } 7156 7157 bool 7158 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7159 if (isToken(Kind)) { 7160 lex(); 7161 return true; 7162 } 7163 return false; 7164 } 7165 7166 bool 7167 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7168 const StringRef ErrMsg) { 7169 if (!trySkipToken(Kind)) { 7170 Error(getLoc(), ErrMsg); 7171 return false; 7172 } 7173 return true; 7174 } 7175 7176 bool 7177 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7178 SMLoc S = getLoc(); 7179 7180 const MCExpr *Expr; 7181 if (Parser.parseExpression(Expr)) 7182 return false; 7183 7184 if (Expr->evaluateAsAbsolute(Imm)) 7185 return true; 7186 7187 if (Expected.empty()) { 7188 Error(S, "expected absolute expression"); 7189 } else { 7190 Error(S, Twine("expected ", Expected) + 7191 Twine(" or an absolute expression")); 7192 } 7193 return false; 7194 } 7195 7196 bool 7197 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7198 SMLoc S = getLoc(); 7199 7200 const MCExpr *Expr; 7201 if (Parser.parseExpression(Expr)) 7202 return false; 7203 7204 int64_t IntVal; 7205 if (Expr->evaluateAsAbsolute(IntVal)) { 7206 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7207 } else { 7208 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7209 } 7210 return true; 7211 } 7212 7213 bool 7214 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7215 if (isToken(AsmToken::String)) { 7216 Val = getToken().getStringContents(); 7217 lex(); 7218 return true; 7219 } else { 7220 Error(getLoc(), ErrMsg); 7221 return false; 7222 } 7223 } 7224 7225 bool 7226 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7227 if (isToken(AsmToken::Identifier)) { 7228 Val = getTokenStr(); 7229 lex(); 7230 return true; 7231 } else { 7232 if (!ErrMsg.empty()) 7233 Error(getLoc(), ErrMsg); 7234 return false; 7235 } 7236 } 7237 7238 AsmToken 7239 AMDGPUAsmParser::getToken() const { 7240 return Parser.getTok(); 7241 } 7242 7243 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7244 return isToken(AsmToken::EndOfStatement) 7245 ? getToken() 7246 : getLexer().peekTok(ShouldSkipSpace); 7247 } 7248 7249 void 7250 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7251 auto TokCount = getLexer().peekTokens(Tokens); 7252 7253 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7254 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7255 } 7256 7257 AsmToken::TokenKind 7258 AMDGPUAsmParser::getTokenKind() const { 7259 return getLexer().getKind(); 7260 } 7261 7262 SMLoc 7263 AMDGPUAsmParser::getLoc() const { 7264 return getToken().getLoc(); 7265 } 7266 7267 StringRef 7268 AMDGPUAsmParser::getTokenStr() const { 7269 return getToken().getString(); 7270 } 7271 7272 void 7273 AMDGPUAsmParser::lex() { 7274 Parser.Lex(); 7275 } 7276 7277 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { 7278 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7279 } 7280 7281 SMLoc 7282 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7283 const OperandVector &Operands) const { 7284 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7285 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7286 if (Test(Op)) 7287 return Op.getStartLoc(); 7288 } 7289 return getInstLoc(Operands); 7290 } 7291 7292 SMLoc 7293 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7294 const OperandVector &Operands) const { 7295 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7296 return getOperandLoc(Test, Operands); 7297 } 7298 7299 SMLoc 7300 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7301 const OperandVector &Operands) const { 7302 auto Test = [=](const AMDGPUOperand& Op) { 7303 return Op.isRegKind() && Op.getReg() == Reg; 7304 }; 7305 return getOperandLoc(Test, Operands); 7306 } 7307 7308 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, 7309 bool SearchMandatoryLiterals) const { 7310 auto Test = [](const AMDGPUOperand& Op) { 7311 return Op.IsImmKindLiteral() || Op.isExpr(); 7312 }; 7313 SMLoc Loc = getOperandLoc(Test, Operands); 7314 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) 7315 Loc = getMandatoryLitLoc(Operands); 7316 return Loc; 7317 } 7318 7319 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { 7320 auto Test = [](const AMDGPUOperand &Op) { 7321 return Op.IsImmKindMandatoryLiteral(); 7322 }; 7323 return getOperandLoc(Test, Operands); 7324 } 7325 7326 SMLoc 7327 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7328 auto Test = [](const AMDGPUOperand& Op) { 7329 return Op.isImmKindConst(); 7330 }; 7331 return getOperandLoc(Test, Operands); 7332 } 7333 7334 //===----------------------------------------------------------------------===// 7335 // swizzle 7336 //===----------------------------------------------------------------------===// 7337 7338 LLVM_READNONE 7339 static unsigned 7340 encodeBitmaskPerm(const unsigned AndMask, 7341 const unsigned OrMask, 7342 const unsigned XorMask) { 7343 using namespace llvm::AMDGPU::Swizzle; 7344 7345 return BITMASK_PERM_ENC | 7346 (AndMask << BITMASK_AND_SHIFT) | 7347 (OrMask << BITMASK_OR_SHIFT) | 7348 (XorMask << BITMASK_XOR_SHIFT); 7349 } 7350 7351 bool 7352 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7353 const unsigned MinVal, 7354 const unsigned MaxVal, 7355 const StringRef ErrMsg, 7356 SMLoc &Loc) { 7357 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7358 return false; 7359 } 7360 Loc = getLoc(); 7361 if (!parseExpr(Op)) { 7362 return false; 7363 } 7364 if (Op < MinVal || Op > MaxVal) { 7365 Error(Loc, ErrMsg); 7366 return false; 7367 } 7368 7369 return true; 7370 } 7371 7372 bool 7373 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7374 const unsigned MinVal, 7375 const unsigned MaxVal, 7376 const StringRef ErrMsg) { 7377 SMLoc Loc; 7378 for (unsigned i = 0; i < OpNum; ++i) { 7379 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7380 return false; 7381 } 7382 7383 return true; 7384 } 7385 7386 bool 7387 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7388 using namespace llvm::AMDGPU::Swizzle; 7389 7390 int64_t Lane[LANE_NUM]; 7391 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7392 "expected a 2-bit lane id")) { 7393 Imm = QUAD_PERM_ENC; 7394 for (unsigned I = 0; I < LANE_NUM; ++I) { 7395 Imm |= Lane[I] << (LANE_SHIFT * I); 7396 } 7397 return true; 7398 } 7399 return false; 7400 } 7401 7402 bool 7403 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7404 using namespace llvm::AMDGPU::Swizzle; 7405 7406 SMLoc Loc; 7407 int64_t GroupSize; 7408 int64_t LaneIdx; 7409 7410 if (!parseSwizzleOperand(GroupSize, 7411 2, 32, 7412 "group size must be in the interval [2,32]", 7413 Loc)) { 7414 return false; 7415 } 7416 if (!isPowerOf2_64(GroupSize)) { 7417 Error(Loc, "group size must be a power of two"); 7418 return false; 7419 } 7420 if (parseSwizzleOperand(LaneIdx, 7421 0, GroupSize - 1, 7422 "lane id must be in the interval [0,group size - 1]", 7423 Loc)) { 7424 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7425 return true; 7426 } 7427 return false; 7428 } 7429 7430 bool 7431 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7432 using namespace llvm::AMDGPU::Swizzle; 7433 7434 SMLoc Loc; 7435 int64_t GroupSize; 7436 7437 if (!parseSwizzleOperand(GroupSize, 7438 2, 32, 7439 "group size must be in the interval [2,32]", 7440 Loc)) { 7441 return false; 7442 } 7443 if (!isPowerOf2_64(GroupSize)) { 7444 Error(Loc, "group size must be a power of two"); 7445 return false; 7446 } 7447 7448 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7449 return true; 7450 } 7451 7452 bool 7453 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7454 using namespace llvm::AMDGPU::Swizzle; 7455 7456 SMLoc Loc; 7457 int64_t GroupSize; 7458 7459 if (!parseSwizzleOperand(GroupSize, 7460 1, 16, 7461 "group size must be in the interval [1,16]", 7462 Loc)) { 7463 return false; 7464 } 7465 if (!isPowerOf2_64(GroupSize)) { 7466 Error(Loc, "group size must be a power of two"); 7467 return false; 7468 } 7469 7470 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7471 return true; 7472 } 7473 7474 bool 7475 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7476 using namespace llvm::AMDGPU::Swizzle; 7477 7478 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7479 return false; 7480 } 7481 7482 StringRef Ctl; 7483 SMLoc StrLoc = getLoc(); 7484 if (!parseString(Ctl)) { 7485 return false; 7486 } 7487 if (Ctl.size() != BITMASK_WIDTH) { 7488 Error(StrLoc, "expected a 5-character mask"); 7489 return false; 7490 } 7491 7492 unsigned AndMask = 0; 7493 unsigned OrMask = 0; 7494 unsigned XorMask = 0; 7495 7496 for (size_t i = 0; i < Ctl.size(); ++i) { 7497 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7498 switch(Ctl[i]) { 7499 default: 7500 Error(StrLoc, "invalid mask"); 7501 return false; 7502 case '0': 7503 break; 7504 case '1': 7505 OrMask |= Mask; 7506 break; 7507 case 'p': 7508 AndMask |= Mask; 7509 break; 7510 case 'i': 7511 AndMask |= Mask; 7512 XorMask |= Mask; 7513 break; 7514 } 7515 } 7516 7517 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7518 return true; 7519 } 7520 7521 bool 7522 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7523 7524 SMLoc OffsetLoc = getLoc(); 7525 7526 if (!parseExpr(Imm, "a swizzle macro")) { 7527 return false; 7528 } 7529 if (!isUInt<16>(Imm)) { 7530 Error(OffsetLoc, "expected a 16-bit offset"); 7531 return false; 7532 } 7533 return true; 7534 } 7535 7536 bool 7537 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7538 using namespace llvm::AMDGPU::Swizzle; 7539 7540 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7541 7542 SMLoc ModeLoc = getLoc(); 7543 bool Ok = false; 7544 7545 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7546 Ok = parseSwizzleQuadPerm(Imm); 7547 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7548 Ok = parseSwizzleBitmaskPerm(Imm); 7549 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7550 Ok = parseSwizzleBroadcast(Imm); 7551 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7552 Ok = parseSwizzleSwap(Imm); 7553 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7554 Ok = parseSwizzleReverse(Imm); 7555 } else { 7556 Error(ModeLoc, "expected a swizzle mode"); 7557 } 7558 7559 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7560 } 7561 7562 return false; 7563 } 7564 7565 OperandMatchResultTy 7566 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7567 SMLoc S = getLoc(); 7568 int64_t Imm = 0; 7569 7570 if (trySkipId("offset")) { 7571 7572 bool Ok = false; 7573 if (skipToken(AsmToken::Colon, "expected a colon")) { 7574 if (trySkipId("swizzle")) { 7575 Ok = parseSwizzleMacro(Imm); 7576 } else { 7577 Ok = parseSwizzleOffset(Imm); 7578 } 7579 } 7580 7581 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7582 7583 return Ok ? MatchOperand_Success : MatchOperand_ParseFail; 7584 } 7585 return MatchOperand_NoMatch; 7586 } 7587 7588 bool 7589 AMDGPUOperand::isSwizzle() const { 7590 return isImmTy(ImmTySwizzle); 7591 } 7592 7593 //===----------------------------------------------------------------------===// 7594 // VGPR Index Mode 7595 //===----------------------------------------------------------------------===// 7596 7597 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7598 7599 using namespace llvm::AMDGPU::VGPRIndexMode; 7600 7601 if (trySkipToken(AsmToken::RParen)) { 7602 return OFF; 7603 } 7604 7605 int64_t Imm = 0; 7606 7607 while (true) { 7608 unsigned Mode = 0; 7609 SMLoc S = getLoc(); 7610 7611 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7612 if (trySkipId(IdSymbolic[ModeId])) { 7613 Mode = 1 << ModeId; 7614 break; 7615 } 7616 } 7617 7618 if (Mode == 0) { 7619 Error(S, (Imm == 0)? 7620 "expected a VGPR index mode or a closing parenthesis" : 7621 "expected a VGPR index mode"); 7622 return UNDEF; 7623 } 7624 7625 if (Imm & Mode) { 7626 Error(S, "duplicate VGPR index mode"); 7627 return UNDEF; 7628 } 7629 Imm |= Mode; 7630 7631 if (trySkipToken(AsmToken::RParen)) 7632 break; 7633 if (!skipToken(AsmToken::Comma, 7634 "expected a comma or a closing parenthesis")) 7635 return UNDEF; 7636 } 7637 7638 return Imm; 7639 } 7640 7641 OperandMatchResultTy 7642 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7643 7644 using namespace llvm::AMDGPU::VGPRIndexMode; 7645 7646 int64_t Imm = 0; 7647 SMLoc S = getLoc(); 7648 7649 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7650 Imm = parseGPRIdxMacro(); 7651 if (Imm == UNDEF) 7652 return MatchOperand_ParseFail; 7653 } else { 7654 if (getParser().parseAbsoluteExpression(Imm)) 7655 return MatchOperand_ParseFail; 7656 if (Imm < 0 || !isUInt<4>(Imm)) { 7657 Error(S, "invalid immediate: only 4-bit values are legal"); 7658 return MatchOperand_ParseFail; 7659 } 7660 } 7661 7662 Operands.push_back( 7663 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7664 return MatchOperand_Success; 7665 } 7666 7667 bool AMDGPUOperand::isGPRIdxMode() const { 7668 return isImmTy(ImmTyGprIdxMode); 7669 } 7670 7671 //===----------------------------------------------------------------------===// 7672 // sopp branch targets 7673 //===----------------------------------------------------------------------===// 7674 7675 OperandMatchResultTy 7676 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7677 7678 // Make sure we are not parsing something 7679 // that looks like a label or an expression but is not. 7680 // This will improve error messages. 7681 if (isRegister() || isModifier()) 7682 return MatchOperand_NoMatch; 7683 7684 if (!parseExpr(Operands)) 7685 return MatchOperand_ParseFail; 7686 7687 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7688 assert(Opr.isImm() || Opr.isExpr()); 7689 SMLoc Loc = Opr.getStartLoc(); 7690 7691 // Currently we do not support arbitrary expressions as branch targets. 7692 // Only labels and absolute expressions are accepted. 7693 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7694 Error(Loc, "expected an absolute expression or a label"); 7695 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7696 Error(Loc, "expected a 16-bit signed jump offset"); 7697 } 7698 7699 return MatchOperand_Success; 7700 } 7701 7702 //===----------------------------------------------------------------------===// 7703 // Boolean holding registers 7704 //===----------------------------------------------------------------------===// 7705 7706 OperandMatchResultTy 7707 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7708 return parseReg(Operands); 7709 } 7710 7711 //===----------------------------------------------------------------------===// 7712 // mubuf 7713 //===----------------------------------------------------------------------===// 7714 7715 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7716 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7717 } 7718 7719 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7720 const OperandVector &Operands, 7721 bool IsAtomic) { 7722 OptionalImmIndexMap OptionalIdx; 7723 unsigned FirstOperandIdx = 1; 7724 bool IsAtomicReturn = false; 7725 7726 if (IsAtomic) { 7727 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7728 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7729 if (!Op.isCPol()) 7730 continue; 7731 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7732 break; 7733 } 7734 7735 if (!IsAtomicReturn) { 7736 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7737 if (NewOpc != -1) 7738 Inst.setOpcode(NewOpc); 7739 } 7740 7741 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7742 SIInstrFlags::IsAtomicRet; 7743 } 7744 7745 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7746 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7747 7748 // Add the register arguments 7749 if (Op.isReg()) { 7750 Op.addRegOperands(Inst, 1); 7751 // Insert a tied src for atomic return dst. 7752 // This cannot be postponed as subsequent calls to 7753 // addImmOperands rely on correct number of MC operands. 7754 if (IsAtomicReturn && i == FirstOperandIdx) 7755 Op.addRegOperands(Inst, 1); 7756 continue; 7757 } 7758 7759 // Handle the case where soffset is an immediate 7760 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7761 Op.addImmOperands(Inst, 1); 7762 continue; 7763 } 7764 7765 // Handle tokens like 'offen' which are sometimes hard-coded into the 7766 // asm string. There are no MCInst operands for these. 7767 if (Op.isToken()) { 7768 continue; 7769 } 7770 assert(Op.isImm()); 7771 7772 // Handle optional arguments 7773 OptionalIdx[Op.getImmTy()] = i; 7774 } 7775 7776 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7779 } 7780 7781 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7782 OptionalImmIndexMap OptionalIdx; 7783 7784 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7785 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7786 7787 // Add the register arguments 7788 if (Op.isReg()) { 7789 Op.addRegOperands(Inst, 1); 7790 continue; 7791 } 7792 7793 // Handle the case where soffset is an immediate 7794 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7795 Op.addImmOperands(Inst, 1); 7796 continue; 7797 } 7798 7799 // Handle tokens like 'offen' which are sometimes hard-coded into the 7800 // asm string. There are no MCInst operands for these. 7801 if (Op.isToken()) { 7802 continue; 7803 } 7804 assert(Op.isImm()); 7805 7806 // Handle optional arguments 7807 OptionalIdx[Op.getImmTy()] = i; 7808 } 7809 7810 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7811 AMDGPUOperand::ImmTyOffset); 7812 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7813 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7815 } 7816 7817 //===----------------------------------------------------------------------===// 7818 // mimg 7819 //===----------------------------------------------------------------------===// 7820 7821 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7822 bool IsAtomic) { 7823 unsigned I = 1; 7824 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7825 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7826 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7827 } 7828 7829 if (IsAtomic) { 7830 // Add src, same as dst 7831 assert(Desc.getNumDefs() == 1); 7832 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7833 } 7834 7835 OptionalImmIndexMap OptionalIdx; 7836 7837 for (unsigned E = Operands.size(); I != E; ++I) { 7838 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7839 7840 // Add the register arguments 7841 if (Op.isReg()) { 7842 Op.addRegOperands(Inst, 1); 7843 } else if (Op.isImmModifier()) { 7844 OptionalIdx[Op.getImmTy()] = I; 7845 } else if (!Op.isToken()) { 7846 llvm_unreachable("unexpected operand type"); 7847 } 7848 } 7849 7850 bool IsGFX10Plus = isGFX10Plus(); 7851 7852 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7853 if (IsGFX10Plus) 7854 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7855 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7856 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7857 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7858 if (IsGFX10Plus) 7859 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7860 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::tfe)) 7861 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7862 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7863 if (!IsGFX10Plus) 7864 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7865 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7866 } 7867 7868 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7869 cvtMIMG(Inst, Operands, true); 7870 } 7871 7872 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7873 OptionalImmIndexMap OptionalIdx; 7874 bool IsAtomicReturn = false; 7875 7876 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7877 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7878 if (!Op.isCPol()) 7879 continue; 7880 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7881 break; 7882 } 7883 7884 if (!IsAtomicReturn) { 7885 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7886 if (NewOpc != -1) 7887 Inst.setOpcode(NewOpc); 7888 } 7889 7890 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7891 SIInstrFlags::IsAtomicRet; 7892 7893 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7894 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7895 7896 // Add the register arguments 7897 if (Op.isReg()) { 7898 Op.addRegOperands(Inst, 1); 7899 if (IsAtomicReturn && i == 1) 7900 Op.addRegOperands(Inst, 1); 7901 continue; 7902 } 7903 7904 // Handle the case where soffset is an immediate 7905 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7906 Op.addImmOperands(Inst, 1); 7907 continue; 7908 } 7909 7910 // Handle tokens like 'offen' which are sometimes hard-coded into the 7911 // asm string. There are no MCInst operands for these. 7912 if (Op.isToken()) { 7913 continue; 7914 } 7915 assert(Op.isImm()); 7916 7917 // Handle optional arguments 7918 OptionalIdx[Op.getImmTy()] = i; 7919 } 7920 7921 if ((int)Inst.getNumOperands() <= 7922 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7923 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7924 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7925 } 7926 7927 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7928 const OperandVector &Operands) { 7929 for (unsigned I = 1; I < Operands.size(); ++I) { 7930 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7931 if (Operand.isReg()) 7932 Operand.addRegOperands(Inst, 1); 7933 } 7934 7935 Inst.addOperand(MCOperand::createImm(1)); // a16 7936 } 7937 7938 //===----------------------------------------------------------------------===// 7939 // smrd 7940 //===----------------------------------------------------------------------===// 7941 7942 bool AMDGPUOperand::isSMRDOffset8() const { 7943 return isImm() && isUInt<8>(getImm()); 7944 } 7945 7946 bool AMDGPUOperand::isSMEMOffset() const { 7947 return isImmTy(ImmTyNone) || 7948 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7949 } 7950 7951 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7952 // 32-bit literals are only supported on CI and we only want to use them 7953 // when the offset is > 8-bits. 7954 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7955 } 7956 7957 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7958 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7959 } 7960 7961 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7962 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7963 } 7964 7965 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7966 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7967 } 7968 7969 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7970 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7971 } 7972 7973 //===----------------------------------------------------------------------===// 7974 // vop3 7975 //===----------------------------------------------------------------------===// 7976 7977 static bool ConvertOmodMul(int64_t &Mul) { 7978 if (Mul != 1 && Mul != 2 && Mul != 4) 7979 return false; 7980 7981 Mul >>= 1; 7982 return true; 7983 } 7984 7985 static bool ConvertOmodDiv(int64_t &Div) { 7986 if (Div == 1) { 7987 Div = 0; 7988 return true; 7989 } 7990 7991 if (Div == 2) { 7992 Div = 3; 7993 return true; 7994 } 7995 7996 return false; 7997 } 7998 7999 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 8000 // This is intentional and ensures compatibility with sp3. 8001 // See bug 35397 for details. 8002 static bool ConvertDppBoundCtrl(int64_t &BoundCtrl) { 8003 if (BoundCtrl == 0 || BoundCtrl == 1) { 8004 BoundCtrl = 1; 8005 return true; 8006 } 8007 return false; 8008 } 8009 8010 void AMDGPUAsmParser::onBeginOfFile() { 8011 if (!getParser().getStreamer().getTargetStreamer() || 8012 getSTI().getTargetTriple().getArch() == Triple::r600) 8013 return; 8014 8015 if (!getTargetStreamer().getTargetID()) 8016 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 8017 8018 if (isHsaAbiVersion3AndAbove(&getSTI())) 8019 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 8020 } 8021 8022 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8023 StringRef Name = getTokenStr(); 8024 if (Name == "mul") { 8025 return parseIntWithPrefix("mul", Operands, 8026 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8027 } 8028 8029 if (Name == "div") { 8030 return parseIntWithPrefix("div", Operands, 8031 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8032 } 8033 8034 return MatchOperand_NoMatch; 8035 } 8036 8037 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8038 // the number of src operands present, then copies that bit into src0_modifiers. 8039 void cvtVOP3DstOpSelOnly(MCInst &Inst) { 8040 int Opc = Inst.getOpcode(); 8041 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8042 if (OpSelIdx == -1) 8043 return; 8044 8045 int SrcNum; 8046 const int Ops[] = { AMDGPU::OpName::src0, 8047 AMDGPU::OpName::src1, 8048 AMDGPU::OpName::src2 }; 8049 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); 8050 ++SrcNum) 8051 ; 8052 assert(SrcNum > 0); 8053 8054 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8055 8056 if ((OpSel & (1 << SrcNum)) != 0) { 8057 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8058 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8059 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8060 } 8061 } 8062 8063 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8064 const OperandVector &Operands) { 8065 cvtVOP3P(Inst, Operands); 8066 cvtVOP3DstOpSelOnly(Inst); 8067 } 8068 8069 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8070 OptionalImmIndexMap &OptionalIdx) { 8071 cvtVOP3P(Inst, Operands, OptionalIdx); 8072 cvtVOP3DstOpSelOnly(Inst); 8073 } 8074 8075 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8076 return 8077 // 1. This operand is input modifiers 8078 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8079 // 2. This is not last operand 8080 && Desc.NumOperands > (OpNum + 1) 8081 // 3. Next operand is register class 8082 && Desc.operands()[OpNum + 1].RegClass != -1 8083 // 4. Next register is not tied to any other operand 8084 && Desc.getOperandConstraint(OpNum + 1, 8085 MCOI::OperandConstraint::TIED_TO) == -1; 8086 } 8087 8088 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8089 { 8090 OptionalImmIndexMap OptionalIdx; 8091 unsigned Opc = Inst.getOpcode(); 8092 8093 unsigned I = 1; 8094 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8095 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8096 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8097 } 8098 8099 for (unsigned E = Operands.size(); I != E; ++I) { 8100 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8101 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8102 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8103 } else if (Op.isInterpSlot() || 8104 Op.isInterpAttr() || 8105 Op.isAttrChan()) { 8106 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8107 } else if (Op.isImmModifier()) { 8108 OptionalIdx[Op.getImmTy()] = I; 8109 } else { 8110 llvm_unreachable("unhandled operand type"); 8111 } 8112 } 8113 8114 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) 8115 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8116 AMDGPUOperand::ImmTyHigh); 8117 8118 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8119 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8120 AMDGPUOperand::ImmTyClampSI); 8121 8122 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8123 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8124 AMDGPUOperand::ImmTyOModSI); 8125 } 8126 8127 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8128 { 8129 OptionalImmIndexMap OptionalIdx; 8130 unsigned Opc = Inst.getOpcode(); 8131 8132 unsigned I = 1; 8133 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8134 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8135 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8136 } 8137 8138 for (unsigned E = Operands.size(); I != E; ++I) { 8139 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8140 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8141 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8142 } else if (Op.isImmModifier()) { 8143 OptionalIdx[Op.getImmTy()] = I; 8144 } else { 8145 llvm_unreachable("unhandled operand type"); 8146 } 8147 } 8148 8149 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8150 8151 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8152 if (OpSelIdx != -1) 8153 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8154 8155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8156 8157 if (OpSelIdx == -1) 8158 return; 8159 8160 const int Ops[] = { AMDGPU::OpName::src0, 8161 AMDGPU::OpName::src1, 8162 AMDGPU::OpName::src2 }; 8163 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8164 AMDGPU::OpName::src1_modifiers, 8165 AMDGPU::OpName::src2_modifiers }; 8166 8167 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8168 8169 for (int J = 0; J < 3; ++J) { 8170 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8171 if (OpIdx == -1) 8172 break; 8173 8174 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8175 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8176 8177 if ((OpSel & (1 << J)) != 0) 8178 ModVal |= SISrcMods::OP_SEL_0; 8179 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8180 (OpSel & (1 << 3)) != 0) 8181 ModVal |= SISrcMods::DST_OP_SEL; 8182 8183 Inst.getOperand(ModIdx).setImm(ModVal); 8184 } 8185 } 8186 8187 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8188 OptionalImmIndexMap &OptionalIdx) { 8189 unsigned Opc = Inst.getOpcode(); 8190 8191 unsigned I = 1; 8192 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8193 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8194 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8195 } 8196 8197 for (unsigned E = Operands.size(); I != E; ++I) { 8198 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8199 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8200 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8201 } else if (Op.isImmModifier()) { 8202 OptionalIdx[Op.getImmTy()] = I; 8203 } else if (Op.isRegOrImm()) { 8204 Op.addRegOrImmOperands(Inst, 1); 8205 } else { 8206 llvm_unreachable("unhandled operand type"); 8207 } 8208 } 8209 8210 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8211 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8212 AMDGPUOperand::ImmTyClampSI); 8213 8214 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8215 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8216 AMDGPUOperand::ImmTyOModSI); 8217 8218 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8219 // it has src2 register operand that is tied to dst operand 8220 // we don't allow modifiers for this operand in assembler so src2_modifiers 8221 // should be 0. 8222 if (isMAC(Opc)) { 8223 auto it = Inst.begin(); 8224 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8225 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8226 ++it; 8227 // Copy the operand to ensure it's not invalidated when Inst grows. 8228 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8229 } 8230 } 8231 8232 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8233 OptionalImmIndexMap OptionalIdx; 8234 cvtVOP3(Inst, Operands, OptionalIdx); 8235 } 8236 8237 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8238 OptionalImmIndexMap &OptIdx) { 8239 const int Opc = Inst.getOpcode(); 8240 const MCInstrDesc &Desc = MII.get(Opc); 8241 8242 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8243 8244 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 8245 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { 8246 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 8247 Inst.addOperand(Inst.getOperand(0)); 8248 } 8249 8250 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { 8251 assert(!IsPacked); 8252 Inst.addOperand(Inst.getOperand(0)); 8253 } 8254 8255 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8256 // instruction, and then figure out where to actually put the modifiers 8257 8258 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8259 if (OpSelIdx != -1) { 8260 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8261 } 8262 8263 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8264 if (OpSelHiIdx != -1) { 8265 int DefaultVal = IsPacked ? -1 : 0; 8266 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8267 DefaultVal); 8268 } 8269 8270 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8271 if (NegLoIdx != -1) { 8272 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8273 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8274 } 8275 8276 const int Ops[] = { AMDGPU::OpName::src0, 8277 AMDGPU::OpName::src1, 8278 AMDGPU::OpName::src2 }; 8279 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8280 AMDGPU::OpName::src1_modifiers, 8281 AMDGPU::OpName::src2_modifiers }; 8282 8283 unsigned OpSel = 0; 8284 unsigned OpSelHi = 0; 8285 unsigned NegLo = 0; 8286 unsigned NegHi = 0; 8287 8288 if (OpSelIdx != -1) 8289 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8290 8291 if (OpSelHiIdx != -1) 8292 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8293 8294 if (NegLoIdx != -1) { 8295 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8296 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8297 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8298 } 8299 8300 for (int J = 0; J < 3; ++J) { 8301 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8302 if (OpIdx == -1) 8303 break; 8304 8305 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8306 8307 if (ModIdx == -1) 8308 continue; 8309 8310 uint32_t ModVal = 0; 8311 8312 if ((OpSel & (1 << J)) != 0) 8313 ModVal |= SISrcMods::OP_SEL_0; 8314 8315 if ((OpSelHi & (1 << J)) != 0) 8316 ModVal |= SISrcMods::OP_SEL_1; 8317 8318 if ((NegLo & (1 << J)) != 0) 8319 ModVal |= SISrcMods::NEG; 8320 8321 if ((NegHi & (1 << J)) != 0) 8322 ModVal |= SISrcMods::NEG_HI; 8323 8324 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8325 } 8326 } 8327 8328 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8329 OptionalImmIndexMap OptIdx; 8330 cvtVOP3(Inst, Operands, OptIdx); 8331 cvtVOP3P(Inst, Operands, OptIdx); 8332 } 8333 8334 //===----------------------------------------------------------------------===// 8335 // VOPD 8336 //===----------------------------------------------------------------------===// 8337 8338 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8339 if (!hasVOPD(getSTI())) 8340 return MatchOperand_NoMatch; 8341 8342 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8343 SMLoc S = getLoc(); 8344 lex(); 8345 lex(); 8346 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8347 SMLoc OpYLoc = getLoc(); 8348 StringRef OpYName; 8349 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) { 8350 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc)); 8351 return MatchOperand_Success; 8352 } 8353 Error(OpYLoc, "expected a VOPDY instruction after ::"); 8354 return MatchOperand_ParseFail; 8355 } 8356 return MatchOperand_NoMatch; 8357 } 8358 8359 // Create VOPD MCInst operands using parsed assembler operands. 8360 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8361 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer 8362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); 8363 if (Op.isReg()) { 8364 Op.addRegOperands(Inst, 1); 8365 return; 8366 } 8367 if (Op.isImm()) { 8368 Op.addImmOperands(Inst, 1); 8369 return; 8370 } 8371 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8372 }; 8373 8374 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); 8375 8376 // MCInst operands are ordered as follows: 8377 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8378 8379 for (auto CompIdx : VOPD::COMPONENTS) { 8380 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); 8381 } 8382 8383 for (auto CompIdx : VOPD::COMPONENTS) { 8384 const auto &CInfo = InstInfo[CompIdx]; 8385 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); 8386 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) 8387 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); 8388 if (CInfo.hasSrc2Acc()) 8389 addOp(CInfo.getIndexOfDstInParsedOperands()); 8390 } 8391 } 8392 8393 //===----------------------------------------------------------------------===// 8394 // dpp 8395 //===----------------------------------------------------------------------===// 8396 8397 bool AMDGPUOperand::isDPP8() const { 8398 return isImmTy(ImmTyDPP8); 8399 } 8400 8401 bool AMDGPUOperand::isDPPCtrl() const { 8402 using namespace AMDGPU::DPP; 8403 8404 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8405 if (result) { 8406 int64_t Imm = getImm(); 8407 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8408 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8409 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8410 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8411 (Imm == DppCtrl::WAVE_SHL1) || 8412 (Imm == DppCtrl::WAVE_ROL1) || 8413 (Imm == DppCtrl::WAVE_SHR1) || 8414 (Imm == DppCtrl::WAVE_ROR1) || 8415 (Imm == DppCtrl::ROW_MIRROR) || 8416 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8417 (Imm == DppCtrl::BCAST15) || 8418 (Imm == DppCtrl::BCAST31) || 8419 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8420 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8421 } 8422 return false; 8423 } 8424 8425 //===----------------------------------------------------------------------===// 8426 // mAI 8427 //===----------------------------------------------------------------------===// 8428 8429 bool AMDGPUOperand::isBLGP() const { 8430 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8431 } 8432 8433 bool AMDGPUOperand::isCBSZ() const { 8434 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8435 } 8436 8437 bool AMDGPUOperand::isABID() const { 8438 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8439 } 8440 8441 bool AMDGPUOperand::isS16Imm() const { 8442 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8443 } 8444 8445 bool AMDGPUOperand::isU16Imm() const { 8446 return isImm() && isUInt<16>(getImm()); 8447 } 8448 8449 //===----------------------------------------------------------------------===// 8450 // dim 8451 //===----------------------------------------------------------------------===// 8452 8453 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8454 // We want to allow "dim:1D" etc., 8455 // but the initial 1 is tokenized as an integer. 8456 std::string Token; 8457 if (isToken(AsmToken::Integer)) { 8458 SMLoc Loc = getToken().getEndLoc(); 8459 Token = std::string(getTokenStr()); 8460 lex(); 8461 if (getLoc() != Loc) 8462 return false; 8463 } 8464 8465 StringRef Suffix; 8466 if (!parseId(Suffix)) 8467 return false; 8468 Token += Suffix; 8469 8470 StringRef DimId = Token; 8471 if (DimId.startswith("SQ_RSRC_IMG_")) 8472 DimId = DimId.drop_front(12); 8473 8474 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8475 if (!DimInfo) 8476 return false; 8477 8478 Encoding = DimInfo->Encoding; 8479 return true; 8480 } 8481 8482 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8483 if (!isGFX10Plus()) 8484 return MatchOperand_NoMatch; 8485 8486 SMLoc S = getLoc(); 8487 8488 if (!trySkipId("dim", AsmToken::Colon)) 8489 return MatchOperand_NoMatch; 8490 8491 unsigned Encoding; 8492 SMLoc Loc = getLoc(); 8493 if (!parseDimId(Encoding)) { 8494 Error(Loc, "invalid dim value"); 8495 return MatchOperand_ParseFail; 8496 } 8497 8498 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8499 AMDGPUOperand::ImmTyDim)); 8500 return MatchOperand_Success; 8501 } 8502 8503 //===----------------------------------------------------------------------===// 8504 // dpp 8505 //===----------------------------------------------------------------------===// 8506 8507 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8508 SMLoc S = getLoc(); 8509 8510 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8511 return MatchOperand_NoMatch; 8512 8513 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8514 8515 int64_t Sels[8]; 8516 8517 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8518 return MatchOperand_ParseFail; 8519 8520 for (size_t i = 0; i < 8; ++i) { 8521 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8522 return MatchOperand_ParseFail; 8523 8524 SMLoc Loc = getLoc(); 8525 if (getParser().parseAbsoluteExpression(Sels[i])) 8526 return MatchOperand_ParseFail; 8527 if (0 > Sels[i] || 7 < Sels[i]) { 8528 Error(Loc, "expected a 3-bit value"); 8529 return MatchOperand_ParseFail; 8530 } 8531 } 8532 8533 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8534 return MatchOperand_ParseFail; 8535 8536 unsigned DPP8 = 0; 8537 for (size_t i = 0; i < 8; ++i) 8538 DPP8 |= (Sels[i] << (i * 3)); 8539 8540 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8541 return MatchOperand_Success; 8542 } 8543 8544 bool 8545 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8546 const OperandVector &Operands) { 8547 if (Ctrl == "row_newbcast") 8548 return isGFX90A(); 8549 8550 if (Ctrl == "row_share" || 8551 Ctrl == "row_xmask") 8552 return isGFX10Plus(); 8553 8554 if (Ctrl == "wave_shl" || 8555 Ctrl == "wave_shr" || 8556 Ctrl == "wave_rol" || 8557 Ctrl == "wave_ror" || 8558 Ctrl == "row_bcast") 8559 return isVI() || isGFX9(); 8560 8561 return Ctrl == "row_mirror" || 8562 Ctrl == "row_half_mirror" || 8563 Ctrl == "quad_perm" || 8564 Ctrl == "row_shl" || 8565 Ctrl == "row_shr" || 8566 Ctrl == "row_ror"; 8567 } 8568 8569 int64_t 8570 AMDGPUAsmParser::parseDPPCtrlPerm() { 8571 // quad_perm:[%d,%d,%d,%d] 8572 8573 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8574 return -1; 8575 8576 int64_t Val = 0; 8577 for (int i = 0; i < 4; ++i) { 8578 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8579 return -1; 8580 8581 int64_t Temp; 8582 SMLoc Loc = getLoc(); 8583 if (getParser().parseAbsoluteExpression(Temp)) 8584 return -1; 8585 if (Temp < 0 || Temp > 3) { 8586 Error(Loc, "expected a 2-bit value"); 8587 return -1; 8588 } 8589 8590 Val += (Temp << i * 2); 8591 } 8592 8593 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8594 return -1; 8595 8596 return Val; 8597 } 8598 8599 int64_t 8600 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8601 using namespace AMDGPU::DPP; 8602 8603 // sel:%d 8604 8605 int64_t Val; 8606 SMLoc Loc = getLoc(); 8607 8608 if (getParser().parseAbsoluteExpression(Val)) 8609 return -1; 8610 8611 struct DppCtrlCheck { 8612 int64_t Ctrl; 8613 int Lo; 8614 int Hi; 8615 }; 8616 8617 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8618 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8619 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8620 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8621 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8622 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8623 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8624 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8625 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8626 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8627 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8628 .Default({-1, 0, 0}); 8629 8630 bool Valid; 8631 if (Check.Ctrl == -1) { 8632 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8633 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8634 } else { 8635 Valid = Check.Lo <= Val && Val <= Check.Hi; 8636 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8637 } 8638 8639 if (!Valid) { 8640 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8641 return -1; 8642 } 8643 8644 return Val; 8645 } 8646 8647 OperandMatchResultTy 8648 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8649 using namespace AMDGPU::DPP; 8650 8651 if (!isToken(AsmToken::Identifier) || 8652 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8653 return MatchOperand_NoMatch; 8654 8655 SMLoc S = getLoc(); 8656 int64_t Val = -1; 8657 StringRef Ctrl; 8658 8659 parseId(Ctrl); 8660 8661 if (Ctrl == "row_mirror") { 8662 Val = DppCtrl::ROW_MIRROR; 8663 } else if (Ctrl == "row_half_mirror") { 8664 Val = DppCtrl::ROW_HALF_MIRROR; 8665 } else { 8666 if (skipToken(AsmToken::Colon, "expected a colon")) { 8667 if (Ctrl == "quad_perm") { 8668 Val = parseDPPCtrlPerm(); 8669 } else { 8670 Val = parseDPPCtrlSel(Ctrl); 8671 } 8672 } 8673 } 8674 8675 if (Val == -1) 8676 return MatchOperand_ParseFail; 8677 8678 Operands.push_back( 8679 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8680 return MatchOperand_Success; 8681 } 8682 8683 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8684 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8685 } 8686 8687 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8688 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8689 } 8690 8691 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8692 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8693 } 8694 8695 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDppBoundCtrl() const { 8696 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8697 } 8698 8699 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8700 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8701 } 8702 8703 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 8704 bool IsDPP8) { 8705 OptionalImmIndexMap OptionalIdx; 8706 unsigned Opc = Inst.getOpcode(); 8707 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8708 8709 // MAC instructions are special because they have 'old' 8710 // operand which is not tied to dst (but assumed to be). 8711 // They also have dummy unused src2_modifiers. 8712 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); 8713 int Src2ModIdx = 8714 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); 8715 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && 8716 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; 8717 8718 unsigned I = 1; 8719 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8720 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8721 } 8722 8723 int Fi = 0; 8724 for (unsigned E = Operands.size(); I != E; ++I) { 8725 8726 if (IsMAC) { 8727 int NumOperands = Inst.getNumOperands(); 8728 if (OldIdx == NumOperands) { 8729 // Handle old operand 8730 constexpr int DST_IDX = 0; 8731 Inst.addOperand(Inst.getOperand(DST_IDX)); 8732 } else if (Src2ModIdx == NumOperands) { 8733 // Add unused dummy src2_modifiers 8734 Inst.addOperand(MCOperand::createImm(0)); 8735 } 8736 } 8737 8738 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8739 MCOI::TIED_TO); 8740 if (TiedTo != -1) { 8741 assert((unsigned)TiedTo < Inst.getNumOperands()); 8742 // handle tied old or src2 for MAC instructions 8743 Inst.addOperand(Inst.getOperand(TiedTo)); 8744 } 8745 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8746 // Add the register arguments 8747 if (IsDPP8 && Op.isFI()) { 8748 Fi = Op.getImm(); 8749 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8750 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8751 } else if (Op.isReg()) { 8752 Op.addRegOperands(Inst, 1); 8753 } else if (Op.isImm() && 8754 Desc.operands()[Inst.getNumOperands()].RegClass != -1) { 8755 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8756 Op.addImmOperands(Inst, 1); 8757 } else if (Op.isImm()) { 8758 OptionalIdx[Op.getImmTy()] = I; 8759 } else { 8760 llvm_unreachable("unhandled operand type"); 8761 } 8762 } 8763 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8764 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8765 8766 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8768 8769 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8770 cvtVOP3P(Inst, Operands, OptionalIdx); 8771 else if (Desc.TSFlags & SIInstrFlags::VOP3) 8772 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 8773 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 8774 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8775 } 8776 8777 if (IsDPP8) { 8778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8779 using namespace llvm::AMDGPU::DPP; 8780 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8781 } else { 8782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8783 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8786 8787 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) 8788 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8789 AMDGPUOperand::ImmTyDppFi); 8790 } 8791 } 8792 8793 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8794 OptionalImmIndexMap OptionalIdx; 8795 8796 unsigned I = 1; 8797 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8798 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8799 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8800 } 8801 8802 int Fi = 0; 8803 for (unsigned E = Operands.size(); I != E; ++I) { 8804 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8805 MCOI::TIED_TO); 8806 if (TiedTo != -1) { 8807 assert((unsigned)TiedTo < Inst.getNumOperands()); 8808 // handle tied old or src2 for MAC instructions 8809 Inst.addOperand(Inst.getOperand(TiedTo)); 8810 } 8811 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8812 // Add the register arguments 8813 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8814 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8815 // Skip it. 8816 continue; 8817 } 8818 8819 if (IsDPP8) { 8820 if (Op.isDPP8()) { 8821 Op.addImmOperands(Inst, 1); 8822 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8823 Op.addRegWithFPInputModsOperands(Inst, 2); 8824 } else if (Op.isFI()) { 8825 Fi = Op.getImm(); 8826 } else if (Op.isReg()) { 8827 Op.addRegOperands(Inst, 1); 8828 } else { 8829 llvm_unreachable("Invalid operand type"); 8830 } 8831 } else { 8832 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8833 Op.addRegWithFPInputModsOperands(Inst, 2); 8834 } else if (Op.isReg()) { 8835 Op.addRegOperands(Inst, 1); 8836 } else if (Op.isDPPCtrl()) { 8837 Op.addImmOperands(Inst, 1); 8838 } else if (Op.isImm()) { 8839 // Handle optional arguments 8840 OptionalIdx[Op.getImmTy()] = I; 8841 } else { 8842 llvm_unreachable("Invalid operand type"); 8843 } 8844 } 8845 } 8846 8847 if (IsDPP8) { 8848 using namespace llvm::AMDGPU::DPP; 8849 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8850 } else { 8851 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8852 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8853 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8854 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { 8855 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8856 } 8857 } 8858 } 8859 8860 //===----------------------------------------------------------------------===// 8861 // sdwa 8862 //===----------------------------------------------------------------------===// 8863 8864 OperandMatchResultTy 8865 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8866 AMDGPUOperand::ImmTy Type) { 8867 using namespace llvm::AMDGPU::SDWA; 8868 8869 SMLoc S = getLoc(); 8870 StringRef Value; 8871 OperandMatchResultTy res; 8872 8873 SMLoc StringLoc; 8874 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8875 if (res != MatchOperand_Success) { 8876 return res; 8877 } 8878 8879 int64_t Int; 8880 Int = StringSwitch<int64_t>(Value) 8881 .Case("BYTE_0", SdwaSel::BYTE_0) 8882 .Case("BYTE_1", SdwaSel::BYTE_1) 8883 .Case("BYTE_2", SdwaSel::BYTE_2) 8884 .Case("BYTE_3", SdwaSel::BYTE_3) 8885 .Case("WORD_0", SdwaSel::WORD_0) 8886 .Case("WORD_1", SdwaSel::WORD_1) 8887 .Case("DWORD", SdwaSel::DWORD) 8888 .Default(0xffffffff); 8889 8890 if (Int == 0xffffffff) { 8891 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8892 return MatchOperand_ParseFail; 8893 } 8894 8895 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8896 return MatchOperand_Success; 8897 } 8898 8899 OperandMatchResultTy 8900 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8901 using namespace llvm::AMDGPU::SDWA; 8902 8903 SMLoc S = getLoc(); 8904 StringRef Value; 8905 OperandMatchResultTy res; 8906 8907 SMLoc StringLoc; 8908 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8909 if (res != MatchOperand_Success) { 8910 return res; 8911 } 8912 8913 int64_t Int; 8914 Int = StringSwitch<int64_t>(Value) 8915 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8916 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8917 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8918 .Default(0xffffffff); 8919 8920 if (Int == 0xffffffff) { 8921 Error(StringLoc, "invalid dst_unused value"); 8922 return MatchOperand_ParseFail; 8923 } 8924 8925 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8926 return MatchOperand_Success; 8927 } 8928 8929 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8930 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8931 } 8932 8933 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8934 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8935 } 8936 8937 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8938 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8939 } 8940 8941 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8942 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8943 } 8944 8945 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8946 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8947 } 8948 8949 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8950 uint64_t BasicInstType, 8951 bool SkipDstVcc, 8952 bool SkipSrcVcc) { 8953 using namespace llvm::AMDGPU::SDWA; 8954 8955 OptionalImmIndexMap OptionalIdx; 8956 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8957 bool SkippedVcc = false; 8958 8959 unsigned I = 1; 8960 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8961 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8962 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8963 } 8964 8965 for (unsigned E = Operands.size(); I != E; ++I) { 8966 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8967 if (SkipVcc && !SkippedVcc && Op.isReg() && 8968 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8969 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8970 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8971 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8972 // Skip VCC only if we didn't skip it on previous iteration. 8973 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8974 if (BasicInstType == SIInstrFlags::VOP2 && 8975 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8976 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8977 SkippedVcc = true; 8978 continue; 8979 } else if (BasicInstType == SIInstrFlags::VOPC && 8980 Inst.getNumOperands() == 0) { 8981 SkippedVcc = true; 8982 continue; 8983 } 8984 } 8985 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8986 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8987 } else if (Op.isImm()) { 8988 // Handle optional arguments 8989 OptionalIdx[Op.getImmTy()] = I; 8990 } else { 8991 llvm_unreachable("Invalid operand type"); 8992 } 8993 SkippedVcc = false; 8994 } 8995 8996 const unsigned Opc = Inst.getOpcode(); 8997 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && 8998 Opc != AMDGPU::V_NOP_sdwa_vi) { 8999 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9000 switch (BasicInstType) { 9001 case SIInstrFlags::VOP1: 9002 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9003 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9004 AMDGPUOperand::ImmTyClampSI, 0); 9005 9006 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9007 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9008 AMDGPUOperand::ImmTyOModSI, 0); 9009 9010 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) 9011 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9012 AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9013 9014 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) 9015 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9016 AMDGPUOperand::ImmTySdwaDstUnused, 9017 DstUnused::UNUSED_PRESERVE); 9018 9019 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9020 break; 9021 9022 case SIInstrFlags::VOP2: 9023 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9024 9025 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) 9026 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9027 9028 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9029 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 9030 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9031 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9032 break; 9033 9034 case SIInstrFlags::VOPC: 9035 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) 9036 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9037 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9038 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9039 break; 9040 9041 default: 9042 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9043 } 9044 } 9045 9046 // special case v_mac_{f16, f32}: 9047 // it has src2 register operand that is tied to dst operand 9048 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9049 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9050 auto it = Inst.begin(); 9051 std::advance( 9052 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9053 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9054 } 9055 } 9056 9057 //===----------------------------------------------------------------------===// 9058 // mAI 9059 //===----------------------------------------------------------------------===// 9060 9061 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 9062 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 9063 } 9064 9065 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 9066 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 9067 } 9068 9069 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 9070 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 9071 } 9072 9073 /// Force static initialization. 9074 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9075 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 9076 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9077 } 9078 9079 #define GET_REGISTER_MATCHER 9080 #define GET_MATCHER_IMPLEMENTATION 9081 #define GET_MNEMONIC_SPELL_CHECKER 9082 #define GET_MNEMONIC_CHECKER 9083 #include "AMDGPUGenAsmMatcher.inc" 9084 9085 OperandMatchResultTy 9086 AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, unsigned MCK) { 9087 switch (MCK) { 9088 case MCK_addr64: 9089 return parseTokenOp("addr64", Operands); 9090 case MCK_done: 9091 return parseTokenOp("done", Operands); 9092 case MCK_idxen: 9093 return parseTokenOp("idxen", Operands); 9094 case MCK_lds: 9095 return parseTokenOp("lds", Operands); 9096 case MCK_offen: 9097 return parseTokenOp("offen", Operands); 9098 case MCK_off: 9099 return parseTokenOp("off", Operands); 9100 case MCK_row_95_en: 9101 return parseTokenOp("row_en", Operands); 9102 case MCK_ImmABID: 9103 return parseIntWithPrefix("abid", Operands, AMDGPUOperand::ImmTyABID); 9104 case MCK_ImmBankMask: 9105 return parseIntWithPrefix("bank_mask", Operands, 9106 AMDGPUOperand::ImmTyDppBankMask); 9107 case MCK_ImmBLGP: { 9108 OperandMatchResultTy Res = 9109 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP); 9110 if (Res == MatchOperand_NoMatch) { 9111 Res = parseOperandArrayWithPrefix("neg", Operands, 9112 AMDGPUOperand::ImmTyBLGP); 9113 } 9114 return Res; 9115 } 9116 case MCK_ImmCBSZ: 9117 return parseIntWithPrefix("cbsz", Operands, AMDGPUOperand::ImmTyCBSZ); 9118 case MCK_ImmCPol: 9119 return parseCPol(Operands); 9120 case MCK_ImmFI: 9121 return parseIntWithPrefix("fi", Operands, AMDGPUOperand::ImmTyDppFi); 9122 case MCK_gds: 9123 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS); 9124 case MCK_ImmNegHi: 9125 return parseOperandArrayWithPrefix("neg_hi", Operands, 9126 AMDGPUOperand::ImmTyNegHi); 9127 case MCK_ImmNegLo: 9128 return parseOperandArrayWithPrefix("neg_lo", Operands, 9129 AMDGPUOperand::ImmTyNegLo); 9130 case MCK_ImmSMEMOffset: 9131 return parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 9132 case MCK_ImmOModSI: 9133 return parseOModOperand(Operands); 9134 case MCK_ImmOpSel: 9135 return parseOperandArrayWithPrefix("op_sel", Operands, 9136 AMDGPUOperand::ImmTyOpSel); 9137 case MCK_ImmOpSelHi: 9138 return parseOperandArrayWithPrefix("op_sel_hi", Operands, 9139 AMDGPUOperand::ImmTyOpSelHi); 9140 case MCK_ImmRowMask: 9141 return parseIntWithPrefix("row_mask", Operands, 9142 AMDGPUOperand::ImmTyDppRowMask); 9143 case MCK_ImmSDWADstSel: 9144 return parseSDWASel(Operands, "dst_sel", AMDGPUOperand::ImmTySdwaDstSel); 9145 case MCK_ImmSDWADstUnused: 9146 return parseSDWADstUnused(Operands); 9147 case MCK_ImmSDWASrc0Sel: 9148 return parseSDWASel(Operands, "src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel); 9149 case MCK_ImmSDWASrc1Sel: 9150 return parseSDWASel(Operands, "src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel); 9151 case MCK_tfe: 9152 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE); 9153 } 9154 return tryCustomParseOperand(Operands, MCK); 9155 } 9156 9157 // This function should be defined after auto-generated include so that we have 9158 // MatchClassKind enum defined 9159 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9160 unsigned Kind) { 9161 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9162 // But MatchInstructionImpl() expects to meet token and fails to validate 9163 // operand. This method checks if we are given immediate operand but expect to 9164 // get corresponding token. 9165 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9166 switch (Kind) { 9167 case MCK_addr64: 9168 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9169 case MCK_gds: 9170 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9171 case MCK_lds: 9172 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9173 case MCK_idxen: 9174 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9175 case MCK_offen: 9176 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9177 case MCK_tfe: 9178 return Operand.isTFE() ? Match_Success : Match_InvalidOperand; 9179 case MCK_SSrcB32: 9180 // When operands have expression values, they will return true for isToken, 9181 // because it is not possible to distinguish between a token and an 9182 // expression at parse time. MatchInstructionImpl() will always try to 9183 // match an operand as a token, when isToken returns true, and when the 9184 // name of the expression is not a valid token, the match will fail, 9185 // so we need to handle it here. 9186 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9187 case MCK_SSrcF32: 9188 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9189 case MCK_SoppBrTarget: 9190 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9191 case MCK_VReg32OrOff: 9192 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9193 case MCK_InterpSlot: 9194 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9195 case MCK_Attr: 9196 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9197 case MCK_AttrChan: 9198 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9199 case MCK_ImmSMEMOffset: 9200 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9201 case MCK_SReg_64: 9202 case MCK_SReg_64_XEXEC: 9203 // Null is defined as a 32-bit register but 9204 // it should also be enabled with 64-bit operands. 9205 // The following code enables it for SReg_64 operands 9206 // used as source and destination. Remaining source 9207 // operands are handled in isInlinableImm. 9208 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9209 default: 9210 return Match_InvalidOperand; 9211 } 9212 } 9213 9214 //===----------------------------------------------------------------------===// 9215 // endpgm 9216 //===----------------------------------------------------------------------===// 9217 9218 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9219 SMLoc S = getLoc(); 9220 int64_t Imm = 0; 9221 9222 if (!parseExpr(Imm)) { 9223 // The operand is optional, if not present default to 0 9224 Imm = 0; 9225 } 9226 9227 if (!isUInt<16>(Imm)) { 9228 Error(S, "expected a 16-bit value"); 9229 return MatchOperand_ParseFail; 9230 } 9231 9232 Operands.push_back( 9233 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9234 return MatchOperand_Success; 9235 } 9236 9237 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9238 9239 //===----------------------------------------------------------------------===// 9240 // LDSDIR 9241 //===----------------------------------------------------------------------===// 9242 9243 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9244 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9245 } 9246 9247 bool AMDGPUOperand::isWaitVDST() const { 9248 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9249 } 9250 9251 //===----------------------------------------------------------------------===// 9252 // VINTERP 9253 //===----------------------------------------------------------------------===// 9254 9255 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9256 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9257 } 9258 9259 bool AMDGPUOperand::isWaitEXP() const { 9260 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9261 } 9262