1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 bool validateExeczVcczOperands(const OperandVector &Operands); 1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1655 unsigned getConstantBusLimit(unsigned Opcode) const; 1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1659 1660 bool isSupportedMnemo(StringRef Mnemo, 1661 const FeatureBitset &FBS); 1662 bool isSupportedMnemo(StringRef Mnemo, 1663 const FeatureBitset &FBS, 1664 ArrayRef<unsigned> Variants); 1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1666 1667 bool isId(const StringRef Id) const; 1668 bool isId(const AsmToken &Token, const StringRef Id) const; 1669 bool isToken(const AsmToken::TokenKind Kind) const; 1670 bool trySkipId(const StringRef Id); 1671 bool trySkipId(const StringRef Pref, const StringRef Id); 1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1673 bool trySkipToken(const AsmToken::TokenKind Kind); 1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1676 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1677 1678 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1679 AsmToken::TokenKind getTokenKind() const; 1680 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1681 bool parseExpr(OperandVector &Operands); 1682 StringRef getTokenStr() const; 1683 AsmToken peekToken(bool ShouldSkipSpace = true); 1684 AsmToken getToken() const; 1685 SMLoc getLoc() const; 1686 void lex(); 1687 1688 public: 1689 void onBeginOfFile() override; 1690 1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1693 1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1700 1701 bool parseSwizzleOperand(int64_t &Op, 1702 const unsigned MinVal, 1703 const unsigned MaxVal, 1704 const StringRef ErrMsg, 1705 SMLoc &Loc); 1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1707 const unsigned MinVal, 1708 const unsigned MaxVal, 1709 const StringRef ErrMsg); 1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1711 bool parseSwizzleOffset(int64_t &Imm); 1712 bool parseSwizzleMacro(int64_t &Imm); 1713 bool parseSwizzleQuadPerm(int64_t &Imm); 1714 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1715 bool parseSwizzleBroadcast(int64_t &Imm); 1716 bool parseSwizzleSwap(int64_t &Imm); 1717 bool parseSwizzleReverse(int64_t &Imm); 1718 1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1720 int64_t parseGPRIdxMacro(); 1721 1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1726 1727 AMDGPUOperand::Ptr defaultCPol() const; 1728 1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1730 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1732 AMDGPUOperand::Ptr defaultFlatOffset() const; 1733 1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1735 1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1737 OptionalImmIndexMap &OptionalIdx); 1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1741 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1742 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1743 OptionalImmIndexMap &OptionalIdx); 1744 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1745 OptionalImmIndexMap &OptionalIdx); 1746 1747 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1748 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1749 1750 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1751 bool IsAtomic = false); 1752 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1753 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1754 1755 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1756 1757 bool parseDimId(unsigned &Encoding); 1758 OperandMatchResultTy parseDim(OperandVector &Operands); 1759 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1760 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1761 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1762 int64_t parseDPPCtrlSel(StringRef Ctrl); 1763 int64_t parseDPPCtrlPerm(); 1764 AMDGPUOperand::Ptr defaultRowMask() const; 1765 AMDGPUOperand::Ptr defaultBankMask() const; 1766 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1767 AMDGPUOperand::Ptr defaultFI() const; 1768 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1769 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1770 cvtDPP(Inst, Operands, true); 1771 } 1772 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1773 bool IsDPP8 = false); 1774 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1775 cvtVOP3DPP(Inst, Operands, true); 1776 } 1777 1778 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1779 AMDGPUOperand::ImmTy Type); 1780 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1781 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1782 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1783 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1784 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1785 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1786 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1787 uint64_t BasicInstType, 1788 bool SkipDstVcc = false, 1789 bool SkipSrcVcc = false); 1790 1791 AMDGPUOperand::Ptr defaultBLGP() const; 1792 AMDGPUOperand::Ptr defaultCBSZ() const; 1793 AMDGPUOperand::Ptr defaultABID() const; 1794 1795 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1796 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1797 1798 AMDGPUOperand::Ptr defaultWaitVDST() const; 1799 AMDGPUOperand::Ptr defaultWaitEXP() const; 1800 OperandMatchResultTy parseVOPD(OperandVector &Operands); 1801 }; 1802 1803 struct OptionalOperand { 1804 const char *Name; 1805 AMDGPUOperand::ImmTy Type; 1806 bool IsBit; 1807 bool (*ConvertResult)(int64_t&); 1808 }; 1809 1810 } // end anonymous namespace 1811 1812 // May be called with integer type with equivalent bitwidth. 1813 static const fltSemantics *getFltSemantics(unsigned Size) { 1814 switch (Size) { 1815 case 4: 1816 return &APFloat::IEEEsingle(); 1817 case 8: 1818 return &APFloat::IEEEdouble(); 1819 case 2: 1820 return &APFloat::IEEEhalf(); 1821 default: 1822 llvm_unreachable("unsupported fp type"); 1823 } 1824 } 1825 1826 static const fltSemantics *getFltSemantics(MVT VT) { 1827 return getFltSemantics(VT.getSizeInBits() / 8); 1828 } 1829 1830 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1831 switch (OperandType) { 1832 case AMDGPU::OPERAND_REG_IMM_INT32: 1833 case AMDGPU::OPERAND_REG_IMM_FP32: 1834 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1835 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1836 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1837 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1838 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1839 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1840 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1841 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1842 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1843 case AMDGPU::OPERAND_KIMM32: 1844 return &APFloat::IEEEsingle(); 1845 case AMDGPU::OPERAND_REG_IMM_INT64: 1846 case AMDGPU::OPERAND_REG_IMM_FP64: 1847 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1848 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1849 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1850 return &APFloat::IEEEdouble(); 1851 case AMDGPU::OPERAND_REG_IMM_INT16: 1852 case AMDGPU::OPERAND_REG_IMM_FP16: 1853 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1854 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1855 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1856 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1857 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1858 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1859 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1860 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1861 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1862 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1863 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1864 case AMDGPU::OPERAND_KIMM16: 1865 return &APFloat::IEEEhalf(); 1866 default: 1867 llvm_unreachable("unsupported fp type"); 1868 } 1869 } 1870 1871 //===----------------------------------------------------------------------===// 1872 // Operand 1873 //===----------------------------------------------------------------------===// 1874 1875 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1876 bool Lost; 1877 1878 // Convert literal to single precision 1879 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1880 APFloat::rmNearestTiesToEven, 1881 &Lost); 1882 // We allow precision lost but not overflow or underflow 1883 if (Status != APFloat::opOK && 1884 Lost && 1885 ((Status & APFloat::opOverflow) != 0 || 1886 (Status & APFloat::opUnderflow) != 0)) { 1887 return false; 1888 } 1889 1890 return true; 1891 } 1892 1893 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1894 return isUIntN(Size, Val) || isIntN(Size, Val); 1895 } 1896 1897 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1898 if (VT.getScalarType() == MVT::i16) { 1899 // FP immediate values are broken. 1900 return isInlinableIntLiteral(Val); 1901 } 1902 1903 // f16/v2f16 operands work correctly for all values. 1904 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1905 } 1906 1907 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1908 1909 // This is a hack to enable named inline values like 1910 // shared_base with both 32-bit and 64-bit operands. 1911 // Note that these values are defined as 1912 // 32-bit operands only. 1913 if (isInlineValue()) { 1914 return true; 1915 } 1916 1917 if (!isImmTy(ImmTyNone)) { 1918 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1919 return false; 1920 } 1921 // TODO: We should avoid using host float here. It would be better to 1922 // check the float bit values which is what a few other places do. 1923 // We've had bot failures before due to weird NaN support on mips hosts. 1924 1925 APInt Literal(64, Imm.Val); 1926 1927 if (Imm.IsFPImm) { // We got fp literal token 1928 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1929 return AMDGPU::isInlinableLiteral64(Imm.Val, 1930 AsmParser->hasInv2PiInlineImm()); 1931 } 1932 1933 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1934 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1935 return false; 1936 1937 if (type.getScalarSizeInBits() == 16) { 1938 return isInlineableLiteralOp16( 1939 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1940 type, AsmParser->hasInv2PiInlineImm()); 1941 } 1942 1943 // Check if single precision literal is inlinable 1944 return AMDGPU::isInlinableLiteral32( 1945 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1946 AsmParser->hasInv2PiInlineImm()); 1947 } 1948 1949 // We got int literal token. 1950 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1951 return AMDGPU::isInlinableLiteral64(Imm.Val, 1952 AsmParser->hasInv2PiInlineImm()); 1953 } 1954 1955 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1956 return false; 1957 } 1958 1959 if (type.getScalarSizeInBits() == 16) { 1960 return isInlineableLiteralOp16( 1961 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1962 type, AsmParser->hasInv2PiInlineImm()); 1963 } 1964 1965 return AMDGPU::isInlinableLiteral32( 1966 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1967 AsmParser->hasInv2PiInlineImm()); 1968 } 1969 1970 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1971 // Check that this immediate can be added as literal 1972 if (!isImmTy(ImmTyNone)) { 1973 return false; 1974 } 1975 1976 if (!Imm.IsFPImm) { 1977 // We got int literal token. 1978 1979 if (type == MVT::f64 && hasFPModifiers()) { 1980 // Cannot apply fp modifiers to int literals preserving the same semantics 1981 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1982 // disable these cases. 1983 return false; 1984 } 1985 1986 unsigned Size = type.getSizeInBits(); 1987 if (Size == 64) 1988 Size = 32; 1989 1990 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1991 // types. 1992 return isSafeTruncation(Imm.Val, Size); 1993 } 1994 1995 // We got fp literal token 1996 if (type == MVT::f64) { // Expected 64-bit fp operand 1997 // We would set low 64-bits of literal to zeroes but we accept this literals 1998 return true; 1999 } 2000 2001 if (type == MVT::i64) { // Expected 64-bit int operand 2002 // We don't allow fp literals in 64-bit integer instructions. It is 2003 // unclear how we should encode them. 2004 return false; 2005 } 2006 2007 // We allow fp literals with f16x2 operands assuming that the specified 2008 // literal goes into the lower half and the upper half is zero. We also 2009 // require that the literal may be losslessly converted to f16. 2010 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2011 (type == MVT::v2i16)? MVT::i16 : 2012 (type == MVT::v2f32)? MVT::f32 : type; 2013 2014 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2015 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2016 } 2017 2018 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2019 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2020 } 2021 2022 bool AMDGPUOperand::isVRegWithInputMods() const { 2023 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2024 // GFX90A allows DPP on 64-bit operands. 2025 (isRegClass(AMDGPU::VReg_64RegClassID) && 2026 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2027 } 2028 2029 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2030 if (AsmParser->isVI()) 2031 return isVReg32(); 2032 else if (AsmParser->isGFX9Plus()) 2033 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2034 else 2035 return false; 2036 } 2037 2038 bool AMDGPUOperand::isSDWAFP16Operand() const { 2039 return isSDWAOperand(MVT::f16); 2040 } 2041 2042 bool AMDGPUOperand::isSDWAFP32Operand() const { 2043 return isSDWAOperand(MVT::f32); 2044 } 2045 2046 bool AMDGPUOperand::isSDWAInt16Operand() const { 2047 return isSDWAOperand(MVT::i16); 2048 } 2049 2050 bool AMDGPUOperand::isSDWAInt32Operand() const { 2051 return isSDWAOperand(MVT::i32); 2052 } 2053 2054 bool AMDGPUOperand::isBoolReg() const { 2055 auto FB = AsmParser->getFeatureBits(); 2056 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2057 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2058 } 2059 2060 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2061 { 2062 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2063 assert(Size == 2 || Size == 4 || Size == 8); 2064 2065 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2066 2067 if (Imm.Mods.Abs) { 2068 Val &= ~FpSignMask; 2069 } 2070 if (Imm.Mods.Neg) { 2071 Val ^= FpSignMask; 2072 } 2073 2074 return Val; 2075 } 2076 2077 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2078 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2079 Inst.getNumOperands())) { 2080 addLiteralImmOperand(Inst, Imm.Val, 2081 ApplyModifiers & 2082 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2083 } else { 2084 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2085 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2086 setImmKindNone(); 2087 } 2088 } 2089 2090 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2091 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2092 auto OpNum = Inst.getNumOperands(); 2093 // Check that this operand accepts literals 2094 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2095 2096 if (ApplyModifiers) { 2097 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2098 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2099 Val = applyInputFPModifiers(Val, Size); 2100 } 2101 2102 APInt Literal(64, Val); 2103 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2104 2105 if (Imm.IsFPImm) { // We got fp literal token 2106 switch (OpTy) { 2107 case AMDGPU::OPERAND_REG_IMM_INT64: 2108 case AMDGPU::OPERAND_REG_IMM_FP64: 2109 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2110 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2111 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2112 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2113 AsmParser->hasInv2PiInlineImm())) { 2114 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2115 setImmKindConst(); 2116 return; 2117 } 2118 2119 // Non-inlineable 2120 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2121 // For fp operands we check if low 32 bits are zeros 2122 if (Literal.getLoBits(32) != 0) { 2123 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2124 "Can't encode literal as exact 64-bit floating-point operand. " 2125 "Low 32-bits will be set to zero"); 2126 } 2127 2128 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2129 setImmKindLiteral(); 2130 return; 2131 } 2132 2133 // We don't allow fp literals in 64-bit integer instructions. It is 2134 // unclear how we should encode them. This case should be checked earlier 2135 // in predicate methods (isLiteralImm()) 2136 llvm_unreachable("fp literal in 64-bit integer instruction."); 2137 2138 case AMDGPU::OPERAND_REG_IMM_INT32: 2139 case AMDGPU::OPERAND_REG_IMM_FP32: 2140 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2141 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2142 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2143 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2144 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2145 case AMDGPU::OPERAND_REG_IMM_INT16: 2146 case AMDGPU::OPERAND_REG_IMM_FP16: 2147 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2148 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2149 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2150 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2151 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2152 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2153 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2154 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2155 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2156 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2157 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2158 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2159 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2160 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2161 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2162 case AMDGPU::OPERAND_KIMM32: 2163 case AMDGPU::OPERAND_KIMM16: { 2164 bool lost; 2165 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2166 // Convert literal to single precision 2167 FPLiteral.convert(*getOpFltSemantics(OpTy), 2168 APFloat::rmNearestTiesToEven, &lost); 2169 // We allow precision lost but not overflow or underflow. This should be 2170 // checked earlier in isLiteralImm() 2171 2172 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2173 Inst.addOperand(MCOperand::createImm(ImmVal)); 2174 setImmKindLiteral(); 2175 return; 2176 } 2177 default: 2178 llvm_unreachable("invalid operand size"); 2179 } 2180 2181 return; 2182 } 2183 2184 // We got int literal token. 2185 // Only sign extend inline immediates. 2186 switch (OpTy) { 2187 case AMDGPU::OPERAND_REG_IMM_INT32: 2188 case AMDGPU::OPERAND_REG_IMM_FP32: 2189 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2190 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2191 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2192 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2193 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2194 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2195 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2196 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2197 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2198 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2199 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2200 if (isSafeTruncation(Val, 32) && 2201 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2202 AsmParser->hasInv2PiInlineImm())) { 2203 Inst.addOperand(MCOperand::createImm(Val)); 2204 setImmKindConst(); 2205 return; 2206 } 2207 2208 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2209 setImmKindLiteral(); 2210 return; 2211 2212 case AMDGPU::OPERAND_REG_IMM_INT64: 2213 case AMDGPU::OPERAND_REG_IMM_FP64: 2214 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2215 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2216 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2217 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2218 Inst.addOperand(MCOperand::createImm(Val)); 2219 setImmKindConst(); 2220 return; 2221 } 2222 2223 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2224 setImmKindLiteral(); 2225 return; 2226 2227 case AMDGPU::OPERAND_REG_IMM_INT16: 2228 case AMDGPU::OPERAND_REG_IMM_FP16: 2229 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2230 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2231 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2232 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2233 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2234 if (isSafeTruncation(Val, 16) && 2235 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2236 AsmParser->hasInv2PiInlineImm())) { 2237 Inst.addOperand(MCOperand::createImm(Val)); 2238 setImmKindConst(); 2239 return; 2240 } 2241 2242 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2243 setImmKindLiteral(); 2244 return; 2245 2246 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2247 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2248 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2249 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2250 assert(isSafeTruncation(Val, 16)); 2251 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2252 AsmParser->hasInv2PiInlineImm())); 2253 2254 Inst.addOperand(MCOperand::createImm(Val)); 2255 return; 2256 } 2257 case AMDGPU::OPERAND_KIMM32: 2258 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2259 setImmKindNone(); 2260 return; 2261 case AMDGPU::OPERAND_KIMM16: 2262 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2263 setImmKindNone(); 2264 return; 2265 default: 2266 llvm_unreachable("invalid operand size"); 2267 } 2268 } 2269 2270 template <unsigned Bitwidth> 2271 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2272 APInt Literal(64, Imm.Val); 2273 setImmKindNone(); 2274 2275 if (!Imm.IsFPImm) { 2276 // We got int literal token. 2277 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2278 return; 2279 } 2280 2281 bool Lost; 2282 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2283 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2284 APFloat::rmNearestTiesToEven, &Lost); 2285 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2286 } 2287 2288 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2289 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2290 } 2291 2292 static bool isInlineValue(unsigned Reg) { 2293 switch (Reg) { 2294 case AMDGPU::SRC_SHARED_BASE: 2295 case AMDGPU::SRC_SHARED_LIMIT: 2296 case AMDGPU::SRC_PRIVATE_BASE: 2297 case AMDGPU::SRC_PRIVATE_LIMIT: 2298 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2299 return true; 2300 case AMDGPU::SRC_VCCZ: 2301 case AMDGPU::SRC_EXECZ: 2302 case AMDGPU::SRC_SCC: 2303 return true; 2304 case AMDGPU::SGPR_NULL: 2305 return true; 2306 default: 2307 return false; 2308 } 2309 } 2310 2311 bool AMDGPUOperand::isInlineValue() const { 2312 return isRegKind() && ::isInlineValue(getReg()); 2313 } 2314 2315 //===----------------------------------------------------------------------===// 2316 // AsmParser 2317 //===----------------------------------------------------------------------===// 2318 2319 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2320 if (Is == IS_VGPR) { 2321 switch (RegWidth) { 2322 default: return -1; 2323 case 32: 2324 return AMDGPU::VGPR_32RegClassID; 2325 case 64: 2326 return AMDGPU::VReg_64RegClassID; 2327 case 96: 2328 return AMDGPU::VReg_96RegClassID; 2329 case 128: 2330 return AMDGPU::VReg_128RegClassID; 2331 case 160: 2332 return AMDGPU::VReg_160RegClassID; 2333 case 192: 2334 return AMDGPU::VReg_192RegClassID; 2335 case 224: 2336 return AMDGPU::VReg_224RegClassID; 2337 case 256: 2338 return AMDGPU::VReg_256RegClassID; 2339 case 512: 2340 return AMDGPU::VReg_512RegClassID; 2341 case 1024: 2342 return AMDGPU::VReg_1024RegClassID; 2343 } 2344 } else if (Is == IS_TTMP) { 2345 switch (RegWidth) { 2346 default: return -1; 2347 case 32: 2348 return AMDGPU::TTMP_32RegClassID; 2349 case 64: 2350 return AMDGPU::TTMP_64RegClassID; 2351 case 128: 2352 return AMDGPU::TTMP_128RegClassID; 2353 case 256: 2354 return AMDGPU::TTMP_256RegClassID; 2355 case 512: 2356 return AMDGPU::TTMP_512RegClassID; 2357 } 2358 } else if (Is == IS_SGPR) { 2359 switch (RegWidth) { 2360 default: return -1; 2361 case 32: 2362 return AMDGPU::SGPR_32RegClassID; 2363 case 64: 2364 return AMDGPU::SGPR_64RegClassID; 2365 case 96: 2366 return AMDGPU::SGPR_96RegClassID; 2367 case 128: 2368 return AMDGPU::SGPR_128RegClassID; 2369 case 160: 2370 return AMDGPU::SGPR_160RegClassID; 2371 case 192: 2372 return AMDGPU::SGPR_192RegClassID; 2373 case 224: 2374 return AMDGPU::SGPR_224RegClassID; 2375 case 256: 2376 return AMDGPU::SGPR_256RegClassID; 2377 case 512: 2378 return AMDGPU::SGPR_512RegClassID; 2379 } 2380 } else if (Is == IS_AGPR) { 2381 switch (RegWidth) { 2382 default: return -1; 2383 case 32: 2384 return AMDGPU::AGPR_32RegClassID; 2385 case 64: 2386 return AMDGPU::AReg_64RegClassID; 2387 case 96: 2388 return AMDGPU::AReg_96RegClassID; 2389 case 128: 2390 return AMDGPU::AReg_128RegClassID; 2391 case 160: 2392 return AMDGPU::AReg_160RegClassID; 2393 case 192: 2394 return AMDGPU::AReg_192RegClassID; 2395 case 224: 2396 return AMDGPU::AReg_224RegClassID; 2397 case 256: 2398 return AMDGPU::AReg_256RegClassID; 2399 case 512: 2400 return AMDGPU::AReg_512RegClassID; 2401 case 1024: 2402 return AMDGPU::AReg_1024RegClassID; 2403 } 2404 } 2405 return -1; 2406 } 2407 2408 static unsigned getSpecialRegForName(StringRef RegName) { 2409 return StringSwitch<unsigned>(RegName) 2410 .Case("exec", AMDGPU::EXEC) 2411 .Case("vcc", AMDGPU::VCC) 2412 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2413 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2414 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2415 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2416 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2417 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2418 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2419 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2420 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2421 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2422 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2423 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2424 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2425 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2426 .Case("m0", AMDGPU::M0) 2427 .Case("vccz", AMDGPU::SRC_VCCZ) 2428 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2429 .Case("execz", AMDGPU::SRC_EXECZ) 2430 .Case("src_execz", AMDGPU::SRC_EXECZ) 2431 .Case("scc", AMDGPU::SRC_SCC) 2432 .Case("src_scc", AMDGPU::SRC_SCC) 2433 .Case("tba", AMDGPU::TBA) 2434 .Case("tma", AMDGPU::TMA) 2435 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2436 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2437 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2438 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2439 .Case("vcc_lo", AMDGPU::VCC_LO) 2440 .Case("vcc_hi", AMDGPU::VCC_HI) 2441 .Case("exec_lo", AMDGPU::EXEC_LO) 2442 .Case("exec_hi", AMDGPU::EXEC_HI) 2443 .Case("tma_lo", AMDGPU::TMA_LO) 2444 .Case("tma_hi", AMDGPU::TMA_HI) 2445 .Case("tba_lo", AMDGPU::TBA_LO) 2446 .Case("tba_hi", AMDGPU::TBA_HI) 2447 .Case("pc", AMDGPU::PC_REG) 2448 .Case("null", AMDGPU::SGPR_NULL) 2449 .Default(AMDGPU::NoRegister); 2450 } 2451 2452 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2453 SMLoc &EndLoc, bool RestoreOnFailure) { 2454 auto R = parseRegister(); 2455 if (!R) return true; 2456 assert(R->isReg()); 2457 RegNo = R->getReg(); 2458 StartLoc = R->getStartLoc(); 2459 EndLoc = R->getEndLoc(); 2460 return false; 2461 } 2462 2463 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2464 SMLoc &EndLoc) { 2465 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2466 } 2467 2468 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2469 SMLoc &StartLoc, 2470 SMLoc &EndLoc) { 2471 bool Result = 2472 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2473 bool PendingErrors = getParser().hasPendingError(); 2474 getParser().clearPendingErrors(); 2475 if (PendingErrors) 2476 return MatchOperand_ParseFail; 2477 if (Result) 2478 return MatchOperand_NoMatch; 2479 return MatchOperand_Success; 2480 } 2481 2482 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2483 RegisterKind RegKind, unsigned Reg1, 2484 SMLoc Loc) { 2485 switch (RegKind) { 2486 case IS_SPECIAL: 2487 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2488 Reg = AMDGPU::EXEC; 2489 RegWidth = 64; 2490 return true; 2491 } 2492 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2493 Reg = AMDGPU::FLAT_SCR; 2494 RegWidth = 64; 2495 return true; 2496 } 2497 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2498 Reg = AMDGPU::XNACK_MASK; 2499 RegWidth = 64; 2500 return true; 2501 } 2502 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2503 Reg = AMDGPU::VCC; 2504 RegWidth = 64; 2505 return true; 2506 } 2507 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2508 Reg = AMDGPU::TBA; 2509 RegWidth = 64; 2510 return true; 2511 } 2512 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2513 Reg = AMDGPU::TMA; 2514 RegWidth = 64; 2515 return true; 2516 } 2517 Error(Loc, "register does not fit in the list"); 2518 return false; 2519 case IS_VGPR: 2520 case IS_SGPR: 2521 case IS_AGPR: 2522 case IS_TTMP: 2523 if (Reg1 != Reg + RegWidth / 32) { 2524 Error(Loc, "registers in a list must have consecutive indices"); 2525 return false; 2526 } 2527 RegWidth += 32; 2528 return true; 2529 default: 2530 llvm_unreachable("unexpected register kind"); 2531 } 2532 } 2533 2534 struct RegInfo { 2535 StringLiteral Name; 2536 RegisterKind Kind; 2537 }; 2538 2539 static constexpr RegInfo RegularRegisters[] = { 2540 {{"v"}, IS_VGPR}, 2541 {{"s"}, IS_SGPR}, 2542 {{"ttmp"}, IS_TTMP}, 2543 {{"acc"}, IS_AGPR}, 2544 {{"a"}, IS_AGPR}, 2545 }; 2546 2547 static bool isRegularReg(RegisterKind Kind) { 2548 return Kind == IS_VGPR || 2549 Kind == IS_SGPR || 2550 Kind == IS_TTMP || 2551 Kind == IS_AGPR; 2552 } 2553 2554 static const RegInfo* getRegularRegInfo(StringRef Str) { 2555 for (const RegInfo &Reg : RegularRegisters) 2556 if (Str.startswith(Reg.Name)) 2557 return &Reg; 2558 return nullptr; 2559 } 2560 2561 static bool getRegNum(StringRef Str, unsigned& Num) { 2562 return !Str.getAsInteger(10, Num); 2563 } 2564 2565 bool 2566 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2567 const AsmToken &NextToken) const { 2568 2569 // A list of consecutive registers: [s0,s1,s2,s3] 2570 if (Token.is(AsmToken::LBrac)) 2571 return true; 2572 2573 if (!Token.is(AsmToken::Identifier)) 2574 return false; 2575 2576 // A single register like s0 or a range of registers like s[0:1] 2577 2578 StringRef Str = Token.getString(); 2579 const RegInfo *Reg = getRegularRegInfo(Str); 2580 if (Reg) { 2581 StringRef RegName = Reg->Name; 2582 StringRef RegSuffix = Str.substr(RegName.size()); 2583 if (!RegSuffix.empty()) { 2584 unsigned Num; 2585 // A single register with an index: rXX 2586 if (getRegNum(RegSuffix, Num)) 2587 return true; 2588 } else { 2589 // A range of registers: r[XX:YY]. 2590 if (NextToken.is(AsmToken::LBrac)) 2591 return true; 2592 } 2593 } 2594 2595 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2596 } 2597 2598 bool 2599 AMDGPUAsmParser::isRegister() 2600 { 2601 return isRegister(getToken(), peekToken()); 2602 } 2603 2604 unsigned 2605 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2606 unsigned RegNum, 2607 unsigned RegWidth, 2608 SMLoc Loc) { 2609 2610 assert(isRegularReg(RegKind)); 2611 2612 unsigned AlignSize = 1; 2613 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2614 // SGPR and TTMP registers must be aligned. 2615 // Max required alignment is 4 dwords. 2616 AlignSize = std::min(RegWidth / 32, 4u); 2617 } 2618 2619 if (RegNum % AlignSize != 0) { 2620 Error(Loc, "invalid register alignment"); 2621 return AMDGPU::NoRegister; 2622 } 2623 2624 unsigned RegIdx = RegNum / AlignSize; 2625 int RCID = getRegClass(RegKind, RegWidth); 2626 if (RCID == -1) { 2627 Error(Loc, "invalid or unsupported register size"); 2628 return AMDGPU::NoRegister; 2629 } 2630 2631 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2632 const MCRegisterClass RC = TRI->getRegClass(RCID); 2633 if (RegIdx >= RC.getNumRegs()) { 2634 Error(Loc, "register index is out of range"); 2635 return AMDGPU::NoRegister; 2636 } 2637 2638 return RC.getRegister(RegIdx); 2639 } 2640 2641 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2642 int64_t RegLo, RegHi; 2643 if (!skipToken(AsmToken::LBrac, "missing register index")) 2644 return false; 2645 2646 SMLoc FirstIdxLoc = getLoc(); 2647 SMLoc SecondIdxLoc; 2648 2649 if (!parseExpr(RegLo)) 2650 return false; 2651 2652 if (trySkipToken(AsmToken::Colon)) { 2653 SecondIdxLoc = getLoc(); 2654 if (!parseExpr(RegHi)) 2655 return false; 2656 } else { 2657 RegHi = RegLo; 2658 } 2659 2660 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2661 return false; 2662 2663 if (!isUInt<32>(RegLo)) { 2664 Error(FirstIdxLoc, "invalid register index"); 2665 return false; 2666 } 2667 2668 if (!isUInt<32>(RegHi)) { 2669 Error(SecondIdxLoc, "invalid register index"); 2670 return false; 2671 } 2672 2673 if (RegLo > RegHi) { 2674 Error(FirstIdxLoc, "first register index should not exceed second index"); 2675 return false; 2676 } 2677 2678 Num = static_cast<unsigned>(RegLo); 2679 RegWidth = 32 * ((RegHi - RegLo) + 1); 2680 return true; 2681 } 2682 2683 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2684 unsigned &RegNum, unsigned &RegWidth, 2685 SmallVectorImpl<AsmToken> &Tokens) { 2686 assert(isToken(AsmToken::Identifier)); 2687 unsigned Reg = getSpecialRegForName(getTokenStr()); 2688 if (Reg) { 2689 RegNum = 0; 2690 RegWidth = 32; 2691 RegKind = IS_SPECIAL; 2692 Tokens.push_back(getToken()); 2693 lex(); // skip register name 2694 } 2695 return Reg; 2696 } 2697 2698 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2699 unsigned &RegNum, unsigned &RegWidth, 2700 SmallVectorImpl<AsmToken> &Tokens) { 2701 assert(isToken(AsmToken::Identifier)); 2702 StringRef RegName = getTokenStr(); 2703 auto Loc = getLoc(); 2704 2705 const RegInfo *RI = getRegularRegInfo(RegName); 2706 if (!RI) { 2707 Error(Loc, "invalid register name"); 2708 return AMDGPU::NoRegister; 2709 } 2710 2711 Tokens.push_back(getToken()); 2712 lex(); // skip register name 2713 2714 RegKind = RI->Kind; 2715 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2716 if (!RegSuffix.empty()) { 2717 // Single 32-bit register: vXX. 2718 if (!getRegNum(RegSuffix, RegNum)) { 2719 Error(Loc, "invalid register index"); 2720 return AMDGPU::NoRegister; 2721 } 2722 RegWidth = 32; 2723 } else { 2724 // Range of registers: v[XX:YY]. ":YY" is optional. 2725 if (!ParseRegRange(RegNum, RegWidth)) 2726 return AMDGPU::NoRegister; 2727 } 2728 2729 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2730 } 2731 2732 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2733 unsigned &RegWidth, 2734 SmallVectorImpl<AsmToken> &Tokens) { 2735 unsigned Reg = AMDGPU::NoRegister; 2736 auto ListLoc = getLoc(); 2737 2738 if (!skipToken(AsmToken::LBrac, 2739 "expected a register or a list of registers")) { 2740 return AMDGPU::NoRegister; 2741 } 2742 2743 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2744 2745 auto Loc = getLoc(); 2746 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2747 return AMDGPU::NoRegister; 2748 if (RegWidth != 32) { 2749 Error(Loc, "expected a single 32-bit register"); 2750 return AMDGPU::NoRegister; 2751 } 2752 2753 for (; trySkipToken(AsmToken::Comma); ) { 2754 RegisterKind NextRegKind; 2755 unsigned NextReg, NextRegNum, NextRegWidth; 2756 Loc = getLoc(); 2757 2758 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2759 NextRegNum, NextRegWidth, 2760 Tokens)) { 2761 return AMDGPU::NoRegister; 2762 } 2763 if (NextRegWidth != 32) { 2764 Error(Loc, "expected a single 32-bit register"); 2765 return AMDGPU::NoRegister; 2766 } 2767 if (NextRegKind != RegKind) { 2768 Error(Loc, "registers in a list must be of the same kind"); 2769 return AMDGPU::NoRegister; 2770 } 2771 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2772 return AMDGPU::NoRegister; 2773 } 2774 2775 if (!skipToken(AsmToken::RBrac, 2776 "expected a comma or a closing square bracket")) { 2777 return AMDGPU::NoRegister; 2778 } 2779 2780 if (isRegularReg(RegKind)) 2781 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2782 2783 return Reg; 2784 } 2785 2786 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2787 unsigned &RegNum, unsigned &RegWidth, 2788 SmallVectorImpl<AsmToken> &Tokens) { 2789 auto Loc = getLoc(); 2790 Reg = AMDGPU::NoRegister; 2791 2792 if (isToken(AsmToken::Identifier)) { 2793 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2794 if (Reg == AMDGPU::NoRegister) 2795 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2796 } else { 2797 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2798 } 2799 2800 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2801 if (Reg == AMDGPU::NoRegister) { 2802 assert(Parser.hasPendingError()); 2803 return false; 2804 } 2805 2806 if (!subtargetHasRegister(*TRI, Reg)) { 2807 if (Reg == AMDGPU::SGPR_NULL) { 2808 Error(Loc, "'null' operand is not supported on this GPU"); 2809 } else { 2810 Error(Loc, "register not available on this GPU"); 2811 } 2812 return false; 2813 } 2814 2815 return true; 2816 } 2817 2818 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2819 unsigned &RegNum, unsigned &RegWidth, 2820 bool RestoreOnFailure /*=false*/) { 2821 Reg = AMDGPU::NoRegister; 2822 2823 SmallVector<AsmToken, 1> Tokens; 2824 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2825 if (RestoreOnFailure) { 2826 while (!Tokens.empty()) { 2827 getLexer().UnLex(Tokens.pop_back_val()); 2828 } 2829 } 2830 return true; 2831 } 2832 return false; 2833 } 2834 2835 Optional<StringRef> 2836 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2837 switch (RegKind) { 2838 case IS_VGPR: 2839 return StringRef(".amdgcn.next_free_vgpr"); 2840 case IS_SGPR: 2841 return StringRef(".amdgcn.next_free_sgpr"); 2842 default: 2843 return None; 2844 } 2845 } 2846 2847 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2848 auto SymbolName = getGprCountSymbolName(RegKind); 2849 assert(SymbolName && "initializing invalid register kind"); 2850 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2851 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2852 } 2853 2854 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2855 unsigned DwordRegIndex, 2856 unsigned RegWidth) { 2857 // Symbols are only defined for GCN targets 2858 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2859 return true; 2860 2861 auto SymbolName = getGprCountSymbolName(RegKind); 2862 if (!SymbolName) 2863 return true; 2864 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2865 2866 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2867 int64_t OldCount; 2868 2869 if (!Sym->isVariable()) 2870 return !Error(getLoc(), 2871 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2872 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2873 return !Error( 2874 getLoc(), 2875 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2876 2877 if (OldCount <= NewMax) 2878 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2879 2880 return true; 2881 } 2882 2883 std::unique_ptr<AMDGPUOperand> 2884 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2885 const auto &Tok = getToken(); 2886 SMLoc StartLoc = Tok.getLoc(); 2887 SMLoc EndLoc = Tok.getEndLoc(); 2888 RegisterKind RegKind; 2889 unsigned Reg, RegNum, RegWidth; 2890 2891 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2892 return nullptr; 2893 } 2894 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2895 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2896 return nullptr; 2897 } else 2898 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2899 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2900 } 2901 2902 OperandMatchResultTy 2903 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2904 // TODO: add syntactic sugar for 1/(2*PI) 2905 2906 if (isRegister()) 2907 return MatchOperand_NoMatch; 2908 assert(!isModifier()); 2909 2910 const auto& Tok = getToken(); 2911 const auto& NextTok = peekToken(); 2912 bool IsReal = Tok.is(AsmToken::Real); 2913 SMLoc S = getLoc(); 2914 bool Negate = false; 2915 2916 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2917 lex(); 2918 IsReal = true; 2919 Negate = true; 2920 } 2921 2922 if (IsReal) { 2923 // Floating-point expressions are not supported. 2924 // Can only allow floating-point literals with an 2925 // optional sign. 2926 2927 StringRef Num = getTokenStr(); 2928 lex(); 2929 2930 APFloat RealVal(APFloat::IEEEdouble()); 2931 auto roundMode = APFloat::rmNearestTiesToEven; 2932 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2933 return MatchOperand_ParseFail; 2934 } 2935 if (Negate) 2936 RealVal.changeSign(); 2937 2938 Operands.push_back( 2939 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2940 AMDGPUOperand::ImmTyNone, true)); 2941 2942 return MatchOperand_Success; 2943 2944 } else { 2945 int64_t IntVal; 2946 const MCExpr *Expr; 2947 SMLoc S = getLoc(); 2948 2949 if (HasSP3AbsModifier) { 2950 // This is a workaround for handling expressions 2951 // as arguments of SP3 'abs' modifier, for example: 2952 // |1.0| 2953 // |-1| 2954 // |1+x| 2955 // This syntax is not compatible with syntax of standard 2956 // MC expressions (due to the trailing '|'). 2957 SMLoc EndLoc; 2958 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2959 return MatchOperand_ParseFail; 2960 } else { 2961 if (Parser.parseExpression(Expr)) 2962 return MatchOperand_ParseFail; 2963 } 2964 2965 if (Expr->evaluateAsAbsolute(IntVal)) { 2966 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2967 } else { 2968 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2969 } 2970 2971 return MatchOperand_Success; 2972 } 2973 2974 return MatchOperand_NoMatch; 2975 } 2976 2977 OperandMatchResultTy 2978 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2979 if (!isRegister()) 2980 return MatchOperand_NoMatch; 2981 2982 if (auto R = parseRegister()) { 2983 assert(R->isReg()); 2984 Operands.push_back(std::move(R)); 2985 return MatchOperand_Success; 2986 } 2987 return MatchOperand_ParseFail; 2988 } 2989 2990 OperandMatchResultTy 2991 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2992 auto res = parseReg(Operands); 2993 if (res != MatchOperand_NoMatch) { 2994 return res; 2995 } else if (isModifier()) { 2996 return MatchOperand_NoMatch; 2997 } else { 2998 return parseImm(Operands, HasSP3AbsMod); 2999 } 3000 } 3001 3002 bool 3003 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3004 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3005 const auto &str = Token.getString(); 3006 return str == "abs" || str == "neg" || str == "sext"; 3007 } 3008 return false; 3009 } 3010 3011 bool 3012 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3013 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3014 } 3015 3016 bool 3017 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3018 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3019 } 3020 3021 bool 3022 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3023 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3024 } 3025 3026 // Check if this is an operand modifier or an opcode modifier 3027 // which may look like an expression but it is not. We should 3028 // avoid parsing these modifiers as expressions. Currently 3029 // recognized sequences are: 3030 // |...| 3031 // abs(...) 3032 // neg(...) 3033 // sext(...) 3034 // -reg 3035 // -|...| 3036 // -abs(...) 3037 // name:... 3038 // Note that simple opcode modifiers like 'gds' may be parsed as 3039 // expressions; this is a special case. See getExpressionAsToken. 3040 // 3041 bool 3042 AMDGPUAsmParser::isModifier() { 3043 3044 AsmToken Tok = getToken(); 3045 AsmToken NextToken[2]; 3046 peekTokens(NextToken); 3047 3048 return isOperandModifier(Tok, NextToken[0]) || 3049 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3050 isOpcodeModifierWithVal(Tok, NextToken[0]); 3051 } 3052 3053 // Check if the current token is an SP3 'neg' modifier. 3054 // Currently this modifier is allowed in the following context: 3055 // 3056 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3057 // 2. Before an 'abs' modifier: -abs(...) 3058 // 3. Before an SP3 'abs' modifier: -|...| 3059 // 3060 // In all other cases "-" is handled as a part 3061 // of an expression that follows the sign. 3062 // 3063 // Note: When "-" is followed by an integer literal, 3064 // this is interpreted as integer negation rather 3065 // than a floating-point NEG modifier applied to N. 3066 // Beside being contr-intuitive, such use of floating-point 3067 // NEG modifier would have resulted in different meaning 3068 // of integer literals used with VOP1/2/C and VOP3, 3069 // for example: 3070 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3071 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3072 // Negative fp literals with preceding "-" are 3073 // handled likewise for uniformity 3074 // 3075 bool 3076 AMDGPUAsmParser::parseSP3NegModifier() { 3077 3078 AsmToken NextToken[2]; 3079 peekTokens(NextToken); 3080 3081 if (isToken(AsmToken::Minus) && 3082 (isRegister(NextToken[0], NextToken[1]) || 3083 NextToken[0].is(AsmToken::Pipe) || 3084 isId(NextToken[0], "abs"))) { 3085 lex(); 3086 return true; 3087 } 3088 3089 return false; 3090 } 3091 3092 OperandMatchResultTy 3093 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3094 bool AllowImm) { 3095 bool Neg, SP3Neg; 3096 bool Abs, SP3Abs; 3097 SMLoc Loc; 3098 3099 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3100 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3101 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3102 return MatchOperand_ParseFail; 3103 } 3104 3105 SP3Neg = parseSP3NegModifier(); 3106 3107 Loc = getLoc(); 3108 Neg = trySkipId("neg"); 3109 if (Neg && SP3Neg) { 3110 Error(Loc, "expected register or immediate"); 3111 return MatchOperand_ParseFail; 3112 } 3113 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3114 return MatchOperand_ParseFail; 3115 3116 Abs = trySkipId("abs"); 3117 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3118 return MatchOperand_ParseFail; 3119 3120 Loc = getLoc(); 3121 SP3Abs = trySkipToken(AsmToken::Pipe); 3122 if (Abs && SP3Abs) { 3123 Error(Loc, "expected register or immediate"); 3124 return MatchOperand_ParseFail; 3125 } 3126 3127 OperandMatchResultTy Res; 3128 if (AllowImm) { 3129 Res = parseRegOrImm(Operands, SP3Abs); 3130 } else { 3131 Res = parseReg(Operands); 3132 } 3133 if (Res != MatchOperand_Success) { 3134 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3135 } 3136 3137 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3138 return MatchOperand_ParseFail; 3139 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3140 return MatchOperand_ParseFail; 3141 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3142 return MatchOperand_ParseFail; 3143 3144 AMDGPUOperand::Modifiers Mods; 3145 Mods.Abs = Abs || SP3Abs; 3146 Mods.Neg = Neg || SP3Neg; 3147 3148 if (Mods.hasFPModifiers()) { 3149 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3150 if (Op.isExpr()) { 3151 Error(Op.getStartLoc(), "expected an absolute expression"); 3152 return MatchOperand_ParseFail; 3153 } 3154 Op.setModifiers(Mods); 3155 } 3156 return MatchOperand_Success; 3157 } 3158 3159 OperandMatchResultTy 3160 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3161 bool AllowImm) { 3162 bool Sext = trySkipId("sext"); 3163 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3164 return MatchOperand_ParseFail; 3165 3166 OperandMatchResultTy Res; 3167 if (AllowImm) { 3168 Res = parseRegOrImm(Operands); 3169 } else { 3170 Res = parseReg(Operands); 3171 } 3172 if (Res != MatchOperand_Success) { 3173 return Sext? MatchOperand_ParseFail : Res; 3174 } 3175 3176 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3177 return MatchOperand_ParseFail; 3178 3179 AMDGPUOperand::Modifiers Mods; 3180 Mods.Sext = Sext; 3181 3182 if (Mods.hasIntModifiers()) { 3183 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3184 if (Op.isExpr()) { 3185 Error(Op.getStartLoc(), "expected an absolute expression"); 3186 return MatchOperand_ParseFail; 3187 } 3188 Op.setModifiers(Mods); 3189 } 3190 3191 return MatchOperand_Success; 3192 } 3193 3194 OperandMatchResultTy 3195 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3196 return parseRegOrImmWithFPInputMods(Operands, false); 3197 } 3198 3199 OperandMatchResultTy 3200 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3201 return parseRegOrImmWithIntInputMods(Operands, false); 3202 } 3203 3204 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3205 auto Loc = getLoc(); 3206 if (trySkipId("off")) { 3207 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3208 AMDGPUOperand::ImmTyOff, false)); 3209 return MatchOperand_Success; 3210 } 3211 3212 if (!isRegister()) 3213 return MatchOperand_NoMatch; 3214 3215 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3216 if (Reg) { 3217 Operands.push_back(std::move(Reg)); 3218 return MatchOperand_Success; 3219 } 3220 3221 return MatchOperand_ParseFail; 3222 3223 } 3224 3225 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3226 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3227 3228 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3229 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3230 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3231 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3232 return Match_InvalidOperand; 3233 3234 if ((TSFlags & SIInstrFlags::VOP3) && 3235 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3236 getForcedEncodingSize() != 64) 3237 return Match_PreferE32; 3238 3239 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3240 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3241 // v_mac_f32/16 allow only dst_sel == DWORD; 3242 auto OpNum = 3243 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3244 const auto &Op = Inst.getOperand(OpNum); 3245 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3246 return Match_InvalidOperand; 3247 } 3248 } 3249 3250 return Match_Success; 3251 } 3252 3253 static ArrayRef<unsigned> getAllVariants() { 3254 static const unsigned Variants[] = { 3255 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3256 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3257 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3258 }; 3259 3260 return makeArrayRef(Variants); 3261 } 3262 3263 // What asm variants we should check 3264 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3265 if (isForcedDPP() && isForcedVOP3()) { 3266 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3267 return makeArrayRef(Variants); 3268 } 3269 if (getForcedEncodingSize() == 32) { 3270 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3271 return makeArrayRef(Variants); 3272 } 3273 3274 if (isForcedVOP3()) { 3275 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3276 return makeArrayRef(Variants); 3277 } 3278 3279 if (isForcedSDWA()) { 3280 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3281 AMDGPUAsmVariants::SDWA9}; 3282 return makeArrayRef(Variants); 3283 } 3284 3285 if (isForcedDPP()) { 3286 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3287 return makeArrayRef(Variants); 3288 } 3289 3290 return getAllVariants(); 3291 } 3292 3293 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3294 if (isForcedDPP() && isForcedVOP3()) 3295 return "e64_dpp"; 3296 3297 if (getForcedEncodingSize() == 32) 3298 return "e32"; 3299 3300 if (isForcedVOP3()) 3301 return "e64"; 3302 3303 if (isForcedSDWA()) 3304 return "sdwa"; 3305 3306 if (isForcedDPP()) 3307 return "dpp"; 3308 3309 return ""; 3310 } 3311 3312 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3313 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3314 const unsigned Num = Desc.getNumImplicitUses(); 3315 for (unsigned i = 0; i < Num; ++i) { 3316 unsigned Reg = Desc.ImplicitUses[i]; 3317 switch (Reg) { 3318 case AMDGPU::FLAT_SCR: 3319 case AMDGPU::VCC: 3320 case AMDGPU::VCC_LO: 3321 case AMDGPU::VCC_HI: 3322 case AMDGPU::M0: 3323 return Reg; 3324 default: 3325 break; 3326 } 3327 } 3328 return AMDGPU::NoRegister; 3329 } 3330 3331 // NB: This code is correct only when used to check constant 3332 // bus limitations because GFX7 support no f16 inline constants. 3333 // Note that there are no cases when a GFX7 opcode violates 3334 // constant bus limitations due to the use of an f16 constant. 3335 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3336 unsigned OpIdx) const { 3337 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3338 3339 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3340 return false; 3341 } 3342 3343 const MCOperand &MO = Inst.getOperand(OpIdx); 3344 3345 int64_t Val = MO.getImm(); 3346 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3347 3348 switch (OpSize) { // expected operand size 3349 case 8: 3350 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3351 case 4: 3352 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3353 case 2: { 3354 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3355 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3356 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3357 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3358 return AMDGPU::isInlinableIntLiteral(Val); 3359 3360 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3361 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3362 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3363 return AMDGPU::isInlinableIntLiteralV216(Val); 3364 3365 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3366 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3367 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3368 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3369 3370 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3371 } 3372 default: 3373 llvm_unreachable("invalid operand size"); 3374 } 3375 } 3376 3377 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3378 if (!isGFX10Plus()) 3379 return 1; 3380 3381 switch (Opcode) { 3382 // 64-bit shift instructions can use only one scalar value input 3383 case AMDGPU::V_LSHLREV_B64_e64: 3384 case AMDGPU::V_LSHLREV_B64_gfx10: 3385 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3386 case AMDGPU::V_LSHRREV_B64_e64: 3387 case AMDGPU::V_LSHRREV_B64_gfx10: 3388 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3389 case AMDGPU::V_ASHRREV_I64_e64: 3390 case AMDGPU::V_ASHRREV_I64_gfx10: 3391 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3392 case AMDGPU::V_LSHL_B64_e64: 3393 case AMDGPU::V_LSHR_B64_e64: 3394 case AMDGPU::V_ASHR_I64_e64: 3395 return 1; 3396 default: 3397 return 2; 3398 } 3399 } 3400 3401 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3402 const MCOperand &MO = Inst.getOperand(OpIdx); 3403 if (MO.isImm()) { 3404 return !isInlineConstant(Inst, OpIdx); 3405 } else if (MO.isReg()) { 3406 auto Reg = MO.getReg(); 3407 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3408 auto PReg = mc2PseudoReg(Reg); 3409 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3410 } else { 3411 return true; 3412 } 3413 } 3414 3415 bool 3416 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3417 const OperandVector &Operands) { 3418 const unsigned Opcode = Inst.getOpcode(); 3419 const MCInstrDesc &Desc = MII.get(Opcode); 3420 unsigned LastSGPR = AMDGPU::NoRegister; 3421 unsigned ConstantBusUseCount = 0; 3422 unsigned NumLiterals = 0; 3423 unsigned LiteralSize; 3424 3425 if (Desc.TSFlags & 3426 (SIInstrFlags::VOPC | 3427 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3428 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3429 SIInstrFlags::SDWA)) { 3430 // Check special imm operands (used by madmk, etc) 3431 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3432 ++NumLiterals; 3433 LiteralSize = 4; 3434 } 3435 3436 SmallDenseSet<unsigned> SGPRsUsed; 3437 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3438 if (SGPRUsed != AMDGPU::NoRegister) { 3439 SGPRsUsed.insert(SGPRUsed); 3440 ++ConstantBusUseCount; 3441 } 3442 3443 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3444 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3445 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3446 3447 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3448 3449 for (int OpIdx : OpIndices) { 3450 if (OpIdx == -1) break; 3451 3452 const MCOperand &MO = Inst.getOperand(OpIdx); 3453 if (usesConstantBus(Inst, OpIdx)) { 3454 if (MO.isReg()) { 3455 LastSGPR = mc2PseudoReg(MO.getReg()); 3456 // Pairs of registers with a partial intersections like these 3457 // s0, s[0:1] 3458 // flat_scratch_lo, flat_scratch 3459 // flat_scratch_lo, flat_scratch_hi 3460 // are theoretically valid but they are disabled anyway. 3461 // Note that this code mimics SIInstrInfo::verifyInstruction 3462 if (SGPRsUsed.insert(LastSGPR).second) { 3463 ++ConstantBusUseCount; 3464 } 3465 } else { // Expression or a literal 3466 3467 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3468 continue; // special operand like VINTERP attr_chan 3469 3470 // An instruction may use only one literal. 3471 // This has been validated on the previous step. 3472 // See validateVOPLiteral. 3473 // This literal may be used as more than one operand. 3474 // If all these operands are of the same size, 3475 // this literal counts as one scalar value. 3476 // Otherwise it counts as 2 scalar values. 3477 // See "GFX10 Shader Programming", section 3.6.2.3. 3478 3479 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3480 if (Size < 4) Size = 4; 3481 3482 if (NumLiterals == 0) { 3483 NumLiterals = 1; 3484 LiteralSize = Size; 3485 } else if (LiteralSize != Size) { 3486 NumLiterals = 2; 3487 } 3488 } 3489 } 3490 } 3491 } 3492 ConstantBusUseCount += NumLiterals; 3493 3494 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3495 return true; 3496 3497 SMLoc LitLoc = getLitLoc(Operands); 3498 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3499 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3500 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3501 return false; 3502 } 3503 3504 bool 3505 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3506 const OperandVector &Operands) { 3507 const unsigned Opcode = Inst.getOpcode(); 3508 const MCInstrDesc &Desc = MII.get(Opcode); 3509 3510 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3511 if (DstIdx == -1 || 3512 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3513 return true; 3514 } 3515 3516 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3517 3518 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3519 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3520 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3521 3522 assert(DstIdx != -1); 3523 const MCOperand &Dst = Inst.getOperand(DstIdx); 3524 assert(Dst.isReg()); 3525 3526 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3527 3528 for (int SrcIdx : SrcIndices) { 3529 if (SrcIdx == -1) break; 3530 const MCOperand &Src = Inst.getOperand(SrcIdx); 3531 if (Src.isReg()) { 3532 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3533 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3534 Error(getRegLoc(SrcReg, Operands), 3535 "destination must be different than all sources"); 3536 return false; 3537 } 3538 } 3539 } 3540 3541 return true; 3542 } 3543 3544 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3545 3546 const unsigned Opc = Inst.getOpcode(); 3547 const MCInstrDesc &Desc = MII.get(Opc); 3548 3549 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3550 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3551 assert(ClampIdx != -1); 3552 return Inst.getOperand(ClampIdx).getImm() == 0; 3553 } 3554 3555 return true; 3556 } 3557 3558 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3559 3560 const unsigned Opc = Inst.getOpcode(); 3561 const MCInstrDesc &Desc = MII.get(Opc); 3562 3563 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3564 return None; 3565 3566 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3567 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3568 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3569 3570 assert(VDataIdx != -1); 3571 3572 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3573 return None; 3574 3575 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3576 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3577 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3578 if (DMask == 0) 3579 DMask = 1; 3580 3581 bool isPackedD16 = false; 3582 unsigned DataSize = 3583 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3584 if (hasPackedD16()) { 3585 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3586 isPackedD16 = D16Idx >= 0; 3587 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3588 DataSize = (DataSize + 1) / 2; 3589 } 3590 3591 if ((VDataSize / 4) == DataSize + TFESize) 3592 return None; 3593 3594 return StringRef(isPackedD16 3595 ? "image data size does not match dmask, d16 and tfe" 3596 : "image data size does not match dmask and tfe"); 3597 } 3598 3599 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3600 const unsigned Opc = Inst.getOpcode(); 3601 const MCInstrDesc &Desc = MII.get(Opc); 3602 3603 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3604 return true; 3605 3606 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3607 3608 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3609 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3610 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3611 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3612 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3613 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3614 3615 assert(VAddr0Idx != -1); 3616 assert(SrsrcIdx != -1); 3617 assert(SrsrcIdx > VAddr0Idx); 3618 3619 if (DimIdx == -1) 3620 return true; // intersect_ray 3621 3622 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3623 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3624 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3625 unsigned ActualAddrSize = 3626 IsNSA ? SrsrcIdx - VAddr0Idx 3627 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3628 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3629 3630 unsigned ExpectedAddrSize = 3631 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3632 3633 if (!IsNSA) { 3634 if (ExpectedAddrSize > 8) 3635 ExpectedAddrSize = 16; 3636 3637 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3638 // This provides backward compatibility for assembly created 3639 // before 160b/192b/224b types were directly supported. 3640 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3641 return true; 3642 } 3643 3644 return ActualAddrSize == ExpectedAddrSize; 3645 } 3646 3647 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3648 3649 const unsigned Opc = Inst.getOpcode(); 3650 const MCInstrDesc &Desc = MII.get(Opc); 3651 3652 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3653 return true; 3654 if (!Desc.mayLoad() || !Desc.mayStore()) 3655 return true; // Not atomic 3656 3657 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3658 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3659 3660 // This is an incomplete check because image_atomic_cmpswap 3661 // may only use 0x3 and 0xf while other atomic operations 3662 // may use 0x1 and 0x3. However these limitations are 3663 // verified when we check that dmask matches dst size. 3664 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3665 } 3666 3667 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3668 3669 const unsigned Opc = Inst.getOpcode(); 3670 const MCInstrDesc &Desc = MII.get(Opc); 3671 3672 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3673 return true; 3674 3675 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3676 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3677 3678 // GATHER4 instructions use dmask in a different fashion compared to 3679 // other MIMG instructions. The only useful DMASK values are 3680 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3681 // (red,red,red,red) etc.) The ISA document doesn't mention 3682 // this. 3683 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3684 } 3685 3686 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3687 const unsigned Opc = Inst.getOpcode(); 3688 const MCInstrDesc &Desc = MII.get(Opc); 3689 3690 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3691 return true; 3692 3693 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3694 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3695 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3696 3697 if (!BaseOpcode->MSAA) 3698 return true; 3699 3700 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3701 assert(DimIdx != -1); 3702 3703 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3704 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3705 3706 return DimInfo->MSAA; 3707 } 3708 3709 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3710 { 3711 switch (Opcode) { 3712 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3713 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3714 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3715 return true; 3716 default: 3717 return false; 3718 } 3719 } 3720 3721 // movrels* opcodes should only allow VGPRS as src0. 3722 // This is specified in .td description for vop1/vop3, 3723 // but sdwa is handled differently. See isSDWAOperand. 3724 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3725 const OperandVector &Operands) { 3726 3727 const unsigned Opc = Inst.getOpcode(); 3728 const MCInstrDesc &Desc = MII.get(Opc); 3729 3730 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3731 return true; 3732 3733 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3734 assert(Src0Idx != -1); 3735 3736 SMLoc ErrLoc; 3737 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3738 if (Src0.isReg()) { 3739 auto Reg = mc2PseudoReg(Src0.getReg()); 3740 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3741 if (!isSGPR(Reg, TRI)) 3742 return true; 3743 ErrLoc = getRegLoc(Reg, Operands); 3744 } else { 3745 ErrLoc = getConstLoc(Operands); 3746 } 3747 3748 Error(ErrLoc, "source operand must be a VGPR"); 3749 return false; 3750 } 3751 3752 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3753 const OperandVector &Operands) { 3754 3755 const unsigned Opc = Inst.getOpcode(); 3756 3757 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3758 return true; 3759 3760 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3761 assert(Src0Idx != -1); 3762 3763 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3764 if (!Src0.isReg()) 3765 return true; 3766 3767 auto Reg = mc2PseudoReg(Src0.getReg()); 3768 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3769 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3770 Error(getRegLoc(Reg, Operands), 3771 "source operand must be either a VGPR or an inline constant"); 3772 return false; 3773 } 3774 3775 return true; 3776 } 3777 3778 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3779 const OperandVector &Operands) { 3780 const unsigned Opc = Inst.getOpcode(); 3781 const MCInstrDesc &Desc = MII.get(Opc); 3782 3783 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3784 return true; 3785 3786 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3787 if (Src2Idx == -1) 3788 return true; 3789 3790 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3791 if (!Src2.isReg()) 3792 return true; 3793 3794 MCRegister Src2Reg = Src2.getReg(); 3795 MCRegister DstReg = Inst.getOperand(0).getReg(); 3796 if (Src2Reg == DstReg) 3797 return true; 3798 3799 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3800 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3801 return true; 3802 3803 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3804 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3805 "source 2 operand must not partially overlap with dst"); 3806 return false; 3807 } 3808 3809 return true; 3810 } 3811 3812 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3813 switch (Inst.getOpcode()) { 3814 default: 3815 return true; 3816 case V_DIV_SCALE_F32_gfx6_gfx7: 3817 case V_DIV_SCALE_F32_vi: 3818 case V_DIV_SCALE_F32_gfx10: 3819 case V_DIV_SCALE_F64_gfx6_gfx7: 3820 case V_DIV_SCALE_F64_vi: 3821 case V_DIV_SCALE_F64_gfx10: 3822 break; 3823 } 3824 3825 // TODO: Check that src0 = src1 or src2. 3826 3827 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3828 AMDGPU::OpName::src2_modifiers, 3829 AMDGPU::OpName::src2_modifiers}) { 3830 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3831 .getImm() & 3832 SISrcMods::ABS) { 3833 return false; 3834 } 3835 } 3836 3837 return true; 3838 } 3839 3840 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3841 3842 const unsigned Opc = Inst.getOpcode(); 3843 const MCInstrDesc &Desc = MII.get(Opc); 3844 3845 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3846 return true; 3847 3848 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3849 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3850 if (isCI() || isSI()) 3851 return false; 3852 } 3853 3854 return true; 3855 } 3856 3857 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3858 const unsigned Opc = Inst.getOpcode(); 3859 const MCInstrDesc &Desc = MII.get(Opc); 3860 3861 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3862 return true; 3863 3864 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3865 if (DimIdx < 0) 3866 return true; 3867 3868 long Imm = Inst.getOperand(DimIdx).getImm(); 3869 if (Imm < 0 || Imm >= 8) 3870 return false; 3871 3872 return true; 3873 } 3874 3875 static bool IsRevOpcode(const unsigned Opcode) 3876 { 3877 switch (Opcode) { 3878 case AMDGPU::V_SUBREV_F32_e32: 3879 case AMDGPU::V_SUBREV_F32_e64: 3880 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3881 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3882 case AMDGPU::V_SUBREV_F32_e32_vi: 3883 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3884 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3885 case AMDGPU::V_SUBREV_F32_e64_vi: 3886 3887 case AMDGPU::V_SUBREV_CO_U32_e32: 3888 case AMDGPU::V_SUBREV_CO_U32_e64: 3889 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3890 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3891 3892 case AMDGPU::V_SUBBREV_U32_e32: 3893 case AMDGPU::V_SUBBREV_U32_e64: 3894 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3895 case AMDGPU::V_SUBBREV_U32_e32_vi: 3896 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3897 case AMDGPU::V_SUBBREV_U32_e64_vi: 3898 3899 case AMDGPU::V_SUBREV_U32_e32: 3900 case AMDGPU::V_SUBREV_U32_e64: 3901 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3902 case AMDGPU::V_SUBREV_U32_e32_vi: 3903 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3904 case AMDGPU::V_SUBREV_U32_e64_vi: 3905 3906 case AMDGPU::V_SUBREV_F16_e32: 3907 case AMDGPU::V_SUBREV_F16_e64: 3908 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3909 case AMDGPU::V_SUBREV_F16_e32_vi: 3910 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3911 case AMDGPU::V_SUBREV_F16_e64_vi: 3912 3913 case AMDGPU::V_SUBREV_U16_e32: 3914 case AMDGPU::V_SUBREV_U16_e64: 3915 case AMDGPU::V_SUBREV_U16_e32_vi: 3916 case AMDGPU::V_SUBREV_U16_e64_vi: 3917 3918 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3919 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3920 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3921 3922 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3923 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3924 3925 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3926 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3927 3928 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3929 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3930 3931 case AMDGPU::V_LSHRREV_B32_e32: 3932 case AMDGPU::V_LSHRREV_B32_e64: 3933 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3934 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3935 case AMDGPU::V_LSHRREV_B32_e32_vi: 3936 case AMDGPU::V_LSHRREV_B32_e64_vi: 3937 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3938 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3939 3940 case AMDGPU::V_ASHRREV_I32_e32: 3941 case AMDGPU::V_ASHRREV_I32_e64: 3942 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3943 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3944 case AMDGPU::V_ASHRREV_I32_e32_vi: 3945 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3946 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3947 case AMDGPU::V_ASHRREV_I32_e64_vi: 3948 3949 case AMDGPU::V_LSHLREV_B32_e32: 3950 case AMDGPU::V_LSHLREV_B32_e64: 3951 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3952 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3953 case AMDGPU::V_LSHLREV_B32_e32_vi: 3954 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3955 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3956 case AMDGPU::V_LSHLREV_B32_e64_vi: 3957 3958 case AMDGPU::V_LSHLREV_B16_e32: 3959 case AMDGPU::V_LSHLREV_B16_e64: 3960 case AMDGPU::V_LSHLREV_B16_e32_vi: 3961 case AMDGPU::V_LSHLREV_B16_e64_vi: 3962 case AMDGPU::V_LSHLREV_B16_gfx10: 3963 3964 case AMDGPU::V_LSHRREV_B16_e32: 3965 case AMDGPU::V_LSHRREV_B16_e64: 3966 case AMDGPU::V_LSHRREV_B16_e32_vi: 3967 case AMDGPU::V_LSHRREV_B16_e64_vi: 3968 case AMDGPU::V_LSHRREV_B16_gfx10: 3969 3970 case AMDGPU::V_ASHRREV_I16_e32: 3971 case AMDGPU::V_ASHRREV_I16_e64: 3972 case AMDGPU::V_ASHRREV_I16_e32_vi: 3973 case AMDGPU::V_ASHRREV_I16_e64_vi: 3974 case AMDGPU::V_ASHRREV_I16_gfx10: 3975 3976 case AMDGPU::V_LSHLREV_B64_e64: 3977 case AMDGPU::V_LSHLREV_B64_gfx10: 3978 case AMDGPU::V_LSHLREV_B64_vi: 3979 3980 case AMDGPU::V_LSHRREV_B64_e64: 3981 case AMDGPU::V_LSHRREV_B64_gfx10: 3982 case AMDGPU::V_LSHRREV_B64_vi: 3983 3984 case AMDGPU::V_ASHRREV_I64_e64: 3985 case AMDGPU::V_ASHRREV_I64_gfx10: 3986 case AMDGPU::V_ASHRREV_I64_vi: 3987 3988 case AMDGPU::V_PK_LSHLREV_B16: 3989 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3990 case AMDGPU::V_PK_LSHLREV_B16_vi: 3991 3992 case AMDGPU::V_PK_LSHRREV_B16: 3993 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3994 case AMDGPU::V_PK_LSHRREV_B16_vi: 3995 case AMDGPU::V_PK_ASHRREV_I16: 3996 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3997 case AMDGPU::V_PK_ASHRREV_I16_vi: 3998 return true; 3999 default: 4000 return false; 4001 } 4002 } 4003 4004 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4005 4006 using namespace SIInstrFlags; 4007 const unsigned Opcode = Inst.getOpcode(); 4008 const MCInstrDesc &Desc = MII.get(Opcode); 4009 4010 // lds_direct register is defined so that it can be used 4011 // with 9-bit operands only. Ignore encodings which do not accept these. 4012 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4013 if ((Desc.TSFlags & Enc) == 0) 4014 return None; 4015 4016 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4017 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4018 if (SrcIdx == -1) 4019 break; 4020 const auto &Src = Inst.getOperand(SrcIdx); 4021 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4022 4023 if (isGFX90A() || isGFX11Plus()) 4024 return StringRef("lds_direct is not supported on this GPU"); 4025 4026 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4027 return StringRef("lds_direct cannot be used with this instruction"); 4028 4029 if (SrcName != OpName::src0) 4030 return StringRef("lds_direct may be used as src0 only"); 4031 } 4032 } 4033 4034 return None; 4035 } 4036 4037 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4038 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4039 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4040 if (Op.isFlatOffset()) 4041 return Op.getStartLoc(); 4042 } 4043 return getLoc(); 4044 } 4045 4046 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4047 const OperandVector &Operands) { 4048 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4049 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4050 return true; 4051 4052 auto Opcode = Inst.getOpcode(); 4053 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4054 assert(OpNum != -1); 4055 4056 const auto &Op = Inst.getOperand(OpNum); 4057 if (!hasFlatOffsets() && Op.getImm() != 0) { 4058 Error(getFlatOffsetLoc(Operands), 4059 "flat offset modifier is not supported on this GPU"); 4060 return false; 4061 } 4062 4063 // For FLAT segment the offset must be positive; 4064 // MSB is ignored and forced to zero. 4065 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4066 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4067 if (!isIntN(OffsetSize, Op.getImm())) { 4068 Error(getFlatOffsetLoc(Operands), 4069 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4070 return false; 4071 } 4072 } else { 4073 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4074 if (!isUIntN(OffsetSize, Op.getImm())) { 4075 Error(getFlatOffsetLoc(Operands), 4076 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4077 return false; 4078 } 4079 } 4080 4081 return true; 4082 } 4083 4084 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4085 // Start with second operand because SMEM Offset cannot be dst or src0. 4086 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4087 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4088 if (Op.isSMEMOffset()) 4089 return Op.getStartLoc(); 4090 } 4091 return getLoc(); 4092 } 4093 4094 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4095 const OperandVector &Operands) { 4096 if (isCI() || isSI()) 4097 return true; 4098 4099 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4100 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4101 return true; 4102 4103 auto Opcode = Inst.getOpcode(); 4104 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4105 if (OpNum == -1) 4106 return true; 4107 4108 const auto &Op = Inst.getOperand(OpNum); 4109 if (!Op.isImm()) 4110 return true; 4111 4112 uint64_t Offset = Op.getImm(); 4113 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4114 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4115 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4116 return true; 4117 4118 Error(getSMEMOffsetLoc(Operands), 4119 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4120 "expected a 21-bit signed offset"); 4121 4122 return false; 4123 } 4124 4125 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4126 unsigned Opcode = Inst.getOpcode(); 4127 const MCInstrDesc &Desc = MII.get(Opcode); 4128 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4129 return true; 4130 4131 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4132 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4133 4134 const int OpIndices[] = { Src0Idx, Src1Idx }; 4135 4136 unsigned NumExprs = 0; 4137 unsigned NumLiterals = 0; 4138 uint32_t LiteralValue; 4139 4140 for (int OpIdx : OpIndices) { 4141 if (OpIdx == -1) break; 4142 4143 const MCOperand &MO = Inst.getOperand(OpIdx); 4144 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4145 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4146 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4147 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4148 if (NumLiterals == 0 || LiteralValue != Value) { 4149 LiteralValue = Value; 4150 ++NumLiterals; 4151 } 4152 } else if (MO.isExpr()) { 4153 ++NumExprs; 4154 } 4155 } 4156 } 4157 4158 return NumLiterals + NumExprs <= 1; 4159 } 4160 4161 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4162 const unsigned Opc = Inst.getOpcode(); 4163 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4164 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4165 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4166 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4167 4168 if (OpSel & ~3) 4169 return false; 4170 } 4171 4172 uint64_t TSFlags = MII.get(Opc).TSFlags; 4173 4174 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4175 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4176 if (OpSelIdx != -1) { 4177 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4178 return false; 4179 } 4180 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4181 if (OpSelHiIdx != -1) { 4182 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4183 return false; 4184 } 4185 } 4186 4187 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4188 if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) && 4189 !(TSFlags & SIInstrFlags::VOP3P)) { 4190 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4191 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4192 if (OpSel & 3) 4193 return false; 4194 } 4195 4196 return true; 4197 } 4198 4199 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4200 const OperandVector &Operands) { 4201 const unsigned Opc = Inst.getOpcode(); 4202 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4203 if (DppCtrlIdx < 0) 4204 return true; 4205 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4206 4207 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4208 // DPP64 is supported for row_newbcast only. 4209 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4210 if (Src0Idx >= 0 && 4211 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4212 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4213 Error(S, "64 bit dpp only supports row_newbcast"); 4214 return false; 4215 } 4216 } 4217 4218 return true; 4219 } 4220 4221 // Check if VCC register matches wavefront size 4222 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4223 auto FB = getFeatureBits(); 4224 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4225 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4226 } 4227 4228 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4229 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4230 const OperandVector &Operands) { 4231 unsigned Opcode = Inst.getOpcode(); 4232 const MCInstrDesc &Desc = MII.get(Opcode); 4233 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4234 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4235 ImmIdx == -1) 4236 return true; 4237 4238 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4239 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4240 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4241 4242 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4243 4244 unsigned NumExprs = 0; 4245 unsigned NumLiterals = 0; 4246 uint32_t LiteralValue; 4247 4248 for (int OpIdx : OpIndices) { 4249 if (OpIdx == -1) 4250 continue; 4251 4252 const MCOperand &MO = Inst.getOperand(OpIdx); 4253 if (!MO.isImm() && !MO.isExpr()) 4254 continue; 4255 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4256 continue; 4257 4258 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4259 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4260 Error(getConstLoc(Operands), 4261 "inline constants are not allowed for this operand"); 4262 return false; 4263 } 4264 4265 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4266 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4267 if (NumLiterals == 0 || LiteralValue != Value) { 4268 LiteralValue = Value; 4269 ++NumLiterals; 4270 } 4271 } else if (MO.isExpr()) { 4272 ++NumExprs; 4273 } 4274 } 4275 NumLiterals += NumExprs; 4276 4277 if (!NumLiterals) 4278 return true; 4279 4280 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4281 Error(getLitLoc(Operands), "literal operands are not supported"); 4282 return false; 4283 } 4284 4285 if (NumLiterals > 1) { 4286 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4287 return false; 4288 } 4289 4290 return true; 4291 } 4292 4293 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4294 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4295 const MCRegisterInfo *MRI) { 4296 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4297 if (OpIdx < 0) 4298 return -1; 4299 4300 const MCOperand &Op = Inst.getOperand(OpIdx); 4301 if (!Op.isReg()) 4302 return -1; 4303 4304 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4305 auto Reg = Sub ? Sub : Op.getReg(); 4306 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4307 return AGPR32.contains(Reg) ? 1 : 0; 4308 } 4309 4310 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4311 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4312 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4313 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4314 SIInstrFlags::DS)) == 0) 4315 return true; 4316 4317 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4318 : AMDGPU::OpName::vdata; 4319 4320 const MCRegisterInfo *MRI = getMRI(); 4321 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4322 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4323 4324 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4325 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4326 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4327 return false; 4328 } 4329 4330 auto FB = getFeatureBits(); 4331 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4332 if (DataAreg < 0 || DstAreg < 0) 4333 return true; 4334 return DstAreg == DataAreg; 4335 } 4336 4337 return DstAreg < 1 && DataAreg < 1; 4338 } 4339 4340 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4341 auto FB = getFeatureBits(); 4342 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4343 return true; 4344 4345 const MCRegisterInfo *MRI = getMRI(); 4346 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4347 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4348 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4349 const MCOperand &Op = Inst.getOperand(I); 4350 if (!Op.isReg()) 4351 continue; 4352 4353 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4354 if (!Sub) 4355 continue; 4356 4357 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4358 return false; 4359 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4360 return false; 4361 } 4362 4363 return true; 4364 } 4365 4366 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4367 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4368 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4369 if (Op.isBLGP()) 4370 return Op.getStartLoc(); 4371 } 4372 return SMLoc(); 4373 } 4374 4375 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4376 const OperandVector &Operands) { 4377 unsigned Opc = Inst.getOpcode(); 4378 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4379 if (BlgpIdx == -1) 4380 return true; 4381 SMLoc BLGPLoc = getBLGPLoc(Operands); 4382 if (!BLGPLoc.isValid()) 4383 return true; 4384 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4385 auto FB = getFeatureBits(); 4386 bool UsesNeg = false; 4387 if (FB[AMDGPU::FeatureGFX940Insts]) { 4388 switch (Opc) { 4389 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4390 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4391 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4392 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4393 UsesNeg = true; 4394 } 4395 } 4396 4397 if (IsNeg == UsesNeg) 4398 return true; 4399 4400 Error(BLGPLoc, 4401 UsesNeg ? "invalid modifier: blgp is not supported" 4402 : "invalid modifier: neg is not supported"); 4403 4404 return false; 4405 } 4406 4407 // gfx90a has an undocumented limitation: 4408 // DS_GWS opcodes must use even aligned registers. 4409 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4410 const OperandVector &Operands) { 4411 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4412 return true; 4413 4414 int Opc = Inst.getOpcode(); 4415 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4416 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4417 return true; 4418 4419 const MCRegisterInfo *MRI = getMRI(); 4420 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4421 int Data0Pos = 4422 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4423 assert(Data0Pos != -1); 4424 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4425 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4426 if (RegIdx & 1) { 4427 SMLoc RegLoc = getRegLoc(Reg, Operands); 4428 Error(RegLoc, "vgpr must be even aligned"); 4429 return false; 4430 } 4431 4432 return true; 4433 } 4434 4435 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4436 const OperandVector &Operands, 4437 const SMLoc &IDLoc) { 4438 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4439 AMDGPU::OpName::cpol); 4440 if (CPolPos == -1) 4441 return true; 4442 4443 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4444 4445 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4446 if (TSFlags & SIInstrFlags::SMRD) { 4447 if (CPol && (isSI() || isCI())) { 4448 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4449 Error(S, "cache policy is not supported for SMRD instructions"); 4450 return false; 4451 } 4452 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4453 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4454 return false; 4455 } 4456 } 4457 4458 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4459 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4460 StringRef CStr(S.getPointer()); 4461 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4462 Error(S, "scc is not supported on this GPU"); 4463 return false; 4464 } 4465 4466 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4467 return true; 4468 4469 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4470 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4471 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4472 : "instruction must use glc"); 4473 return false; 4474 } 4475 } else { 4476 if (CPol & CPol::GLC) { 4477 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4478 StringRef CStr(S.getPointer()); 4479 S = SMLoc::getFromPointer( 4480 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4481 Error(S, isGFX940() ? "instruction must not use sc0" 4482 : "instruction must not use glc"); 4483 return false; 4484 } 4485 } 4486 4487 return true; 4488 } 4489 4490 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4491 const OperandVector &Operands, 4492 const SMLoc &IDLoc) { 4493 if (isGFX940()) 4494 return true; 4495 4496 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4497 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4498 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4499 return true; 4500 // This is FLAT LDS DMA. 4501 4502 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4503 StringRef CStr(S.getPointer()); 4504 if (!CStr.startswith("lds")) { 4505 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4506 // And LDS version should have 'lds' modifier, but it follows optional 4507 // operands so its absense is ignored by the matcher. 4508 Error(IDLoc, "invalid operands for instruction"); 4509 return false; 4510 } 4511 4512 return true; 4513 } 4514 4515 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4516 if (!isGFX11Plus()) 4517 return true; 4518 for (auto &Operand : Operands) { 4519 if (!Operand->isReg()) 4520 continue; 4521 unsigned Reg = Operand->getReg(); 4522 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4523 Error(getRegLoc(Reg, Operands), 4524 "execz and vccz are not supported on this GPU"); 4525 return false; 4526 } 4527 } 4528 return true; 4529 } 4530 4531 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4532 const SMLoc &IDLoc, 4533 const OperandVector &Operands) { 4534 if (auto ErrMsg = validateLdsDirect(Inst)) { 4535 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4536 return false; 4537 } 4538 if (!validateSOPLiteral(Inst)) { 4539 Error(getLitLoc(Operands), 4540 "only one literal operand is allowed"); 4541 return false; 4542 } 4543 if (!validateVOPLiteral(Inst, Operands)) { 4544 return false; 4545 } 4546 if (!validateConstantBusLimitations(Inst, Operands)) { 4547 return false; 4548 } 4549 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4550 return false; 4551 } 4552 if (!validateIntClampSupported(Inst)) { 4553 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4554 "integer clamping is not supported on this GPU"); 4555 return false; 4556 } 4557 if (!validateOpSel(Inst)) { 4558 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4559 "invalid op_sel operand"); 4560 return false; 4561 } 4562 if (!validateDPP(Inst, Operands)) { 4563 return false; 4564 } 4565 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4566 if (!validateMIMGD16(Inst)) { 4567 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4568 "d16 modifier is not supported on this GPU"); 4569 return false; 4570 } 4571 if (!validateMIMGDim(Inst)) { 4572 Error(IDLoc, "dim modifier is required on this GPU"); 4573 return false; 4574 } 4575 if (!validateMIMGMSAA(Inst)) { 4576 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4577 "invalid dim; must be MSAA type"); 4578 return false; 4579 } 4580 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4581 Error(IDLoc, *ErrMsg); 4582 return false; 4583 } 4584 if (!validateMIMGAddrSize(Inst)) { 4585 Error(IDLoc, 4586 "image address size does not match dim and a16"); 4587 return false; 4588 } 4589 if (!validateMIMGAtomicDMask(Inst)) { 4590 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4591 "invalid atomic image dmask"); 4592 return false; 4593 } 4594 if (!validateMIMGGatherDMask(Inst)) { 4595 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4596 "invalid image_gather dmask: only one bit must be set"); 4597 return false; 4598 } 4599 if (!validateMovrels(Inst, Operands)) { 4600 return false; 4601 } 4602 if (!validateFlatOffset(Inst, Operands)) { 4603 return false; 4604 } 4605 if (!validateSMEMOffset(Inst, Operands)) { 4606 return false; 4607 } 4608 if (!validateMAIAccWrite(Inst, Operands)) { 4609 return false; 4610 } 4611 if (!validateMFMA(Inst, Operands)) { 4612 return false; 4613 } 4614 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4615 return false; 4616 } 4617 4618 if (!validateAGPRLdSt(Inst)) { 4619 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4620 ? "invalid register class: data and dst should be all VGPR or AGPR" 4621 : "invalid register class: agpr loads and stores not supported on this GPU" 4622 ); 4623 return false; 4624 } 4625 if (!validateVGPRAlign(Inst)) { 4626 Error(IDLoc, 4627 "invalid register class: vgpr tuples must be 64 bit aligned"); 4628 return false; 4629 } 4630 if (!validateGWS(Inst, Operands)) { 4631 return false; 4632 } 4633 4634 if (!validateBLGP(Inst, Operands)) { 4635 return false; 4636 } 4637 4638 if (!validateDivScale(Inst)) { 4639 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4640 return false; 4641 } 4642 if (!validateExeczVcczOperands(Operands)) { 4643 return false; 4644 } 4645 4646 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4647 return false; 4648 } 4649 4650 return true; 4651 } 4652 4653 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4654 const FeatureBitset &FBS, 4655 unsigned VariantID = 0); 4656 4657 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4658 const FeatureBitset &AvailableFeatures, 4659 unsigned VariantID); 4660 4661 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4662 const FeatureBitset &FBS) { 4663 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4664 } 4665 4666 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4667 const FeatureBitset &FBS, 4668 ArrayRef<unsigned> Variants) { 4669 for (auto Variant : Variants) { 4670 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4671 return true; 4672 } 4673 4674 return false; 4675 } 4676 4677 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4678 const SMLoc &IDLoc) { 4679 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4680 4681 // Check if requested instruction variant is supported. 4682 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4683 return false; 4684 4685 // This instruction is not supported. 4686 // Clear any other pending errors because they are no longer relevant. 4687 getParser().clearPendingErrors(); 4688 4689 // Requested instruction variant is not supported. 4690 // Check if any other variants are supported. 4691 StringRef VariantName = getMatchedVariantName(); 4692 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4693 return Error(IDLoc, 4694 Twine(VariantName, 4695 " variant of this instruction is not supported")); 4696 } 4697 4698 // Finally check if this instruction is supported on any other GPU. 4699 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4700 return Error(IDLoc, "instruction not supported on this GPU"); 4701 } 4702 4703 // Instruction not supported on any GPU. Probably a typo. 4704 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4705 return Error(IDLoc, "invalid instruction" + Suggestion); 4706 } 4707 4708 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4709 OperandVector &Operands, 4710 MCStreamer &Out, 4711 uint64_t &ErrorInfo, 4712 bool MatchingInlineAsm) { 4713 MCInst Inst; 4714 unsigned Result = Match_Success; 4715 for (auto Variant : getMatchedVariants()) { 4716 uint64_t EI; 4717 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4718 Variant); 4719 // We order match statuses from least to most specific. We use most specific 4720 // status as resulting 4721 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4722 if ((R == Match_Success) || 4723 (R == Match_PreferE32) || 4724 (R == Match_MissingFeature && Result != Match_PreferE32) || 4725 (R == Match_InvalidOperand && Result != Match_MissingFeature 4726 && Result != Match_PreferE32) || 4727 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4728 && Result != Match_MissingFeature 4729 && Result != Match_PreferE32)) { 4730 Result = R; 4731 ErrorInfo = EI; 4732 } 4733 if (R == Match_Success) 4734 break; 4735 } 4736 4737 if (Result == Match_Success) { 4738 if (!validateInstruction(Inst, IDLoc, Operands)) { 4739 return true; 4740 } 4741 Inst.setLoc(IDLoc); 4742 Out.emitInstruction(Inst, getSTI()); 4743 return false; 4744 } 4745 4746 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4747 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4748 return true; 4749 } 4750 4751 switch (Result) { 4752 default: break; 4753 case Match_MissingFeature: 4754 // It has been verified that the specified instruction 4755 // mnemonic is valid. A match was found but it requires 4756 // features which are not supported on this GPU. 4757 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4758 4759 case Match_InvalidOperand: { 4760 SMLoc ErrorLoc = IDLoc; 4761 if (ErrorInfo != ~0ULL) { 4762 if (ErrorInfo >= Operands.size()) { 4763 return Error(IDLoc, "too few operands for instruction"); 4764 } 4765 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4766 if (ErrorLoc == SMLoc()) 4767 ErrorLoc = IDLoc; 4768 } 4769 return Error(ErrorLoc, "invalid operand for instruction"); 4770 } 4771 4772 case Match_PreferE32: 4773 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4774 "should be encoded as e32"); 4775 case Match_MnemonicFail: 4776 llvm_unreachable("Invalid instructions should have been handled already"); 4777 } 4778 llvm_unreachable("Implement any new match types added!"); 4779 } 4780 4781 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4782 int64_t Tmp = -1; 4783 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4784 return true; 4785 } 4786 if (getParser().parseAbsoluteExpression(Tmp)) { 4787 return true; 4788 } 4789 Ret = static_cast<uint32_t>(Tmp); 4790 return false; 4791 } 4792 4793 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4794 uint32_t &Minor) { 4795 if (ParseAsAbsoluteExpression(Major)) 4796 return TokError("invalid major version"); 4797 4798 if (!trySkipToken(AsmToken::Comma)) 4799 return TokError("minor version number required, comma expected"); 4800 4801 if (ParseAsAbsoluteExpression(Minor)) 4802 return TokError("invalid minor version"); 4803 4804 return false; 4805 } 4806 4807 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4808 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4809 return TokError("directive only supported for amdgcn architecture"); 4810 4811 std::string TargetIDDirective; 4812 SMLoc TargetStart = getTok().getLoc(); 4813 if (getParser().parseEscapedString(TargetIDDirective)) 4814 return true; 4815 4816 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4817 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4818 return getParser().Error(TargetRange.Start, 4819 (Twine(".amdgcn_target directive's target id ") + 4820 Twine(TargetIDDirective) + 4821 Twine(" does not match the specified target id ") + 4822 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4823 4824 return false; 4825 } 4826 4827 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4828 return Error(Range.Start, "value out of range", Range); 4829 } 4830 4831 bool AMDGPUAsmParser::calculateGPRBlocks( 4832 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4833 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4834 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4835 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4836 // TODO(scott.linder): These calculations are duplicated from 4837 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4838 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4839 4840 unsigned NumVGPRs = NextFreeVGPR; 4841 unsigned NumSGPRs = NextFreeSGPR; 4842 4843 if (Version.Major >= 10) 4844 NumSGPRs = 0; 4845 else { 4846 unsigned MaxAddressableNumSGPRs = 4847 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4848 4849 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4850 NumSGPRs > MaxAddressableNumSGPRs) 4851 return OutOfRangeError(SGPRRange); 4852 4853 NumSGPRs += 4854 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4855 4856 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4857 NumSGPRs > MaxAddressableNumSGPRs) 4858 return OutOfRangeError(SGPRRange); 4859 4860 if (Features.test(FeatureSGPRInitBug)) 4861 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4862 } 4863 4864 VGPRBlocks = 4865 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4866 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4867 4868 return false; 4869 } 4870 4871 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4872 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4873 return TokError("directive only supported for amdgcn architecture"); 4874 4875 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4876 return TokError("directive only supported for amdhsa OS"); 4877 4878 StringRef KernelName; 4879 if (getParser().parseIdentifier(KernelName)) 4880 return true; 4881 4882 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4883 4884 StringSet<> Seen; 4885 4886 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4887 4888 SMRange VGPRRange; 4889 uint64_t NextFreeVGPR = 0; 4890 uint64_t AccumOffset = 0; 4891 uint64_t SharedVGPRCount = 0; 4892 SMRange SGPRRange; 4893 uint64_t NextFreeSGPR = 0; 4894 4895 // Count the number of user SGPRs implied from the enabled feature bits. 4896 unsigned ImpliedUserSGPRCount = 0; 4897 4898 // Track if the asm explicitly contains the directive for the user SGPR 4899 // count. 4900 Optional<unsigned> ExplicitUserSGPRCount; 4901 bool ReserveVCC = true; 4902 bool ReserveFlatScr = true; 4903 Optional<bool> EnableWavefrontSize32; 4904 4905 while (true) { 4906 while (trySkipToken(AsmToken::EndOfStatement)); 4907 4908 StringRef ID; 4909 SMRange IDRange = getTok().getLocRange(); 4910 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4911 return true; 4912 4913 if (ID == ".end_amdhsa_kernel") 4914 break; 4915 4916 if (!Seen.insert(ID).second) 4917 return TokError(".amdhsa_ directives cannot be repeated"); 4918 4919 SMLoc ValStart = getLoc(); 4920 int64_t IVal; 4921 if (getParser().parseAbsoluteExpression(IVal)) 4922 return true; 4923 SMLoc ValEnd = getLoc(); 4924 SMRange ValRange = SMRange(ValStart, ValEnd); 4925 4926 if (IVal < 0) 4927 return OutOfRangeError(ValRange); 4928 4929 uint64_t Val = IVal; 4930 4931 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4932 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4933 return OutOfRangeError(RANGE); \ 4934 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4935 4936 if (ID == ".amdhsa_group_segment_fixed_size") { 4937 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4938 return OutOfRangeError(ValRange); 4939 KD.group_segment_fixed_size = Val; 4940 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4941 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4942 return OutOfRangeError(ValRange); 4943 KD.private_segment_fixed_size = Val; 4944 } else if (ID == ".amdhsa_kernarg_size") { 4945 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4946 return OutOfRangeError(ValRange); 4947 KD.kernarg_size = Val; 4948 } else if (ID == ".amdhsa_user_sgpr_count") { 4949 ExplicitUserSGPRCount = Val; 4950 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4951 if (hasArchitectedFlatScratch()) 4952 return Error(IDRange.Start, 4953 "directive is not supported with architected flat scratch", 4954 IDRange); 4955 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4956 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4957 Val, ValRange); 4958 if (Val) 4959 ImpliedUserSGPRCount += 4; 4960 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4961 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4962 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4963 ValRange); 4964 if (Val) 4965 ImpliedUserSGPRCount += 2; 4966 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4967 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4968 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4969 ValRange); 4970 if (Val) 4971 ImpliedUserSGPRCount += 2; 4972 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4973 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4974 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4975 Val, ValRange); 4976 if (Val) 4977 ImpliedUserSGPRCount += 2; 4978 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4979 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4980 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4981 ValRange); 4982 if (Val) 4983 ImpliedUserSGPRCount += 2; 4984 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4985 if (hasArchitectedFlatScratch()) 4986 return Error(IDRange.Start, 4987 "directive is not supported with architected flat scratch", 4988 IDRange); 4989 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4990 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4991 ValRange); 4992 if (Val) 4993 ImpliedUserSGPRCount += 2; 4994 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4995 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4996 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4997 Val, ValRange); 4998 if (Val) 4999 ImpliedUserSGPRCount += 1; 5000 } else if (ID == ".amdhsa_wavefront_size32") { 5001 if (IVersion.Major < 10) 5002 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5003 EnableWavefrontSize32 = Val; 5004 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5005 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5006 Val, ValRange); 5007 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5008 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5009 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); 5010 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5011 if (hasArchitectedFlatScratch()) 5012 return Error(IDRange.Start, 5013 "directive is not supported with architected flat scratch", 5014 IDRange); 5015 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5016 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5017 } else if (ID == ".amdhsa_enable_private_segment") { 5018 if (!hasArchitectedFlatScratch()) 5019 return Error( 5020 IDRange.Start, 5021 "directive is not supported without architected flat scratch", 5022 IDRange); 5023 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5024 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5025 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5026 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5027 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5028 ValRange); 5029 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5030 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5031 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5032 ValRange); 5033 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5034 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5035 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5036 ValRange); 5037 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5038 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5039 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5040 ValRange); 5041 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5042 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5043 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5044 ValRange); 5045 } else if (ID == ".amdhsa_next_free_vgpr") { 5046 VGPRRange = ValRange; 5047 NextFreeVGPR = Val; 5048 } else if (ID == ".amdhsa_next_free_sgpr") { 5049 SGPRRange = ValRange; 5050 NextFreeSGPR = Val; 5051 } else if (ID == ".amdhsa_accum_offset") { 5052 if (!isGFX90A()) 5053 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5054 AccumOffset = Val; 5055 } else if (ID == ".amdhsa_reserve_vcc") { 5056 if (!isUInt<1>(Val)) 5057 return OutOfRangeError(ValRange); 5058 ReserveVCC = Val; 5059 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5060 if (IVersion.Major < 7) 5061 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5062 if (hasArchitectedFlatScratch()) 5063 return Error(IDRange.Start, 5064 "directive is not supported with architected flat scratch", 5065 IDRange); 5066 if (!isUInt<1>(Val)) 5067 return OutOfRangeError(ValRange); 5068 ReserveFlatScr = Val; 5069 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5070 if (IVersion.Major < 8) 5071 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5072 if (!isUInt<1>(Val)) 5073 return OutOfRangeError(ValRange); 5074 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5075 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5076 IDRange); 5077 } else if (ID == ".amdhsa_float_round_mode_32") { 5078 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5079 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5080 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5082 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5083 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5084 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5085 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5086 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5087 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5088 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5089 ValRange); 5090 } else if (ID == ".amdhsa_dx10_clamp") { 5091 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5092 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5093 } else if (ID == ".amdhsa_ieee_mode") { 5094 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5095 Val, ValRange); 5096 } else if (ID == ".amdhsa_fp16_overflow") { 5097 if (IVersion.Major < 9) 5098 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5099 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5100 ValRange); 5101 } else if (ID == ".amdhsa_tg_split") { 5102 if (!isGFX90A()) 5103 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5104 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5105 ValRange); 5106 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5107 if (IVersion.Major < 10) 5108 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5109 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5110 ValRange); 5111 } else if (ID == ".amdhsa_memory_ordered") { 5112 if (IVersion.Major < 10) 5113 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5114 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5115 ValRange); 5116 } else if (ID == ".amdhsa_forward_progress") { 5117 if (IVersion.Major < 10) 5118 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5119 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5120 ValRange); 5121 } else if (ID == ".amdhsa_shared_vgpr_count") { 5122 if (IVersion.Major < 10) 5123 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5124 SharedVGPRCount = Val; 5125 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5126 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5127 ValRange); 5128 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5129 PARSE_BITS_ENTRY( 5130 KD.compute_pgm_rsrc2, 5131 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5132 ValRange); 5133 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5134 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5135 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5136 Val, ValRange); 5137 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5138 PARSE_BITS_ENTRY( 5139 KD.compute_pgm_rsrc2, 5140 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5141 ValRange); 5142 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5143 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5144 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5145 Val, ValRange); 5146 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5147 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5148 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5149 Val, ValRange); 5150 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5151 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5152 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5153 Val, ValRange); 5154 } else if (ID == ".amdhsa_exception_int_div_zero") { 5155 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5156 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5157 Val, ValRange); 5158 } else { 5159 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5160 } 5161 5162 #undef PARSE_BITS_ENTRY 5163 } 5164 5165 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5166 return TokError(".amdhsa_next_free_vgpr directive is required"); 5167 5168 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5169 return TokError(".amdhsa_next_free_sgpr directive is required"); 5170 5171 unsigned VGPRBlocks; 5172 unsigned SGPRBlocks; 5173 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5174 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5175 EnableWavefrontSize32, NextFreeVGPR, 5176 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5177 SGPRBlocks)) 5178 return true; 5179 5180 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5181 VGPRBlocks)) 5182 return OutOfRangeError(VGPRRange); 5183 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5184 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5185 5186 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5187 SGPRBlocks)) 5188 return OutOfRangeError(SGPRRange); 5189 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5190 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5191 SGPRBlocks); 5192 5193 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5194 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5195 "enabled user SGPRs"); 5196 5197 unsigned UserSGPRCount = 5198 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5199 5200 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5201 return TokError("too many user SGPRs enabled"); 5202 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5203 UserSGPRCount); 5204 5205 if (isGFX90A()) { 5206 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5207 return TokError(".amdhsa_accum_offset directive is required"); 5208 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5209 return TokError("accum_offset should be in range [4..256] in " 5210 "increments of 4"); 5211 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5212 return TokError("accum_offset exceeds total VGPR allocation"); 5213 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5214 (AccumOffset / 4 - 1)); 5215 } 5216 5217 if (IVersion.Major == 10) { 5218 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5219 if (SharedVGPRCount && EnableWavefrontSize32) { 5220 return TokError("shared_vgpr_count directive not valid on " 5221 "wavefront size 32"); 5222 } 5223 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5224 return TokError("shared_vgpr_count*2 + " 5225 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5226 "exceed 63\n"); 5227 } 5228 } 5229 5230 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5231 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5232 ReserveFlatScr); 5233 return false; 5234 } 5235 5236 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5237 uint32_t Major; 5238 uint32_t Minor; 5239 5240 if (ParseDirectiveMajorMinor(Major, Minor)) 5241 return true; 5242 5243 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5244 return false; 5245 } 5246 5247 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5248 uint32_t Major; 5249 uint32_t Minor; 5250 uint32_t Stepping; 5251 StringRef VendorName; 5252 StringRef ArchName; 5253 5254 // If this directive has no arguments, then use the ISA version for the 5255 // targeted GPU. 5256 if (isToken(AsmToken::EndOfStatement)) { 5257 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5258 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5259 ISA.Stepping, 5260 "AMD", "AMDGPU"); 5261 return false; 5262 } 5263 5264 if (ParseDirectiveMajorMinor(Major, Minor)) 5265 return true; 5266 5267 if (!trySkipToken(AsmToken::Comma)) 5268 return TokError("stepping version number required, comma expected"); 5269 5270 if (ParseAsAbsoluteExpression(Stepping)) 5271 return TokError("invalid stepping version"); 5272 5273 if (!trySkipToken(AsmToken::Comma)) 5274 return TokError("vendor name required, comma expected"); 5275 5276 if (!parseString(VendorName, "invalid vendor name")) 5277 return true; 5278 5279 if (!trySkipToken(AsmToken::Comma)) 5280 return TokError("arch name required, comma expected"); 5281 5282 if (!parseString(ArchName, "invalid arch name")) 5283 return true; 5284 5285 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5286 VendorName, ArchName); 5287 return false; 5288 } 5289 5290 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5291 amd_kernel_code_t &Header) { 5292 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5293 // assembly for backwards compatibility. 5294 if (ID == "max_scratch_backing_memory_byte_size") { 5295 Parser.eatToEndOfStatement(); 5296 return false; 5297 } 5298 5299 SmallString<40> ErrStr; 5300 raw_svector_ostream Err(ErrStr); 5301 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5302 return TokError(Err.str()); 5303 } 5304 Lex(); 5305 5306 if (ID == "enable_wavefront_size32") { 5307 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5308 if (!isGFX10Plus()) 5309 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5310 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5311 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5312 } else { 5313 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5314 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5315 } 5316 } 5317 5318 if (ID == "wavefront_size") { 5319 if (Header.wavefront_size == 5) { 5320 if (!isGFX10Plus()) 5321 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5322 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5323 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5324 } else if (Header.wavefront_size == 6) { 5325 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5326 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5327 } 5328 } 5329 5330 if (ID == "enable_wgp_mode") { 5331 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5332 !isGFX10Plus()) 5333 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5334 } 5335 5336 if (ID == "enable_mem_ordered") { 5337 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5338 !isGFX10Plus()) 5339 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5340 } 5341 5342 if (ID == "enable_fwd_progress") { 5343 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5344 !isGFX10Plus()) 5345 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5346 } 5347 5348 return false; 5349 } 5350 5351 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5352 amd_kernel_code_t Header; 5353 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5354 5355 while (true) { 5356 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5357 // will set the current token to EndOfStatement. 5358 while(trySkipToken(AsmToken::EndOfStatement)); 5359 5360 StringRef ID; 5361 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5362 return true; 5363 5364 if (ID == ".end_amd_kernel_code_t") 5365 break; 5366 5367 if (ParseAMDKernelCodeTValue(ID, Header)) 5368 return true; 5369 } 5370 5371 getTargetStreamer().EmitAMDKernelCodeT(Header); 5372 5373 return false; 5374 } 5375 5376 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5377 StringRef KernelName; 5378 if (!parseId(KernelName, "expected symbol name")) 5379 return true; 5380 5381 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5382 ELF::STT_AMDGPU_HSA_KERNEL); 5383 5384 KernelScope.initialize(getContext()); 5385 return false; 5386 } 5387 5388 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5389 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5390 return Error(getLoc(), 5391 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5392 "architectures"); 5393 } 5394 5395 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5396 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5397 return Error(getParser().getTok().getLoc(), "target id must match options"); 5398 5399 getTargetStreamer().EmitISAVersion(); 5400 Lex(); 5401 5402 return false; 5403 } 5404 5405 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5406 const char *AssemblerDirectiveBegin; 5407 const char *AssemblerDirectiveEnd; 5408 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5409 isHsaAbiVersion3AndAbove(&getSTI()) 5410 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5411 HSAMD::V3::AssemblerDirectiveEnd) 5412 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5413 HSAMD::AssemblerDirectiveEnd); 5414 5415 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5416 return Error(getLoc(), 5417 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5418 "not available on non-amdhsa OSes")).str()); 5419 } 5420 5421 std::string HSAMetadataString; 5422 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5423 HSAMetadataString)) 5424 return true; 5425 5426 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5427 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5428 return Error(getLoc(), "invalid HSA metadata"); 5429 } else { 5430 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5431 return Error(getLoc(), "invalid HSA metadata"); 5432 } 5433 5434 return false; 5435 } 5436 5437 /// Common code to parse out a block of text (typically YAML) between start and 5438 /// end directives. 5439 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5440 const char *AssemblerDirectiveEnd, 5441 std::string &CollectString) { 5442 5443 raw_string_ostream CollectStream(CollectString); 5444 5445 getLexer().setSkipSpace(false); 5446 5447 bool FoundEnd = false; 5448 while (!isToken(AsmToken::Eof)) { 5449 while (isToken(AsmToken::Space)) { 5450 CollectStream << getTokenStr(); 5451 Lex(); 5452 } 5453 5454 if (trySkipId(AssemblerDirectiveEnd)) { 5455 FoundEnd = true; 5456 break; 5457 } 5458 5459 CollectStream << Parser.parseStringToEndOfStatement() 5460 << getContext().getAsmInfo()->getSeparatorString(); 5461 5462 Parser.eatToEndOfStatement(); 5463 } 5464 5465 getLexer().setSkipSpace(true); 5466 5467 if (isToken(AsmToken::Eof) && !FoundEnd) { 5468 return TokError(Twine("expected directive ") + 5469 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5470 } 5471 5472 CollectStream.flush(); 5473 return false; 5474 } 5475 5476 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5477 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5478 std::string String; 5479 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5480 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5481 return true; 5482 5483 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5484 if (!PALMetadata->setFromString(String)) 5485 return Error(getLoc(), "invalid PAL metadata"); 5486 return false; 5487 } 5488 5489 /// Parse the assembler directive for old linear-format PAL metadata. 5490 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5491 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5492 return Error(getLoc(), 5493 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5494 "not available on non-amdpal OSes")).str()); 5495 } 5496 5497 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5498 PALMetadata->setLegacy(); 5499 for (;;) { 5500 uint32_t Key, Value; 5501 if (ParseAsAbsoluteExpression(Key)) { 5502 return TokError(Twine("invalid value in ") + 5503 Twine(PALMD::AssemblerDirective)); 5504 } 5505 if (!trySkipToken(AsmToken::Comma)) { 5506 return TokError(Twine("expected an even number of values in ") + 5507 Twine(PALMD::AssemblerDirective)); 5508 } 5509 if (ParseAsAbsoluteExpression(Value)) { 5510 return TokError(Twine("invalid value in ") + 5511 Twine(PALMD::AssemblerDirective)); 5512 } 5513 PALMetadata->setRegister(Key, Value); 5514 if (!trySkipToken(AsmToken::Comma)) 5515 break; 5516 } 5517 return false; 5518 } 5519 5520 /// ParseDirectiveAMDGPULDS 5521 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5522 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5523 if (getParser().checkForValidSection()) 5524 return true; 5525 5526 StringRef Name; 5527 SMLoc NameLoc = getLoc(); 5528 if (getParser().parseIdentifier(Name)) 5529 return TokError("expected identifier in directive"); 5530 5531 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5532 if (parseToken(AsmToken::Comma, "expected ','")) 5533 return true; 5534 5535 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5536 5537 int64_t Size; 5538 SMLoc SizeLoc = getLoc(); 5539 if (getParser().parseAbsoluteExpression(Size)) 5540 return true; 5541 if (Size < 0) 5542 return Error(SizeLoc, "size must be non-negative"); 5543 if (Size > LocalMemorySize) 5544 return Error(SizeLoc, "size is too large"); 5545 5546 int64_t Alignment = 4; 5547 if (trySkipToken(AsmToken::Comma)) { 5548 SMLoc AlignLoc = getLoc(); 5549 if (getParser().parseAbsoluteExpression(Alignment)) 5550 return true; 5551 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5552 return Error(AlignLoc, "alignment must be a power of two"); 5553 5554 // Alignment larger than the size of LDS is possible in theory, as long 5555 // as the linker manages to place to symbol at address 0, but we do want 5556 // to make sure the alignment fits nicely into a 32-bit integer. 5557 if (Alignment >= 1u << 31) 5558 return Error(AlignLoc, "alignment is too large"); 5559 } 5560 5561 if (parseEOL()) 5562 return true; 5563 5564 Symbol->redefineIfPossible(); 5565 if (!Symbol->isUndefined()) 5566 return Error(NameLoc, "invalid symbol redefinition"); 5567 5568 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5569 return false; 5570 } 5571 5572 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5573 StringRef IDVal = DirectiveID.getString(); 5574 5575 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5576 if (IDVal == ".amdhsa_kernel") 5577 return ParseDirectiveAMDHSAKernel(); 5578 5579 // TODO: Restructure/combine with PAL metadata directive. 5580 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5581 return ParseDirectiveHSAMetadata(); 5582 } else { 5583 if (IDVal == ".hsa_code_object_version") 5584 return ParseDirectiveHSACodeObjectVersion(); 5585 5586 if (IDVal == ".hsa_code_object_isa") 5587 return ParseDirectiveHSACodeObjectISA(); 5588 5589 if (IDVal == ".amd_kernel_code_t") 5590 return ParseDirectiveAMDKernelCodeT(); 5591 5592 if (IDVal == ".amdgpu_hsa_kernel") 5593 return ParseDirectiveAMDGPUHsaKernel(); 5594 5595 if (IDVal == ".amd_amdgpu_isa") 5596 return ParseDirectiveISAVersion(); 5597 5598 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5599 return ParseDirectiveHSAMetadata(); 5600 } 5601 5602 if (IDVal == ".amdgcn_target") 5603 return ParseDirectiveAMDGCNTarget(); 5604 5605 if (IDVal == ".amdgpu_lds") 5606 return ParseDirectiveAMDGPULDS(); 5607 5608 if (IDVal == PALMD::AssemblerDirectiveBegin) 5609 return ParseDirectivePALMetadataBegin(); 5610 5611 if (IDVal == PALMD::AssemblerDirective) 5612 return ParseDirectivePALMetadata(); 5613 5614 return true; 5615 } 5616 5617 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5618 unsigned RegNo) { 5619 5620 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5621 return isGFX9Plus(); 5622 5623 // GFX10+ has 2 more SGPRs 104 and 105. 5624 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5625 return hasSGPR104_SGPR105(); 5626 5627 switch (RegNo) { 5628 case AMDGPU::SRC_SHARED_BASE: 5629 case AMDGPU::SRC_SHARED_LIMIT: 5630 case AMDGPU::SRC_PRIVATE_BASE: 5631 case AMDGPU::SRC_PRIVATE_LIMIT: 5632 return isGFX9Plus(); 5633 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5634 return isGFX9Plus() && !isGFX11Plus(); 5635 case AMDGPU::TBA: 5636 case AMDGPU::TBA_LO: 5637 case AMDGPU::TBA_HI: 5638 case AMDGPU::TMA: 5639 case AMDGPU::TMA_LO: 5640 case AMDGPU::TMA_HI: 5641 return !isGFX9Plus(); 5642 case AMDGPU::XNACK_MASK: 5643 case AMDGPU::XNACK_MASK_LO: 5644 case AMDGPU::XNACK_MASK_HI: 5645 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5646 case AMDGPU::SGPR_NULL: 5647 return isGFX10Plus(); 5648 default: 5649 break; 5650 } 5651 5652 if (isCI()) 5653 return true; 5654 5655 if (isSI() || isGFX10Plus()) { 5656 // No flat_scr on SI. 5657 // On GFX10Plus flat scratch is not a valid register operand and can only be 5658 // accessed with s_setreg/s_getreg. 5659 switch (RegNo) { 5660 case AMDGPU::FLAT_SCR: 5661 case AMDGPU::FLAT_SCR_LO: 5662 case AMDGPU::FLAT_SCR_HI: 5663 return false; 5664 default: 5665 return true; 5666 } 5667 } 5668 5669 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5670 // SI/CI have. 5671 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5672 return hasSGPR102_SGPR103(); 5673 5674 return true; 5675 } 5676 5677 OperandMatchResultTy 5678 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5679 OperandMode Mode) { 5680 OperandMatchResultTy ResTy = parseVOPD(Operands); 5681 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5682 isToken(AsmToken::EndOfStatement)) 5683 return ResTy; 5684 5685 // Try to parse with a custom parser 5686 ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5687 5688 // If we successfully parsed the operand or if there as an error parsing, 5689 // we are done. 5690 // 5691 // If we are parsing after we reach EndOfStatement then this means we 5692 // are appending default values to the Operands list. This is only done 5693 // by custom parser, so we shouldn't continue on to the generic parsing. 5694 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5695 isToken(AsmToken::EndOfStatement)) 5696 return ResTy; 5697 5698 SMLoc RBraceLoc; 5699 SMLoc LBraceLoc = getLoc(); 5700 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5701 unsigned Prefix = Operands.size(); 5702 5703 for (;;) { 5704 auto Loc = getLoc(); 5705 ResTy = parseReg(Operands); 5706 if (ResTy == MatchOperand_NoMatch) 5707 Error(Loc, "expected a register"); 5708 if (ResTy != MatchOperand_Success) 5709 return MatchOperand_ParseFail; 5710 5711 RBraceLoc = getLoc(); 5712 if (trySkipToken(AsmToken::RBrac)) 5713 break; 5714 5715 if (!skipToken(AsmToken::Comma, 5716 "expected a comma or a closing square bracket")) { 5717 return MatchOperand_ParseFail; 5718 } 5719 } 5720 5721 if (Operands.size() - Prefix > 1) { 5722 Operands.insert(Operands.begin() + Prefix, 5723 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5724 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5725 } 5726 5727 return MatchOperand_Success; 5728 } 5729 5730 return parseRegOrImm(Operands); 5731 } 5732 5733 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5734 // Clear any forced encodings from the previous instruction. 5735 setForcedEncodingSize(0); 5736 setForcedDPP(false); 5737 setForcedSDWA(false); 5738 5739 if (Name.endswith("_e64_dpp")) { 5740 setForcedDPP(true); 5741 setForcedEncodingSize(64); 5742 return Name.substr(0, Name.size() - 8); 5743 } else if (Name.endswith("_e64")) { 5744 setForcedEncodingSize(64); 5745 return Name.substr(0, Name.size() - 4); 5746 } else if (Name.endswith("_e32")) { 5747 setForcedEncodingSize(32); 5748 return Name.substr(0, Name.size() - 4); 5749 } else if (Name.endswith("_dpp")) { 5750 setForcedDPP(true); 5751 return Name.substr(0, Name.size() - 4); 5752 } else if (Name.endswith("_sdwa")) { 5753 setForcedSDWA(true); 5754 return Name.substr(0, Name.size() - 5); 5755 } 5756 return Name; 5757 } 5758 5759 static void applyMnemonicAliases(StringRef &Mnemonic, 5760 const FeatureBitset &Features, 5761 unsigned VariantID); 5762 5763 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5764 StringRef Name, 5765 SMLoc NameLoc, OperandVector &Operands) { 5766 // Add the instruction mnemonic 5767 Name = parseMnemonicSuffix(Name); 5768 5769 // If the target architecture uses MnemonicAlias, call it here to parse 5770 // operands correctly. 5771 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5772 5773 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5774 5775 bool IsMIMG = Name.startswith("image_"); 5776 5777 while (!trySkipToken(AsmToken::EndOfStatement)) { 5778 OperandMode Mode = OperandMode_Default; 5779 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5780 Mode = OperandMode_NSA; 5781 CPolSeen = 0; 5782 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5783 5784 if (Res != MatchOperand_Success) { 5785 checkUnsupportedInstruction(Name, NameLoc); 5786 if (!Parser.hasPendingError()) { 5787 // FIXME: use real operand location rather than the current location. 5788 StringRef Msg = 5789 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5790 "not a valid operand."; 5791 Error(getLoc(), Msg); 5792 } 5793 while (!trySkipToken(AsmToken::EndOfStatement)) { 5794 lex(); 5795 } 5796 return true; 5797 } 5798 5799 // Eat the comma or space if there is one. 5800 trySkipToken(AsmToken::Comma); 5801 } 5802 5803 return false; 5804 } 5805 5806 //===----------------------------------------------------------------------===// 5807 // Utility functions 5808 //===----------------------------------------------------------------------===// 5809 5810 OperandMatchResultTy 5811 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5812 5813 if (!trySkipId(Prefix, AsmToken::Colon)) 5814 return MatchOperand_NoMatch; 5815 5816 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5817 } 5818 5819 OperandMatchResultTy 5820 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5821 AMDGPUOperand::ImmTy ImmTy, 5822 bool (*ConvertResult)(int64_t&)) { 5823 SMLoc S = getLoc(); 5824 int64_t Value = 0; 5825 5826 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5827 if (Res != MatchOperand_Success) 5828 return Res; 5829 5830 if (ConvertResult && !ConvertResult(Value)) { 5831 Error(S, "invalid " + StringRef(Prefix) + " value."); 5832 } 5833 5834 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5835 return MatchOperand_Success; 5836 } 5837 5838 OperandMatchResultTy 5839 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5840 OperandVector &Operands, 5841 AMDGPUOperand::ImmTy ImmTy, 5842 bool (*ConvertResult)(int64_t&)) { 5843 SMLoc S = getLoc(); 5844 if (!trySkipId(Prefix, AsmToken::Colon)) 5845 return MatchOperand_NoMatch; 5846 5847 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5848 return MatchOperand_ParseFail; 5849 5850 unsigned Val = 0; 5851 const unsigned MaxSize = 4; 5852 5853 // FIXME: How to verify the number of elements matches the number of src 5854 // operands? 5855 for (int I = 0; ; ++I) { 5856 int64_t Op; 5857 SMLoc Loc = getLoc(); 5858 if (!parseExpr(Op)) 5859 return MatchOperand_ParseFail; 5860 5861 if (Op != 0 && Op != 1) { 5862 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5863 return MatchOperand_ParseFail; 5864 } 5865 5866 Val |= (Op << I); 5867 5868 if (trySkipToken(AsmToken::RBrac)) 5869 break; 5870 5871 if (I + 1 == MaxSize) { 5872 Error(getLoc(), "expected a closing square bracket"); 5873 return MatchOperand_ParseFail; 5874 } 5875 5876 if (!skipToken(AsmToken::Comma, "expected a comma")) 5877 return MatchOperand_ParseFail; 5878 } 5879 5880 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5881 return MatchOperand_Success; 5882 } 5883 5884 OperandMatchResultTy 5885 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5886 AMDGPUOperand::ImmTy ImmTy) { 5887 int64_t Bit; 5888 SMLoc S = getLoc(); 5889 5890 if (trySkipId(Name)) { 5891 Bit = 1; 5892 } else if (trySkipId("no", Name)) { 5893 Bit = 0; 5894 } else { 5895 return MatchOperand_NoMatch; 5896 } 5897 5898 if (Name == "r128" && !hasMIMG_R128()) { 5899 Error(S, "r128 modifier is not supported on this GPU"); 5900 return MatchOperand_ParseFail; 5901 } 5902 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5903 Error(S, "a16 modifier is not supported on this GPU"); 5904 return MatchOperand_ParseFail; 5905 } 5906 5907 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5908 ImmTy = AMDGPUOperand::ImmTyR128A16; 5909 5910 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5911 return MatchOperand_Success; 5912 } 5913 5914 OperandMatchResultTy 5915 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5916 unsigned CPolOn = 0; 5917 unsigned CPolOff = 0; 5918 SMLoc S = getLoc(); 5919 5920 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5921 if (isGFX940() && !Mnemo.startswith("s_")) { 5922 if (trySkipId("sc0")) 5923 CPolOn = AMDGPU::CPol::SC0; 5924 else if (trySkipId("nosc0")) 5925 CPolOff = AMDGPU::CPol::SC0; 5926 else if (trySkipId("nt")) 5927 CPolOn = AMDGPU::CPol::NT; 5928 else if (trySkipId("nont")) 5929 CPolOff = AMDGPU::CPol::NT; 5930 else if (trySkipId("sc1")) 5931 CPolOn = AMDGPU::CPol::SC1; 5932 else if (trySkipId("nosc1")) 5933 CPolOff = AMDGPU::CPol::SC1; 5934 else 5935 return MatchOperand_NoMatch; 5936 } 5937 else if (trySkipId("glc")) 5938 CPolOn = AMDGPU::CPol::GLC; 5939 else if (trySkipId("noglc")) 5940 CPolOff = AMDGPU::CPol::GLC; 5941 else if (trySkipId("slc")) 5942 CPolOn = AMDGPU::CPol::SLC; 5943 else if (trySkipId("noslc")) 5944 CPolOff = AMDGPU::CPol::SLC; 5945 else if (trySkipId("dlc")) 5946 CPolOn = AMDGPU::CPol::DLC; 5947 else if (trySkipId("nodlc")) 5948 CPolOff = AMDGPU::CPol::DLC; 5949 else if (trySkipId("scc")) 5950 CPolOn = AMDGPU::CPol::SCC; 5951 else if (trySkipId("noscc")) 5952 CPolOff = AMDGPU::CPol::SCC; 5953 else 5954 return MatchOperand_NoMatch; 5955 5956 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5957 Error(S, "dlc modifier is not supported on this GPU"); 5958 return MatchOperand_ParseFail; 5959 } 5960 5961 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5962 Error(S, "scc modifier is not supported on this GPU"); 5963 return MatchOperand_ParseFail; 5964 } 5965 5966 if (CPolSeen & (CPolOn | CPolOff)) { 5967 Error(S, "duplicate cache policy modifier"); 5968 return MatchOperand_ParseFail; 5969 } 5970 5971 CPolSeen |= (CPolOn | CPolOff); 5972 5973 for (unsigned I = 1; I != Operands.size(); ++I) { 5974 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5975 if (Op.isCPol()) { 5976 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5977 return MatchOperand_Success; 5978 } 5979 } 5980 5981 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5982 AMDGPUOperand::ImmTyCPol)); 5983 5984 return MatchOperand_Success; 5985 } 5986 5987 static void addOptionalImmOperand( 5988 MCInst& Inst, const OperandVector& Operands, 5989 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5990 AMDGPUOperand::ImmTy ImmT, 5991 int64_t Default = 0) { 5992 auto i = OptionalIdx.find(ImmT); 5993 if (i != OptionalIdx.end()) { 5994 unsigned Idx = i->second; 5995 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5996 } else { 5997 Inst.addOperand(MCOperand::createImm(Default)); 5998 } 5999 } 6000 6001 OperandMatchResultTy 6002 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6003 StringRef &Value, 6004 SMLoc &StringLoc) { 6005 if (!trySkipId(Prefix, AsmToken::Colon)) 6006 return MatchOperand_NoMatch; 6007 6008 StringLoc = getLoc(); 6009 return parseId(Value, "expected an identifier") ? MatchOperand_Success 6010 : MatchOperand_ParseFail; 6011 } 6012 6013 //===----------------------------------------------------------------------===// 6014 // MTBUF format 6015 //===----------------------------------------------------------------------===// 6016 6017 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6018 int64_t MaxVal, 6019 int64_t &Fmt) { 6020 int64_t Val; 6021 SMLoc Loc = getLoc(); 6022 6023 auto Res = parseIntWithPrefix(Pref, Val); 6024 if (Res == MatchOperand_ParseFail) 6025 return false; 6026 if (Res == MatchOperand_NoMatch) 6027 return true; 6028 6029 if (Val < 0 || Val > MaxVal) { 6030 Error(Loc, Twine("out of range ", StringRef(Pref))); 6031 return false; 6032 } 6033 6034 Fmt = Val; 6035 return true; 6036 } 6037 6038 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6039 // values to live in a joint format operand in the MCInst encoding. 6040 OperandMatchResultTy 6041 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6042 using namespace llvm::AMDGPU::MTBUFFormat; 6043 6044 int64_t Dfmt = DFMT_UNDEF; 6045 int64_t Nfmt = NFMT_UNDEF; 6046 6047 // dfmt and nfmt can appear in either order, and each is optional. 6048 for (int I = 0; I < 2; ++I) { 6049 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6050 return MatchOperand_ParseFail; 6051 6052 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6053 return MatchOperand_ParseFail; 6054 } 6055 // Skip optional comma between dfmt/nfmt 6056 // but guard against 2 commas following each other. 6057 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6058 !peekToken().is(AsmToken::Comma)) { 6059 trySkipToken(AsmToken::Comma); 6060 } 6061 } 6062 6063 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6064 return MatchOperand_NoMatch; 6065 6066 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6067 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6068 6069 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6070 return MatchOperand_Success; 6071 } 6072 6073 OperandMatchResultTy 6074 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6075 using namespace llvm::AMDGPU::MTBUFFormat; 6076 6077 int64_t Fmt = UFMT_UNDEF; 6078 6079 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6080 return MatchOperand_ParseFail; 6081 6082 if (Fmt == UFMT_UNDEF) 6083 return MatchOperand_NoMatch; 6084 6085 Format = Fmt; 6086 return MatchOperand_Success; 6087 } 6088 6089 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6090 int64_t &Nfmt, 6091 StringRef FormatStr, 6092 SMLoc Loc) { 6093 using namespace llvm::AMDGPU::MTBUFFormat; 6094 int64_t Format; 6095 6096 Format = getDfmt(FormatStr); 6097 if (Format != DFMT_UNDEF) { 6098 Dfmt = Format; 6099 return true; 6100 } 6101 6102 Format = getNfmt(FormatStr, getSTI()); 6103 if (Format != NFMT_UNDEF) { 6104 Nfmt = Format; 6105 return true; 6106 } 6107 6108 Error(Loc, "unsupported format"); 6109 return false; 6110 } 6111 6112 OperandMatchResultTy 6113 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6114 SMLoc FormatLoc, 6115 int64_t &Format) { 6116 using namespace llvm::AMDGPU::MTBUFFormat; 6117 6118 int64_t Dfmt = DFMT_UNDEF; 6119 int64_t Nfmt = NFMT_UNDEF; 6120 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6121 return MatchOperand_ParseFail; 6122 6123 if (trySkipToken(AsmToken::Comma)) { 6124 StringRef Str; 6125 SMLoc Loc = getLoc(); 6126 if (!parseId(Str, "expected a format string") || 6127 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6128 return MatchOperand_ParseFail; 6129 } 6130 if (Dfmt == DFMT_UNDEF) { 6131 Error(Loc, "duplicate numeric format"); 6132 return MatchOperand_ParseFail; 6133 } else if (Nfmt == NFMT_UNDEF) { 6134 Error(Loc, "duplicate data format"); 6135 return MatchOperand_ParseFail; 6136 } 6137 } 6138 6139 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6140 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6141 6142 if (isGFX10Plus()) { 6143 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6144 if (Ufmt == UFMT_UNDEF) { 6145 Error(FormatLoc, "unsupported format"); 6146 return MatchOperand_ParseFail; 6147 } 6148 Format = Ufmt; 6149 } else { 6150 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6151 } 6152 6153 return MatchOperand_Success; 6154 } 6155 6156 OperandMatchResultTy 6157 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6158 SMLoc Loc, 6159 int64_t &Format) { 6160 using namespace llvm::AMDGPU::MTBUFFormat; 6161 6162 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6163 if (Id == UFMT_UNDEF) 6164 return MatchOperand_NoMatch; 6165 6166 if (!isGFX10Plus()) { 6167 Error(Loc, "unified format is not supported on this GPU"); 6168 return MatchOperand_ParseFail; 6169 } 6170 6171 Format = Id; 6172 return MatchOperand_Success; 6173 } 6174 6175 OperandMatchResultTy 6176 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6177 using namespace llvm::AMDGPU::MTBUFFormat; 6178 SMLoc Loc = getLoc(); 6179 6180 if (!parseExpr(Format)) 6181 return MatchOperand_ParseFail; 6182 if (!isValidFormatEncoding(Format, getSTI())) { 6183 Error(Loc, "out of range format"); 6184 return MatchOperand_ParseFail; 6185 } 6186 6187 return MatchOperand_Success; 6188 } 6189 6190 OperandMatchResultTy 6191 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6192 using namespace llvm::AMDGPU::MTBUFFormat; 6193 6194 if (!trySkipId("format", AsmToken::Colon)) 6195 return MatchOperand_NoMatch; 6196 6197 if (trySkipToken(AsmToken::LBrac)) { 6198 StringRef FormatStr; 6199 SMLoc Loc = getLoc(); 6200 if (!parseId(FormatStr, "expected a format string")) 6201 return MatchOperand_ParseFail; 6202 6203 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6204 if (Res == MatchOperand_NoMatch) 6205 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6206 if (Res != MatchOperand_Success) 6207 return Res; 6208 6209 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6210 return MatchOperand_ParseFail; 6211 6212 return MatchOperand_Success; 6213 } 6214 6215 return parseNumericFormat(Format); 6216 } 6217 6218 OperandMatchResultTy 6219 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6220 using namespace llvm::AMDGPU::MTBUFFormat; 6221 6222 int64_t Format = getDefaultFormatEncoding(getSTI()); 6223 OperandMatchResultTy Res; 6224 SMLoc Loc = getLoc(); 6225 6226 // Parse legacy format syntax. 6227 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6228 if (Res == MatchOperand_ParseFail) 6229 return Res; 6230 6231 bool FormatFound = (Res == MatchOperand_Success); 6232 6233 Operands.push_back( 6234 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6235 6236 if (FormatFound) 6237 trySkipToken(AsmToken::Comma); 6238 6239 if (isToken(AsmToken::EndOfStatement)) { 6240 // We are expecting an soffset operand, 6241 // but let matcher handle the error. 6242 return MatchOperand_Success; 6243 } 6244 6245 // Parse soffset. 6246 Res = parseRegOrImm(Operands); 6247 if (Res != MatchOperand_Success) 6248 return Res; 6249 6250 trySkipToken(AsmToken::Comma); 6251 6252 if (!FormatFound) { 6253 Res = parseSymbolicOrNumericFormat(Format); 6254 if (Res == MatchOperand_ParseFail) 6255 return Res; 6256 if (Res == MatchOperand_Success) { 6257 auto Size = Operands.size(); 6258 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6259 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6260 Op.setImm(Format); 6261 } 6262 return MatchOperand_Success; 6263 } 6264 6265 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6266 Error(getLoc(), "duplicate format"); 6267 return MatchOperand_ParseFail; 6268 } 6269 return MatchOperand_Success; 6270 } 6271 6272 //===----------------------------------------------------------------------===// 6273 // ds 6274 //===----------------------------------------------------------------------===// 6275 6276 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6277 const OperandVector &Operands) { 6278 OptionalImmIndexMap OptionalIdx; 6279 6280 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6281 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6282 6283 // Add the register arguments 6284 if (Op.isReg()) { 6285 Op.addRegOperands(Inst, 1); 6286 continue; 6287 } 6288 6289 // Handle optional arguments 6290 OptionalIdx[Op.getImmTy()] = i; 6291 } 6292 6293 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6294 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6295 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6296 6297 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6298 } 6299 6300 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6301 bool IsGdsHardcoded) { 6302 OptionalImmIndexMap OptionalIdx; 6303 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; 6304 6305 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6306 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6307 6308 // Add the register arguments 6309 if (Op.isReg()) { 6310 Op.addRegOperands(Inst, 1); 6311 continue; 6312 } 6313 6314 if (Op.isToken() && Op.getToken() == "gds") { 6315 IsGdsHardcoded = true; 6316 continue; 6317 } 6318 6319 // Handle optional arguments 6320 OptionalIdx[Op.getImmTy()] = i; 6321 6322 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) 6323 OffsetType = AMDGPUOperand::ImmTySwizzle; 6324 } 6325 6326 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6327 6328 if (!IsGdsHardcoded) { 6329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6330 } 6331 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6332 } 6333 6334 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6335 OptionalImmIndexMap OptionalIdx; 6336 6337 unsigned OperandIdx[4]; 6338 unsigned EnMask = 0; 6339 int SrcIdx = 0; 6340 6341 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6342 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6343 6344 // Add the register arguments 6345 if (Op.isReg()) { 6346 assert(SrcIdx < 4); 6347 OperandIdx[SrcIdx] = Inst.size(); 6348 Op.addRegOperands(Inst, 1); 6349 ++SrcIdx; 6350 continue; 6351 } 6352 6353 if (Op.isOff()) { 6354 assert(SrcIdx < 4); 6355 OperandIdx[SrcIdx] = Inst.size(); 6356 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6357 ++SrcIdx; 6358 continue; 6359 } 6360 6361 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6362 Op.addImmOperands(Inst, 1); 6363 continue; 6364 } 6365 6366 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6367 continue; 6368 6369 // Handle optional arguments 6370 OptionalIdx[Op.getImmTy()] = i; 6371 } 6372 6373 assert(SrcIdx == 4); 6374 6375 bool Compr = false; 6376 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6377 Compr = true; 6378 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6379 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6380 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6381 } 6382 6383 for (auto i = 0; i < SrcIdx; ++i) { 6384 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6385 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6386 } 6387 } 6388 6389 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6390 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6391 6392 Inst.addOperand(MCOperand::createImm(EnMask)); 6393 } 6394 6395 //===----------------------------------------------------------------------===// 6396 // s_waitcnt 6397 //===----------------------------------------------------------------------===// 6398 6399 static bool 6400 encodeCnt( 6401 const AMDGPU::IsaVersion ISA, 6402 int64_t &IntVal, 6403 int64_t CntVal, 6404 bool Saturate, 6405 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6406 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6407 { 6408 bool Failed = false; 6409 6410 IntVal = encode(ISA, IntVal, CntVal); 6411 if (CntVal != decode(ISA, IntVal)) { 6412 if (Saturate) { 6413 IntVal = encode(ISA, IntVal, -1); 6414 } else { 6415 Failed = true; 6416 } 6417 } 6418 return Failed; 6419 } 6420 6421 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6422 6423 SMLoc CntLoc = getLoc(); 6424 StringRef CntName = getTokenStr(); 6425 6426 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6427 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6428 return false; 6429 6430 int64_t CntVal; 6431 SMLoc ValLoc = getLoc(); 6432 if (!parseExpr(CntVal)) 6433 return false; 6434 6435 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6436 6437 bool Failed = true; 6438 bool Sat = CntName.endswith("_sat"); 6439 6440 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6441 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6442 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6443 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6444 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6445 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6446 } else { 6447 Error(CntLoc, "invalid counter name " + CntName); 6448 return false; 6449 } 6450 6451 if (Failed) { 6452 Error(ValLoc, "too large value for " + CntName); 6453 return false; 6454 } 6455 6456 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6457 return false; 6458 6459 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6460 if (isToken(AsmToken::EndOfStatement)) { 6461 Error(getLoc(), "expected a counter name"); 6462 return false; 6463 } 6464 } 6465 6466 return true; 6467 } 6468 6469 OperandMatchResultTy 6470 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6471 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6472 int64_t Waitcnt = getWaitcntBitMask(ISA); 6473 SMLoc S = getLoc(); 6474 6475 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6476 while (!isToken(AsmToken::EndOfStatement)) { 6477 if (!parseCnt(Waitcnt)) 6478 return MatchOperand_ParseFail; 6479 } 6480 } else { 6481 if (!parseExpr(Waitcnt)) 6482 return MatchOperand_ParseFail; 6483 } 6484 6485 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6486 return MatchOperand_Success; 6487 } 6488 6489 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6490 SMLoc FieldLoc = getLoc(); 6491 StringRef FieldName = getTokenStr(); 6492 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6493 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6494 return false; 6495 6496 SMLoc ValueLoc = getLoc(); 6497 StringRef ValueName = getTokenStr(); 6498 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6499 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6500 return false; 6501 6502 unsigned Shift; 6503 if (FieldName == "instid0") { 6504 Shift = 0; 6505 } else if (FieldName == "instskip") { 6506 Shift = 4; 6507 } else if (FieldName == "instid1") { 6508 Shift = 7; 6509 } else { 6510 Error(FieldLoc, "invalid field name " + FieldName); 6511 return false; 6512 } 6513 6514 int Value; 6515 if (Shift == 4) { 6516 // Parse values for instskip. 6517 Value = StringSwitch<int>(ValueName) 6518 .Case("SAME", 0) 6519 .Case("NEXT", 1) 6520 .Case("SKIP_1", 2) 6521 .Case("SKIP_2", 3) 6522 .Case("SKIP_3", 4) 6523 .Case("SKIP_4", 5) 6524 .Default(-1); 6525 } else { 6526 // Parse values for instid0 and instid1. 6527 Value = StringSwitch<int>(ValueName) 6528 .Case("NO_DEP", 0) 6529 .Case("VALU_DEP_1", 1) 6530 .Case("VALU_DEP_2", 2) 6531 .Case("VALU_DEP_3", 3) 6532 .Case("VALU_DEP_4", 4) 6533 .Case("TRANS32_DEP_1", 5) 6534 .Case("TRANS32_DEP_2", 6) 6535 .Case("TRANS32_DEP_3", 7) 6536 .Case("FMA_ACCUM_CYCLE_1", 8) 6537 .Case("SALU_CYCLE_1", 9) 6538 .Case("SALU_CYCLE_2", 10) 6539 .Case("SALU_CYCLE_3", 11) 6540 .Default(-1); 6541 } 6542 if (Value < 0) { 6543 Error(ValueLoc, "invalid value name " + ValueName); 6544 return false; 6545 } 6546 6547 Delay |= Value << Shift; 6548 return true; 6549 } 6550 6551 OperandMatchResultTy 6552 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6553 int64_t Delay = 0; 6554 SMLoc S = getLoc(); 6555 6556 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6557 do { 6558 if (!parseDelay(Delay)) 6559 return MatchOperand_ParseFail; 6560 } while (trySkipToken(AsmToken::Pipe)); 6561 } else { 6562 if (!parseExpr(Delay)) 6563 return MatchOperand_ParseFail; 6564 } 6565 6566 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6567 return MatchOperand_Success; 6568 } 6569 6570 bool 6571 AMDGPUOperand::isSWaitCnt() const { 6572 return isImm(); 6573 } 6574 6575 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6576 6577 //===----------------------------------------------------------------------===// 6578 // DepCtr 6579 //===----------------------------------------------------------------------===// 6580 6581 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6582 StringRef DepCtrName) { 6583 switch (ErrorId) { 6584 case OPR_ID_UNKNOWN: 6585 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6586 return; 6587 case OPR_ID_UNSUPPORTED: 6588 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6589 return; 6590 case OPR_ID_DUPLICATE: 6591 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6592 return; 6593 case OPR_VAL_INVALID: 6594 Error(Loc, Twine("invalid value for ", DepCtrName)); 6595 return; 6596 default: 6597 assert(false); 6598 } 6599 } 6600 6601 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6602 6603 using namespace llvm::AMDGPU::DepCtr; 6604 6605 SMLoc DepCtrLoc = getLoc(); 6606 StringRef DepCtrName = getTokenStr(); 6607 6608 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6609 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6610 return false; 6611 6612 int64_t ExprVal; 6613 if (!parseExpr(ExprVal)) 6614 return false; 6615 6616 unsigned PrevOprMask = UsedOprMask; 6617 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6618 6619 if (CntVal < 0) { 6620 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6621 return false; 6622 } 6623 6624 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6625 return false; 6626 6627 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6628 if (isToken(AsmToken::EndOfStatement)) { 6629 Error(getLoc(), "expected a counter name"); 6630 return false; 6631 } 6632 } 6633 6634 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6635 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6636 return true; 6637 } 6638 6639 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6640 using namespace llvm::AMDGPU::DepCtr; 6641 6642 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6643 SMLoc Loc = getLoc(); 6644 6645 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6646 unsigned UsedOprMask = 0; 6647 while (!isToken(AsmToken::EndOfStatement)) { 6648 if (!parseDepCtr(DepCtr, UsedOprMask)) 6649 return MatchOperand_ParseFail; 6650 } 6651 } else { 6652 if (!parseExpr(DepCtr)) 6653 return MatchOperand_ParseFail; 6654 } 6655 6656 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6657 return MatchOperand_Success; 6658 } 6659 6660 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6661 6662 //===----------------------------------------------------------------------===// 6663 // hwreg 6664 //===----------------------------------------------------------------------===// 6665 6666 bool 6667 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6668 OperandInfoTy &Offset, 6669 OperandInfoTy &Width) { 6670 using namespace llvm::AMDGPU::Hwreg; 6671 6672 // The register may be specified by name or using a numeric code 6673 HwReg.Loc = getLoc(); 6674 if (isToken(AsmToken::Identifier) && 6675 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6676 HwReg.IsSymbolic = true; 6677 lex(); // skip register name 6678 } else if (!parseExpr(HwReg.Id, "a register name")) { 6679 return false; 6680 } 6681 6682 if (trySkipToken(AsmToken::RParen)) 6683 return true; 6684 6685 // parse optional params 6686 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6687 return false; 6688 6689 Offset.Loc = getLoc(); 6690 if (!parseExpr(Offset.Id)) 6691 return false; 6692 6693 if (!skipToken(AsmToken::Comma, "expected a comma")) 6694 return false; 6695 6696 Width.Loc = getLoc(); 6697 return parseExpr(Width.Id) && 6698 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6699 } 6700 6701 bool 6702 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6703 const OperandInfoTy &Offset, 6704 const OperandInfoTy &Width) { 6705 6706 using namespace llvm::AMDGPU::Hwreg; 6707 6708 if (HwReg.IsSymbolic) { 6709 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6710 Error(HwReg.Loc, 6711 "specified hardware register is not supported on this GPU"); 6712 return false; 6713 } 6714 } else { 6715 if (!isValidHwreg(HwReg.Id)) { 6716 Error(HwReg.Loc, 6717 "invalid code of hardware register: only 6-bit values are legal"); 6718 return false; 6719 } 6720 } 6721 if (!isValidHwregOffset(Offset.Id)) { 6722 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6723 return false; 6724 } 6725 if (!isValidHwregWidth(Width.Id)) { 6726 Error(Width.Loc, 6727 "invalid bitfield width: only values from 1 to 32 are legal"); 6728 return false; 6729 } 6730 return true; 6731 } 6732 6733 OperandMatchResultTy 6734 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6735 using namespace llvm::AMDGPU::Hwreg; 6736 6737 int64_t ImmVal = 0; 6738 SMLoc Loc = getLoc(); 6739 6740 if (trySkipId("hwreg", AsmToken::LParen)) { 6741 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6742 OperandInfoTy Offset(OFFSET_DEFAULT_); 6743 OperandInfoTy Width(WIDTH_DEFAULT_); 6744 if (parseHwregBody(HwReg, Offset, Width) && 6745 validateHwreg(HwReg, Offset, Width)) { 6746 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6747 } else { 6748 return MatchOperand_ParseFail; 6749 } 6750 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6751 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6752 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6753 return MatchOperand_ParseFail; 6754 } 6755 } else { 6756 return MatchOperand_ParseFail; 6757 } 6758 6759 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6760 return MatchOperand_Success; 6761 } 6762 6763 bool AMDGPUOperand::isHwreg() const { 6764 return isImmTy(ImmTyHwreg); 6765 } 6766 6767 //===----------------------------------------------------------------------===// 6768 // sendmsg 6769 //===----------------------------------------------------------------------===// 6770 6771 bool 6772 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6773 OperandInfoTy &Op, 6774 OperandInfoTy &Stream) { 6775 using namespace llvm::AMDGPU::SendMsg; 6776 6777 Msg.Loc = getLoc(); 6778 if (isToken(AsmToken::Identifier) && 6779 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6780 Msg.IsSymbolic = true; 6781 lex(); // skip message name 6782 } else if (!parseExpr(Msg.Id, "a message name")) { 6783 return false; 6784 } 6785 6786 if (trySkipToken(AsmToken::Comma)) { 6787 Op.IsDefined = true; 6788 Op.Loc = getLoc(); 6789 if (isToken(AsmToken::Identifier) && 6790 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6791 lex(); // skip operation name 6792 } else if (!parseExpr(Op.Id, "an operation name")) { 6793 return false; 6794 } 6795 6796 if (trySkipToken(AsmToken::Comma)) { 6797 Stream.IsDefined = true; 6798 Stream.Loc = getLoc(); 6799 if (!parseExpr(Stream.Id)) 6800 return false; 6801 } 6802 } 6803 6804 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6805 } 6806 6807 bool 6808 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6809 const OperandInfoTy &Op, 6810 const OperandInfoTy &Stream) { 6811 using namespace llvm::AMDGPU::SendMsg; 6812 6813 // Validation strictness depends on whether message is specified 6814 // in a symbolic or in a numeric form. In the latter case 6815 // only encoding possibility is checked. 6816 bool Strict = Msg.IsSymbolic; 6817 6818 if (Strict) { 6819 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6820 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6821 return false; 6822 } 6823 } else { 6824 if (!isValidMsgId(Msg.Id, getSTI())) { 6825 Error(Msg.Loc, "invalid message id"); 6826 return false; 6827 } 6828 } 6829 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6830 if (Op.IsDefined) { 6831 Error(Op.Loc, "message does not support operations"); 6832 } else { 6833 Error(Msg.Loc, "missing message operation"); 6834 } 6835 return false; 6836 } 6837 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6838 Error(Op.Loc, "invalid operation id"); 6839 return false; 6840 } 6841 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6842 Stream.IsDefined) { 6843 Error(Stream.Loc, "message operation does not support streams"); 6844 return false; 6845 } 6846 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6847 Error(Stream.Loc, "invalid message stream id"); 6848 return false; 6849 } 6850 return true; 6851 } 6852 6853 OperandMatchResultTy 6854 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6855 using namespace llvm::AMDGPU::SendMsg; 6856 6857 int64_t ImmVal = 0; 6858 SMLoc Loc = getLoc(); 6859 6860 if (trySkipId("sendmsg", AsmToken::LParen)) { 6861 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6862 OperandInfoTy Op(OP_NONE_); 6863 OperandInfoTy Stream(STREAM_ID_NONE_); 6864 if (parseSendMsgBody(Msg, Op, Stream) && 6865 validateSendMsg(Msg, Op, Stream)) { 6866 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6867 } else { 6868 return MatchOperand_ParseFail; 6869 } 6870 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6871 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6872 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6873 return MatchOperand_ParseFail; 6874 } 6875 } else { 6876 return MatchOperand_ParseFail; 6877 } 6878 6879 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6880 return MatchOperand_Success; 6881 } 6882 6883 bool AMDGPUOperand::isSendMsg() const { 6884 return isImmTy(ImmTySendMsg); 6885 } 6886 6887 //===----------------------------------------------------------------------===// 6888 // v_interp 6889 //===----------------------------------------------------------------------===// 6890 6891 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6892 StringRef Str; 6893 SMLoc S = getLoc(); 6894 6895 if (!parseId(Str)) 6896 return MatchOperand_NoMatch; 6897 6898 int Slot = StringSwitch<int>(Str) 6899 .Case("p10", 0) 6900 .Case("p20", 1) 6901 .Case("p0", 2) 6902 .Default(-1); 6903 6904 if (Slot == -1) { 6905 Error(S, "invalid interpolation slot"); 6906 return MatchOperand_ParseFail; 6907 } 6908 6909 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6910 AMDGPUOperand::ImmTyInterpSlot)); 6911 return MatchOperand_Success; 6912 } 6913 6914 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6915 StringRef Str; 6916 SMLoc S = getLoc(); 6917 6918 if (!parseId(Str)) 6919 return MatchOperand_NoMatch; 6920 6921 if (!Str.startswith("attr")) { 6922 Error(S, "invalid interpolation attribute"); 6923 return MatchOperand_ParseFail; 6924 } 6925 6926 StringRef Chan = Str.take_back(2); 6927 int AttrChan = StringSwitch<int>(Chan) 6928 .Case(".x", 0) 6929 .Case(".y", 1) 6930 .Case(".z", 2) 6931 .Case(".w", 3) 6932 .Default(-1); 6933 if (AttrChan == -1) { 6934 Error(S, "invalid or missing interpolation attribute channel"); 6935 return MatchOperand_ParseFail; 6936 } 6937 6938 Str = Str.drop_back(2).drop_front(4); 6939 6940 uint8_t Attr; 6941 if (Str.getAsInteger(10, Attr)) { 6942 Error(S, "invalid or missing interpolation attribute number"); 6943 return MatchOperand_ParseFail; 6944 } 6945 6946 if (Attr > 63) { 6947 Error(S, "out of bounds interpolation attribute number"); 6948 return MatchOperand_ParseFail; 6949 } 6950 6951 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6952 6953 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6954 AMDGPUOperand::ImmTyInterpAttr)); 6955 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6956 AMDGPUOperand::ImmTyAttrChan)); 6957 return MatchOperand_Success; 6958 } 6959 6960 //===----------------------------------------------------------------------===// 6961 // exp 6962 //===----------------------------------------------------------------------===// 6963 6964 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6965 using namespace llvm::AMDGPU::Exp; 6966 6967 StringRef Str; 6968 SMLoc S = getLoc(); 6969 6970 if (!parseId(Str)) 6971 return MatchOperand_NoMatch; 6972 6973 unsigned Id = getTgtId(Str); 6974 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6975 Error(S, (Id == ET_INVALID) ? 6976 "invalid exp target" : 6977 "exp target is not supported on this GPU"); 6978 return MatchOperand_ParseFail; 6979 } 6980 6981 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6982 AMDGPUOperand::ImmTyExpTgt)); 6983 return MatchOperand_Success; 6984 } 6985 6986 //===----------------------------------------------------------------------===// 6987 // parser helpers 6988 //===----------------------------------------------------------------------===// 6989 6990 bool 6991 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6992 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6993 } 6994 6995 bool 6996 AMDGPUAsmParser::isId(const StringRef Id) const { 6997 return isId(getToken(), Id); 6998 } 6999 7000 bool 7001 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 7002 return getTokenKind() == Kind; 7003 } 7004 7005 bool 7006 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7007 if (isId(Id)) { 7008 lex(); 7009 return true; 7010 } 7011 return false; 7012 } 7013 7014 bool 7015 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7016 if (isToken(AsmToken::Identifier)) { 7017 StringRef Tok = getTokenStr(); 7018 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7019 lex(); 7020 return true; 7021 } 7022 } 7023 return false; 7024 } 7025 7026 bool 7027 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7028 if (isId(Id) && peekToken().is(Kind)) { 7029 lex(); 7030 lex(); 7031 return true; 7032 } 7033 return false; 7034 } 7035 7036 bool 7037 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7038 if (isToken(Kind)) { 7039 lex(); 7040 return true; 7041 } 7042 return false; 7043 } 7044 7045 bool 7046 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7047 const StringRef ErrMsg) { 7048 if (!trySkipToken(Kind)) { 7049 Error(getLoc(), ErrMsg); 7050 return false; 7051 } 7052 return true; 7053 } 7054 7055 bool 7056 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7057 SMLoc S = getLoc(); 7058 7059 const MCExpr *Expr; 7060 if (Parser.parseExpression(Expr)) 7061 return false; 7062 7063 if (Expr->evaluateAsAbsolute(Imm)) 7064 return true; 7065 7066 if (Expected.empty()) { 7067 Error(S, "expected absolute expression"); 7068 } else { 7069 Error(S, Twine("expected ", Expected) + 7070 Twine(" or an absolute expression")); 7071 } 7072 return false; 7073 } 7074 7075 bool 7076 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7077 SMLoc S = getLoc(); 7078 7079 const MCExpr *Expr; 7080 if (Parser.parseExpression(Expr)) 7081 return false; 7082 7083 int64_t IntVal; 7084 if (Expr->evaluateAsAbsolute(IntVal)) { 7085 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7086 } else { 7087 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7088 } 7089 return true; 7090 } 7091 7092 bool 7093 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7094 if (isToken(AsmToken::String)) { 7095 Val = getToken().getStringContents(); 7096 lex(); 7097 return true; 7098 } else { 7099 Error(getLoc(), ErrMsg); 7100 return false; 7101 } 7102 } 7103 7104 bool 7105 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7106 if (isToken(AsmToken::Identifier)) { 7107 Val = getTokenStr(); 7108 lex(); 7109 return true; 7110 } else { 7111 if (!ErrMsg.empty()) 7112 Error(getLoc(), ErrMsg); 7113 return false; 7114 } 7115 } 7116 7117 AsmToken 7118 AMDGPUAsmParser::getToken() const { 7119 return Parser.getTok(); 7120 } 7121 7122 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7123 return isToken(AsmToken::EndOfStatement) 7124 ? getToken() 7125 : getLexer().peekTok(ShouldSkipSpace); 7126 } 7127 7128 void 7129 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7130 auto TokCount = getLexer().peekTokens(Tokens); 7131 7132 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7133 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7134 } 7135 7136 AsmToken::TokenKind 7137 AMDGPUAsmParser::getTokenKind() const { 7138 return getLexer().getKind(); 7139 } 7140 7141 SMLoc 7142 AMDGPUAsmParser::getLoc() const { 7143 return getToken().getLoc(); 7144 } 7145 7146 StringRef 7147 AMDGPUAsmParser::getTokenStr() const { 7148 return getToken().getString(); 7149 } 7150 7151 void 7152 AMDGPUAsmParser::lex() { 7153 Parser.Lex(); 7154 } 7155 7156 SMLoc 7157 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7158 const OperandVector &Operands) const { 7159 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7160 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7161 if (Test(Op)) 7162 return Op.getStartLoc(); 7163 } 7164 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7165 } 7166 7167 SMLoc 7168 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7169 const OperandVector &Operands) const { 7170 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7171 return getOperandLoc(Test, Operands); 7172 } 7173 7174 SMLoc 7175 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7176 const OperandVector &Operands) const { 7177 auto Test = [=](const AMDGPUOperand& Op) { 7178 return Op.isRegKind() && Op.getReg() == Reg; 7179 }; 7180 return getOperandLoc(Test, Operands); 7181 } 7182 7183 SMLoc 7184 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7185 auto Test = [](const AMDGPUOperand& Op) { 7186 return Op.IsImmKindLiteral() || Op.isExpr(); 7187 }; 7188 return getOperandLoc(Test, Operands); 7189 } 7190 7191 SMLoc 7192 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7193 auto Test = [](const AMDGPUOperand& Op) { 7194 return Op.isImmKindConst(); 7195 }; 7196 return getOperandLoc(Test, Operands); 7197 } 7198 7199 //===----------------------------------------------------------------------===// 7200 // swizzle 7201 //===----------------------------------------------------------------------===// 7202 7203 LLVM_READNONE 7204 static unsigned 7205 encodeBitmaskPerm(const unsigned AndMask, 7206 const unsigned OrMask, 7207 const unsigned XorMask) { 7208 using namespace llvm::AMDGPU::Swizzle; 7209 7210 return BITMASK_PERM_ENC | 7211 (AndMask << BITMASK_AND_SHIFT) | 7212 (OrMask << BITMASK_OR_SHIFT) | 7213 (XorMask << BITMASK_XOR_SHIFT); 7214 } 7215 7216 bool 7217 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7218 const unsigned MinVal, 7219 const unsigned MaxVal, 7220 const StringRef ErrMsg, 7221 SMLoc &Loc) { 7222 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7223 return false; 7224 } 7225 Loc = getLoc(); 7226 if (!parseExpr(Op)) { 7227 return false; 7228 } 7229 if (Op < MinVal || Op > MaxVal) { 7230 Error(Loc, ErrMsg); 7231 return false; 7232 } 7233 7234 return true; 7235 } 7236 7237 bool 7238 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7239 const unsigned MinVal, 7240 const unsigned MaxVal, 7241 const StringRef ErrMsg) { 7242 SMLoc Loc; 7243 for (unsigned i = 0; i < OpNum; ++i) { 7244 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7245 return false; 7246 } 7247 7248 return true; 7249 } 7250 7251 bool 7252 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7253 using namespace llvm::AMDGPU::Swizzle; 7254 7255 int64_t Lane[LANE_NUM]; 7256 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7257 "expected a 2-bit lane id")) { 7258 Imm = QUAD_PERM_ENC; 7259 for (unsigned I = 0; I < LANE_NUM; ++I) { 7260 Imm |= Lane[I] << (LANE_SHIFT * I); 7261 } 7262 return true; 7263 } 7264 return false; 7265 } 7266 7267 bool 7268 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7269 using namespace llvm::AMDGPU::Swizzle; 7270 7271 SMLoc Loc; 7272 int64_t GroupSize; 7273 int64_t LaneIdx; 7274 7275 if (!parseSwizzleOperand(GroupSize, 7276 2, 32, 7277 "group size must be in the interval [2,32]", 7278 Loc)) { 7279 return false; 7280 } 7281 if (!isPowerOf2_64(GroupSize)) { 7282 Error(Loc, "group size must be a power of two"); 7283 return false; 7284 } 7285 if (parseSwizzleOperand(LaneIdx, 7286 0, GroupSize - 1, 7287 "lane id must be in the interval [0,group size - 1]", 7288 Loc)) { 7289 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7290 return true; 7291 } 7292 return false; 7293 } 7294 7295 bool 7296 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7297 using namespace llvm::AMDGPU::Swizzle; 7298 7299 SMLoc Loc; 7300 int64_t GroupSize; 7301 7302 if (!parseSwizzleOperand(GroupSize, 7303 2, 32, 7304 "group size must be in the interval [2,32]", 7305 Loc)) { 7306 return false; 7307 } 7308 if (!isPowerOf2_64(GroupSize)) { 7309 Error(Loc, "group size must be a power of two"); 7310 return false; 7311 } 7312 7313 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7314 return true; 7315 } 7316 7317 bool 7318 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7319 using namespace llvm::AMDGPU::Swizzle; 7320 7321 SMLoc Loc; 7322 int64_t GroupSize; 7323 7324 if (!parseSwizzleOperand(GroupSize, 7325 1, 16, 7326 "group size must be in the interval [1,16]", 7327 Loc)) { 7328 return false; 7329 } 7330 if (!isPowerOf2_64(GroupSize)) { 7331 Error(Loc, "group size must be a power of two"); 7332 return false; 7333 } 7334 7335 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7336 return true; 7337 } 7338 7339 bool 7340 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7341 using namespace llvm::AMDGPU::Swizzle; 7342 7343 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7344 return false; 7345 } 7346 7347 StringRef Ctl; 7348 SMLoc StrLoc = getLoc(); 7349 if (!parseString(Ctl)) { 7350 return false; 7351 } 7352 if (Ctl.size() != BITMASK_WIDTH) { 7353 Error(StrLoc, "expected a 5-character mask"); 7354 return false; 7355 } 7356 7357 unsigned AndMask = 0; 7358 unsigned OrMask = 0; 7359 unsigned XorMask = 0; 7360 7361 for (size_t i = 0; i < Ctl.size(); ++i) { 7362 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7363 switch(Ctl[i]) { 7364 default: 7365 Error(StrLoc, "invalid mask"); 7366 return false; 7367 case '0': 7368 break; 7369 case '1': 7370 OrMask |= Mask; 7371 break; 7372 case 'p': 7373 AndMask |= Mask; 7374 break; 7375 case 'i': 7376 AndMask |= Mask; 7377 XorMask |= Mask; 7378 break; 7379 } 7380 } 7381 7382 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7383 return true; 7384 } 7385 7386 bool 7387 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7388 7389 SMLoc OffsetLoc = getLoc(); 7390 7391 if (!parseExpr(Imm, "a swizzle macro")) { 7392 return false; 7393 } 7394 if (!isUInt<16>(Imm)) { 7395 Error(OffsetLoc, "expected a 16-bit offset"); 7396 return false; 7397 } 7398 return true; 7399 } 7400 7401 bool 7402 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7403 using namespace llvm::AMDGPU::Swizzle; 7404 7405 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7406 7407 SMLoc ModeLoc = getLoc(); 7408 bool Ok = false; 7409 7410 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7411 Ok = parseSwizzleQuadPerm(Imm); 7412 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7413 Ok = parseSwizzleBitmaskPerm(Imm); 7414 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7415 Ok = parseSwizzleBroadcast(Imm); 7416 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7417 Ok = parseSwizzleSwap(Imm); 7418 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7419 Ok = parseSwizzleReverse(Imm); 7420 } else { 7421 Error(ModeLoc, "expected a swizzle mode"); 7422 } 7423 7424 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7425 } 7426 7427 return false; 7428 } 7429 7430 OperandMatchResultTy 7431 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7432 SMLoc S = getLoc(); 7433 int64_t Imm = 0; 7434 7435 if (trySkipId("offset")) { 7436 7437 bool Ok = false; 7438 if (skipToken(AsmToken::Colon, "expected a colon")) { 7439 if (trySkipId("swizzle")) { 7440 Ok = parseSwizzleMacro(Imm); 7441 } else { 7442 Ok = parseSwizzleOffset(Imm); 7443 } 7444 } 7445 7446 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7447 7448 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7449 } else { 7450 // Swizzle "offset" operand is optional. 7451 // If it is omitted, try parsing other optional operands. 7452 return parseOptionalOpr(Operands); 7453 } 7454 } 7455 7456 bool 7457 AMDGPUOperand::isSwizzle() const { 7458 return isImmTy(ImmTySwizzle); 7459 } 7460 7461 //===----------------------------------------------------------------------===// 7462 // VGPR Index Mode 7463 //===----------------------------------------------------------------------===// 7464 7465 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7466 7467 using namespace llvm::AMDGPU::VGPRIndexMode; 7468 7469 if (trySkipToken(AsmToken::RParen)) { 7470 return OFF; 7471 } 7472 7473 int64_t Imm = 0; 7474 7475 while (true) { 7476 unsigned Mode = 0; 7477 SMLoc S = getLoc(); 7478 7479 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7480 if (trySkipId(IdSymbolic[ModeId])) { 7481 Mode = 1 << ModeId; 7482 break; 7483 } 7484 } 7485 7486 if (Mode == 0) { 7487 Error(S, (Imm == 0)? 7488 "expected a VGPR index mode or a closing parenthesis" : 7489 "expected a VGPR index mode"); 7490 return UNDEF; 7491 } 7492 7493 if (Imm & Mode) { 7494 Error(S, "duplicate VGPR index mode"); 7495 return UNDEF; 7496 } 7497 Imm |= Mode; 7498 7499 if (trySkipToken(AsmToken::RParen)) 7500 break; 7501 if (!skipToken(AsmToken::Comma, 7502 "expected a comma or a closing parenthesis")) 7503 return UNDEF; 7504 } 7505 7506 return Imm; 7507 } 7508 7509 OperandMatchResultTy 7510 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7511 7512 using namespace llvm::AMDGPU::VGPRIndexMode; 7513 7514 int64_t Imm = 0; 7515 SMLoc S = getLoc(); 7516 7517 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7518 Imm = parseGPRIdxMacro(); 7519 if (Imm == UNDEF) 7520 return MatchOperand_ParseFail; 7521 } else { 7522 if (getParser().parseAbsoluteExpression(Imm)) 7523 return MatchOperand_ParseFail; 7524 if (Imm < 0 || !isUInt<4>(Imm)) { 7525 Error(S, "invalid immediate: only 4-bit values are legal"); 7526 return MatchOperand_ParseFail; 7527 } 7528 } 7529 7530 Operands.push_back( 7531 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7532 return MatchOperand_Success; 7533 } 7534 7535 bool AMDGPUOperand::isGPRIdxMode() const { 7536 return isImmTy(ImmTyGprIdxMode); 7537 } 7538 7539 //===----------------------------------------------------------------------===// 7540 // sopp branch targets 7541 //===----------------------------------------------------------------------===// 7542 7543 OperandMatchResultTy 7544 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7545 7546 // Make sure we are not parsing something 7547 // that looks like a label or an expression but is not. 7548 // This will improve error messages. 7549 if (isRegister() || isModifier()) 7550 return MatchOperand_NoMatch; 7551 7552 if (!parseExpr(Operands)) 7553 return MatchOperand_ParseFail; 7554 7555 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7556 assert(Opr.isImm() || Opr.isExpr()); 7557 SMLoc Loc = Opr.getStartLoc(); 7558 7559 // Currently we do not support arbitrary expressions as branch targets. 7560 // Only labels and absolute expressions are accepted. 7561 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7562 Error(Loc, "expected an absolute expression or a label"); 7563 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7564 Error(Loc, "expected a 16-bit signed jump offset"); 7565 } 7566 7567 return MatchOperand_Success; 7568 } 7569 7570 //===----------------------------------------------------------------------===// 7571 // Boolean holding registers 7572 //===----------------------------------------------------------------------===// 7573 7574 OperandMatchResultTy 7575 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7576 return parseReg(Operands); 7577 } 7578 7579 //===----------------------------------------------------------------------===// 7580 // mubuf 7581 //===----------------------------------------------------------------------===// 7582 7583 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7584 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7585 } 7586 7587 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7588 const OperandVector &Operands, 7589 bool IsAtomic, 7590 bool IsLds) { 7591 OptionalImmIndexMap OptionalIdx; 7592 unsigned FirstOperandIdx = 1; 7593 bool IsAtomicReturn = false; 7594 7595 if (IsAtomic) { 7596 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7597 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7598 if (!Op.isCPol()) 7599 continue; 7600 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7601 break; 7602 } 7603 7604 if (!IsAtomicReturn) { 7605 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7606 if (NewOpc != -1) 7607 Inst.setOpcode(NewOpc); 7608 } 7609 7610 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7611 SIInstrFlags::IsAtomicRet; 7612 } 7613 7614 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7615 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7616 7617 // Add the register arguments 7618 if (Op.isReg()) { 7619 Op.addRegOperands(Inst, 1); 7620 // Insert a tied src for atomic return dst. 7621 // This cannot be postponed as subsequent calls to 7622 // addImmOperands rely on correct number of MC operands. 7623 if (IsAtomicReturn && i == FirstOperandIdx) 7624 Op.addRegOperands(Inst, 1); 7625 continue; 7626 } 7627 7628 // Handle the case where soffset is an immediate 7629 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7630 Op.addImmOperands(Inst, 1); 7631 continue; 7632 } 7633 7634 // Handle tokens like 'offen' which are sometimes hard-coded into the 7635 // asm string. There are no MCInst operands for these. 7636 if (Op.isToken()) { 7637 continue; 7638 } 7639 assert(Op.isImm()); 7640 7641 // Handle optional arguments 7642 OptionalIdx[Op.getImmTy()] = i; 7643 } 7644 7645 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7646 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7647 7648 if (!IsLds) { // tfe is not legal with lds opcodes 7649 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7650 } 7651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7652 } 7653 7654 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7655 OptionalImmIndexMap OptionalIdx; 7656 7657 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7658 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7659 7660 // Add the register arguments 7661 if (Op.isReg()) { 7662 Op.addRegOperands(Inst, 1); 7663 continue; 7664 } 7665 7666 // Handle the case where soffset is an immediate 7667 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7668 Op.addImmOperands(Inst, 1); 7669 continue; 7670 } 7671 7672 // Handle tokens like 'offen' which are sometimes hard-coded into the 7673 // asm string. There are no MCInst operands for these. 7674 if (Op.isToken()) { 7675 continue; 7676 } 7677 assert(Op.isImm()); 7678 7679 // Handle optional arguments 7680 OptionalIdx[Op.getImmTy()] = i; 7681 } 7682 7683 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7684 AMDGPUOperand::ImmTyOffset); 7685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7689 } 7690 7691 //===----------------------------------------------------------------------===// 7692 // mimg 7693 //===----------------------------------------------------------------------===// 7694 7695 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7696 bool IsAtomic) { 7697 unsigned I = 1; 7698 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7699 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7700 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7701 } 7702 7703 if (IsAtomic) { 7704 // Add src, same as dst 7705 assert(Desc.getNumDefs() == 1); 7706 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7707 } 7708 7709 OptionalImmIndexMap OptionalIdx; 7710 7711 for (unsigned E = Operands.size(); I != E; ++I) { 7712 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7713 7714 // Add the register arguments 7715 if (Op.isReg()) { 7716 Op.addRegOperands(Inst, 1); 7717 } else if (Op.isImmModifier()) { 7718 OptionalIdx[Op.getImmTy()] = I; 7719 } else if (!Op.isToken()) { 7720 llvm_unreachable("unexpected operand type"); 7721 } 7722 } 7723 7724 bool IsGFX10Plus = isGFX10Plus(); 7725 7726 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7727 if (IsGFX10Plus) 7728 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7729 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7731 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7732 if (IsGFX10Plus) 7733 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7734 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7735 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7736 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7737 if (!IsGFX10Plus) 7738 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7739 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7740 } 7741 7742 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7743 cvtMIMG(Inst, Operands, true); 7744 } 7745 7746 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7747 OptionalImmIndexMap OptionalIdx; 7748 bool IsAtomicReturn = false; 7749 7750 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7751 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7752 if (!Op.isCPol()) 7753 continue; 7754 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7755 break; 7756 } 7757 7758 if (!IsAtomicReturn) { 7759 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7760 if (NewOpc != -1) 7761 Inst.setOpcode(NewOpc); 7762 } 7763 7764 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7765 SIInstrFlags::IsAtomicRet; 7766 7767 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7768 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7769 7770 // Add the register arguments 7771 if (Op.isReg()) { 7772 Op.addRegOperands(Inst, 1); 7773 if (IsAtomicReturn && i == 1) 7774 Op.addRegOperands(Inst, 1); 7775 continue; 7776 } 7777 7778 // Handle the case where soffset is an immediate 7779 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7780 Op.addImmOperands(Inst, 1); 7781 continue; 7782 } 7783 7784 // Handle tokens like 'offen' which are sometimes hard-coded into the 7785 // asm string. There are no MCInst operands for these. 7786 if (Op.isToken()) { 7787 continue; 7788 } 7789 assert(Op.isImm()); 7790 7791 // Handle optional arguments 7792 OptionalIdx[Op.getImmTy()] = i; 7793 } 7794 7795 if ((int)Inst.getNumOperands() <= 7796 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7797 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7798 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7799 } 7800 7801 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7802 const OperandVector &Operands) { 7803 for (unsigned I = 1; I < Operands.size(); ++I) { 7804 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7805 if (Operand.isReg()) 7806 Operand.addRegOperands(Inst, 1); 7807 } 7808 7809 Inst.addOperand(MCOperand::createImm(1)); // a16 7810 } 7811 7812 //===----------------------------------------------------------------------===// 7813 // smrd 7814 //===----------------------------------------------------------------------===// 7815 7816 bool AMDGPUOperand::isSMRDOffset8() const { 7817 return isImm() && isUInt<8>(getImm()); 7818 } 7819 7820 bool AMDGPUOperand::isSMEMOffset() const { 7821 return isImmTy(ImmTyNone) || 7822 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7823 } 7824 7825 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7826 // 32-bit literals are only supported on CI and we only want to use them 7827 // when the offset is > 8-bits. 7828 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7829 } 7830 7831 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7832 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7833 } 7834 7835 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7836 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7837 } 7838 7839 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7840 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7841 } 7842 7843 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7844 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7845 } 7846 7847 //===----------------------------------------------------------------------===// 7848 // vop3 7849 //===----------------------------------------------------------------------===// 7850 7851 static bool ConvertOmodMul(int64_t &Mul) { 7852 if (Mul != 1 && Mul != 2 && Mul != 4) 7853 return false; 7854 7855 Mul >>= 1; 7856 return true; 7857 } 7858 7859 static bool ConvertOmodDiv(int64_t &Div) { 7860 if (Div == 1) { 7861 Div = 0; 7862 return true; 7863 } 7864 7865 if (Div == 2) { 7866 Div = 3; 7867 return true; 7868 } 7869 7870 return false; 7871 } 7872 7873 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7874 // This is intentional and ensures compatibility with sp3. 7875 // See bug 35397 for details. 7876 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7877 if (BoundCtrl == 0 || BoundCtrl == 1) { 7878 BoundCtrl = 1; 7879 return true; 7880 } 7881 return false; 7882 } 7883 7884 // Note: the order in this table matches the order of operands in AsmString. 7885 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7886 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7887 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7888 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7889 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7890 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7891 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7892 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7893 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7894 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7895 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7896 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7897 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7898 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7899 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7900 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7901 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7902 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7903 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7904 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7905 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7906 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7907 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7908 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7909 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7910 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7911 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7912 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7913 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7914 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7915 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7916 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7917 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7918 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7919 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7920 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7921 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7922 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7923 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7924 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7925 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7926 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7927 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7928 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7929 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7930 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7931 }; 7932 7933 void AMDGPUAsmParser::onBeginOfFile() { 7934 if (!getParser().getStreamer().getTargetStreamer() || 7935 getSTI().getTargetTriple().getArch() == Triple::r600) 7936 return; 7937 7938 if (!getTargetStreamer().getTargetID()) 7939 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7940 7941 if (isHsaAbiVersion3AndAbove(&getSTI())) 7942 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7943 } 7944 7945 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7946 7947 OperandMatchResultTy res = parseOptionalOpr(Operands); 7948 7949 // This is a hack to enable hardcoded mandatory operands which follow 7950 // optional operands. 7951 // 7952 // Current design assumes that all operands after the first optional operand 7953 // are also optional. However implementation of some instructions violates 7954 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7955 // 7956 // To alleviate this problem, we have to (implicitly) parse extra operands 7957 // to make sure autogenerated parser of custom operands never hit hardcoded 7958 // mandatory operands. 7959 7960 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7961 if (res != MatchOperand_Success || 7962 isToken(AsmToken::EndOfStatement)) 7963 break; 7964 7965 trySkipToken(AsmToken::Comma); 7966 res = parseOptionalOpr(Operands); 7967 } 7968 7969 return res; 7970 } 7971 7972 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7973 OperandMatchResultTy res; 7974 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7975 // try to parse any optional operand here 7976 if (Op.IsBit) { 7977 res = parseNamedBit(Op.Name, Operands, Op.Type); 7978 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7979 res = parseOModOperand(Operands); 7980 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7981 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7982 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7983 res = parseSDWASel(Operands, Op.Name, Op.Type); 7984 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7985 res = parseSDWADstUnused(Operands); 7986 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7987 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7988 Op.Type == AMDGPUOperand::ImmTyNegLo || 7989 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7990 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7991 Op.ConvertResult); 7992 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7993 res = parseDim(Operands); 7994 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7995 res = parseCPol(Operands); 7996 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7997 res = parseDPP8(Operands); 7998 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7999 res = parseDPPCtrl(Operands); 8000 } else { 8001 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 8002 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 8003 res = parseOperandArrayWithPrefix("neg", Operands, 8004 AMDGPUOperand::ImmTyBLGP, 8005 nullptr); 8006 } 8007 } 8008 if (res != MatchOperand_NoMatch) { 8009 return res; 8010 } 8011 } 8012 return MatchOperand_NoMatch; 8013 } 8014 8015 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8016 StringRef Name = getTokenStr(); 8017 if (Name == "mul") { 8018 return parseIntWithPrefix("mul", Operands, 8019 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8020 } 8021 8022 if (Name == "div") { 8023 return parseIntWithPrefix("div", Operands, 8024 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8025 } 8026 8027 return MatchOperand_NoMatch; 8028 } 8029 8030 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8031 // the number of src operands present, then copies that bit into src0_modifiers. 8032 void cvtVOP3DstOpSelOnly(MCInst &Inst) { 8033 int Opc = Inst.getOpcode(); 8034 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8035 if (OpSelIdx == -1) 8036 return; 8037 8038 int SrcNum; 8039 const int Ops[] = { AMDGPU::OpName::src0, 8040 AMDGPU::OpName::src1, 8041 AMDGPU::OpName::src2 }; 8042 for (SrcNum = 0; 8043 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8044 ++SrcNum); 8045 assert(SrcNum > 0); 8046 8047 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8048 8049 if ((OpSel & (1 << SrcNum)) != 0) { 8050 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8051 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8052 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8053 } 8054 } 8055 8056 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8057 const OperandVector &Operands) { 8058 cvtVOP3P(Inst, Operands); 8059 cvtVOP3DstOpSelOnly(Inst); 8060 } 8061 8062 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8063 OptionalImmIndexMap &OptionalIdx) { 8064 cvtVOP3P(Inst, Operands, OptionalIdx); 8065 cvtVOP3DstOpSelOnly(Inst); 8066 } 8067 8068 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8069 // 1. This operand is input modifiers 8070 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8071 // 2. This is not last operand 8072 && Desc.NumOperands > (OpNum + 1) 8073 // 3. Next operand is register class 8074 && Desc.OpInfo[OpNum + 1].RegClass != -1 8075 // 4. Next register is not tied to any other operand 8076 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8077 } 8078 8079 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8080 { 8081 OptionalImmIndexMap OptionalIdx; 8082 unsigned Opc = Inst.getOpcode(); 8083 8084 unsigned I = 1; 8085 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8086 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8087 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8088 } 8089 8090 for (unsigned E = Operands.size(); I != E; ++I) { 8091 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8092 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8093 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8094 } else if (Op.isInterpSlot() || 8095 Op.isInterpAttr() || 8096 Op.isAttrChan()) { 8097 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8098 } else if (Op.isImmModifier()) { 8099 OptionalIdx[Op.getImmTy()] = I; 8100 } else { 8101 llvm_unreachable("unhandled operand type"); 8102 } 8103 } 8104 8105 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8106 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8107 } 8108 8109 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8111 } 8112 8113 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8115 } 8116 } 8117 8118 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8119 { 8120 OptionalImmIndexMap OptionalIdx; 8121 unsigned Opc = Inst.getOpcode(); 8122 8123 unsigned I = 1; 8124 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8125 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8126 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8127 } 8128 8129 for (unsigned E = Operands.size(); I != E; ++I) { 8130 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8131 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8132 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8133 } else if (Op.isImmModifier()) { 8134 OptionalIdx[Op.getImmTy()] = I; 8135 } else { 8136 llvm_unreachable("unhandled operand type"); 8137 } 8138 } 8139 8140 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8141 8142 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8143 if (OpSelIdx != -1) 8144 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8145 8146 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8147 8148 if (OpSelIdx == -1) 8149 return; 8150 8151 const int Ops[] = { AMDGPU::OpName::src0, 8152 AMDGPU::OpName::src1, 8153 AMDGPU::OpName::src2 }; 8154 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8155 AMDGPU::OpName::src1_modifiers, 8156 AMDGPU::OpName::src2_modifiers }; 8157 8158 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8159 8160 for (int J = 0; J < 3; ++J) { 8161 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8162 if (OpIdx == -1) 8163 break; 8164 8165 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8166 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8167 8168 if ((OpSel & (1 << J)) != 0) 8169 ModVal |= SISrcMods::OP_SEL_0; 8170 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8171 (OpSel & (1 << 3)) != 0) 8172 ModVal |= SISrcMods::DST_OP_SEL; 8173 8174 Inst.getOperand(ModIdx).setImm(ModVal); 8175 } 8176 } 8177 8178 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8179 OptionalImmIndexMap &OptionalIdx) { 8180 unsigned Opc = Inst.getOpcode(); 8181 8182 unsigned I = 1; 8183 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8184 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8185 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8186 } 8187 8188 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8189 // This instruction has src modifiers 8190 for (unsigned E = Operands.size(); I != E; ++I) { 8191 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8192 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8193 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8194 } else if (Op.isImmModifier()) { 8195 OptionalIdx[Op.getImmTy()] = I; 8196 } else if (Op.isRegOrImm()) { 8197 Op.addRegOrImmOperands(Inst, 1); 8198 } else { 8199 llvm_unreachable("unhandled operand type"); 8200 } 8201 } 8202 } else { 8203 // No src modifiers 8204 for (unsigned E = Operands.size(); I != E; ++I) { 8205 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8206 if (Op.isMod()) { 8207 OptionalIdx[Op.getImmTy()] = I; 8208 } else { 8209 Op.addRegOrImmOperands(Inst, 1); 8210 } 8211 } 8212 } 8213 8214 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8215 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8216 } 8217 8218 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8220 } 8221 8222 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8223 // it has src2 register operand that is tied to dst operand 8224 // we don't allow modifiers for this operand in assembler so src2_modifiers 8225 // should be 0. 8226 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8227 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8228 Opc == AMDGPU::V_MAC_F32_e64_vi || 8229 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8230 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8231 Opc == AMDGPU::V_MAC_F16_e64_vi || 8232 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8233 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8234 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8235 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8236 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8237 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8238 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8239 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8240 auto it = Inst.begin(); 8241 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8242 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8243 ++it; 8244 // Copy the operand to ensure it's not invalidated when Inst grows. 8245 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8246 } 8247 } 8248 8249 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8250 OptionalImmIndexMap OptionalIdx; 8251 cvtVOP3(Inst, Operands, OptionalIdx); 8252 } 8253 8254 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8255 OptionalImmIndexMap &OptIdx) { 8256 const int Opc = Inst.getOpcode(); 8257 const MCInstrDesc &Desc = MII.get(Opc); 8258 8259 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8260 8261 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 8262 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { 8263 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 8264 Inst.addOperand(Inst.getOperand(0)); 8265 } 8266 8267 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8268 assert(!IsPacked); 8269 Inst.addOperand(Inst.getOperand(0)); 8270 } 8271 8272 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8273 // instruction, and then figure out where to actually put the modifiers 8274 8275 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8276 if (OpSelIdx != -1) { 8277 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8278 } 8279 8280 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8281 if (OpSelHiIdx != -1) { 8282 int DefaultVal = IsPacked ? -1 : 0; 8283 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8284 DefaultVal); 8285 } 8286 8287 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8288 if (NegLoIdx != -1) { 8289 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8290 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8291 } 8292 8293 const int Ops[] = { AMDGPU::OpName::src0, 8294 AMDGPU::OpName::src1, 8295 AMDGPU::OpName::src2 }; 8296 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8297 AMDGPU::OpName::src1_modifiers, 8298 AMDGPU::OpName::src2_modifiers }; 8299 8300 unsigned OpSel = 0; 8301 unsigned OpSelHi = 0; 8302 unsigned NegLo = 0; 8303 unsigned NegHi = 0; 8304 8305 if (OpSelIdx != -1) 8306 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8307 8308 if (OpSelHiIdx != -1) 8309 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8310 8311 if (NegLoIdx != -1) { 8312 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8313 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8314 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8315 } 8316 8317 for (int J = 0; J < 3; ++J) { 8318 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8319 if (OpIdx == -1) 8320 break; 8321 8322 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8323 8324 if (ModIdx == -1) 8325 continue; 8326 8327 uint32_t ModVal = 0; 8328 8329 if ((OpSel & (1 << J)) != 0) 8330 ModVal |= SISrcMods::OP_SEL_0; 8331 8332 if ((OpSelHi & (1 << J)) != 0) 8333 ModVal |= SISrcMods::OP_SEL_1; 8334 8335 if ((NegLo & (1 << J)) != 0) 8336 ModVal |= SISrcMods::NEG; 8337 8338 if ((NegHi & (1 << J)) != 0) 8339 ModVal |= SISrcMods::NEG_HI; 8340 8341 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8342 } 8343 } 8344 8345 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8346 OptionalImmIndexMap OptIdx; 8347 cvtVOP3(Inst, Operands, OptIdx); 8348 cvtVOP3P(Inst, Operands, OptIdx); 8349 } 8350 8351 //===----------------------------------------------------------------------===// 8352 // VOPD 8353 //===----------------------------------------------------------------------===// 8354 8355 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8356 if (!hasVOPD(getSTI())) 8357 return MatchOperand_NoMatch; 8358 8359 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8360 SMLoc S = getLoc(); 8361 lex(); 8362 lex(); 8363 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8364 const MCExpr *Expr; 8365 if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) { 8366 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 8367 return MatchOperand_Success; 8368 } 8369 Error(S, "invalid VOPD :: usage"); 8370 return MatchOperand_ParseFail; 8371 } 8372 return MatchOperand_NoMatch; 8373 } 8374 8375 // Create VOPD MCInst operands using parsed assembler operands. 8376 // Parsed VOPD operands are ordered as follows: 8377 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 8378 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8379 // If both OpX and OpY have an imm, the first imm has a different name: 8380 // OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::' 8381 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8382 // MCInst operands have the following order: 8383 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8384 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8385 auto addOp = [&](uint16_t i) { // NOLINT:function pointer 8386 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8387 if (Op.isReg()) { 8388 Op.addRegOperands(Inst, 1); 8389 return; 8390 } 8391 if (Op.isImm()) { 8392 Op.addImmOperands(Inst, 1); 8393 return; 8394 } 8395 // Handle tokens like 'offen' which are sometimes hard-coded into the 8396 // asm string. There are no MCInst operands for these. 8397 if (Op.isToken()) { 8398 return; 8399 } 8400 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8401 }; 8402 8403 // Indices into MCInst.Operands 8404 const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ... 8405 const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ... 8406 const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ... 8407 8408 unsigned Opc = Inst.getOpcode(); 8409 bool HasVsrc1X = 8410 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1; 8411 bool HasImmX = 8412 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8413 (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8414 FmamkOpXImmMCIndex || 8415 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8416 FmaakOpXImmMCIndex)); 8417 8418 bool HasVsrc1Y = 8419 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1; 8420 bool HasImmY = 8421 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8422 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >= 8423 MinOpYImmMCIndex + HasVsrc1X; 8424 8425 // Indices of parsed operands relative to dst 8426 const auto DstIdx = 0; 8427 const auto Src0Idx = 1; 8428 const auto Vsrc1OrImmIdx = 2; 8429 8430 const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X; 8431 const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo) 8432 8433 // Offsets into parsed operands 8434 const auto OpXFirstOperandOffset = 1; 8435 const auto OpYFirstOperandOffset = 8436 OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize; 8437 8438 // Order of addOp calls determines MC operand order 8439 addOp(OpXFirstOperandOffset + DstIdx); // vdstX 8440 addOp(OpYFirstOperandOffset + DstIdx); // vdstY 8441 8442 addOp(OpXFirstOperandOffset + Src0Idx); // src0X 8443 if (HasImmX) { 8444 // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak 8445 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); 8446 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1); 8447 } else { 8448 if (HasVsrc1X) // all except v_mov 8449 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X 8450 } 8451 8452 addOp(OpYFirstOperandOffset + Src0Idx); // src0Y 8453 if (HasImmY) { 8454 // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak 8455 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); 8456 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1); 8457 } else { 8458 if (HasVsrc1Y) // all except v_mov 8459 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y 8460 } 8461 } 8462 8463 //===----------------------------------------------------------------------===// 8464 // dpp 8465 //===----------------------------------------------------------------------===// 8466 8467 bool AMDGPUOperand::isDPP8() const { 8468 return isImmTy(ImmTyDPP8); 8469 } 8470 8471 bool AMDGPUOperand::isDPPCtrl() const { 8472 using namespace AMDGPU::DPP; 8473 8474 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8475 if (result) { 8476 int64_t Imm = getImm(); 8477 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8478 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8479 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8480 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8481 (Imm == DppCtrl::WAVE_SHL1) || 8482 (Imm == DppCtrl::WAVE_ROL1) || 8483 (Imm == DppCtrl::WAVE_SHR1) || 8484 (Imm == DppCtrl::WAVE_ROR1) || 8485 (Imm == DppCtrl::ROW_MIRROR) || 8486 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8487 (Imm == DppCtrl::BCAST15) || 8488 (Imm == DppCtrl::BCAST31) || 8489 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8490 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8491 } 8492 return false; 8493 } 8494 8495 //===----------------------------------------------------------------------===// 8496 // mAI 8497 //===----------------------------------------------------------------------===// 8498 8499 bool AMDGPUOperand::isBLGP() const { 8500 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8501 } 8502 8503 bool AMDGPUOperand::isCBSZ() const { 8504 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8505 } 8506 8507 bool AMDGPUOperand::isABID() const { 8508 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8509 } 8510 8511 bool AMDGPUOperand::isS16Imm() const { 8512 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8513 } 8514 8515 bool AMDGPUOperand::isU16Imm() const { 8516 return isImm() && isUInt<16>(getImm()); 8517 } 8518 8519 //===----------------------------------------------------------------------===// 8520 // dim 8521 //===----------------------------------------------------------------------===// 8522 8523 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8524 // We want to allow "dim:1D" etc., 8525 // but the initial 1 is tokenized as an integer. 8526 std::string Token; 8527 if (isToken(AsmToken::Integer)) { 8528 SMLoc Loc = getToken().getEndLoc(); 8529 Token = std::string(getTokenStr()); 8530 lex(); 8531 if (getLoc() != Loc) 8532 return false; 8533 } 8534 8535 StringRef Suffix; 8536 if (!parseId(Suffix)) 8537 return false; 8538 Token += Suffix; 8539 8540 StringRef DimId = Token; 8541 if (DimId.startswith("SQ_RSRC_IMG_")) 8542 DimId = DimId.drop_front(12); 8543 8544 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8545 if (!DimInfo) 8546 return false; 8547 8548 Encoding = DimInfo->Encoding; 8549 return true; 8550 } 8551 8552 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8553 if (!isGFX10Plus()) 8554 return MatchOperand_NoMatch; 8555 8556 SMLoc S = getLoc(); 8557 8558 if (!trySkipId("dim", AsmToken::Colon)) 8559 return MatchOperand_NoMatch; 8560 8561 unsigned Encoding; 8562 SMLoc Loc = getLoc(); 8563 if (!parseDimId(Encoding)) { 8564 Error(Loc, "invalid dim value"); 8565 return MatchOperand_ParseFail; 8566 } 8567 8568 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8569 AMDGPUOperand::ImmTyDim)); 8570 return MatchOperand_Success; 8571 } 8572 8573 //===----------------------------------------------------------------------===// 8574 // dpp 8575 //===----------------------------------------------------------------------===// 8576 8577 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8578 SMLoc S = getLoc(); 8579 8580 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8581 return MatchOperand_NoMatch; 8582 8583 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8584 8585 int64_t Sels[8]; 8586 8587 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8588 return MatchOperand_ParseFail; 8589 8590 for (size_t i = 0; i < 8; ++i) { 8591 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8592 return MatchOperand_ParseFail; 8593 8594 SMLoc Loc = getLoc(); 8595 if (getParser().parseAbsoluteExpression(Sels[i])) 8596 return MatchOperand_ParseFail; 8597 if (0 > Sels[i] || 7 < Sels[i]) { 8598 Error(Loc, "expected a 3-bit value"); 8599 return MatchOperand_ParseFail; 8600 } 8601 } 8602 8603 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8604 return MatchOperand_ParseFail; 8605 8606 unsigned DPP8 = 0; 8607 for (size_t i = 0; i < 8; ++i) 8608 DPP8 |= (Sels[i] << (i * 3)); 8609 8610 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8611 return MatchOperand_Success; 8612 } 8613 8614 bool 8615 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8616 const OperandVector &Operands) { 8617 if (Ctrl == "row_newbcast") 8618 return isGFX90A(); 8619 8620 if (Ctrl == "row_share" || 8621 Ctrl == "row_xmask") 8622 return isGFX10Plus(); 8623 8624 if (Ctrl == "wave_shl" || 8625 Ctrl == "wave_shr" || 8626 Ctrl == "wave_rol" || 8627 Ctrl == "wave_ror" || 8628 Ctrl == "row_bcast") 8629 return isVI() || isGFX9(); 8630 8631 return Ctrl == "row_mirror" || 8632 Ctrl == "row_half_mirror" || 8633 Ctrl == "quad_perm" || 8634 Ctrl == "row_shl" || 8635 Ctrl == "row_shr" || 8636 Ctrl == "row_ror"; 8637 } 8638 8639 int64_t 8640 AMDGPUAsmParser::parseDPPCtrlPerm() { 8641 // quad_perm:[%d,%d,%d,%d] 8642 8643 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8644 return -1; 8645 8646 int64_t Val = 0; 8647 for (int i = 0; i < 4; ++i) { 8648 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8649 return -1; 8650 8651 int64_t Temp; 8652 SMLoc Loc = getLoc(); 8653 if (getParser().parseAbsoluteExpression(Temp)) 8654 return -1; 8655 if (Temp < 0 || Temp > 3) { 8656 Error(Loc, "expected a 2-bit value"); 8657 return -1; 8658 } 8659 8660 Val += (Temp << i * 2); 8661 } 8662 8663 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8664 return -1; 8665 8666 return Val; 8667 } 8668 8669 int64_t 8670 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8671 using namespace AMDGPU::DPP; 8672 8673 // sel:%d 8674 8675 int64_t Val; 8676 SMLoc Loc = getLoc(); 8677 8678 if (getParser().parseAbsoluteExpression(Val)) 8679 return -1; 8680 8681 struct DppCtrlCheck { 8682 int64_t Ctrl; 8683 int Lo; 8684 int Hi; 8685 }; 8686 8687 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8688 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8689 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8690 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8691 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8692 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8693 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8694 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8695 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8696 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8697 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8698 .Default({-1, 0, 0}); 8699 8700 bool Valid; 8701 if (Check.Ctrl == -1) { 8702 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8703 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8704 } else { 8705 Valid = Check.Lo <= Val && Val <= Check.Hi; 8706 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8707 } 8708 8709 if (!Valid) { 8710 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8711 return -1; 8712 } 8713 8714 return Val; 8715 } 8716 8717 OperandMatchResultTy 8718 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8719 using namespace AMDGPU::DPP; 8720 8721 if (!isToken(AsmToken::Identifier) || 8722 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8723 return MatchOperand_NoMatch; 8724 8725 SMLoc S = getLoc(); 8726 int64_t Val = -1; 8727 StringRef Ctrl; 8728 8729 parseId(Ctrl); 8730 8731 if (Ctrl == "row_mirror") { 8732 Val = DppCtrl::ROW_MIRROR; 8733 } else if (Ctrl == "row_half_mirror") { 8734 Val = DppCtrl::ROW_HALF_MIRROR; 8735 } else { 8736 if (skipToken(AsmToken::Colon, "expected a colon")) { 8737 if (Ctrl == "quad_perm") { 8738 Val = parseDPPCtrlPerm(); 8739 } else { 8740 Val = parseDPPCtrlSel(Ctrl); 8741 } 8742 } 8743 } 8744 8745 if (Val == -1) 8746 return MatchOperand_ParseFail; 8747 8748 Operands.push_back( 8749 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8750 return MatchOperand_Success; 8751 } 8752 8753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8754 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8755 } 8756 8757 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8758 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8759 } 8760 8761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8762 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8763 } 8764 8765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8766 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8767 } 8768 8769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8770 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8771 } 8772 8773 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8774 OptionalImmIndexMap OptionalIdx; 8775 unsigned Opc = Inst.getOpcode(); 8776 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8777 unsigned I = 1; 8778 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8779 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8780 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8781 } 8782 8783 int Fi = 0; 8784 for (unsigned E = Operands.size(); I != E; ++I) { 8785 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8786 MCOI::TIED_TO); 8787 if (TiedTo != -1) { 8788 assert((unsigned)TiedTo < Inst.getNumOperands()); 8789 // handle tied old or src2 for MAC instructions 8790 Inst.addOperand(Inst.getOperand(TiedTo)); 8791 } 8792 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8793 // Add the register arguments 8794 if (IsDPP8 && Op.isFI()) { 8795 Fi = Op.getImm(); 8796 } else if (HasModifiers && 8797 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8798 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8799 } else if (Op.isReg()) { 8800 Op.addRegOperands(Inst, 1); 8801 } else if (Op.isImm() && 8802 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8803 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8804 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8805 Op.addImmOperands(Inst, 1); 8806 } else if (Op.isImm()) { 8807 OptionalIdx[Op.getImmTy()] = I; 8808 } else { 8809 llvm_unreachable("unhandled operand type"); 8810 } 8811 } 8812 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8813 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8814 } 8815 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8817 } 8818 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8819 cvtVOP3P(Inst, Operands, OptionalIdx); 8820 else if (Desc.TSFlags & SIInstrFlags::VOP3) 8821 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 8822 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8823 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8824 } 8825 8826 if (IsDPP8) { 8827 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8828 using namespace llvm::AMDGPU::DPP; 8829 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8830 } else { 8831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8832 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8835 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8837 } 8838 } 8839 } 8840 8841 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8842 OptionalImmIndexMap OptionalIdx; 8843 8844 unsigned Opc = Inst.getOpcode(); 8845 bool HasModifiers = 8846 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8847 unsigned I = 1; 8848 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8849 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8850 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8851 } 8852 8853 int Fi = 0; 8854 for (unsigned E = Operands.size(); I != E; ++I) { 8855 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8856 MCOI::TIED_TO); 8857 if (TiedTo != -1) { 8858 assert((unsigned)TiedTo < Inst.getNumOperands()); 8859 // handle tied old or src2 for MAC instructions 8860 Inst.addOperand(Inst.getOperand(TiedTo)); 8861 } 8862 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8863 // Add the register arguments 8864 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8865 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8866 // Skip it. 8867 continue; 8868 } 8869 8870 if (IsDPP8) { 8871 if (Op.isDPP8()) { 8872 Op.addImmOperands(Inst, 1); 8873 } else if (HasModifiers && 8874 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8875 Op.addRegWithFPInputModsOperands(Inst, 2); 8876 } else if (Op.isFI()) { 8877 Fi = Op.getImm(); 8878 } else if (Op.isReg()) { 8879 Op.addRegOperands(Inst, 1); 8880 } else { 8881 llvm_unreachable("Invalid operand type"); 8882 } 8883 } else { 8884 if (HasModifiers && 8885 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8886 Op.addRegWithFPInputModsOperands(Inst, 2); 8887 } else if (Op.isReg()) { 8888 Op.addRegOperands(Inst, 1); 8889 } else if (Op.isDPPCtrl()) { 8890 Op.addImmOperands(Inst, 1); 8891 } else if (Op.isImm()) { 8892 // Handle optional arguments 8893 OptionalIdx[Op.getImmTy()] = I; 8894 } else { 8895 llvm_unreachable("Invalid operand type"); 8896 } 8897 } 8898 } 8899 8900 if (IsDPP8) { 8901 using namespace llvm::AMDGPU::DPP; 8902 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8903 } else { 8904 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8905 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8906 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8907 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8908 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8909 } 8910 } 8911 } 8912 8913 //===----------------------------------------------------------------------===// 8914 // sdwa 8915 //===----------------------------------------------------------------------===// 8916 8917 OperandMatchResultTy 8918 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8919 AMDGPUOperand::ImmTy Type) { 8920 using namespace llvm::AMDGPU::SDWA; 8921 8922 SMLoc S = getLoc(); 8923 StringRef Value; 8924 OperandMatchResultTy res; 8925 8926 SMLoc StringLoc; 8927 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8928 if (res != MatchOperand_Success) { 8929 return res; 8930 } 8931 8932 int64_t Int; 8933 Int = StringSwitch<int64_t>(Value) 8934 .Case("BYTE_0", SdwaSel::BYTE_0) 8935 .Case("BYTE_1", SdwaSel::BYTE_1) 8936 .Case("BYTE_2", SdwaSel::BYTE_2) 8937 .Case("BYTE_3", SdwaSel::BYTE_3) 8938 .Case("WORD_0", SdwaSel::WORD_0) 8939 .Case("WORD_1", SdwaSel::WORD_1) 8940 .Case("DWORD", SdwaSel::DWORD) 8941 .Default(0xffffffff); 8942 8943 if (Int == 0xffffffff) { 8944 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8945 return MatchOperand_ParseFail; 8946 } 8947 8948 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8949 return MatchOperand_Success; 8950 } 8951 8952 OperandMatchResultTy 8953 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8954 using namespace llvm::AMDGPU::SDWA; 8955 8956 SMLoc S = getLoc(); 8957 StringRef Value; 8958 OperandMatchResultTy res; 8959 8960 SMLoc StringLoc; 8961 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8962 if (res != MatchOperand_Success) { 8963 return res; 8964 } 8965 8966 int64_t Int; 8967 Int = StringSwitch<int64_t>(Value) 8968 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8969 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8970 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8971 .Default(0xffffffff); 8972 8973 if (Int == 0xffffffff) { 8974 Error(StringLoc, "invalid dst_unused value"); 8975 return MatchOperand_ParseFail; 8976 } 8977 8978 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8979 return MatchOperand_Success; 8980 } 8981 8982 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8983 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8984 } 8985 8986 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8987 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8988 } 8989 8990 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8991 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8992 } 8993 8994 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8995 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8996 } 8997 8998 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8999 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 9000 } 9001 9002 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 9003 uint64_t BasicInstType, 9004 bool SkipDstVcc, 9005 bool SkipSrcVcc) { 9006 using namespace llvm::AMDGPU::SDWA; 9007 9008 OptionalImmIndexMap OptionalIdx; 9009 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 9010 bool SkippedVcc = false; 9011 9012 unsigned I = 1; 9013 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9014 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9015 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9016 } 9017 9018 for (unsigned E = Operands.size(); I != E; ++I) { 9019 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9020 if (SkipVcc && !SkippedVcc && Op.isReg() && 9021 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 9022 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9023 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9024 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9025 // Skip VCC only if we didn't skip it on previous iteration. 9026 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9027 if (BasicInstType == SIInstrFlags::VOP2 && 9028 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9029 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9030 SkippedVcc = true; 9031 continue; 9032 } else if (BasicInstType == SIInstrFlags::VOPC && 9033 Inst.getNumOperands() == 0) { 9034 SkippedVcc = true; 9035 continue; 9036 } 9037 } 9038 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9039 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9040 } else if (Op.isImm()) { 9041 // Handle optional arguments 9042 OptionalIdx[Op.getImmTy()] = I; 9043 } else { 9044 llvm_unreachable("Invalid operand type"); 9045 } 9046 SkippedVcc = false; 9047 } 9048 9049 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 9050 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 9051 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 9052 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9053 switch (BasicInstType) { 9054 case SIInstrFlags::VOP1: 9055 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 9056 AMDGPU::OpName::clamp) != -1) { 9057 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9058 AMDGPUOperand::ImmTyClampSI, 0); 9059 } 9060 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 9061 AMDGPU::OpName::omod) != -1) { 9062 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9063 AMDGPUOperand::ImmTyOModSI, 0); 9064 } 9065 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 9066 AMDGPU::OpName::dst_sel) != -1) { 9067 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9068 AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9069 } 9070 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 9071 AMDGPU::OpName::dst_unused) != -1) { 9072 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9073 AMDGPUOperand::ImmTySdwaDstUnused, 9074 DstUnused::UNUSED_PRESERVE); 9075 } 9076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9077 break; 9078 9079 case SIInstrFlags::VOP2: 9080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9081 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 9082 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9083 } 9084 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9085 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 9086 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9087 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9088 break; 9089 9090 case SIInstrFlags::VOPC: 9091 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 9092 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9094 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9095 break; 9096 9097 default: 9098 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9099 } 9100 } 9101 9102 // special case v_mac_{f16, f32}: 9103 // it has src2 register operand that is tied to dst operand 9104 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9105 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9106 auto it = Inst.begin(); 9107 std::advance( 9108 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9109 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9110 } 9111 } 9112 9113 //===----------------------------------------------------------------------===// 9114 // mAI 9115 //===----------------------------------------------------------------------===// 9116 9117 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 9118 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 9119 } 9120 9121 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 9122 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 9123 } 9124 9125 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 9126 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 9127 } 9128 9129 /// Force static initialization. 9130 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9131 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 9132 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9133 } 9134 9135 #define GET_REGISTER_MATCHER 9136 #define GET_MATCHER_IMPLEMENTATION 9137 #define GET_MNEMONIC_SPELL_CHECKER 9138 #define GET_MNEMONIC_CHECKER 9139 #include "AMDGPUGenAsmMatcher.inc" 9140 9141 // This function should be defined after auto-generated include so that we have 9142 // MatchClassKind enum defined 9143 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9144 unsigned Kind) { 9145 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9146 // But MatchInstructionImpl() expects to meet token and fails to validate 9147 // operand. This method checks if we are given immediate operand but expect to 9148 // get corresponding token. 9149 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9150 switch (Kind) { 9151 case MCK_addr64: 9152 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9153 case MCK_gds: 9154 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9155 case MCK_lds: 9156 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9157 case MCK_idxen: 9158 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9159 case MCK_offen: 9160 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9161 case MCK_SSrcB32: 9162 // When operands have expression values, they will return true for isToken, 9163 // because it is not possible to distinguish between a token and an 9164 // expression at parse time. MatchInstructionImpl() will always try to 9165 // match an operand as a token, when isToken returns true, and when the 9166 // name of the expression is not a valid token, the match will fail, 9167 // so we need to handle it here. 9168 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9169 case MCK_SSrcF32: 9170 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9171 case MCK_SoppBrTarget: 9172 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9173 case MCK_VReg32OrOff: 9174 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9175 case MCK_InterpSlot: 9176 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9177 case MCK_Attr: 9178 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9179 case MCK_AttrChan: 9180 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9181 case MCK_ImmSMEMOffset: 9182 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9183 case MCK_SReg_64: 9184 case MCK_SReg_64_XEXEC: 9185 // Null is defined as a 32-bit register but 9186 // it should also be enabled with 64-bit operands. 9187 // The following code enables it for SReg_64 operands 9188 // used as source and destination. Remaining source 9189 // operands are handled in isInlinableImm. 9190 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9191 default: 9192 return Match_InvalidOperand; 9193 } 9194 } 9195 9196 //===----------------------------------------------------------------------===// 9197 // endpgm 9198 //===----------------------------------------------------------------------===// 9199 9200 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9201 SMLoc S = getLoc(); 9202 int64_t Imm = 0; 9203 9204 if (!parseExpr(Imm)) { 9205 // The operand is optional, if not present default to 0 9206 Imm = 0; 9207 } 9208 9209 if (!isUInt<16>(Imm)) { 9210 Error(S, "expected a 16-bit value"); 9211 return MatchOperand_ParseFail; 9212 } 9213 9214 Operands.push_back( 9215 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9216 return MatchOperand_Success; 9217 } 9218 9219 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9220 9221 //===----------------------------------------------------------------------===// 9222 // LDSDIR 9223 //===----------------------------------------------------------------------===// 9224 9225 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9226 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9227 } 9228 9229 bool AMDGPUOperand::isWaitVDST() const { 9230 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9231 } 9232 9233 //===----------------------------------------------------------------------===// 9234 // VINTERP 9235 //===----------------------------------------------------------------------===// 9236 9237 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9238 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9239 } 9240 9241 bool AMDGPUOperand::isWaitEXP() const { 9242 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9243 } 9244