1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/MC/TargetRegistry.h" 32 #include "llvm/Support/AMDGPUMetadata.h" 33 #include "llvm/Support/AMDHSAKernelDescriptor.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/MachineValueType.h" 36 #include "llvm/Support/TargetParser.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrInline(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type); 251 } 252 253 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 254 return isRegOrInline(RCID, type) || isLiteralImm(type); 255 } 256 257 bool isRegOrImmWithInt16InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 259 } 260 261 bool isRegOrImmWithInt32InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 263 } 264 265 bool isRegOrImmWithInt64InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 267 } 268 269 bool isRegOrImmWithFP16InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 271 } 272 273 bool isRegOrImmWithFP32InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 275 } 276 277 bool isRegOrImmWithFP64InputMods() const { 278 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 279 } 280 281 bool isVReg() const { 282 return isRegClass(AMDGPU::VGPR_32RegClassID) || 283 isRegClass(AMDGPU::VReg_64RegClassID) || 284 isRegClass(AMDGPU::VReg_96RegClassID) || 285 isRegClass(AMDGPU::VReg_128RegClassID) || 286 isRegClass(AMDGPU::VReg_160RegClassID) || 287 isRegClass(AMDGPU::VReg_192RegClassID) || 288 isRegClass(AMDGPU::VReg_256RegClassID) || 289 isRegClass(AMDGPU::VReg_512RegClassID) || 290 isRegClass(AMDGPU::VReg_1024RegClassID); 291 } 292 293 bool isVReg32() const { 294 return isRegClass(AMDGPU::VGPR_32RegClassID); 295 } 296 297 bool isVReg32OrOff() const { 298 return isOff() || isVReg32(); 299 } 300 301 bool isNull() const { 302 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 303 } 304 305 bool isVRegWithInputMods() const; 306 307 bool isSDWAOperand(MVT type) const; 308 bool isSDWAFP16Operand() const; 309 bool isSDWAFP32Operand() const; 310 bool isSDWAInt16Operand() const; 311 bool isSDWAInt32Operand() const; 312 313 bool isImmTy(ImmTy ImmT) const { 314 return isImm() && Imm.Type == ImmT; 315 } 316 317 bool isImmModifier() const { 318 return isImm() && Imm.Type != ImmTyNone; 319 } 320 321 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 322 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 323 bool isDMask() const { return isImmTy(ImmTyDMask); } 324 bool isDim() const { return isImmTy(ImmTyDim); } 325 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 326 bool isDA() const { return isImmTy(ImmTyDA); } 327 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 328 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 329 bool isLWE() const { return isImmTy(ImmTyLWE); } 330 bool isOff() const { return isImmTy(ImmTyOff); } 331 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 332 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 333 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 334 bool isOffen() const { return isImmTy(ImmTyOffen); } 335 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 336 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 337 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 338 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 339 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 340 341 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 342 bool isGDS() const { return isImmTy(ImmTyGDS); } 343 bool isLDS() const { return isImmTy(ImmTyLDS); } 344 bool isCPol() const { return isImmTy(ImmTyCPol); } 345 bool isSWZ() const { return isImmTy(ImmTySWZ); } 346 bool isTFE() const { return isImmTy(ImmTyTFE); } 347 bool isD16() const { return isImmTy(ImmTyD16); } 348 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 349 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 350 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 351 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 352 bool isFI() const { return isImmTy(ImmTyDppFi); } 353 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 354 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 355 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 356 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 357 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 358 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 359 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 360 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 361 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 362 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 363 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 364 bool isHigh() const { return isImmTy(ImmTyHigh); } 365 366 bool isMod() const { 367 return isClampSI() || isOModSI(); 368 } 369 370 bool isRegOrImm() const { 371 return isReg() || isImm(); 372 } 373 374 bool isRegClass(unsigned RCID) const; 375 376 bool isInlineValue() const; 377 378 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 379 return isRegOrInline(RCID, type) && !hasModifiers(); 380 } 381 382 bool isSCSrcB16() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 384 } 385 386 bool isSCSrcV2B16() const { 387 return isSCSrcB16(); 388 } 389 390 bool isSCSrcB32() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 392 } 393 394 bool isSCSrcB64() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 396 } 397 398 bool isBoolReg() const; 399 400 bool isSCSrcF16() const { 401 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 402 } 403 404 bool isSCSrcV2F16() const { 405 return isSCSrcF16(); 406 } 407 408 bool isSCSrcF32() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 410 } 411 412 bool isSCSrcF64() const { 413 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 414 } 415 416 bool isSSrcB32() const { 417 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 418 } 419 420 bool isSSrcB16() const { 421 return isSCSrcB16() || isLiteralImm(MVT::i16); 422 } 423 424 bool isSSrcV2B16() const { 425 llvm_unreachable("cannot happen"); 426 return isSSrcB16(); 427 } 428 429 bool isSSrcB64() const { 430 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 431 // See isVSrc64(). 432 return isSCSrcB64() || isLiteralImm(MVT::i64); 433 } 434 435 bool isSSrcF32() const { 436 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 437 } 438 439 bool isSSrcF64() const { 440 return isSCSrcB64() || isLiteralImm(MVT::f64); 441 } 442 443 bool isSSrcF16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::f16); 445 } 446 447 bool isSSrcV2F16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcF16(); 450 } 451 452 bool isSSrcV2FP32() const { 453 llvm_unreachable("cannot happen"); 454 return isSSrcF32(); 455 } 456 457 bool isSCSrcV2FP32() const { 458 llvm_unreachable("cannot happen"); 459 return isSCSrcF32(); 460 } 461 462 bool isSSrcV2INT32() const { 463 llvm_unreachable("cannot happen"); 464 return isSSrcB32(); 465 } 466 467 bool isSCSrcV2INT32() const { 468 llvm_unreachable("cannot happen"); 469 return isSCSrcB32(); 470 } 471 472 bool isSSrcOrLdsB32() const { 473 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 474 isLiteralImm(MVT::i32) || isExpr(); 475 } 476 477 bool isVCSrcB32() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 479 } 480 481 bool isVCSrcB64() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 483 } 484 485 bool isVCSrcB16() const { 486 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 487 } 488 489 bool isVCSrcV2B16() const { 490 return isVCSrcB16(); 491 } 492 493 bool isVCSrcF32() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 495 } 496 497 bool isVCSrcF64() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 499 } 500 501 bool isVCSrcF16() const { 502 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 503 } 504 505 bool isVCSrcV2F16() const { 506 return isVCSrcF16(); 507 } 508 509 bool isVSrcB32() const { 510 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 511 } 512 513 bool isVSrcB64() const { 514 return isVCSrcF64() || isLiteralImm(MVT::i64); 515 } 516 517 bool isVSrcB16() const { 518 return isVCSrcB16() || isLiteralImm(MVT::i16); 519 } 520 521 bool isVSrcV2B16() const { 522 return isVSrcB16() || isLiteralImm(MVT::v2i16); 523 } 524 525 bool isVCSrcV2FP32() const { 526 return isVCSrcF64(); 527 } 528 529 bool isVSrcV2FP32() const { 530 return isVSrcF64() || isLiteralImm(MVT::v2f32); 531 } 532 533 bool isVCSrcV2INT32() const { 534 return isVCSrcB64(); 535 } 536 537 bool isVSrcV2INT32() const { 538 return isVSrcB64() || isLiteralImm(MVT::v2i32); 539 } 540 541 bool isVSrcF32() const { 542 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 543 } 544 545 bool isVSrcF64() const { 546 return isVCSrcF64() || isLiteralImm(MVT::f64); 547 } 548 549 bool isVSrcF16() const { 550 return isVCSrcF16() || isLiteralImm(MVT::f16); 551 } 552 553 bool isVSrcV2F16() const { 554 return isVSrcF16() || isLiteralImm(MVT::v2f16); 555 } 556 557 bool isVISrcB32() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 559 } 560 561 bool isVISrcB16() const { 562 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 563 } 564 565 bool isVISrcV2B16() const { 566 return isVISrcB16(); 567 } 568 569 bool isVISrcF32() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 571 } 572 573 bool isVISrcF16() const { 574 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 575 } 576 577 bool isVISrcV2F16() const { 578 return isVISrcF16() || isVISrcB32(); 579 } 580 581 bool isVISrc_64B64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 583 } 584 585 bool isVISrc_64F64() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 587 } 588 589 bool isVISrc_64V2FP32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 591 } 592 593 bool isVISrc_64V2INT32() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 595 } 596 597 bool isVISrc_256B64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 599 } 600 601 bool isVISrc_256F64() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 603 } 604 605 bool isVISrc_128B16() const { 606 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 607 } 608 609 bool isVISrc_128V2B16() const { 610 return isVISrc_128B16(); 611 } 612 613 bool isVISrc_128B32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 615 } 616 617 bool isVISrc_128F32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2FP32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 623 } 624 625 bool isVISrc_256V2INT32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B32() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 631 } 632 633 bool isVISrc_512B16() const { 634 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 635 } 636 637 bool isVISrc_512V2B16() const { 638 return isVISrc_512B16(); 639 } 640 641 bool isVISrc_512F32() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 643 } 644 645 bool isVISrc_512F16() const { 646 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 647 } 648 649 bool isVISrc_512V2F16() const { 650 return isVISrc_512F16() || isVISrc_512B32(); 651 } 652 653 bool isVISrc_1024B32() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 655 } 656 657 bool isVISrc_1024B16() const { 658 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 659 } 660 661 bool isVISrc_1024V2B16() const { 662 return isVISrc_1024B16(); 663 } 664 665 bool isVISrc_1024F32() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 667 } 668 669 bool isVISrc_1024F16() const { 670 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 671 } 672 673 bool isVISrc_1024V2F16() const { 674 return isVISrc_1024F16() || isVISrc_1024B32(); 675 } 676 677 bool isAISrcB32() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 679 } 680 681 bool isAISrcB16() const { 682 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 683 } 684 685 bool isAISrcV2B16() const { 686 return isAISrcB16(); 687 } 688 689 bool isAISrcF32() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 691 } 692 693 bool isAISrcF16() const { 694 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 695 } 696 697 bool isAISrcV2F16() const { 698 return isAISrcF16() || isAISrcB32(); 699 } 700 701 bool isAISrc_64B64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 703 } 704 705 bool isAISrc_64F64() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 707 } 708 709 bool isAISrc_128B32() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 711 } 712 713 bool isAISrc_128B16() const { 714 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 715 } 716 717 bool isAISrc_128V2B16() const { 718 return isAISrc_128B16(); 719 } 720 721 bool isAISrc_128F32() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 723 } 724 725 bool isAISrc_128F16() const { 726 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 727 } 728 729 bool isAISrc_128V2F16() const { 730 return isAISrc_128F16() || isAISrc_128B32(); 731 } 732 733 bool isVISrc_128F16() const { 734 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 735 } 736 737 bool isVISrc_128V2F16() const { 738 return isVISrc_128F16() || isVISrc_128B32(); 739 } 740 741 bool isAISrc_256B64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 743 } 744 745 bool isAISrc_256F64() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 747 } 748 749 bool isAISrc_512B32() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 751 } 752 753 bool isAISrc_512B16() const { 754 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 755 } 756 757 bool isAISrc_512V2B16() const { 758 return isAISrc_512B16(); 759 } 760 761 bool isAISrc_512F32() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 763 } 764 765 bool isAISrc_512F16() const { 766 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 767 } 768 769 bool isAISrc_512V2F16() const { 770 return isAISrc_512F16() || isAISrc_512B32(); 771 } 772 773 bool isAISrc_1024B32() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 775 } 776 777 bool isAISrc_1024B16() const { 778 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 779 } 780 781 bool isAISrc_1024V2B16() const { 782 return isAISrc_1024B16(); 783 } 784 785 bool isAISrc_1024F32() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 787 } 788 789 bool isAISrc_1024F16() const { 790 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 791 } 792 793 bool isAISrc_1024V2F16() const { 794 return isAISrc_1024F16() || isAISrc_1024B32(); 795 } 796 797 bool isKImmFP32() const { 798 return isLiteralImm(MVT::f32); 799 } 800 801 bool isKImmFP16() const { 802 return isLiteralImm(MVT::f16); 803 } 804 805 bool isMem() const override { 806 return false; 807 } 808 809 bool isExpr() const { 810 return Kind == Expression; 811 } 812 813 bool isSoppBrTarget() const { 814 return isExpr() || isImm(); 815 } 816 817 bool isSWaitCnt() const; 818 bool isHwreg() const; 819 bool isSendMsg() const; 820 bool isSwizzle() const; 821 bool isSMRDOffset8() const; 822 bool isSMEMOffset() const; 823 bool isSMRDLiteralOffset() const; 824 bool isDPP8() const; 825 bool isDPPCtrl() const; 826 bool isBLGP() const; 827 bool isCBSZ() const; 828 bool isABID() const; 829 bool isGPRIdxMode() const; 830 bool isS16Imm() const; 831 bool isU16Imm() const; 832 bool isEndpgm() const; 833 834 StringRef getExpressionAsToken() const { 835 assert(isExpr()); 836 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 837 return S->getSymbol().getName(); 838 } 839 840 StringRef getToken() const { 841 assert(isToken()); 842 843 if (Kind == Expression) 844 return getExpressionAsToken(); 845 846 return StringRef(Tok.Data, Tok.Length); 847 } 848 849 int64_t getImm() const { 850 assert(isImm()); 851 return Imm.Val; 852 } 853 854 void setImm(int64_t Val) { 855 assert(isImm()); 856 Imm.Val = Val; 857 } 858 859 ImmTy getImmTy() const { 860 assert(isImm()); 861 return Imm.Type; 862 } 863 864 unsigned getReg() const override { 865 assert(isRegKind()); 866 return Reg.RegNo; 867 } 868 869 SMLoc getStartLoc() const override { 870 return StartLoc; 871 } 872 873 SMLoc getEndLoc() const override { 874 return EndLoc; 875 } 876 877 SMRange getLocRange() const { 878 return SMRange(StartLoc, EndLoc); 879 } 880 881 Modifiers getModifiers() const { 882 assert(isRegKind() || isImmTy(ImmTyNone)); 883 return isRegKind() ? Reg.Mods : Imm.Mods; 884 } 885 886 void setModifiers(Modifiers Mods) { 887 assert(isRegKind() || isImmTy(ImmTyNone)); 888 if (isRegKind()) 889 Reg.Mods = Mods; 890 else 891 Imm.Mods = Mods; 892 } 893 894 bool hasModifiers() const { 895 return getModifiers().hasModifiers(); 896 } 897 898 bool hasFPModifiers() const { 899 return getModifiers().hasFPModifiers(); 900 } 901 902 bool hasIntModifiers() const { 903 return getModifiers().hasIntModifiers(); 904 } 905 906 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 907 908 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 909 910 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 911 912 template <unsigned Bitwidth> 913 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 914 915 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<16>(Inst, N); 917 } 918 919 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 920 addKImmFPOperands<32>(Inst, N); 921 } 922 923 void addRegOperands(MCInst &Inst, unsigned N) const; 924 925 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 926 addRegOperands(Inst, N); 927 } 928 929 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 930 if (isRegKind()) 931 addRegOperands(Inst, N); 932 else if (isExpr()) 933 Inst.addOperand(MCOperand::createExpr(Expr)); 934 else 935 addImmOperands(Inst, N); 936 } 937 938 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 939 Modifiers Mods = getModifiers(); 940 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 941 if (isRegKind()) { 942 addRegOperands(Inst, N); 943 } else { 944 addImmOperands(Inst, N, false); 945 } 946 } 947 948 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 949 assert(!hasIntModifiers()); 950 addRegOrImmWithInputModsOperands(Inst, N); 951 } 952 953 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 954 assert(!hasFPModifiers()); 955 addRegOrImmWithInputModsOperands(Inst, N); 956 } 957 958 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 959 Modifiers Mods = getModifiers(); 960 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 961 assert(isRegKind()); 962 addRegOperands(Inst, N); 963 } 964 965 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 966 assert(!hasIntModifiers()); 967 addRegWithInputModsOperands(Inst, N); 968 } 969 970 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 971 assert(!hasFPModifiers()); 972 addRegWithInputModsOperands(Inst, N); 973 } 974 975 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 976 if (isImm()) 977 addImmOperands(Inst, N); 978 else { 979 assert(isExpr()); 980 Inst.addOperand(MCOperand::createExpr(Expr)); 981 } 982 } 983 984 static void printImmTy(raw_ostream& OS, ImmTy Type) { 985 switch (Type) { 986 case ImmTyNone: OS << "None"; break; 987 case ImmTyGDS: OS << "GDS"; break; 988 case ImmTyLDS: OS << "LDS"; break; 989 case ImmTyOffen: OS << "Offen"; break; 990 case ImmTyIdxen: OS << "Idxen"; break; 991 case ImmTyAddr64: OS << "Addr64"; break; 992 case ImmTyOffset: OS << "Offset"; break; 993 case ImmTyInstOffset: OS << "InstOffset"; break; 994 case ImmTyOffset0: OS << "Offset0"; break; 995 case ImmTyOffset1: OS << "Offset1"; break; 996 case ImmTyCPol: OS << "CPol"; break; 997 case ImmTySWZ: OS << "SWZ"; break; 998 case ImmTyTFE: OS << "TFE"; break; 999 case ImmTyD16: OS << "D16"; break; 1000 case ImmTyFORMAT: OS << "FORMAT"; break; 1001 case ImmTyClampSI: OS << "ClampSI"; break; 1002 case ImmTyOModSI: OS << "OModSI"; break; 1003 case ImmTyDPP8: OS << "DPP8"; break; 1004 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1005 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1006 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1007 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1008 case ImmTyDppFi: OS << "FI"; break; 1009 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1010 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1011 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1012 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1013 case ImmTyDMask: OS << "DMask"; break; 1014 case ImmTyDim: OS << "Dim"; break; 1015 case ImmTyUNorm: OS << "UNorm"; break; 1016 case ImmTyDA: OS << "DA"; break; 1017 case ImmTyR128A16: OS << "R128A16"; break; 1018 case ImmTyA16: OS << "A16"; break; 1019 case ImmTyLWE: OS << "LWE"; break; 1020 case ImmTyOff: OS << "Off"; break; 1021 case ImmTyExpTgt: OS << "ExpTgt"; break; 1022 case ImmTyExpCompr: OS << "ExpCompr"; break; 1023 case ImmTyExpVM: OS << "ExpVM"; break; 1024 case ImmTyHwreg: OS << "Hwreg"; break; 1025 case ImmTySendMsg: OS << "SendMsg"; break; 1026 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1027 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1028 case ImmTyAttrChan: OS << "AttrChan"; break; 1029 case ImmTyOpSel: OS << "OpSel"; break; 1030 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1031 case ImmTyNegLo: OS << "NegLo"; break; 1032 case ImmTyNegHi: OS << "NegHi"; break; 1033 case ImmTySwizzle: OS << "Swizzle"; break; 1034 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1035 case ImmTyHigh: OS << "High"; break; 1036 case ImmTyBLGP: OS << "BLGP"; break; 1037 case ImmTyCBSZ: OS << "CBSZ"; break; 1038 case ImmTyABID: OS << "ABID"; break; 1039 case ImmTyEndpgm: OS << "Endpgm"; break; 1040 } 1041 } 1042 1043 void print(raw_ostream &OS) const override { 1044 switch (Kind) { 1045 case Register: 1046 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1047 break; 1048 case Immediate: 1049 OS << '<' << getImm(); 1050 if (getImmTy() != ImmTyNone) { 1051 OS << " type: "; printImmTy(OS, getImmTy()); 1052 } 1053 OS << " mods: " << Imm.Mods << '>'; 1054 break; 1055 case Token: 1056 OS << '\'' << getToken() << '\''; 1057 break; 1058 case Expression: 1059 OS << "<expr " << *Expr << '>'; 1060 break; 1061 } 1062 } 1063 1064 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1065 int64_t Val, SMLoc Loc, 1066 ImmTy Type = ImmTyNone, 1067 bool IsFPImm = false) { 1068 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1069 Op->Imm.Val = Val; 1070 Op->Imm.IsFPImm = IsFPImm; 1071 Op->Imm.Kind = ImmKindTyNone; 1072 Op->Imm.Type = Type; 1073 Op->Imm.Mods = Modifiers(); 1074 Op->StartLoc = Loc; 1075 Op->EndLoc = Loc; 1076 return Op; 1077 } 1078 1079 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1080 StringRef Str, SMLoc Loc, 1081 bool HasExplicitEncodingSize = true) { 1082 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1083 Res->Tok.Data = Str.data(); 1084 Res->Tok.Length = Str.size(); 1085 Res->StartLoc = Loc; 1086 Res->EndLoc = Loc; 1087 return Res; 1088 } 1089 1090 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1091 unsigned RegNo, SMLoc S, 1092 SMLoc E) { 1093 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1094 Op->Reg.RegNo = RegNo; 1095 Op->Reg.Mods = Modifiers(); 1096 Op->StartLoc = S; 1097 Op->EndLoc = E; 1098 return Op; 1099 } 1100 1101 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1102 const class MCExpr *Expr, SMLoc S) { 1103 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1104 Op->Expr = Expr; 1105 Op->StartLoc = S; 1106 Op->EndLoc = S; 1107 return Op; 1108 } 1109 }; 1110 1111 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1112 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1113 return OS; 1114 } 1115 1116 //===----------------------------------------------------------------------===// 1117 // AsmParser 1118 //===----------------------------------------------------------------------===// 1119 1120 // Holds info related to the current kernel, e.g. count of SGPRs used. 1121 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1122 // .amdgpu_hsa_kernel or at EOF. 1123 class KernelScopeInfo { 1124 int SgprIndexUnusedMin = -1; 1125 int VgprIndexUnusedMin = -1; 1126 MCContext *Ctx = nullptr; 1127 1128 void usesSgprAt(int i) { 1129 if (i >= SgprIndexUnusedMin) { 1130 SgprIndexUnusedMin = ++i; 1131 if (Ctx) { 1132 MCSymbol* const Sym = 1133 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1134 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1135 } 1136 } 1137 } 1138 1139 void usesVgprAt(int i) { 1140 if (i >= VgprIndexUnusedMin) { 1141 VgprIndexUnusedMin = ++i; 1142 if (Ctx) { 1143 MCSymbol* const Sym = 1144 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1145 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1146 } 1147 } 1148 } 1149 1150 public: 1151 KernelScopeInfo() = default; 1152 1153 void initialize(MCContext &Context) { 1154 Ctx = &Context; 1155 usesSgprAt(SgprIndexUnusedMin = -1); 1156 usesVgprAt(VgprIndexUnusedMin = -1); 1157 } 1158 1159 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1160 switch (RegKind) { 1161 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1162 case IS_AGPR: // fall through 1163 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1164 default: break; 1165 } 1166 } 1167 }; 1168 1169 class AMDGPUAsmParser : public MCTargetAsmParser { 1170 MCAsmParser &Parser; 1171 1172 // Number of extra operands parsed after the first optional operand. 1173 // This may be necessary to skip hardcoded mandatory operands. 1174 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1175 1176 unsigned ForcedEncodingSize = 0; 1177 bool ForcedDPP = false; 1178 bool ForcedSDWA = false; 1179 KernelScopeInfo KernelScope; 1180 unsigned CPolSeen; 1181 1182 /// @name Auto-generated Match Functions 1183 /// { 1184 1185 #define GET_ASSEMBLER_HEADER 1186 #include "AMDGPUGenAsmMatcher.inc" 1187 1188 /// } 1189 1190 private: 1191 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1192 bool OutOfRangeError(SMRange Range); 1193 /// Calculate VGPR/SGPR blocks required for given target, reserved 1194 /// registers, and user-specified NextFreeXGPR values. 1195 /// 1196 /// \param Features [in] Target features, used for bug corrections. 1197 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1198 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1199 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1200 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1201 /// descriptor field, if valid. 1202 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1203 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1204 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1205 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1206 /// \param VGPRBlocks [out] Result VGPR block count. 1207 /// \param SGPRBlocks [out] Result SGPR block count. 1208 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1209 bool FlatScrUsed, bool XNACKUsed, 1210 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1211 SMRange VGPRRange, unsigned NextFreeSGPR, 1212 SMRange SGPRRange, unsigned &VGPRBlocks, 1213 unsigned &SGPRBlocks); 1214 bool ParseDirectiveAMDGCNTarget(); 1215 bool ParseDirectiveAMDHSAKernel(); 1216 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1217 bool ParseDirectiveHSACodeObjectVersion(); 1218 bool ParseDirectiveHSACodeObjectISA(); 1219 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1220 bool ParseDirectiveAMDKernelCodeT(); 1221 // TODO: Possibly make subtargetHasRegister const. 1222 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1223 bool ParseDirectiveAMDGPUHsaKernel(); 1224 1225 bool ParseDirectiveISAVersion(); 1226 bool ParseDirectiveHSAMetadata(); 1227 bool ParseDirectivePALMetadataBegin(); 1228 bool ParseDirectivePALMetadata(); 1229 bool ParseDirectiveAMDGPULDS(); 1230 1231 /// Common code to parse out a block of text (typically YAML) between start and 1232 /// end directives. 1233 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1234 const char *AssemblerDirectiveEnd, 1235 std::string &CollectString); 1236 1237 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1238 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1239 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1240 unsigned &RegNum, unsigned &RegWidth, 1241 bool RestoreOnFailure = false); 1242 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1243 unsigned &RegNum, unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, 1247 SmallVectorImpl<AsmToken> &Tokens); 1248 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1249 unsigned &RegWidth, 1250 SmallVectorImpl<AsmToken> &Tokens); 1251 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1252 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1253 bool ParseRegRange(unsigned& Num, unsigned& Width); 1254 unsigned getRegularReg(RegisterKind RegKind, 1255 unsigned RegNum, 1256 unsigned RegWidth, 1257 SMLoc Loc); 1258 1259 bool isRegister(); 1260 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1261 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1262 void initializeGprCountSymbol(RegisterKind RegKind); 1263 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1264 unsigned RegWidth); 1265 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1266 bool IsAtomic, bool IsLds = false); 1267 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1268 bool IsGdsHardcoded); 1269 1270 public: 1271 enum AMDGPUMatchResultTy { 1272 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1273 }; 1274 enum OperandMode { 1275 OperandMode_Default, 1276 OperandMode_NSA, 1277 }; 1278 1279 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1280 1281 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1282 const MCInstrInfo &MII, 1283 const MCTargetOptions &Options) 1284 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1285 MCAsmParserExtension::Initialize(Parser); 1286 1287 if (getFeatureBits().none()) { 1288 // Set default features. 1289 copySTI().ToggleFeature("southern-islands"); 1290 } 1291 1292 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1293 1294 { 1295 // TODO: make those pre-defined variables read-only. 1296 // Currently there is none suitable machinery in the core llvm-mc for this. 1297 // MCSymbol::isRedefinable is intended for another purpose, and 1298 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1299 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1300 MCContext &Ctx = getContext(); 1301 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1302 MCSymbol *Sym = 1303 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1304 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1305 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1309 } else { 1310 MCSymbol *Sym = 1311 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1312 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1313 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1314 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1315 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1316 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1317 } 1318 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1319 initializeGprCountSymbol(IS_VGPR); 1320 initializeGprCountSymbol(IS_SGPR); 1321 } else 1322 KernelScope.initialize(getContext()); 1323 } 1324 } 1325 1326 bool hasMIMG_R128() const { 1327 return AMDGPU::hasMIMG_R128(getSTI()); 1328 } 1329 1330 bool hasPackedD16() const { 1331 return AMDGPU::hasPackedD16(getSTI()); 1332 } 1333 1334 bool hasGFX10A16() const { 1335 return AMDGPU::hasGFX10A16(getSTI()); 1336 } 1337 1338 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1339 1340 bool isSI() const { 1341 return AMDGPU::isSI(getSTI()); 1342 } 1343 1344 bool isCI() const { 1345 return AMDGPU::isCI(getSTI()); 1346 } 1347 1348 bool isVI() const { 1349 return AMDGPU::isVI(getSTI()); 1350 } 1351 1352 bool isGFX9() const { 1353 return AMDGPU::isGFX9(getSTI()); 1354 } 1355 1356 bool isGFX90A() const { 1357 return AMDGPU::isGFX90A(getSTI()); 1358 } 1359 1360 bool isGFX9Plus() const { 1361 return AMDGPU::isGFX9Plus(getSTI()); 1362 } 1363 1364 bool isGFX10() const { 1365 return AMDGPU::isGFX10(getSTI()); 1366 } 1367 1368 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1369 1370 bool isGFX10_BEncoding() const { 1371 return AMDGPU::isGFX10_BEncoding(getSTI()); 1372 } 1373 1374 bool hasInv2PiInlineImm() const { 1375 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1376 } 1377 1378 bool hasFlatOffsets() const { 1379 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1380 } 1381 1382 bool hasArchitectedFlatScratch() const { 1383 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1384 } 1385 1386 bool hasSGPR102_SGPR103() const { 1387 return !isVI() && !isGFX9(); 1388 } 1389 1390 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1391 1392 bool hasIntClamp() const { 1393 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1394 } 1395 1396 AMDGPUTargetStreamer &getTargetStreamer() { 1397 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1398 return static_cast<AMDGPUTargetStreamer &>(TS); 1399 } 1400 1401 const MCRegisterInfo *getMRI() const { 1402 // We need this const_cast because for some reason getContext() is not const 1403 // in MCAsmParser. 1404 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1405 } 1406 1407 const MCInstrInfo *getMII() const { 1408 return &MII; 1409 } 1410 1411 const FeatureBitset &getFeatureBits() const { 1412 return getSTI().getFeatureBits(); 1413 } 1414 1415 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1416 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1417 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1418 1419 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1420 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1421 bool isForcedDPP() const { return ForcedDPP; } 1422 bool isForcedSDWA() const { return ForcedSDWA; } 1423 ArrayRef<unsigned> getMatchedVariants() const; 1424 StringRef getMatchedVariantName() const; 1425 1426 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1427 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1428 bool RestoreOnFailure); 1429 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1430 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1431 SMLoc &EndLoc) override; 1432 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1433 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1434 unsigned Kind) override; 1435 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1436 OperandVector &Operands, MCStreamer &Out, 1437 uint64_t &ErrorInfo, 1438 bool MatchingInlineAsm) override; 1439 bool ParseDirective(AsmToken DirectiveID) override; 1440 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1441 OperandMode Mode = OperandMode_Default); 1442 StringRef parseMnemonicSuffix(StringRef Name); 1443 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1444 SMLoc NameLoc, OperandVector &Operands) override; 1445 //bool ProcessInstruction(MCInst &Inst); 1446 1447 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1448 1449 OperandMatchResultTy 1450 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1452 bool (*ConvertResult)(int64_t &) = nullptr); 1453 1454 OperandMatchResultTy 1455 parseOperandArrayWithPrefix(const char *Prefix, 1456 OperandVector &Operands, 1457 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1458 bool (*ConvertResult)(int64_t&) = nullptr); 1459 1460 OperandMatchResultTy 1461 parseNamedBit(StringRef Name, OperandVector &Operands, 1462 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1463 OperandMatchResultTy parseCPol(OperandVector &Operands); 1464 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1465 StringRef &Value, 1466 SMLoc &StringLoc); 1467 1468 bool isModifier(); 1469 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1470 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1471 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1472 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1473 bool parseSP3NegModifier(); 1474 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1475 OperandMatchResultTy parseReg(OperandVector &Operands); 1476 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1477 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1478 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1479 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1480 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1481 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1482 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1483 OperandMatchResultTy parseUfmt(int64_t &Format); 1484 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1485 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1486 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1487 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1488 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1489 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1490 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1491 1492 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1493 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1494 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1495 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1496 1497 bool parseCnt(int64_t &IntVal); 1498 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1499 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1500 1501 private: 1502 struct OperandInfoTy { 1503 SMLoc Loc; 1504 int64_t Id; 1505 bool IsSymbolic = false; 1506 bool IsDefined = false; 1507 1508 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1509 }; 1510 1511 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1512 bool validateSendMsg(const OperandInfoTy &Msg, 1513 const OperandInfoTy &Op, 1514 const OperandInfoTy &Stream); 1515 1516 bool parseHwregBody(OperandInfoTy &HwReg, 1517 OperandInfoTy &Offset, 1518 OperandInfoTy &Width); 1519 bool validateHwreg(const OperandInfoTy &HwReg, 1520 const OperandInfoTy &Offset, 1521 const OperandInfoTy &Width); 1522 1523 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1524 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1525 1526 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1527 const OperandVector &Operands) const; 1528 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1529 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1530 SMLoc getLitLoc(const OperandVector &Operands) const; 1531 SMLoc getConstLoc(const OperandVector &Operands) const; 1532 1533 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1534 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1535 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1536 bool validateSOPLiteral(const MCInst &Inst) const; 1537 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1538 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1539 bool validateIntClampSupported(const MCInst &Inst); 1540 bool validateMIMGAtomicDMask(const MCInst &Inst); 1541 bool validateMIMGGatherDMask(const MCInst &Inst); 1542 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1543 bool validateMIMGDataSize(const MCInst &Inst); 1544 bool validateMIMGAddrSize(const MCInst &Inst); 1545 bool validateMIMGD16(const MCInst &Inst); 1546 bool validateMIMGDim(const MCInst &Inst); 1547 bool validateMIMGMSAA(const MCInst &Inst); 1548 bool validateOpSel(const MCInst &Inst); 1549 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1550 bool validateVccOperand(unsigned Reg) const; 1551 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1552 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1553 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1554 bool validateAGPRLdSt(const MCInst &Inst) const; 1555 bool validateVGPRAlign(const MCInst &Inst) const; 1556 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1557 bool validateDivScale(const MCInst &Inst); 1558 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1559 const SMLoc &IDLoc); 1560 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1561 unsigned getConstantBusLimit(unsigned Opcode) const; 1562 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1563 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1564 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1565 1566 bool isSupportedMnemo(StringRef Mnemo, 1567 const FeatureBitset &FBS); 1568 bool isSupportedMnemo(StringRef Mnemo, 1569 const FeatureBitset &FBS, 1570 ArrayRef<unsigned> Variants); 1571 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1572 1573 bool isId(const StringRef Id) const; 1574 bool isId(const AsmToken &Token, const StringRef Id) const; 1575 bool isToken(const AsmToken::TokenKind Kind) const; 1576 bool trySkipId(const StringRef Id); 1577 bool trySkipId(const StringRef Pref, const StringRef Id); 1578 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1579 bool trySkipToken(const AsmToken::TokenKind Kind); 1580 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1581 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1582 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1583 1584 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1585 AsmToken::TokenKind getTokenKind() const; 1586 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1587 bool parseExpr(OperandVector &Operands); 1588 StringRef getTokenStr() const; 1589 AsmToken peekToken(); 1590 AsmToken getToken() const; 1591 SMLoc getLoc() const; 1592 void lex(); 1593 1594 public: 1595 void onBeginOfFile() override; 1596 1597 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1598 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1599 1600 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1601 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1602 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1603 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1604 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1605 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1606 1607 bool parseSwizzleOperand(int64_t &Op, 1608 const unsigned MinVal, 1609 const unsigned MaxVal, 1610 const StringRef ErrMsg, 1611 SMLoc &Loc); 1612 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1613 const unsigned MinVal, 1614 const unsigned MaxVal, 1615 const StringRef ErrMsg); 1616 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1617 bool parseSwizzleOffset(int64_t &Imm); 1618 bool parseSwizzleMacro(int64_t &Imm); 1619 bool parseSwizzleQuadPerm(int64_t &Imm); 1620 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1621 bool parseSwizzleBroadcast(int64_t &Imm); 1622 bool parseSwizzleSwap(int64_t &Imm); 1623 bool parseSwizzleReverse(int64_t &Imm); 1624 1625 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1626 int64_t parseGPRIdxMacro(); 1627 1628 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1629 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1630 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1631 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1632 1633 AMDGPUOperand::Ptr defaultCPol() const; 1634 1635 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1636 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1637 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1638 AMDGPUOperand::Ptr defaultFlatOffset() const; 1639 1640 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1641 1642 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1643 OptionalImmIndexMap &OptionalIdx); 1644 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1645 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1646 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1647 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1648 OptionalImmIndexMap &OptionalIdx); 1649 1650 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1651 1652 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1653 bool IsAtomic = false); 1654 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1655 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1656 1657 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1658 1659 bool parseDimId(unsigned &Encoding); 1660 OperandMatchResultTy parseDim(OperandVector &Operands); 1661 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1662 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1663 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1664 int64_t parseDPPCtrlSel(StringRef Ctrl); 1665 int64_t parseDPPCtrlPerm(); 1666 AMDGPUOperand::Ptr defaultRowMask() const; 1667 AMDGPUOperand::Ptr defaultBankMask() const; 1668 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1669 AMDGPUOperand::Ptr defaultFI() const; 1670 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1671 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1672 1673 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1674 AMDGPUOperand::ImmTy Type); 1675 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1676 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1677 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1678 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1679 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1680 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1681 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1682 uint64_t BasicInstType, 1683 bool SkipDstVcc = false, 1684 bool SkipSrcVcc = false); 1685 1686 AMDGPUOperand::Ptr defaultBLGP() const; 1687 AMDGPUOperand::Ptr defaultCBSZ() const; 1688 AMDGPUOperand::Ptr defaultABID() const; 1689 1690 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1691 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1692 }; 1693 1694 struct OptionalOperand { 1695 const char *Name; 1696 AMDGPUOperand::ImmTy Type; 1697 bool IsBit; 1698 bool (*ConvertResult)(int64_t&); 1699 }; 1700 1701 } // end anonymous namespace 1702 1703 // May be called with integer type with equivalent bitwidth. 1704 static const fltSemantics *getFltSemantics(unsigned Size) { 1705 switch (Size) { 1706 case 4: 1707 return &APFloat::IEEEsingle(); 1708 case 8: 1709 return &APFloat::IEEEdouble(); 1710 case 2: 1711 return &APFloat::IEEEhalf(); 1712 default: 1713 llvm_unreachable("unsupported fp type"); 1714 } 1715 } 1716 1717 static const fltSemantics *getFltSemantics(MVT VT) { 1718 return getFltSemantics(VT.getSizeInBits() / 8); 1719 } 1720 1721 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1722 switch (OperandType) { 1723 case AMDGPU::OPERAND_REG_IMM_INT32: 1724 case AMDGPU::OPERAND_REG_IMM_FP32: 1725 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1726 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1727 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1728 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1729 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1730 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1731 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1732 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1733 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1734 case AMDGPU::OPERAND_KIMM32: 1735 return &APFloat::IEEEsingle(); 1736 case AMDGPU::OPERAND_REG_IMM_INT64: 1737 case AMDGPU::OPERAND_REG_IMM_FP64: 1738 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1739 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1740 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1741 return &APFloat::IEEEdouble(); 1742 case AMDGPU::OPERAND_REG_IMM_INT16: 1743 case AMDGPU::OPERAND_REG_IMM_FP16: 1744 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1745 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1746 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1747 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1748 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1749 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1750 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1751 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1752 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1753 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1754 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1755 case AMDGPU::OPERAND_KIMM16: 1756 return &APFloat::IEEEhalf(); 1757 default: 1758 llvm_unreachable("unsupported fp type"); 1759 } 1760 } 1761 1762 //===----------------------------------------------------------------------===// 1763 // Operand 1764 //===----------------------------------------------------------------------===// 1765 1766 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1767 bool Lost; 1768 1769 // Convert literal to single precision 1770 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1771 APFloat::rmNearestTiesToEven, 1772 &Lost); 1773 // We allow precision lost but not overflow or underflow 1774 if (Status != APFloat::opOK && 1775 Lost && 1776 ((Status & APFloat::opOverflow) != 0 || 1777 (Status & APFloat::opUnderflow) != 0)) { 1778 return false; 1779 } 1780 1781 return true; 1782 } 1783 1784 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1785 return isUIntN(Size, Val) || isIntN(Size, Val); 1786 } 1787 1788 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1789 if (VT.getScalarType() == MVT::i16) { 1790 // FP immediate values are broken. 1791 return isInlinableIntLiteral(Val); 1792 } 1793 1794 // f16/v2f16 operands work correctly for all values. 1795 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1796 } 1797 1798 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1799 1800 // This is a hack to enable named inline values like 1801 // shared_base with both 32-bit and 64-bit operands. 1802 // Note that these values are defined as 1803 // 32-bit operands only. 1804 if (isInlineValue()) { 1805 return true; 1806 } 1807 1808 if (!isImmTy(ImmTyNone)) { 1809 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1810 return false; 1811 } 1812 // TODO: We should avoid using host float here. It would be better to 1813 // check the float bit values which is what a few other places do. 1814 // We've had bot failures before due to weird NaN support on mips hosts. 1815 1816 APInt Literal(64, Imm.Val); 1817 1818 if (Imm.IsFPImm) { // We got fp literal token 1819 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1820 return AMDGPU::isInlinableLiteral64(Imm.Val, 1821 AsmParser->hasInv2PiInlineImm()); 1822 } 1823 1824 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1825 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1826 return false; 1827 1828 if (type.getScalarSizeInBits() == 16) { 1829 return isInlineableLiteralOp16( 1830 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1831 type, AsmParser->hasInv2PiInlineImm()); 1832 } 1833 1834 // Check if single precision literal is inlinable 1835 return AMDGPU::isInlinableLiteral32( 1836 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1837 AsmParser->hasInv2PiInlineImm()); 1838 } 1839 1840 // We got int literal token. 1841 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1842 return AMDGPU::isInlinableLiteral64(Imm.Val, 1843 AsmParser->hasInv2PiInlineImm()); 1844 } 1845 1846 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1847 return false; 1848 } 1849 1850 if (type.getScalarSizeInBits() == 16) { 1851 return isInlineableLiteralOp16( 1852 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1853 type, AsmParser->hasInv2PiInlineImm()); 1854 } 1855 1856 return AMDGPU::isInlinableLiteral32( 1857 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1858 AsmParser->hasInv2PiInlineImm()); 1859 } 1860 1861 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1862 // Check that this immediate can be added as literal 1863 if (!isImmTy(ImmTyNone)) { 1864 return false; 1865 } 1866 1867 if (!Imm.IsFPImm) { 1868 // We got int literal token. 1869 1870 if (type == MVT::f64 && hasFPModifiers()) { 1871 // Cannot apply fp modifiers to int literals preserving the same semantics 1872 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1873 // disable these cases. 1874 return false; 1875 } 1876 1877 unsigned Size = type.getSizeInBits(); 1878 if (Size == 64) 1879 Size = 32; 1880 1881 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1882 // types. 1883 return isSafeTruncation(Imm.Val, Size); 1884 } 1885 1886 // We got fp literal token 1887 if (type == MVT::f64) { // Expected 64-bit fp operand 1888 // We would set low 64-bits of literal to zeroes but we accept this literals 1889 return true; 1890 } 1891 1892 if (type == MVT::i64) { // Expected 64-bit int operand 1893 // We don't allow fp literals in 64-bit integer instructions. It is 1894 // unclear how we should encode them. 1895 return false; 1896 } 1897 1898 // We allow fp literals with f16x2 operands assuming that the specified 1899 // literal goes into the lower half and the upper half is zero. We also 1900 // require that the literal may be losslesly converted to f16. 1901 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1902 (type == MVT::v2i16)? MVT::i16 : 1903 (type == MVT::v2f32)? MVT::f32 : type; 1904 1905 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1906 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1907 } 1908 1909 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1910 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1911 } 1912 1913 bool AMDGPUOperand::isVRegWithInputMods() const { 1914 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1915 // GFX90A allows DPP on 64-bit operands. 1916 (isRegClass(AMDGPU::VReg_64RegClassID) && 1917 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1918 } 1919 1920 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1921 if (AsmParser->isVI()) 1922 return isVReg32(); 1923 else if (AsmParser->isGFX9Plus()) 1924 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1925 else 1926 return false; 1927 } 1928 1929 bool AMDGPUOperand::isSDWAFP16Operand() const { 1930 return isSDWAOperand(MVT::f16); 1931 } 1932 1933 bool AMDGPUOperand::isSDWAFP32Operand() const { 1934 return isSDWAOperand(MVT::f32); 1935 } 1936 1937 bool AMDGPUOperand::isSDWAInt16Operand() const { 1938 return isSDWAOperand(MVT::i16); 1939 } 1940 1941 bool AMDGPUOperand::isSDWAInt32Operand() const { 1942 return isSDWAOperand(MVT::i32); 1943 } 1944 1945 bool AMDGPUOperand::isBoolReg() const { 1946 auto FB = AsmParser->getFeatureBits(); 1947 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1948 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1949 } 1950 1951 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1952 { 1953 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1954 assert(Size == 2 || Size == 4 || Size == 8); 1955 1956 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1957 1958 if (Imm.Mods.Abs) { 1959 Val &= ~FpSignMask; 1960 } 1961 if (Imm.Mods.Neg) { 1962 Val ^= FpSignMask; 1963 } 1964 1965 return Val; 1966 } 1967 1968 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1969 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1970 Inst.getNumOperands())) { 1971 addLiteralImmOperand(Inst, Imm.Val, 1972 ApplyModifiers & 1973 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1974 } else { 1975 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1976 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1977 setImmKindNone(); 1978 } 1979 } 1980 1981 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1982 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1983 auto OpNum = Inst.getNumOperands(); 1984 // Check that this operand accepts literals 1985 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1986 1987 if (ApplyModifiers) { 1988 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1989 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1990 Val = applyInputFPModifiers(Val, Size); 1991 } 1992 1993 APInt Literal(64, Val); 1994 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1995 1996 if (Imm.IsFPImm) { // We got fp literal token 1997 switch (OpTy) { 1998 case AMDGPU::OPERAND_REG_IMM_INT64: 1999 case AMDGPU::OPERAND_REG_IMM_FP64: 2000 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2001 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2002 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2003 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2004 AsmParser->hasInv2PiInlineImm())) { 2005 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2006 setImmKindConst(); 2007 return; 2008 } 2009 2010 // Non-inlineable 2011 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2012 // For fp operands we check if low 32 bits are zeros 2013 if (Literal.getLoBits(32) != 0) { 2014 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2015 "Can't encode literal as exact 64-bit floating-point operand. " 2016 "Low 32-bits will be set to zero"); 2017 } 2018 2019 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2020 setImmKindLiteral(); 2021 return; 2022 } 2023 2024 // We don't allow fp literals in 64-bit integer instructions. It is 2025 // unclear how we should encode them. This case should be checked earlier 2026 // in predicate methods (isLiteralImm()) 2027 llvm_unreachable("fp literal in 64-bit integer instruction."); 2028 2029 case AMDGPU::OPERAND_REG_IMM_INT32: 2030 case AMDGPU::OPERAND_REG_IMM_FP32: 2031 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2032 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2033 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2034 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2035 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2036 case AMDGPU::OPERAND_REG_IMM_INT16: 2037 case AMDGPU::OPERAND_REG_IMM_FP16: 2038 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2039 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2040 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2041 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2042 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2043 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2044 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2045 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2046 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2047 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2048 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2049 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2050 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2051 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2052 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2053 case AMDGPU::OPERAND_KIMM32: 2054 case AMDGPU::OPERAND_KIMM16: { 2055 bool lost; 2056 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2057 // Convert literal to single precision 2058 FPLiteral.convert(*getOpFltSemantics(OpTy), 2059 APFloat::rmNearestTiesToEven, &lost); 2060 // We allow precision lost but not overflow or underflow. This should be 2061 // checked earlier in isLiteralImm() 2062 2063 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2064 Inst.addOperand(MCOperand::createImm(ImmVal)); 2065 setImmKindLiteral(); 2066 return; 2067 } 2068 default: 2069 llvm_unreachable("invalid operand size"); 2070 } 2071 2072 return; 2073 } 2074 2075 // We got int literal token. 2076 // Only sign extend inline immediates. 2077 switch (OpTy) { 2078 case AMDGPU::OPERAND_REG_IMM_INT32: 2079 case AMDGPU::OPERAND_REG_IMM_FP32: 2080 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2081 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2082 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2083 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2084 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2085 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2086 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2087 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2088 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2089 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2090 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2091 if (isSafeTruncation(Val, 32) && 2092 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2093 AsmParser->hasInv2PiInlineImm())) { 2094 Inst.addOperand(MCOperand::createImm(Val)); 2095 setImmKindConst(); 2096 return; 2097 } 2098 2099 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2100 setImmKindLiteral(); 2101 return; 2102 2103 case AMDGPU::OPERAND_REG_IMM_INT64: 2104 case AMDGPU::OPERAND_REG_IMM_FP64: 2105 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2106 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2107 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2108 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2109 Inst.addOperand(MCOperand::createImm(Val)); 2110 setImmKindConst(); 2111 return; 2112 } 2113 2114 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2115 setImmKindLiteral(); 2116 return; 2117 2118 case AMDGPU::OPERAND_REG_IMM_INT16: 2119 case AMDGPU::OPERAND_REG_IMM_FP16: 2120 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2121 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2122 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2123 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2124 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2125 if (isSafeTruncation(Val, 16) && 2126 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2127 AsmParser->hasInv2PiInlineImm())) { 2128 Inst.addOperand(MCOperand::createImm(Val)); 2129 setImmKindConst(); 2130 return; 2131 } 2132 2133 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2134 setImmKindLiteral(); 2135 return; 2136 2137 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2138 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2139 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2140 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2141 assert(isSafeTruncation(Val, 16)); 2142 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2143 AsmParser->hasInv2PiInlineImm())); 2144 2145 Inst.addOperand(MCOperand::createImm(Val)); 2146 return; 2147 } 2148 case AMDGPU::OPERAND_KIMM32: 2149 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2150 setImmKindNone(); 2151 return; 2152 case AMDGPU::OPERAND_KIMM16: 2153 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2154 setImmKindNone(); 2155 return; 2156 default: 2157 llvm_unreachable("invalid operand size"); 2158 } 2159 } 2160 2161 template <unsigned Bitwidth> 2162 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2163 APInt Literal(64, Imm.Val); 2164 setImmKindNone(); 2165 2166 if (!Imm.IsFPImm) { 2167 // We got int literal token. 2168 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2169 return; 2170 } 2171 2172 bool Lost; 2173 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2174 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2175 APFloat::rmNearestTiesToEven, &Lost); 2176 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2177 } 2178 2179 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2180 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2181 } 2182 2183 static bool isInlineValue(unsigned Reg) { 2184 switch (Reg) { 2185 case AMDGPU::SRC_SHARED_BASE: 2186 case AMDGPU::SRC_SHARED_LIMIT: 2187 case AMDGPU::SRC_PRIVATE_BASE: 2188 case AMDGPU::SRC_PRIVATE_LIMIT: 2189 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2190 return true; 2191 case AMDGPU::SRC_VCCZ: 2192 case AMDGPU::SRC_EXECZ: 2193 case AMDGPU::SRC_SCC: 2194 return true; 2195 case AMDGPU::SGPR_NULL: 2196 return true; 2197 default: 2198 return false; 2199 } 2200 } 2201 2202 bool AMDGPUOperand::isInlineValue() const { 2203 return isRegKind() && ::isInlineValue(getReg()); 2204 } 2205 2206 //===----------------------------------------------------------------------===// 2207 // AsmParser 2208 //===----------------------------------------------------------------------===// 2209 2210 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2211 if (Is == IS_VGPR) { 2212 switch (RegWidth) { 2213 default: return -1; 2214 case 1: return AMDGPU::VGPR_32RegClassID; 2215 case 2: return AMDGPU::VReg_64RegClassID; 2216 case 3: return AMDGPU::VReg_96RegClassID; 2217 case 4: return AMDGPU::VReg_128RegClassID; 2218 case 5: return AMDGPU::VReg_160RegClassID; 2219 case 6: return AMDGPU::VReg_192RegClassID; 2220 case 7: return AMDGPU::VReg_224RegClassID; 2221 case 8: return AMDGPU::VReg_256RegClassID; 2222 case 16: return AMDGPU::VReg_512RegClassID; 2223 case 32: return AMDGPU::VReg_1024RegClassID; 2224 } 2225 } else if (Is == IS_TTMP) { 2226 switch (RegWidth) { 2227 default: return -1; 2228 case 1: return AMDGPU::TTMP_32RegClassID; 2229 case 2: return AMDGPU::TTMP_64RegClassID; 2230 case 4: return AMDGPU::TTMP_128RegClassID; 2231 case 8: return AMDGPU::TTMP_256RegClassID; 2232 case 16: return AMDGPU::TTMP_512RegClassID; 2233 } 2234 } else if (Is == IS_SGPR) { 2235 switch (RegWidth) { 2236 default: return -1; 2237 case 1: return AMDGPU::SGPR_32RegClassID; 2238 case 2: return AMDGPU::SGPR_64RegClassID; 2239 case 3: return AMDGPU::SGPR_96RegClassID; 2240 case 4: return AMDGPU::SGPR_128RegClassID; 2241 case 5: return AMDGPU::SGPR_160RegClassID; 2242 case 6: return AMDGPU::SGPR_192RegClassID; 2243 case 7: return AMDGPU::SGPR_224RegClassID; 2244 case 8: return AMDGPU::SGPR_256RegClassID; 2245 case 16: return AMDGPU::SGPR_512RegClassID; 2246 } 2247 } else if (Is == IS_AGPR) { 2248 switch (RegWidth) { 2249 default: return -1; 2250 case 1: return AMDGPU::AGPR_32RegClassID; 2251 case 2: return AMDGPU::AReg_64RegClassID; 2252 case 3: return AMDGPU::AReg_96RegClassID; 2253 case 4: return AMDGPU::AReg_128RegClassID; 2254 case 5: return AMDGPU::AReg_160RegClassID; 2255 case 6: return AMDGPU::AReg_192RegClassID; 2256 case 7: return AMDGPU::AReg_224RegClassID; 2257 case 8: return AMDGPU::AReg_256RegClassID; 2258 case 16: return AMDGPU::AReg_512RegClassID; 2259 case 32: return AMDGPU::AReg_1024RegClassID; 2260 } 2261 } 2262 return -1; 2263 } 2264 2265 static unsigned getSpecialRegForName(StringRef RegName) { 2266 return StringSwitch<unsigned>(RegName) 2267 .Case("exec", AMDGPU::EXEC) 2268 .Case("vcc", AMDGPU::VCC) 2269 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2270 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2271 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2272 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2273 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2274 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2275 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2276 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2277 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2278 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2279 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2280 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2281 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2282 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2283 .Case("m0", AMDGPU::M0) 2284 .Case("vccz", AMDGPU::SRC_VCCZ) 2285 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2286 .Case("execz", AMDGPU::SRC_EXECZ) 2287 .Case("src_execz", AMDGPU::SRC_EXECZ) 2288 .Case("scc", AMDGPU::SRC_SCC) 2289 .Case("src_scc", AMDGPU::SRC_SCC) 2290 .Case("tba", AMDGPU::TBA) 2291 .Case("tma", AMDGPU::TMA) 2292 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2293 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2294 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2295 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2296 .Case("vcc_lo", AMDGPU::VCC_LO) 2297 .Case("vcc_hi", AMDGPU::VCC_HI) 2298 .Case("exec_lo", AMDGPU::EXEC_LO) 2299 .Case("exec_hi", AMDGPU::EXEC_HI) 2300 .Case("tma_lo", AMDGPU::TMA_LO) 2301 .Case("tma_hi", AMDGPU::TMA_HI) 2302 .Case("tba_lo", AMDGPU::TBA_LO) 2303 .Case("tba_hi", AMDGPU::TBA_HI) 2304 .Case("pc", AMDGPU::PC_REG) 2305 .Case("null", AMDGPU::SGPR_NULL) 2306 .Default(AMDGPU::NoRegister); 2307 } 2308 2309 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2310 SMLoc &EndLoc, bool RestoreOnFailure) { 2311 auto R = parseRegister(); 2312 if (!R) return true; 2313 assert(R->isReg()); 2314 RegNo = R->getReg(); 2315 StartLoc = R->getStartLoc(); 2316 EndLoc = R->getEndLoc(); 2317 return false; 2318 } 2319 2320 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2321 SMLoc &EndLoc) { 2322 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2323 } 2324 2325 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2326 SMLoc &StartLoc, 2327 SMLoc &EndLoc) { 2328 bool Result = 2329 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2330 bool PendingErrors = getParser().hasPendingError(); 2331 getParser().clearPendingErrors(); 2332 if (PendingErrors) 2333 return MatchOperand_ParseFail; 2334 if (Result) 2335 return MatchOperand_NoMatch; 2336 return MatchOperand_Success; 2337 } 2338 2339 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2340 RegisterKind RegKind, unsigned Reg1, 2341 SMLoc Loc) { 2342 switch (RegKind) { 2343 case IS_SPECIAL: 2344 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2345 Reg = AMDGPU::EXEC; 2346 RegWidth = 2; 2347 return true; 2348 } 2349 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2350 Reg = AMDGPU::FLAT_SCR; 2351 RegWidth = 2; 2352 return true; 2353 } 2354 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2355 Reg = AMDGPU::XNACK_MASK; 2356 RegWidth = 2; 2357 return true; 2358 } 2359 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2360 Reg = AMDGPU::VCC; 2361 RegWidth = 2; 2362 return true; 2363 } 2364 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2365 Reg = AMDGPU::TBA; 2366 RegWidth = 2; 2367 return true; 2368 } 2369 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2370 Reg = AMDGPU::TMA; 2371 RegWidth = 2; 2372 return true; 2373 } 2374 Error(Loc, "register does not fit in the list"); 2375 return false; 2376 case IS_VGPR: 2377 case IS_SGPR: 2378 case IS_AGPR: 2379 case IS_TTMP: 2380 if (Reg1 != Reg + RegWidth) { 2381 Error(Loc, "registers in a list must have consecutive indices"); 2382 return false; 2383 } 2384 RegWidth++; 2385 return true; 2386 default: 2387 llvm_unreachable("unexpected register kind"); 2388 } 2389 } 2390 2391 struct RegInfo { 2392 StringLiteral Name; 2393 RegisterKind Kind; 2394 }; 2395 2396 static constexpr RegInfo RegularRegisters[] = { 2397 {{"v"}, IS_VGPR}, 2398 {{"s"}, IS_SGPR}, 2399 {{"ttmp"}, IS_TTMP}, 2400 {{"acc"}, IS_AGPR}, 2401 {{"a"}, IS_AGPR}, 2402 }; 2403 2404 static bool isRegularReg(RegisterKind Kind) { 2405 return Kind == IS_VGPR || 2406 Kind == IS_SGPR || 2407 Kind == IS_TTMP || 2408 Kind == IS_AGPR; 2409 } 2410 2411 static const RegInfo* getRegularRegInfo(StringRef Str) { 2412 for (const RegInfo &Reg : RegularRegisters) 2413 if (Str.startswith(Reg.Name)) 2414 return &Reg; 2415 return nullptr; 2416 } 2417 2418 static bool getRegNum(StringRef Str, unsigned& Num) { 2419 return !Str.getAsInteger(10, Num); 2420 } 2421 2422 bool 2423 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2424 const AsmToken &NextToken) const { 2425 2426 // A list of consecutive registers: [s0,s1,s2,s3] 2427 if (Token.is(AsmToken::LBrac)) 2428 return true; 2429 2430 if (!Token.is(AsmToken::Identifier)) 2431 return false; 2432 2433 // A single register like s0 or a range of registers like s[0:1] 2434 2435 StringRef Str = Token.getString(); 2436 const RegInfo *Reg = getRegularRegInfo(Str); 2437 if (Reg) { 2438 StringRef RegName = Reg->Name; 2439 StringRef RegSuffix = Str.substr(RegName.size()); 2440 if (!RegSuffix.empty()) { 2441 unsigned Num; 2442 // A single register with an index: rXX 2443 if (getRegNum(RegSuffix, Num)) 2444 return true; 2445 } else { 2446 // A range of registers: r[XX:YY]. 2447 if (NextToken.is(AsmToken::LBrac)) 2448 return true; 2449 } 2450 } 2451 2452 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2453 } 2454 2455 bool 2456 AMDGPUAsmParser::isRegister() 2457 { 2458 return isRegister(getToken(), peekToken()); 2459 } 2460 2461 unsigned 2462 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2463 unsigned RegNum, 2464 unsigned RegWidth, 2465 SMLoc Loc) { 2466 2467 assert(isRegularReg(RegKind)); 2468 2469 unsigned AlignSize = 1; 2470 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2471 // SGPR and TTMP registers must be aligned. 2472 // Max required alignment is 4 dwords. 2473 AlignSize = std::min(RegWidth, 4u); 2474 } 2475 2476 if (RegNum % AlignSize != 0) { 2477 Error(Loc, "invalid register alignment"); 2478 return AMDGPU::NoRegister; 2479 } 2480 2481 unsigned RegIdx = RegNum / AlignSize; 2482 int RCID = getRegClass(RegKind, RegWidth); 2483 if (RCID == -1) { 2484 Error(Loc, "invalid or unsupported register size"); 2485 return AMDGPU::NoRegister; 2486 } 2487 2488 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2489 const MCRegisterClass RC = TRI->getRegClass(RCID); 2490 if (RegIdx >= RC.getNumRegs()) { 2491 Error(Loc, "register index is out of range"); 2492 return AMDGPU::NoRegister; 2493 } 2494 2495 return RC.getRegister(RegIdx); 2496 } 2497 2498 bool 2499 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2500 int64_t RegLo, RegHi; 2501 if (!skipToken(AsmToken::LBrac, "missing register index")) 2502 return false; 2503 2504 SMLoc FirstIdxLoc = getLoc(); 2505 SMLoc SecondIdxLoc; 2506 2507 if (!parseExpr(RegLo)) 2508 return false; 2509 2510 if (trySkipToken(AsmToken::Colon)) { 2511 SecondIdxLoc = getLoc(); 2512 if (!parseExpr(RegHi)) 2513 return false; 2514 } else { 2515 RegHi = RegLo; 2516 } 2517 2518 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2519 return false; 2520 2521 if (!isUInt<32>(RegLo)) { 2522 Error(FirstIdxLoc, "invalid register index"); 2523 return false; 2524 } 2525 2526 if (!isUInt<32>(RegHi)) { 2527 Error(SecondIdxLoc, "invalid register index"); 2528 return false; 2529 } 2530 2531 if (RegLo > RegHi) { 2532 Error(FirstIdxLoc, "first register index should not exceed second index"); 2533 return false; 2534 } 2535 2536 Num = static_cast<unsigned>(RegLo); 2537 Width = (RegHi - RegLo) + 1; 2538 return true; 2539 } 2540 2541 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2542 unsigned &RegNum, unsigned &RegWidth, 2543 SmallVectorImpl<AsmToken> &Tokens) { 2544 assert(isToken(AsmToken::Identifier)); 2545 unsigned Reg = getSpecialRegForName(getTokenStr()); 2546 if (Reg) { 2547 RegNum = 0; 2548 RegWidth = 1; 2549 RegKind = IS_SPECIAL; 2550 Tokens.push_back(getToken()); 2551 lex(); // skip register name 2552 } 2553 return Reg; 2554 } 2555 2556 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2557 unsigned &RegNum, unsigned &RegWidth, 2558 SmallVectorImpl<AsmToken> &Tokens) { 2559 assert(isToken(AsmToken::Identifier)); 2560 StringRef RegName = getTokenStr(); 2561 auto Loc = getLoc(); 2562 2563 const RegInfo *RI = getRegularRegInfo(RegName); 2564 if (!RI) { 2565 Error(Loc, "invalid register name"); 2566 return AMDGPU::NoRegister; 2567 } 2568 2569 Tokens.push_back(getToken()); 2570 lex(); // skip register name 2571 2572 RegKind = RI->Kind; 2573 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2574 if (!RegSuffix.empty()) { 2575 // Single 32-bit register: vXX. 2576 if (!getRegNum(RegSuffix, RegNum)) { 2577 Error(Loc, "invalid register index"); 2578 return AMDGPU::NoRegister; 2579 } 2580 RegWidth = 1; 2581 } else { 2582 // Range of registers: v[XX:YY]. ":YY" is optional. 2583 if (!ParseRegRange(RegNum, RegWidth)) 2584 return AMDGPU::NoRegister; 2585 } 2586 2587 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2588 } 2589 2590 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2591 unsigned &RegWidth, 2592 SmallVectorImpl<AsmToken> &Tokens) { 2593 unsigned Reg = AMDGPU::NoRegister; 2594 auto ListLoc = getLoc(); 2595 2596 if (!skipToken(AsmToken::LBrac, 2597 "expected a register or a list of registers")) { 2598 return AMDGPU::NoRegister; 2599 } 2600 2601 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2602 2603 auto Loc = getLoc(); 2604 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2605 return AMDGPU::NoRegister; 2606 if (RegWidth != 1) { 2607 Error(Loc, "expected a single 32-bit register"); 2608 return AMDGPU::NoRegister; 2609 } 2610 2611 for (; trySkipToken(AsmToken::Comma); ) { 2612 RegisterKind NextRegKind; 2613 unsigned NextReg, NextRegNum, NextRegWidth; 2614 Loc = getLoc(); 2615 2616 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2617 NextRegNum, NextRegWidth, 2618 Tokens)) { 2619 return AMDGPU::NoRegister; 2620 } 2621 if (NextRegWidth != 1) { 2622 Error(Loc, "expected a single 32-bit register"); 2623 return AMDGPU::NoRegister; 2624 } 2625 if (NextRegKind != RegKind) { 2626 Error(Loc, "registers in a list must be of the same kind"); 2627 return AMDGPU::NoRegister; 2628 } 2629 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2630 return AMDGPU::NoRegister; 2631 } 2632 2633 if (!skipToken(AsmToken::RBrac, 2634 "expected a comma or a closing square bracket")) { 2635 return AMDGPU::NoRegister; 2636 } 2637 2638 if (isRegularReg(RegKind)) 2639 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2640 2641 return Reg; 2642 } 2643 2644 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2645 unsigned &RegNum, unsigned &RegWidth, 2646 SmallVectorImpl<AsmToken> &Tokens) { 2647 auto Loc = getLoc(); 2648 Reg = AMDGPU::NoRegister; 2649 2650 if (isToken(AsmToken::Identifier)) { 2651 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2652 if (Reg == AMDGPU::NoRegister) 2653 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2654 } else { 2655 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2656 } 2657 2658 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2659 if (Reg == AMDGPU::NoRegister) { 2660 assert(Parser.hasPendingError()); 2661 return false; 2662 } 2663 2664 if (!subtargetHasRegister(*TRI, Reg)) { 2665 if (Reg == AMDGPU::SGPR_NULL) { 2666 Error(Loc, "'null' operand is not supported on this GPU"); 2667 } else { 2668 Error(Loc, "register not available on this GPU"); 2669 } 2670 return false; 2671 } 2672 2673 return true; 2674 } 2675 2676 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2677 unsigned &RegNum, unsigned &RegWidth, 2678 bool RestoreOnFailure /*=false*/) { 2679 Reg = AMDGPU::NoRegister; 2680 2681 SmallVector<AsmToken, 1> Tokens; 2682 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2683 if (RestoreOnFailure) { 2684 while (!Tokens.empty()) { 2685 getLexer().UnLex(Tokens.pop_back_val()); 2686 } 2687 } 2688 return true; 2689 } 2690 return false; 2691 } 2692 2693 Optional<StringRef> 2694 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2695 switch (RegKind) { 2696 case IS_VGPR: 2697 return StringRef(".amdgcn.next_free_vgpr"); 2698 case IS_SGPR: 2699 return StringRef(".amdgcn.next_free_sgpr"); 2700 default: 2701 return None; 2702 } 2703 } 2704 2705 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2706 auto SymbolName = getGprCountSymbolName(RegKind); 2707 assert(SymbolName && "initializing invalid register kind"); 2708 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2709 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2710 } 2711 2712 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2713 unsigned DwordRegIndex, 2714 unsigned RegWidth) { 2715 // Symbols are only defined for GCN targets 2716 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2717 return true; 2718 2719 auto SymbolName = getGprCountSymbolName(RegKind); 2720 if (!SymbolName) 2721 return true; 2722 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2723 2724 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2725 int64_t OldCount; 2726 2727 if (!Sym->isVariable()) 2728 return !Error(getLoc(), 2729 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2730 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2731 return !Error( 2732 getLoc(), 2733 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2734 2735 if (OldCount <= NewMax) 2736 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2737 2738 return true; 2739 } 2740 2741 std::unique_ptr<AMDGPUOperand> 2742 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2743 const auto &Tok = getToken(); 2744 SMLoc StartLoc = Tok.getLoc(); 2745 SMLoc EndLoc = Tok.getEndLoc(); 2746 RegisterKind RegKind; 2747 unsigned Reg, RegNum, RegWidth; 2748 2749 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2750 return nullptr; 2751 } 2752 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2753 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2754 return nullptr; 2755 } else 2756 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2757 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2758 } 2759 2760 OperandMatchResultTy 2761 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2762 // TODO: add syntactic sugar for 1/(2*PI) 2763 2764 assert(!isRegister()); 2765 assert(!isModifier()); 2766 2767 const auto& Tok = getToken(); 2768 const auto& NextTok = peekToken(); 2769 bool IsReal = Tok.is(AsmToken::Real); 2770 SMLoc S = getLoc(); 2771 bool Negate = false; 2772 2773 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2774 lex(); 2775 IsReal = true; 2776 Negate = true; 2777 } 2778 2779 if (IsReal) { 2780 // Floating-point expressions are not supported. 2781 // Can only allow floating-point literals with an 2782 // optional sign. 2783 2784 StringRef Num = getTokenStr(); 2785 lex(); 2786 2787 APFloat RealVal(APFloat::IEEEdouble()); 2788 auto roundMode = APFloat::rmNearestTiesToEven; 2789 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2790 return MatchOperand_ParseFail; 2791 } 2792 if (Negate) 2793 RealVal.changeSign(); 2794 2795 Operands.push_back( 2796 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2797 AMDGPUOperand::ImmTyNone, true)); 2798 2799 return MatchOperand_Success; 2800 2801 } else { 2802 int64_t IntVal; 2803 const MCExpr *Expr; 2804 SMLoc S = getLoc(); 2805 2806 if (HasSP3AbsModifier) { 2807 // This is a workaround for handling expressions 2808 // as arguments of SP3 'abs' modifier, for example: 2809 // |1.0| 2810 // |-1| 2811 // |1+x| 2812 // This syntax is not compatible with syntax of standard 2813 // MC expressions (due to the trailing '|'). 2814 SMLoc EndLoc; 2815 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2816 return MatchOperand_ParseFail; 2817 } else { 2818 if (Parser.parseExpression(Expr)) 2819 return MatchOperand_ParseFail; 2820 } 2821 2822 if (Expr->evaluateAsAbsolute(IntVal)) { 2823 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2824 } else { 2825 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2826 } 2827 2828 return MatchOperand_Success; 2829 } 2830 2831 return MatchOperand_NoMatch; 2832 } 2833 2834 OperandMatchResultTy 2835 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2836 if (!isRegister()) 2837 return MatchOperand_NoMatch; 2838 2839 if (auto R = parseRegister()) { 2840 assert(R->isReg()); 2841 Operands.push_back(std::move(R)); 2842 return MatchOperand_Success; 2843 } 2844 return MatchOperand_ParseFail; 2845 } 2846 2847 OperandMatchResultTy 2848 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2849 auto res = parseReg(Operands); 2850 if (res != MatchOperand_NoMatch) { 2851 return res; 2852 } else if (isModifier()) { 2853 return MatchOperand_NoMatch; 2854 } else { 2855 return parseImm(Operands, HasSP3AbsMod); 2856 } 2857 } 2858 2859 bool 2860 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2861 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2862 const auto &str = Token.getString(); 2863 return str == "abs" || str == "neg" || str == "sext"; 2864 } 2865 return false; 2866 } 2867 2868 bool 2869 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2870 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2871 } 2872 2873 bool 2874 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2875 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2876 } 2877 2878 bool 2879 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2880 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2881 } 2882 2883 // Check if this is an operand modifier or an opcode modifier 2884 // which may look like an expression but it is not. We should 2885 // avoid parsing these modifiers as expressions. Currently 2886 // recognized sequences are: 2887 // |...| 2888 // abs(...) 2889 // neg(...) 2890 // sext(...) 2891 // -reg 2892 // -|...| 2893 // -abs(...) 2894 // name:... 2895 // Note that simple opcode modifiers like 'gds' may be parsed as 2896 // expressions; this is a special case. See getExpressionAsToken. 2897 // 2898 bool 2899 AMDGPUAsmParser::isModifier() { 2900 2901 AsmToken Tok = getToken(); 2902 AsmToken NextToken[2]; 2903 peekTokens(NextToken); 2904 2905 return isOperandModifier(Tok, NextToken[0]) || 2906 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2907 isOpcodeModifierWithVal(Tok, NextToken[0]); 2908 } 2909 2910 // Check if the current token is an SP3 'neg' modifier. 2911 // Currently this modifier is allowed in the following context: 2912 // 2913 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2914 // 2. Before an 'abs' modifier: -abs(...) 2915 // 3. Before an SP3 'abs' modifier: -|...| 2916 // 2917 // In all other cases "-" is handled as a part 2918 // of an expression that follows the sign. 2919 // 2920 // Note: When "-" is followed by an integer literal, 2921 // this is interpreted as integer negation rather 2922 // than a floating-point NEG modifier applied to N. 2923 // Beside being contr-intuitive, such use of floating-point 2924 // NEG modifier would have resulted in different meaning 2925 // of integer literals used with VOP1/2/C and VOP3, 2926 // for example: 2927 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2928 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2929 // Negative fp literals with preceding "-" are 2930 // handled likewise for unifomtity 2931 // 2932 bool 2933 AMDGPUAsmParser::parseSP3NegModifier() { 2934 2935 AsmToken NextToken[2]; 2936 peekTokens(NextToken); 2937 2938 if (isToken(AsmToken::Minus) && 2939 (isRegister(NextToken[0], NextToken[1]) || 2940 NextToken[0].is(AsmToken::Pipe) || 2941 isId(NextToken[0], "abs"))) { 2942 lex(); 2943 return true; 2944 } 2945 2946 return false; 2947 } 2948 2949 OperandMatchResultTy 2950 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2951 bool AllowImm) { 2952 bool Neg, SP3Neg; 2953 bool Abs, SP3Abs; 2954 SMLoc Loc; 2955 2956 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2957 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2958 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2959 return MatchOperand_ParseFail; 2960 } 2961 2962 SP3Neg = parseSP3NegModifier(); 2963 2964 Loc = getLoc(); 2965 Neg = trySkipId("neg"); 2966 if (Neg && SP3Neg) { 2967 Error(Loc, "expected register or immediate"); 2968 return MatchOperand_ParseFail; 2969 } 2970 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2971 return MatchOperand_ParseFail; 2972 2973 Abs = trySkipId("abs"); 2974 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2975 return MatchOperand_ParseFail; 2976 2977 Loc = getLoc(); 2978 SP3Abs = trySkipToken(AsmToken::Pipe); 2979 if (Abs && SP3Abs) { 2980 Error(Loc, "expected register or immediate"); 2981 return MatchOperand_ParseFail; 2982 } 2983 2984 OperandMatchResultTy Res; 2985 if (AllowImm) { 2986 Res = parseRegOrImm(Operands, SP3Abs); 2987 } else { 2988 Res = parseReg(Operands); 2989 } 2990 if (Res != MatchOperand_Success) { 2991 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2992 } 2993 2994 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2995 return MatchOperand_ParseFail; 2996 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2997 return MatchOperand_ParseFail; 2998 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2999 return MatchOperand_ParseFail; 3000 3001 AMDGPUOperand::Modifiers Mods; 3002 Mods.Abs = Abs || SP3Abs; 3003 Mods.Neg = Neg || SP3Neg; 3004 3005 if (Mods.hasFPModifiers()) { 3006 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3007 if (Op.isExpr()) { 3008 Error(Op.getStartLoc(), "expected an absolute expression"); 3009 return MatchOperand_ParseFail; 3010 } 3011 Op.setModifiers(Mods); 3012 } 3013 return MatchOperand_Success; 3014 } 3015 3016 OperandMatchResultTy 3017 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3018 bool AllowImm) { 3019 bool Sext = trySkipId("sext"); 3020 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3021 return MatchOperand_ParseFail; 3022 3023 OperandMatchResultTy Res; 3024 if (AllowImm) { 3025 Res = parseRegOrImm(Operands); 3026 } else { 3027 Res = parseReg(Operands); 3028 } 3029 if (Res != MatchOperand_Success) { 3030 return Sext? MatchOperand_ParseFail : Res; 3031 } 3032 3033 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3034 return MatchOperand_ParseFail; 3035 3036 AMDGPUOperand::Modifiers Mods; 3037 Mods.Sext = Sext; 3038 3039 if (Mods.hasIntModifiers()) { 3040 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3041 if (Op.isExpr()) { 3042 Error(Op.getStartLoc(), "expected an absolute expression"); 3043 return MatchOperand_ParseFail; 3044 } 3045 Op.setModifiers(Mods); 3046 } 3047 3048 return MatchOperand_Success; 3049 } 3050 3051 OperandMatchResultTy 3052 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3053 return parseRegOrImmWithFPInputMods(Operands, false); 3054 } 3055 3056 OperandMatchResultTy 3057 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3058 return parseRegOrImmWithIntInputMods(Operands, false); 3059 } 3060 3061 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3062 auto Loc = getLoc(); 3063 if (trySkipId("off")) { 3064 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3065 AMDGPUOperand::ImmTyOff, false)); 3066 return MatchOperand_Success; 3067 } 3068 3069 if (!isRegister()) 3070 return MatchOperand_NoMatch; 3071 3072 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3073 if (Reg) { 3074 Operands.push_back(std::move(Reg)); 3075 return MatchOperand_Success; 3076 } 3077 3078 return MatchOperand_ParseFail; 3079 3080 } 3081 3082 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3083 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3084 3085 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3086 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3087 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3088 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3089 return Match_InvalidOperand; 3090 3091 if ((TSFlags & SIInstrFlags::VOP3) && 3092 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3093 getForcedEncodingSize() != 64) 3094 return Match_PreferE32; 3095 3096 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3097 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3098 // v_mac_f32/16 allow only dst_sel == DWORD; 3099 auto OpNum = 3100 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3101 const auto &Op = Inst.getOperand(OpNum); 3102 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3103 return Match_InvalidOperand; 3104 } 3105 } 3106 3107 return Match_Success; 3108 } 3109 3110 static ArrayRef<unsigned> getAllVariants() { 3111 static const unsigned Variants[] = { 3112 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3113 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3114 }; 3115 3116 return makeArrayRef(Variants); 3117 } 3118 3119 // What asm variants we should check 3120 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3121 if (getForcedEncodingSize() == 32) { 3122 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3123 return makeArrayRef(Variants); 3124 } 3125 3126 if (isForcedVOP3()) { 3127 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3128 return makeArrayRef(Variants); 3129 } 3130 3131 if (isForcedSDWA()) { 3132 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3133 AMDGPUAsmVariants::SDWA9}; 3134 return makeArrayRef(Variants); 3135 } 3136 3137 if (isForcedDPP()) { 3138 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3139 return makeArrayRef(Variants); 3140 } 3141 3142 return getAllVariants(); 3143 } 3144 3145 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3146 if (getForcedEncodingSize() == 32) 3147 return "e32"; 3148 3149 if (isForcedVOP3()) 3150 return "e64"; 3151 3152 if (isForcedSDWA()) 3153 return "sdwa"; 3154 3155 if (isForcedDPP()) 3156 return "dpp"; 3157 3158 return ""; 3159 } 3160 3161 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3162 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3163 const unsigned Num = Desc.getNumImplicitUses(); 3164 for (unsigned i = 0; i < Num; ++i) { 3165 unsigned Reg = Desc.ImplicitUses[i]; 3166 switch (Reg) { 3167 case AMDGPU::FLAT_SCR: 3168 case AMDGPU::VCC: 3169 case AMDGPU::VCC_LO: 3170 case AMDGPU::VCC_HI: 3171 case AMDGPU::M0: 3172 return Reg; 3173 default: 3174 break; 3175 } 3176 } 3177 return AMDGPU::NoRegister; 3178 } 3179 3180 // NB: This code is correct only when used to check constant 3181 // bus limitations because GFX7 support no f16 inline constants. 3182 // Note that there are no cases when a GFX7 opcode violates 3183 // constant bus limitations due to the use of an f16 constant. 3184 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3185 unsigned OpIdx) const { 3186 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3187 3188 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3189 return false; 3190 } 3191 3192 const MCOperand &MO = Inst.getOperand(OpIdx); 3193 3194 int64_t Val = MO.getImm(); 3195 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3196 3197 switch (OpSize) { // expected operand size 3198 case 8: 3199 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3200 case 4: 3201 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3202 case 2: { 3203 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3204 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3205 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3206 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3207 return AMDGPU::isInlinableIntLiteral(Val); 3208 3209 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3210 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3211 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3212 return AMDGPU::isInlinableIntLiteralV216(Val); 3213 3214 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3215 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3216 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3217 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3218 3219 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3220 } 3221 default: 3222 llvm_unreachable("invalid operand size"); 3223 } 3224 } 3225 3226 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3227 if (!isGFX10Plus()) 3228 return 1; 3229 3230 switch (Opcode) { 3231 // 64-bit shift instructions can use only one scalar value input 3232 case AMDGPU::V_LSHLREV_B64_e64: 3233 case AMDGPU::V_LSHLREV_B64_gfx10: 3234 case AMDGPU::V_LSHRREV_B64_e64: 3235 case AMDGPU::V_LSHRREV_B64_gfx10: 3236 case AMDGPU::V_ASHRREV_I64_e64: 3237 case AMDGPU::V_ASHRREV_I64_gfx10: 3238 case AMDGPU::V_LSHL_B64_e64: 3239 case AMDGPU::V_LSHR_B64_e64: 3240 case AMDGPU::V_ASHR_I64_e64: 3241 return 1; 3242 default: 3243 return 2; 3244 } 3245 } 3246 3247 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3248 const MCOperand &MO = Inst.getOperand(OpIdx); 3249 if (MO.isImm()) { 3250 return !isInlineConstant(Inst, OpIdx); 3251 } else if (MO.isReg()) { 3252 auto Reg = MO.getReg(); 3253 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3254 auto PReg = mc2PseudoReg(Reg); 3255 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3256 } else { 3257 return true; 3258 } 3259 } 3260 3261 bool 3262 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3263 const OperandVector &Operands) { 3264 const unsigned Opcode = Inst.getOpcode(); 3265 const MCInstrDesc &Desc = MII.get(Opcode); 3266 unsigned LastSGPR = AMDGPU::NoRegister; 3267 unsigned ConstantBusUseCount = 0; 3268 unsigned NumLiterals = 0; 3269 unsigned LiteralSize; 3270 3271 if (Desc.TSFlags & 3272 (SIInstrFlags::VOPC | 3273 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3274 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3275 SIInstrFlags::SDWA)) { 3276 // Check special imm operands (used by madmk, etc) 3277 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3278 ++NumLiterals; 3279 LiteralSize = 4; 3280 } 3281 3282 SmallDenseSet<unsigned> SGPRsUsed; 3283 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3284 if (SGPRUsed != AMDGPU::NoRegister) { 3285 SGPRsUsed.insert(SGPRUsed); 3286 ++ConstantBusUseCount; 3287 } 3288 3289 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3290 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3291 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3292 3293 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3294 3295 for (int OpIdx : OpIndices) { 3296 if (OpIdx == -1) break; 3297 3298 const MCOperand &MO = Inst.getOperand(OpIdx); 3299 if (usesConstantBus(Inst, OpIdx)) { 3300 if (MO.isReg()) { 3301 LastSGPR = mc2PseudoReg(MO.getReg()); 3302 // Pairs of registers with a partial intersections like these 3303 // s0, s[0:1] 3304 // flat_scratch_lo, flat_scratch 3305 // flat_scratch_lo, flat_scratch_hi 3306 // are theoretically valid but they are disabled anyway. 3307 // Note that this code mimics SIInstrInfo::verifyInstruction 3308 if (!SGPRsUsed.count(LastSGPR)) { 3309 SGPRsUsed.insert(LastSGPR); 3310 ++ConstantBusUseCount; 3311 } 3312 } else { // Expression or a literal 3313 3314 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3315 continue; // special operand like VINTERP attr_chan 3316 3317 // An instruction may use only one literal. 3318 // This has been validated on the previous step. 3319 // See validateVOPLiteral. 3320 // This literal may be used as more than one operand. 3321 // If all these operands are of the same size, 3322 // this literal counts as one scalar value. 3323 // Otherwise it counts as 2 scalar values. 3324 // See "GFX10 Shader Programming", section 3.6.2.3. 3325 3326 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3327 if (Size < 4) Size = 4; 3328 3329 if (NumLiterals == 0) { 3330 NumLiterals = 1; 3331 LiteralSize = Size; 3332 } else if (LiteralSize != Size) { 3333 NumLiterals = 2; 3334 } 3335 } 3336 } 3337 } 3338 } 3339 ConstantBusUseCount += NumLiterals; 3340 3341 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3342 return true; 3343 3344 SMLoc LitLoc = getLitLoc(Operands); 3345 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3346 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3347 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3348 return false; 3349 } 3350 3351 bool 3352 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3353 const OperandVector &Operands) { 3354 const unsigned Opcode = Inst.getOpcode(); 3355 const MCInstrDesc &Desc = MII.get(Opcode); 3356 3357 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3358 if (DstIdx == -1 || 3359 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3360 return true; 3361 } 3362 3363 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3364 3365 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3366 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3367 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3368 3369 assert(DstIdx != -1); 3370 const MCOperand &Dst = Inst.getOperand(DstIdx); 3371 assert(Dst.isReg()); 3372 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3373 3374 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3375 3376 for (int SrcIdx : SrcIndices) { 3377 if (SrcIdx == -1) break; 3378 const MCOperand &Src = Inst.getOperand(SrcIdx); 3379 if (Src.isReg()) { 3380 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3381 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3382 Error(getRegLoc(SrcReg, Operands), 3383 "destination must be different than all sources"); 3384 return false; 3385 } 3386 } 3387 } 3388 3389 return true; 3390 } 3391 3392 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3393 3394 const unsigned Opc = Inst.getOpcode(); 3395 const MCInstrDesc &Desc = MII.get(Opc); 3396 3397 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3398 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3399 assert(ClampIdx != -1); 3400 return Inst.getOperand(ClampIdx).getImm() == 0; 3401 } 3402 3403 return true; 3404 } 3405 3406 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3407 3408 const unsigned Opc = Inst.getOpcode(); 3409 const MCInstrDesc &Desc = MII.get(Opc); 3410 3411 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3412 return true; 3413 3414 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3415 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3416 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3417 3418 assert(VDataIdx != -1); 3419 3420 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3421 return true; 3422 3423 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3424 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3425 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3426 if (DMask == 0) 3427 DMask = 1; 3428 3429 unsigned DataSize = 3430 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3431 if (hasPackedD16()) { 3432 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3433 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3434 DataSize = (DataSize + 1) / 2; 3435 } 3436 3437 return (VDataSize / 4) == DataSize + TFESize; 3438 } 3439 3440 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3441 const unsigned Opc = Inst.getOpcode(); 3442 const MCInstrDesc &Desc = MII.get(Opc); 3443 3444 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3445 return true; 3446 3447 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3448 3449 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3450 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3451 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3452 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3453 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3454 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3455 3456 assert(VAddr0Idx != -1); 3457 assert(SrsrcIdx != -1); 3458 assert(SrsrcIdx > VAddr0Idx); 3459 3460 if (DimIdx == -1) 3461 return true; // intersect_ray 3462 3463 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3464 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3465 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3466 unsigned ActualAddrSize = 3467 IsNSA ? SrsrcIdx - VAddr0Idx 3468 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3469 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3470 3471 unsigned ExpectedAddrSize = 3472 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3473 3474 if (!IsNSA) { 3475 if (ExpectedAddrSize > 8) 3476 ExpectedAddrSize = 16; 3477 3478 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3479 // This provides backward compatibility for assembly created 3480 // before 160b/192b/224b types were directly supported. 3481 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3482 return true; 3483 } 3484 3485 return ActualAddrSize == ExpectedAddrSize; 3486 } 3487 3488 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3489 3490 const unsigned Opc = Inst.getOpcode(); 3491 const MCInstrDesc &Desc = MII.get(Opc); 3492 3493 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3494 return true; 3495 if (!Desc.mayLoad() || !Desc.mayStore()) 3496 return true; // Not atomic 3497 3498 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3499 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3500 3501 // This is an incomplete check because image_atomic_cmpswap 3502 // may only use 0x3 and 0xf while other atomic operations 3503 // may use 0x1 and 0x3. However these limitations are 3504 // verified when we check that dmask matches dst size. 3505 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3506 } 3507 3508 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3509 3510 const unsigned Opc = Inst.getOpcode(); 3511 const MCInstrDesc &Desc = MII.get(Opc); 3512 3513 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3514 return true; 3515 3516 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3517 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3518 3519 // GATHER4 instructions use dmask in a different fashion compared to 3520 // other MIMG instructions. The only useful DMASK values are 3521 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3522 // (red,red,red,red) etc.) The ISA document doesn't mention 3523 // this. 3524 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3525 } 3526 3527 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3528 const unsigned Opc = Inst.getOpcode(); 3529 const MCInstrDesc &Desc = MII.get(Opc); 3530 3531 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3532 return true; 3533 3534 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3535 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3536 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3537 3538 if (!BaseOpcode->MSAA) 3539 return true; 3540 3541 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3542 assert(DimIdx != -1); 3543 3544 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3545 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3546 3547 return DimInfo->MSAA; 3548 } 3549 3550 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3551 { 3552 switch (Opcode) { 3553 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3554 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3555 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3556 return true; 3557 default: 3558 return false; 3559 } 3560 } 3561 3562 // movrels* opcodes should only allow VGPRS as src0. 3563 // This is specified in .td description for vop1/vop3, 3564 // but sdwa is handled differently. See isSDWAOperand. 3565 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3566 const OperandVector &Operands) { 3567 3568 const unsigned Opc = Inst.getOpcode(); 3569 const MCInstrDesc &Desc = MII.get(Opc); 3570 3571 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3572 return true; 3573 3574 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3575 assert(Src0Idx != -1); 3576 3577 SMLoc ErrLoc; 3578 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3579 if (Src0.isReg()) { 3580 auto Reg = mc2PseudoReg(Src0.getReg()); 3581 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3582 if (!isSGPR(Reg, TRI)) 3583 return true; 3584 ErrLoc = getRegLoc(Reg, Operands); 3585 } else { 3586 ErrLoc = getConstLoc(Operands); 3587 } 3588 3589 Error(ErrLoc, "source operand must be a VGPR"); 3590 return false; 3591 } 3592 3593 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3594 const OperandVector &Operands) { 3595 3596 const unsigned Opc = Inst.getOpcode(); 3597 3598 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3599 return true; 3600 3601 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3602 assert(Src0Idx != -1); 3603 3604 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3605 if (!Src0.isReg()) 3606 return true; 3607 3608 auto Reg = mc2PseudoReg(Src0.getReg()); 3609 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3610 if (isSGPR(Reg, TRI)) { 3611 Error(getRegLoc(Reg, Operands), 3612 "source operand must be either a VGPR or an inline constant"); 3613 return false; 3614 } 3615 3616 return true; 3617 } 3618 3619 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3620 const OperandVector &Operands) { 3621 const unsigned Opc = Inst.getOpcode(); 3622 const MCInstrDesc &Desc = MII.get(Opc); 3623 3624 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3625 return true; 3626 3627 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3628 if (Src2Idx == -1) 3629 return true; 3630 3631 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3632 if (!Src2.isReg()) 3633 return true; 3634 3635 MCRegister Src2Reg = Src2.getReg(); 3636 MCRegister DstReg = Inst.getOperand(0).getReg(); 3637 if (Src2Reg == DstReg) 3638 return true; 3639 3640 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3641 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3642 return true; 3643 3644 if (isRegIntersect(Src2Reg, DstReg, TRI)) { 3645 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3646 "source 2 operand must not partially overlap with dst"); 3647 return false; 3648 } 3649 3650 return true; 3651 } 3652 3653 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3654 switch (Inst.getOpcode()) { 3655 default: 3656 return true; 3657 case V_DIV_SCALE_F32_gfx6_gfx7: 3658 case V_DIV_SCALE_F32_vi: 3659 case V_DIV_SCALE_F32_gfx10: 3660 case V_DIV_SCALE_F64_gfx6_gfx7: 3661 case V_DIV_SCALE_F64_vi: 3662 case V_DIV_SCALE_F64_gfx10: 3663 break; 3664 } 3665 3666 // TODO: Check that src0 = src1 or src2. 3667 3668 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3669 AMDGPU::OpName::src2_modifiers, 3670 AMDGPU::OpName::src2_modifiers}) { 3671 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3672 .getImm() & 3673 SISrcMods::ABS) { 3674 return false; 3675 } 3676 } 3677 3678 return true; 3679 } 3680 3681 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3682 3683 const unsigned Opc = Inst.getOpcode(); 3684 const MCInstrDesc &Desc = MII.get(Opc); 3685 3686 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3687 return true; 3688 3689 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3690 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3691 if (isCI() || isSI()) 3692 return false; 3693 } 3694 3695 return true; 3696 } 3697 3698 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3699 const unsigned Opc = Inst.getOpcode(); 3700 const MCInstrDesc &Desc = MII.get(Opc); 3701 3702 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3703 return true; 3704 3705 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3706 if (DimIdx < 0) 3707 return true; 3708 3709 long Imm = Inst.getOperand(DimIdx).getImm(); 3710 if (Imm < 0 || Imm >= 8) 3711 return false; 3712 3713 return true; 3714 } 3715 3716 static bool IsRevOpcode(const unsigned Opcode) 3717 { 3718 switch (Opcode) { 3719 case AMDGPU::V_SUBREV_F32_e32: 3720 case AMDGPU::V_SUBREV_F32_e64: 3721 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3722 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3723 case AMDGPU::V_SUBREV_F32_e32_vi: 3724 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3725 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3726 case AMDGPU::V_SUBREV_F32_e64_vi: 3727 3728 case AMDGPU::V_SUBREV_CO_U32_e32: 3729 case AMDGPU::V_SUBREV_CO_U32_e64: 3730 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3731 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3732 3733 case AMDGPU::V_SUBBREV_U32_e32: 3734 case AMDGPU::V_SUBBREV_U32_e64: 3735 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3736 case AMDGPU::V_SUBBREV_U32_e32_vi: 3737 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3738 case AMDGPU::V_SUBBREV_U32_e64_vi: 3739 3740 case AMDGPU::V_SUBREV_U32_e32: 3741 case AMDGPU::V_SUBREV_U32_e64: 3742 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3743 case AMDGPU::V_SUBREV_U32_e32_vi: 3744 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3745 case AMDGPU::V_SUBREV_U32_e64_vi: 3746 3747 case AMDGPU::V_SUBREV_F16_e32: 3748 case AMDGPU::V_SUBREV_F16_e64: 3749 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3750 case AMDGPU::V_SUBREV_F16_e32_vi: 3751 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3752 case AMDGPU::V_SUBREV_F16_e64_vi: 3753 3754 case AMDGPU::V_SUBREV_U16_e32: 3755 case AMDGPU::V_SUBREV_U16_e64: 3756 case AMDGPU::V_SUBREV_U16_e32_vi: 3757 case AMDGPU::V_SUBREV_U16_e64_vi: 3758 3759 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3760 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3761 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3762 3763 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3764 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3765 3766 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3767 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3768 3769 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3770 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3771 3772 case AMDGPU::V_LSHRREV_B32_e32: 3773 case AMDGPU::V_LSHRREV_B32_e64: 3774 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3775 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3776 case AMDGPU::V_LSHRREV_B32_e32_vi: 3777 case AMDGPU::V_LSHRREV_B32_e64_vi: 3778 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3779 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3780 3781 case AMDGPU::V_ASHRREV_I32_e32: 3782 case AMDGPU::V_ASHRREV_I32_e64: 3783 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3784 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3785 case AMDGPU::V_ASHRREV_I32_e32_vi: 3786 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3787 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3788 case AMDGPU::V_ASHRREV_I32_e64_vi: 3789 3790 case AMDGPU::V_LSHLREV_B32_e32: 3791 case AMDGPU::V_LSHLREV_B32_e64: 3792 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3793 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3794 case AMDGPU::V_LSHLREV_B32_e32_vi: 3795 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3796 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3797 case AMDGPU::V_LSHLREV_B32_e64_vi: 3798 3799 case AMDGPU::V_LSHLREV_B16_e32: 3800 case AMDGPU::V_LSHLREV_B16_e64: 3801 case AMDGPU::V_LSHLREV_B16_e32_vi: 3802 case AMDGPU::V_LSHLREV_B16_e64_vi: 3803 case AMDGPU::V_LSHLREV_B16_gfx10: 3804 3805 case AMDGPU::V_LSHRREV_B16_e32: 3806 case AMDGPU::V_LSHRREV_B16_e64: 3807 case AMDGPU::V_LSHRREV_B16_e32_vi: 3808 case AMDGPU::V_LSHRREV_B16_e64_vi: 3809 case AMDGPU::V_LSHRREV_B16_gfx10: 3810 3811 case AMDGPU::V_ASHRREV_I16_e32: 3812 case AMDGPU::V_ASHRREV_I16_e64: 3813 case AMDGPU::V_ASHRREV_I16_e32_vi: 3814 case AMDGPU::V_ASHRREV_I16_e64_vi: 3815 case AMDGPU::V_ASHRREV_I16_gfx10: 3816 3817 case AMDGPU::V_LSHLREV_B64_e64: 3818 case AMDGPU::V_LSHLREV_B64_gfx10: 3819 case AMDGPU::V_LSHLREV_B64_vi: 3820 3821 case AMDGPU::V_LSHRREV_B64_e64: 3822 case AMDGPU::V_LSHRREV_B64_gfx10: 3823 case AMDGPU::V_LSHRREV_B64_vi: 3824 3825 case AMDGPU::V_ASHRREV_I64_e64: 3826 case AMDGPU::V_ASHRREV_I64_gfx10: 3827 case AMDGPU::V_ASHRREV_I64_vi: 3828 3829 case AMDGPU::V_PK_LSHLREV_B16: 3830 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3831 case AMDGPU::V_PK_LSHLREV_B16_vi: 3832 3833 case AMDGPU::V_PK_LSHRREV_B16: 3834 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3835 case AMDGPU::V_PK_LSHRREV_B16_vi: 3836 case AMDGPU::V_PK_ASHRREV_I16: 3837 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3838 case AMDGPU::V_PK_ASHRREV_I16_vi: 3839 return true; 3840 default: 3841 return false; 3842 } 3843 } 3844 3845 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3846 3847 using namespace SIInstrFlags; 3848 const unsigned Opcode = Inst.getOpcode(); 3849 const MCInstrDesc &Desc = MII.get(Opcode); 3850 3851 // lds_direct register is defined so that it can be used 3852 // with 9-bit operands only. Ignore encodings which do not accept these. 3853 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3854 if ((Desc.TSFlags & Enc) == 0) 3855 return None; 3856 3857 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3858 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3859 if (SrcIdx == -1) 3860 break; 3861 const auto &Src = Inst.getOperand(SrcIdx); 3862 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3863 3864 if (isGFX90A()) 3865 return StringRef("lds_direct is not supported on this GPU"); 3866 3867 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3868 return StringRef("lds_direct cannot be used with this instruction"); 3869 3870 if (SrcName != OpName::src0) 3871 return StringRef("lds_direct may be used as src0 only"); 3872 } 3873 } 3874 3875 return None; 3876 } 3877 3878 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3879 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3880 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3881 if (Op.isFlatOffset()) 3882 return Op.getStartLoc(); 3883 } 3884 return getLoc(); 3885 } 3886 3887 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3888 const OperandVector &Operands) { 3889 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3890 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3891 return true; 3892 3893 auto Opcode = Inst.getOpcode(); 3894 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3895 assert(OpNum != -1); 3896 3897 const auto &Op = Inst.getOperand(OpNum); 3898 if (!hasFlatOffsets() && Op.getImm() != 0) { 3899 Error(getFlatOffsetLoc(Operands), 3900 "flat offset modifier is not supported on this GPU"); 3901 return false; 3902 } 3903 3904 // For FLAT segment the offset must be positive; 3905 // MSB is ignored and forced to zero. 3906 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3907 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3908 if (!isIntN(OffsetSize, Op.getImm())) { 3909 Error(getFlatOffsetLoc(Operands), 3910 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3911 return false; 3912 } 3913 } else { 3914 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3915 if (!isUIntN(OffsetSize, Op.getImm())) { 3916 Error(getFlatOffsetLoc(Operands), 3917 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3918 return false; 3919 } 3920 } 3921 3922 return true; 3923 } 3924 3925 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3926 // Start with second operand because SMEM Offset cannot be dst or src0. 3927 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3928 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3929 if (Op.isSMEMOffset()) 3930 return Op.getStartLoc(); 3931 } 3932 return getLoc(); 3933 } 3934 3935 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3936 const OperandVector &Operands) { 3937 if (isCI() || isSI()) 3938 return true; 3939 3940 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3941 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3942 return true; 3943 3944 auto Opcode = Inst.getOpcode(); 3945 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3946 if (OpNum == -1) 3947 return true; 3948 3949 const auto &Op = Inst.getOperand(OpNum); 3950 if (!Op.isImm()) 3951 return true; 3952 3953 uint64_t Offset = Op.getImm(); 3954 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3955 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3956 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3957 return true; 3958 3959 Error(getSMEMOffsetLoc(Operands), 3960 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3961 "expected a 21-bit signed offset"); 3962 3963 return false; 3964 } 3965 3966 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3967 unsigned Opcode = Inst.getOpcode(); 3968 const MCInstrDesc &Desc = MII.get(Opcode); 3969 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3970 return true; 3971 3972 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3973 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3974 3975 const int OpIndices[] = { Src0Idx, Src1Idx }; 3976 3977 unsigned NumExprs = 0; 3978 unsigned NumLiterals = 0; 3979 uint32_t LiteralValue; 3980 3981 for (int OpIdx : OpIndices) { 3982 if (OpIdx == -1) break; 3983 3984 const MCOperand &MO = Inst.getOperand(OpIdx); 3985 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3986 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3987 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3988 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3989 if (NumLiterals == 0 || LiteralValue != Value) { 3990 LiteralValue = Value; 3991 ++NumLiterals; 3992 } 3993 } else if (MO.isExpr()) { 3994 ++NumExprs; 3995 } 3996 } 3997 } 3998 3999 return NumLiterals + NumExprs <= 1; 4000 } 4001 4002 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4003 const unsigned Opc = Inst.getOpcode(); 4004 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4005 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4006 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4007 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4008 4009 if (OpSel & ~3) 4010 return false; 4011 } 4012 return true; 4013 } 4014 4015 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4016 const OperandVector &Operands) { 4017 const unsigned Opc = Inst.getOpcode(); 4018 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4019 if (DppCtrlIdx < 0) 4020 return true; 4021 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4022 4023 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4024 // DPP64 is supported for row_newbcast only. 4025 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4026 if (Src0Idx >= 0 && 4027 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4028 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4029 Error(S, "64 bit dpp only supports row_newbcast"); 4030 return false; 4031 } 4032 } 4033 4034 return true; 4035 } 4036 4037 // Check if VCC register matches wavefront size 4038 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4039 auto FB = getFeatureBits(); 4040 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4041 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4042 } 4043 4044 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4045 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4046 const OperandVector &Operands) { 4047 unsigned Opcode = Inst.getOpcode(); 4048 const MCInstrDesc &Desc = MII.get(Opcode); 4049 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4050 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4051 ImmIdx == -1) 4052 return true; 4053 4054 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4055 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4056 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4057 4058 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4059 4060 unsigned NumExprs = 0; 4061 unsigned NumLiterals = 0; 4062 uint32_t LiteralValue; 4063 4064 for (int OpIdx : OpIndices) { 4065 if (OpIdx == -1) 4066 continue; 4067 4068 const MCOperand &MO = Inst.getOperand(OpIdx); 4069 if (!MO.isImm() && !MO.isExpr()) 4070 continue; 4071 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4072 continue; 4073 4074 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4075 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4076 Error(getConstLoc(Operands), 4077 "inline constants are not allowed for this operand"); 4078 return false; 4079 } 4080 4081 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4082 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4083 if (NumLiterals == 0 || LiteralValue != Value) { 4084 LiteralValue = Value; 4085 ++NumLiterals; 4086 } 4087 } else if (MO.isExpr()) { 4088 ++NumExprs; 4089 } 4090 } 4091 NumLiterals += NumExprs; 4092 4093 if (!NumLiterals) 4094 return true; 4095 4096 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4097 Error(getLitLoc(Operands), "literal operands are not supported"); 4098 return false; 4099 } 4100 4101 if (NumLiterals > 1) { 4102 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4103 return false; 4104 } 4105 4106 return true; 4107 } 4108 4109 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4110 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4111 const MCRegisterInfo *MRI) { 4112 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4113 if (OpIdx < 0) 4114 return -1; 4115 4116 const MCOperand &Op = Inst.getOperand(OpIdx); 4117 if (!Op.isReg()) 4118 return -1; 4119 4120 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4121 auto Reg = Sub ? Sub : Op.getReg(); 4122 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4123 return AGPR32.contains(Reg) ? 1 : 0; 4124 } 4125 4126 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4127 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4128 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4129 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4130 SIInstrFlags::DS)) == 0) 4131 return true; 4132 4133 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4134 : AMDGPU::OpName::vdata; 4135 4136 const MCRegisterInfo *MRI = getMRI(); 4137 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4138 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4139 4140 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4141 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4142 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4143 return false; 4144 } 4145 4146 auto FB = getFeatureBits(); 4147 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4148 if (DataAreg < 0 || DstAreg < 0) 4149 return true; 4150 return DstAreg == DataAreg; 4151 } 4152 4153 return DstAreg < 1 && DataAreg < 1; 4154 } 4155 4156 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4157 auto FB = getFeatureBits(); 4158 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4159 return true; 4160 4161 const MCRegisterInfo *MRI = getMRI(); 4162 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4163 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4164 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4165 const MCOperand &Op = Inst.getOperand(I); 4166 if (!Op.isReg()) 4167 continue; 4168 4169 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4170 if (!Sub) 4171 continue; 4172 4173 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4174 return false; 4175 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4176 return false; 4177 } 4178 4179 return true; 4180 } 4181 4182 // gfx90a has an undocumented limitation: 4183 // DS_GWS opcodes must use even aligned registers. 4184 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4185 const OperandVector &Operands) { 4186 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4187 return true; 4188 4189 int Opc = Inst.getOpcode(); 4190 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4191 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4192 return true; 4193 4194 const MCRegisterInfo *MRI = getMRI(); 4195 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4196 int Data0Pos = 4197 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4198 assert(Data0Pos != -1); 4199 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4200 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4201 if (RegIdx & 1) { 4202 SMLoc RegLoc = getRegLoc(Reg, Operands); 4203 Error(RegLoc, "vgpr must be even aligned"); 4204 return false; 4205 } 4206 4207 return true; 4208 } 4209 4210 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4211 const OperandVector &Operands, 4212 const SMLoc &IDLoc) { 4213 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4214 AMDGPU::OpName::cpol); 4215 if (CPolPos == -1) 4216 return true; 4217 4218 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4219 4220 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4221 if ((TSFlags & (SIInstrFlags::SMRD)) && 4222 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4223 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4224 return false; 4225 } 4226 4227 if (isGFX90A() && (CPol & CPol::SCC)) { 4228 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4229 StringRef CStr(S.getPointer()); 4230 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4231 Error(S, "scc is not supported on this GPU"); 4232 return false; 4233 } 4234 4235 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4236 return true; 4237 4238 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4239 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4240 Error(IDLoc, "instruction must use glc"); 4241 return false; 4242 } 4243 } else { 4244 if (CPol & CPol::GLC) { 4245 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4246 StringRef CStr(S.getPointer()); 4247 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4248 Error(S, "instruction must not use glc"); 4249 return false; 4250 } 4251 } 4252 4253 return true; 4254 } 4255 4256 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4257 const SMLoc &IDLoc, 4258 const OperandVector &Operands) { 4259 if (auto ErrMsg = validateLdsDirect(Inst)) { 4260 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4261 return false; 4262 } 4263 if (!validateSOPLiteral(Inst)) { 4264 Error(getLitLoc(Operands), 4265 "only one literal operand is allowed"); 4266 return false; 4267 } 4268 if (!validateVOPLiteral(Inst, Operands)) { 4269 return false; 4270 } 4271 if (!validateConstantBusLimitations(Inst, Operands)) { 4272 return false; 4273 } 4274 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4275 return false; 4276 } 4277 if (!validateIntClampSupported(Inst)) { 4278 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4279 "integer clamping is not supported on this GPU"); 4280 return false; 4281 } 4282 if (!validateOpSel(Inst)) { 4283 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4284 "invalid op_sel operand"); 4285 return false; 4286 } 4287 if (!validateDPP(Inst, Operands)) { 4288 return false; 4289 } 4290 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4291 if (!validateMIMGD16(Inst)) { 4292 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4293 "d16 modifier is not supported on this GPU"); 4294 return false; 4295 } 4296 if (!validateMIMGDim(Inst)) { 4297 Error(IDLoc, "dim modifier is required on this GPU"); 4298 return false; 4299 } 4300 if (!validateMIMGMSAA(Inst)) { 4301 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4302 "invalid dim; must be MSAA type"); 4303 return false; 4304 } 4305 if (!validateMIMGDataSize(Inst)) { 4306 Error(IDLoc, 4307 "image data size does not match dmask and tfe"); 4308 return false; 4309 } 4310 if (!validateMIMGAddrSize(Inst)) { 4311 Error(IDLoc, 4312 "image address size does not match dim and a16"); 4313 return false; 4314 } 4315 if (!validateMIMGAtomicDMask(Inst)) { 4316 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4317 "invalid atomic image dmask"); 4318 return false; 4319 } 4320 if (!validateMIMGGatherDMask(Inst)) { 4321 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4322 "invalid image_gather dmask: only one bit must be set"); 4323 return false; 4324 } 4325 if (!validateMovrels(Inst, Operands)) { 4326 return false; 4327 } 4328 if (!validateFlatOffset(Inst, Operands)) { 4329 return false; 4330 } 4331 if (!validateSMEMOffset(Inst, Operands)) { 4332 return false; 4333 } 4334 if (!validateMAIAccWrite(Inst, Operands)) { 4335 return false; 4336 } 4337 if (!validateMFMA(Inst, Operands)) { 4338 return false; 4339 } 4340 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4341 return false; 4342 } 4343 4344 if (!validateAGPRLdSt(Inst)) { 4345 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4346 ? "invalid register class: data and dst should be all VGPR or AGPR" 4347 : "invalid register class: agpr loads and stores not supported on this GPU" 4348 ); 4349 return false; 4350 } 4351 if (!validateVGPRAlign(Inst)) { 4352 Error(IDLoc, 4353 "invalid register class: vgpr tuples must be 64 bit aligned"); 4354 return false; 4355 } 4356 if (!validateGWS(Inst, Operands)) { 4357 return false; 4358 } 4359 4360 if (!validateDivScale(Inst)) { 4361 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4362 return false; 4363 } 4364 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4365 return false; 4366 } 4367 4368 return true; 4369 } 4370 4371 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4372 const FeatureBitset &FBS, 4373 unsigned VariantID = 0); 4374 4375 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4376 const FeatureBitset &AvailableFeatures, 4377 unsigned VariantID); 4378 4379 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4380 const FeatureBitset &FBS) { 4381 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4382 } 4383 4384 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4385 const FeatureBitset &FBS, 4386 ArrayRef<unsigned> Variants) { 4387 for (auto Variant : Variants) { 4388 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4389 return true; 4390 } 4391 4392 return false; 4393 } 4394 4395 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4396 const SMLoc &IDLoc) { 4397 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4398 4399 // Check if requested instruction variant is supported. 4400 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4401 return false; 4402 4403 // This instruction is not supported. 4404 // Clear any other pending errors because they are no longer relevant. 4405 getParser().clearPendingErrors(); 4406 4407 // Requested instruction variant is not supported. 4408 // Check if any other variants are supported. 4409 StringRef VariantName = getMatchedVariantName(); 4410 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4411 return Error(IDLoc, 4412 Twine(VariantName, 4413 " variant of this instruction is not supported")); 4414 } 4415 4416 // Finally check if this instruction is supported on any other GPU. 4417 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4418 return Error(IDLoc, "instruction not supported on this GPU"); 4419 } 4420 4421 // Instruction not supported on any GPU. Probably a typo. 4422 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4423 return Error(IDLoc, "invalid instruction" + Suggestion); 4424 } 4425 4426 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4427 OperandVector &Operands, 4428 MCStreamer &Out, 4429 uint64_t &ErrorInfo, 4430 bool MatchingInlineAsm) { 4431 MCInst Inst; 4432 unsigned Result = Match_Success; 4433 for (auto Variant : getMatchedVariants()) { 4434 uint64_t EI; 4435 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4436 Variant); 4437 // We order match statuses from least to most specific. We use most specific 4438 // status as resulting 4439 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4440 if ((R == Match_Success) || 4441 (R == Match_PreferE32) || 4442 (R == Match_MissingFeature && Result != Match_PreferE32) || 4443 (R == Match_InvalidOperand && Result != Match_MissingFeature 4444 && Result != Match_PreferE32) || 4445 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4446 && Result != Match_MissingFeature 4447 && Result != Match_PreferE32)) { 4448 Result = R; 4449 ErrorInfo = EI; 4450 } 4451 if (R == Match_Success) 4452 break; 4453 } 4454 4455 if (Result == Match_Success) { 4456 if (!validateInstruction(Inst, IDLoc, Operands)) { 4457 return true; 4458 } 4459 Inst.setLoc(IDLoc); 4460 Out.emitInstruction(Inst, getSTI()); 4461 return false; 4462 } 4463 4464 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4465 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4466 return true; 4467 } 4468 4469 switch (Result) { 4470 default: break; 4471 case Match_MissingFeature: 4472 // It has been verified that the specified instruction 4473 // mnemonic is valid. A match was found but it requires 4474 // features which are not supported on this GPU. 4475 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4476 4477 case Match_InvalidOperand: { 4478 SMLoc ErrorLoc = IDLoc; 4479 if (ErrorInfo != ~0ULL) { 4480 if (ErrorInfo >= Operands.size()) { 4481 return Error(IDLoc, "too few operands for instruction"); 4482 } 4483 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4484 if (ErrorLoc == SMLoc()) 4485 ErrorLoc = IDLoc; 4486 } 4487 return Error(ErrorLoc, "invalid operand for instruction"); 4488 } 4489 4490 case Match_PreferE32: 4491 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4492 "should be encoded as e32"); 4493 case Match_MnemonicFail: 4494 llvm_unreachable("Invalid instructions should have been handled already"); 4495 } 4496 llvm_unreachable("Implement any new match types added!"); 4497 } 4498 4499 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4500 int64_t Tmp = -1; 4501 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4502 return true; 4503 } 4504 if (getParser().parseAbsoluteExpression(Tmp)) { 4505 return true; 4506 } 4507 Ret = static_cast<uint32_t>(Tmp); 4508 return false; 4509 } 4510 4511 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4512 uint32_t &Minor) { 4513 if (ParseAsAbsoluteExpression(Major)) 4514 return TokError("invalid major version"); 4515 4516 if (!trySkipToken(AsmToken::Comma)) 4517 return TokError("minor version number required, comma expected"); 4518 4519 if (ParseAsAbsoluteExpression(Minor)) 4520 return TokError("invalid minor version"); 4521 4522 return false; 4523 } 4524 4525 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4526 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4527 return TokError("directive only supported for amdgcn architecture"); 4528 4529 std::string TargetIDDirective; 4530 SMLoc TargetStart = getTok().getLoc(); 4531 if (getParser().parseEscapedString(TargetIDDirective)) 4532 return true; 4533 4534 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4535 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4536 return getParser().Error(TargetRange.Start, 4537 (Twine(".amdgcn_target directive's target id ") + 4538 Twine(TargetIDDirective) + 4539 Twine(" does not match the specified target id ") + 4540 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4541 4542 return false; 4543 } 4544 4545 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4546 return Error(Range.Start, "value out of range", Range); 4547 } 4548 4549 bool AMDGPUAsmParser::calculateGPRBlocks( 4550 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4551 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4552 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4553 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4554 // TODO(scott.linder): These calculations are duplicated from 4555 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4556 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4557 4558 unsigned NumVGPRs = NextFreeVGPR; 4559 unsigned NumSGPRs = NextFreeSGPR; 4560 4561 if (Version.Major >= 10) 4562 NumSGPRs = 0; 4563 else { 4564 unsigned MaxAddressableNumSGPRs = 4565 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4566 4567 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4568 NumSGPRs > MaxAddressableNumSGPRs) 4569 return OutOfRangeError(SGPRRange); 4570 4571 NumSGPRs += 4572 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4573 4574 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4575 NumSGPRs > MaxAddressableNumSGPRs) 4576 return OutOfRangeError(SGPRRange); 4577 4578 if (Features.test(FeatureSGPRInitBug)) 4579 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4580 } 4581 4582 VGPRBlocks = 4583 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4584 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4585 4586 return false; 4587 } 4588 4589 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4590 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4591 return TokError("directive only supported for amdgcn architecture"); 4592 4593 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4594 return TokError("directive only supported for amdhsa OS"); 4595 4596 StringRef KernelName; 4597 if (getParser().parseIdentifier(KernelName)) 4598 return true; 4599 4600 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4601 4602 StringSet<> Seen; 4603 4604 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4605 4606 SMRange VGPRRange; 4607 uint64_t NextFreeVGPR = 0; 4608 uint64_t AccumOffset = 0; 4609 SMRange SGPRRange; 4610 uint64_t NextFreeSGPR = 0; 4611 4612 // Count the number of user SGPRs implied from the enabled feature bits. 4613 unsigned ImpliedUserSGPRCount = 0; 4614 4615 // Track if the asm explicitly contains the directive for the user SGPR 4616 // count. 4617 Optional<unsigned> ExplicitUserSGPRCount; 4618 bool ReserveVCC = true; 4619 bool ReserveFlatScr = true; 4620 Optional<bool> EnableWavefrontSize32; 4621 4622 while (true) { 4623 while (trySkipToken(AsmToken::EndOfStatement)); 4624 4625 StringRef ID; 4626 SMRange IDRange = getTok().getLocRange(); 4627 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4628 return true; 4629 4630 if (ID == ".end_amdhsa_kernel") 4631 break; 4632 4633 if (Seen.find(ID) != Seen.end()) 4634 return TokError(".amdhsa_ directives cannot be repeated"); 4635 Seen.insert(ID); 4636 4637 SMLoc ValStart = getLoc(); 4638 int64_t IVal; 4639 if (getParser().parseAbsoluteExpression(IVal)) 4640 return true; 4641 SMLoc ValEnd = getLoc(); 4642 SMRange ValRange = SMRange(ValStart, ValEnd); 4643 4644 if (IVal < 0) 4645 return OutOfRangeError(ValRange); 4646 4647 uint64_t Val = IVal; 4648 4649 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4650 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4651 return OutOfRangeError(RANGE); \ 4652 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4653 4654 if (ID == ".amdhsa_group_segment_fixed_size") { 4655 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4656 return OutOfRangeError(ValRange); 4657 KD.group_segment_fixed_size = Val; 4658 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4659 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4660 return OutOfRangeError(ValRange); 4661 KD.private_segment_fixed_size = Val; 4662 } else if (ID == ".amdhsa_kernarg_size") { 4663 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4664 return OutOfRangeError(ValRange); 4665 KD.kernarg_size = Val; 4666 } else if (ID == ".amdhsa_user_sgpr_count") { 4667 ExplicitUserSGPRCount = Val; 4668 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4669 if (hasArchitectedFlatScratch()) 4670 return Error(IDRange.Start, 4671 "directive is not supported with architected flat scratch", 4672 IDRange); 4673 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4674 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4675 Val, ValRange); 4676 if (Val) 4677 ImpliedUserSGPRCount += 4; 4678 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4679 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4680 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4681 ValRange); 4682 if (Val) 4683 ImpliedUserSGPRCount += 2; 4684 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4685 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4686 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4687 ValRange); 4688 if (Val) 4689 ImpliedUserSGPRCount += 2; 4690 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4691 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4692 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4693 Val, ValRange); 4694 if (Val) 4695 ImpliedUserSGPRCount += 2; 4696 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4697 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4698 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4699 ValRange); 4700 if (Val) 4701 ImpliedUserSGPRCount += 2; 4702 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4703 if (hasArchitectedFlatScratch()) 4704 return Error(IDRange.Start, 4705 "directive is not supported with architected flat scratch", 4706 IDRange); 4707 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4708 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4709 ValRange); 4710 if (Val) 4711 ImpliedUserSGPRCount += 2; 4712 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4713 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4714 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4715 Val, ValRange); 4716 if (Val) 4717 ImpliedUserSGPRCount += 1; 4718 } else if (ID == ".amdhsa_wavefront_size32") { 4719 if (IVersion.Major < 10) 4720 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4721 EnableWavefrontSize32 = Val; 4722 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4723 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4724 Val, ValRange); 4725 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4726 if (hasArchitectedFlatScratch()) 4727 return Error(IDRange.Start, 4728 "directive is not supported with architected flat scratch", 4729 IDRange); 4730 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4731 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4732 } else if (ID == ".amdhsa_enable_private_segment") { 4733 if (!hasArchitectedFlatScratch()) 4734 return Error( 4735 IDRange.Start, 4736 "directive is not supported without architected flat scratch", 4737 IDRange); 4738 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4739 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4740 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4741 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4742 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4743 ValRange); 4744 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4745 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4746 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4747 ValRange); 4748 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4749 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4750 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4751 ValRange); 4752 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4753 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4754 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4755 ValRange); 4756 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4757 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4758 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4759 ValRange); 4760 } else if (ID == ".amdhsa_next_free_vgpr") { 4761 VGPRRange = ValRange; 4762 NextFreeVGPR = Val; 4763 } else if (ID == ".amdhsa_next_free_sgpr") { 4764 SGPRRange = ValRange; 4765 NextFreeSGPR = Val; 4766 } else if (ID == ".amdhsa_accum_offset") { 4767 if (!isGFX90A()) 4768 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4769 AccumOffset = Val; 4770 } else if (ID == ".amdhsa_reserve_vcc") { 4771 if (!isUInt<1>(Val)) 4772 return OutOfRangeError(ValRange); 4773 ReserveVCC = Val; 4774 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4775 if (IVersion.Major < 7) 4776 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4777 if (hasArchitectedFlatScratch()) 4778 return Error(IDRange.Start, 4779 "directive is not supported with architected flat scratch", 4780 IDRange); 4781 if (!isUInt<1>(Val)) 4782 return OutOfRangeError(ValRange); 4783 ReserveFlatScr = Val; 4784 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4785 if (IVersion.Major < 8) 4786 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4787 if (!isUInt<1>(Val)) 4788 return OutOfRangeError(ValRange); 4789 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4790 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4791 IDRange); 4792 } else if (ID == ".amdhsa_float_round_mode_32") { 4793 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4794 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4795 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4796 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4797 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4798 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4799 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4800 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4801 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4802 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4803 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4804 ValRange); 4805 } else if (ID == ".amdhsa_dx10_clamp") { 4806 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4807 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4808 } else if (ID == ".amdhsa_ieee_mode") { 4809 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4810 Val, ValRange); 4811 } else if (ID == ".amdhsa_fp16_overflow") { 4812 if (IVersion.Major < 9) 4813 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4814 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4815 ValRange); 4816 } else if (ID == ".amdhsa_tg_split") { 4817 if (!isGFX90A()) 4818 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4819 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4820 ValRange); 4821 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4822 if (IVersion.Major < 10) 4823 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4824 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4825 ValRange); 4826 } else if (ID == ".amdhsa_memory_ordered") { 4827 if (IVersion.Major < 10) 4828 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4829 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4830 ValRange); 4831 } else if (ID == ".amdhsa_forward_progress") { 4832 if (IVersion.Major < 10) 4833 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4834 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4835 ValRange); 4836 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4837 PARSE_BITS_ENTRY( 4838 KD.compute_pgm_rsrc2, 4839 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4840 ValRange); 4841 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4842 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4843 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4844 Val, ValRange); 4845 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4846 PARSE_BITS_ENTRY( 4847 KD.compute_pgm_rsrc2, 4848 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4849 ValRange); 4850 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4851 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4852 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4853 Val, ValRange); 4854 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4855 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4857 Val, ValRange); 4858 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4859 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4860 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4861 Val, ValRange); 4862 } else if (ID == ".amdhsa_exception_int_div_zero") { 4863 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4864 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4865 Val, ValRange); 4866 } else { 4867 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4868 } 4869 4870 #undef PARSE_BITS_ENTRY 4871 } 4872 4873 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4874 return TokError(".amdhsa_next_free_vgpr directive is required"); 4875 4876 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4877 return TokError(".amdhsa_next_free_sgpr directive is required"); 4878 4879 unsigned VGPRBlocks; 4880 unsigned SGPRBlocks; 4881 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4882 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4883 EnableWavefrontSize32, NextFreeVGPR, 4884 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4885 SGPRBlocks)) 4886 return true; 4887 4888 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4889 VGPRBlocks)) 4890 return OutOfRangeError(VGPRRange); 4891 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4892 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4893 4894 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4895 SGPRBlocks)) 4896 return OutOfRangeError(SGPRRange); 4897 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4898 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4899 SGPRBlocks); 4900 4901 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4902 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4903 "enabled user SGPRs"); 4904 4905 unsigned UserSGPRCount = 4906 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4907 4908 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4909 return TokError("too many user SGPRs enabled"); 4910 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4911 UserSGPRCount); 4912 4913 if (isGFX90A()) { 4914 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4915 return TokError(".amdhsa_accum_offset directive is required"); 4916 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4917 return TokError("accum_offset should be in range [4..256] in " 4918 "increments of 4"); 4919 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4920 return TokError("accum_offset exceeds total VGPR allocation"); 4921 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4922 (AccumOffset / 4 - 1)); 4923 } 4924 4925 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4926 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4927 ReserveFlatScr); 4928 return false; 4929 } 4930 4931 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4932 uint32_t Major; 4933 uint32_t Minor; 4934 4935 if (ParseDirectiveMajorMinor(Major, Minor)) 4936 return true; 4937 4938 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4939 return false; 4940 } 4941 4942 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4943 uint32_t Major; 4944 uint32_t Minor; 4945 uint32_t Stepping; 4946 StringRef VendorName; 4947 StringRef ArchName; 4948 4949 // If this directive has no arguments, then use the ISA version for the 4950 // targeted GPU. 4951 if (isToken(AsmToken::EndOfStatement)) { 4952 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4953 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4954 ISA.Stepping, 4955 "AMD", "AMDGPU"); 4956 return false; 4957 } 4958 4959 if (ParseDirectiveMajorMinor(Major, Minor)) 4960 return true; 4961 4962 if (!trySkipToken(AsmToken::Comma)) 4963 return TokError("stepping version number required, comma expected"); 4964 4965 if (ParseAsAbsoluteExpression(Stepping)) 4966 return TokError("invalid stepping version"); 4967 4968 if (!trySkipToken(AsmToken::Comma)) 4969 return TokError("vendor name required, comma expected"); 4970 4971 if (!parseString(VendorName, "invalid vendor name")) 4972 return true; 4973 4974 if (!trySkipToken(AsmToken::Comma)) 4975 return TokError("arch name required, comma expected"); 4976 4977 if (!parseString(ArchName, "invalid arch name")) 4978 return true; 4979 4980 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4981 VendorName, ArchName); 4982 return false; 4983 } 4984 4985 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4986 amd_kernel_code_t &Header) { 4987 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4988 // assembly for backwards compatibility. 4989 if (ID == "max_scratch_backing_memory_byte_size") { 4990 Parser.eatToEndOfStatement(); 4991 return false; 4992 } 4993 4994 SmallString<40> ErrStr; 4995 raw_svector_ostream Err(ErrStr); 4996 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4997 return TokError(Err.str()); 4998 } 4999 Lex(); 5000 5001 if (ID == "enable_wavefront_size32") { 5002 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5003 if (!isGFX10Plus()) 5004 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5005 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5006 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5007 } else { 5008 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5009 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5010 } 5011 } 5012 5013 if (ID == "wavefront_size") { 5014 if (Header.wavefront_size == 5) { 5015 if (!isGFX10Plus()) 5016 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5017 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5018 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5019 } else if (Header.wavefront_size == 6) { 5020 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5021 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5022 } 5023 } 5024 5025 if (ID == "enable_wgp_mode") { 5026 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5027 !isGFX10Plus()) 5028 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5029 } 5030 5031 if (ID == "enable_mem_ordered") { 5032 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5033 !isGFX10Plus()) 5034 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5035 } 5036 5037 if (ID == "enable_fwd_progress") { 5038 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5039 !isGFX10Plus()) 5040 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5041 } 5042 5043 return false; 5044 } 5045 5046 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5047 amd_kernel_code_t Header; 5048 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5049 5050 while (true) { 5051 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5052 // will set the current token to EndOfStatement. 5053 while(trySkipToken(AsmToken::EndOfStatement)); 5054 5055 StringRef ID; 5056 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5057 return true; 5058 5059 if (ID == ".end_amd_kernel_code_t") 5060 break; 5061 5062 if (ParseAMDKernelCodeTValue(ID, Header)) 5063 return true; 5064 } 5065 5066 getTargetStreamer().EmitAMDKernelCodeT(Header); 5067 5068 return false; 5069 } 5070 5071 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5072 StringRef KernelName; 5073 if (!parseId(KernelName, "expected symbol name")) 5074 return true; 5075 5076 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5077 ELF::STT_AMDGPU_HSA_KERNEL); 5078 5079 KernelScope.initialize(getContext()); 5080 return false; 5081 } 5082 5083 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5084 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5085 return Error(getLoc(), 5086 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5087 "architectures"); 5088 } 5089 5090 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5091 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5092 return Error(getParser().getTok().getLoc(), "target id must match options"); 5093 5094 getTargetStreamer().EmitISAVersion(); 5095 Lex(); 5096 5097 return false; 5098 } 5099 5100 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5101 const char *AssemblerDirectiveBegin; 5102 const char *AssemblerDirectiveEnd; 5103 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5104 isHsaAbiVersion3AndAbove(&getSTI()) 5105 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5106 HSAMD::V3::AssemblerDirectiveEnd) 5107 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5108 HSAMD::AssemblerDirectiveEnd); 5109 5110 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5111 return Error(getLoc(), 5112 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5113 "not available on non-amdhsa OSes")).str()); 5114 } 5115 5116 std::string HSAMetadataString; 5117 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5118 HSAMetadataString)) 5119 return true; 5120 5121 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5122 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5123 return Error(getLoc(), "invalid HSA metadata"); 5124 } else { 5125 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5126 return Error(getLoc(), "invalid HSA metadata"); 5127 } 5128 5129 return false; 5130 } 5131 5132 /// Common code to parse out a block of text (typically YAML) between start and 5133 /// end directives. 5134 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5135 const char *AssemblerDirectiveEnd, 5136 std::string &CollectString) { 5137 5138 raw_string_ostream CollectStream(CollectString); 5139 5140 getLexer().setSkipSpace(false); 5141 5142 bool FoundEnd = false; 5143 while (!isToken(AsmToken::Eof)) { 5144 while (isToken(AsmToken::Space)) { 5145 CollectStream << getTokenStr(); 5146 Lex(); 5147 } 5148 5149 if (trySkipId(AssemblerDirectiveEnd)) { 5150 FoundEnd = true; 5151 break; 5152 } 5153 5154 CollectStream << Parser.parseStringToEndOfStatement() 5155 << getContext().getAsmInfo()->getSeparatorString(); 5156 5157 Parser.eatToEndOfStatement(); 5158 } 5159 5160 getLexer().setSkipSpace(true); 5161 5162 if (isToken(AsmToken::Eof) && !FoundEnd) { 5163 return TokError(Twine("expected directive ") + 5164 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5165 } 5166 5167 CollectStream.flush(); 5168 return false; 5169 } 5170 5171 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5172 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5173 std::string String; 5174 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5175 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5176 return true; 5177 5178 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5179 if (!PALMetadata->setFromString(String)) 5180 return Error(getLoc(), "invalid PAL metadata"); 5181 return false; 5182 } 5183 5184 /// Parse the assembler directive for old linear-format PAL metadata. 5185 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5186 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5187 return Error(getLoc(), 5188 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5189 "not available on non-amdpal OSes")).str()); 5190 } 5191 5192 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5193 PALMetadata->setLegacy(); 5194 for (;;) { 5195 uint32_t Key, Value; 5196 if (ParseAsAbsoluteExpression(Key)) { 5197 return TokError(Twine("invalid value in ") + 5198 Twine(PALMD::AssemblerDirective)); 5199 } 5200 if (!trySkipToken(AsmToken::Comma)) { 5201 return TokError(Twine("expected an even number of values in ") + 5202 Twine(PALMD::AssemblerDirective)); 5203 } 5204 if (ParseAsAbsoluteExpression(Value)) { 5205 return TokError(Twine("invalid value in ") + 5206 Twine(PALMD::AssemblerDirective)); 5207 } 5208 PALMetadata->setRegister(Key, Value); 5209 if (!trySkipToken(AsmToken::Comma)) 5210 break; 5211 } 5212 return false; 5213 } 5214 5215 /// ParseDirectiveAMDGPULDS 5216 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5217 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5218 if (getParser().checkForValidSection()) 5219 return true; 5220 5221 StringRef Name; 5222 SMLoc NameLoc = getLoc(); 5223 if (getParser().parseIdentifier(Name)) 5224 return TokError("expected identifier in directive"); 5225 5226 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5227 if (parseToken(AsmToken::Comma, "expected ','")) 5228 return true; 5229 5230 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5231 5232 int64_t Size; 5233 SMLoc SizeLoc = getLoc(); 5234 if (getParser().parseAbsoluteExpression(Size)) 5235 return true; 5236 if (Size < 0) 5237 return Error(SizeLoc, "size must be non-negative"); 5238 if (Size > LocalMemorySize) 5239 return Error(SizeLoc, "size is too large"); 5240 5241 int64_t Alignment = 4; 5242 if (trySkipToken(AsmToken::Comma)) { 5243 SMLoc AlignLoc = getLoc(); 5244 if (getParser().parseAbsoluteExpression(Alignment)) 5245 return true; 5246 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5247 return Error(AlignLoc, "alignment must be a power of two"); 5248 5249 // Alignment larger than the size of LDS is possible in theory, as long 5250 // as the linker manages to place to symbol at address 0, but we do want 5251 // to make sure the alignment fits nicely into a 32-bit integer. 5252 if (Alignment >= 1u << 31) 5253 return Error(AlignLoc, "alignment is too large"); 5254 } 5255 5256 if (parseToken(AsmToken::EndOfStatement, 5257 "unexpected token in '.amdgpu_lds' directive")) 5258 return true; 5259 5260 Symbol->redefineIfPossible(); 5261 if (!Symbol->isUndefined()) 5262 return Error(NameLoc, "invalid symbol redefinition"); 5263 5264 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5265 return false; 5266 } 5267 5268 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5269 StringRef IDVal = DirectiveID.getString(); 5270 5271 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5272 if (IDVal == ".amdhsa_kernel") 5273 return ParseDirectiveAMDHSAKernel(); 5274 5275 // TODO: Restructure/combine with PAL metadata directive. 5276 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5277 return ParseDirectiveHSAMetadata(); 5278 } else { 5279 if (IDVal == ".hsa_code_object_version") 5280 return ParseDirectiveHSACodeObjectVersion(); 5281 5282 if (IDVal == ".hsa_code_object_isa") 5283 return ParseDirectiveHSACodeObjectISA(); 5284 5285 if (IDVal == ".amd_kernel_code_t") 5286 return ParseDirectiveAMDKernelCodeT(); 5287 5288 if (IDVal == ".amdgpu_hsa_kernel") 5289 return ParseDirectiveAMDGPUHsaKernel(); 5290 5291 if (IDVal == ".amd_amdgpu_isa") 5292 return ParseDirectiveISAVersion(); 5293 5294 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5295 return ParseDirectiveHSAMetadata(); 5296 } 5297 5298 if (IDVal == ".amdgcn_target") 5299 return ParseDirectiveAMDGCNTarget(); 5300 5301 if (IDVal == ".amdgpu_lds") 5302 return ParseDirectiveAMDGPULDS(); 5303 5304 if (IDVal == PALMD::AssemblerDirectiveBegin) 5305 return ParseDirectivePALMetadataBegin(); 5306 5307 if (IDVal == PALMD::AssemblerDirective) 5308 return ParseDirectivePALMetadata(); 5309 5310 return true; 5311 } 5312 5313 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5314 unsigned RegNo) { 5315 5316 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5317 R.isValid(); ++R) { 5318 if (*R == RegNo) 5319 return isGFX9Plus(); 5320 } 5321 5322 // GFX10 has 2 more SGPRs 104 and 105. 5323 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5324 R.isValid(); ++R) { 5325 if (*R == RegNo) 5326 return hasSGPR104_SGPR105(); 5327 } 5328 5329 switch (RegNo) { 5330 case AMDGPU::SRC_SHARED_BASE: 5331 case AMDGPU::SRC_SHARED_LIMIT: 5332 case AMDGPU::SRC_PRIVATE_BASE: 5333 case AMDGPU::SRC_PRIVATE_LIMIT: 5334 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5335 return isGFX9Plus(); 5336 case AMDGPU::TBA: 5337 case AMDGPU::TBA_LO: 5338 case AMDGPU::TBA_HI: 5339 case AMDGPU::TMA: 5340 case AMDGPU::TMA_LO: 5341 case AMDGPU::TMA_HI: 5342 return !isGFX9Plus(); 5343 case AMDGPU::XNACK_MASK: 5344 case AMDGPU::XNACK_MASK_LO: 5345 case AMDGPU::XNACK_MASK_HI: 5346 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5347 case AMDGPU::SGPR_NULL: 5348 return isGFX10Plus(); 5349 default: 5350 break; 5351 } 5352 5353 if (isCI()) 5354 return true; 5355 5356 if (isSI() || isGFX10Plus()) { 5357 // No flat_scr on SI. 5358 // On GFX10 flat scratch is not a valid register operand and can only be 5359 // accessed with s_setreg/s_getreg. 5360 switch (RegNo) { 5361 case AMDGPU::FLAT_SCR: 5362 case AMDGPU::FLAT_SCR_LO: 5363 case AMDGPU::FLAT_SCR_HI: 5364 return false; 5365 default: 5366 return true; 5367 } 5368 } 5369 5370 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5371 // SI/CI have. 5372 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5373 R.isValid(); ++R) { 5374 if (*R == RegNo) 5375 return hasSGPR102_SGPR103(); 5376 } 5377 5378 return true; 5379 } 5380 5381 OperandMatchResultTy 5382 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5383 OperandMode Mode) { 5384 // Try to parse with a custom parser 5385 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5386 5387 // If we successfully parsed the operand or if there as an error parsing, 5388 // we are done. 5389 // 5390 // If we are parsing after we reach EndOfStatement then this means we 5391 // are appending default values to the Operands list. This is only done 5392 // by custom parser, so we shouldn't continue on to the generic parsing. 5393 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5394 isToken(AsmToken::EndOfStatement)) 5395 return ResTy; 5396 5397 SMLoc RBraceLoc; 5398 SMLoc LBraceLoc = getLoc(); 5399 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5400 unsigned Prefix = Operands.size(); 5401 5402 for (;;) { 5403 auto Loc = getLoc(); 5404 ResTy = parseReg(Operands); 5405 if (ResTy == MatchOperand_NoMatch) 5406 Error(Loc, "expected a register"); 5407 if (ResTy != MatchOperand_Success) 5408 return MatchOperand_ParseFail; 5409 5410 RBraceLoc = getLoc(); 5411 if (trySkipToken(AsmToken::RBrac)) 5412 break; 5413 5414 if (!skipToken(AsmToken::Comma, 5415 "expected a comma or a closing square bracket")) { 5416 return MatchOperand_ParseFail; 5417 } 5418 } 5419 5420 if (Operands.size() - Prefix > 1) { 5421 Operands.insert(Operands.begin() + Prefix, 5422 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5423 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5424 } 5425 5426 return MatchOperand_Success; 5427 } 5428 5429 return parseRegOrImm(Operands); 5430 } 5431 5432 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5433 // Clear any forced encodings from the previous instruction. 5434 setForcedEncodingSize(0); 5435 setForcedDPP(false); 5436 setForcedSDWA(false); 5437 5438 if (Name.endswith("_e64")) { 5439 setForcedEncodingSize(64); 5440 return Name.substr(0, Name.size() - 4); 5441 } else if (Name.endswith("_e32")) { 5442 setForcedEncodingSize(32); 5443 return Name.substr(0, Name.size() - 4); 5444 } else if (Name.endswith("_dpp")) { 5445 setForcedDPP(true); 5446 return Name.substr(0, Name.size() - 4); 5447 } else if (Name.endswith("_sdwa")) { 5448 setForcedSDWA(true); 5449 return Name.substr(0, Name.size() - 5); 5450 } 5451 return Name; 5452 } 5453 5454 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5455 StringRef Name, 5456 SMLoc NameLoc, OperandVector &Operands) { 5457 // Add the instruction mnemonic 5458 Name = parseMnemonicSuffix(Name); 5459 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5460 5461 bool IsMIMG = Name.startswith("image_"); 5462 5463 while (!trySkipToken(AsmToken::EndOfStatement)) { 5464 OperandMode Mode = OperandMode_Default; 5465 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5466 Mode = OperandMode_NSA; 5467 CPolSeen = 0; 5468 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5469 5470 if (Res != MatchOperand_Success) { 5471 checkUnsupportedInstruction(Name, NameLoc); 5472 if (!Parser.hasPendingError()) { 5473 // FIXME: use real operand location rather than the current location. 5474 StringRef Msg = 5475 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5476 "not a valid operand."; 5477 Error(getLoc(), Msg); 5478 } 5479 while (!trySkipToken(AsmToken::EndOfStatement)) { 5480 lex(); 5481 } 5482 return true; 5483 } 5484 5485 // Eat the comma or space if there is one. 5486 trySkipToken(AsmToken::Comma); 5487 } 5488 5489 return false; 5490 } 5491 5492 //===----------------------------------------------------------------------===// 5493 // Utility functions 5494 //===----------------------------------------------------------------------===// 5495 5496 OperandMatchResultTy 5497 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5498 5499 if (!trySkipId(Prefix, AsmToken::Colon)) 5500 return MatchOperand_NoMatch; 5501 5502 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5503 } 5504 5505 OperandMatchResultTy 5506 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5507 AMDGPUOperand::ImmTy ImmTy, 5508 bool (*ConvertResult)(int64_t&)) { 5509 SMLoc S = getLoc(); 5510 int64_t Value = 0; 5511 5512 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5513 if (Res != MatchOperand_Success) 5514 return Res; 5515 5516 if (ConvertResult && !ConvertResult(Value)) { 5517 Error(S, "invalid " + StringRef(Prefix) + " value."); 5518 } 5519 5520 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5521 return MatchOperand_Success; 5522 } 5523 5524 OperandMatchResultTy 5525 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5526 OperandVector &Operands, 5527 AMDGPUOperand::ImmTy ImmTy, 5528 bool (*ConvertResult)(int64_t&)) { 5529 SMLoc S = getLoc(); 5530 if (!trySkipId(Prefix, AsmToken::Colon)) 5531 return MatchOperand_NoMatch; 5532 5533 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5534 return MatchOperand_ParseFail; 5535 5536 unsigned Val = 0; 5537 const unsigned MaxSize = 4; 5538 5539 // FIXME: How to verify the number of elements matches the number of src 5540 // operands? 5541 for (int I = 0; ; ++I) { 5542 int64_t Op; 5543 SMLoc Loc = getLoc(); 5544 if (!parseExpr(Op)) 5545 return MatchOperand_ParseFail; 5546 5547 if (Op != 0 && Op != 1) { 5548 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5549 return MatchOperand_ParseFail; 5550 } 5551 5552 Val |= (Op << I); 5553 5554 if (trySkipToken(AsmToken::RBrac)) 5555 break; 5556 5557 if (I + 1 == MaxSize) { 5558 Error(getLoc(), "expected a closing square bracket"); 5559 return MatchOperand_ParseFail; 5560 } 5561 5562 if (!skipToken(AsmToken::Comma, "expected a comma")) 5563 return MatchOperand_ParseFail; 5564 } 5565 5566 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5567 return MatchOperand_Success; 5568 } 5569 5570 OperandMatchResultTy 5571 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5572 AMDGPUOperand::ImmTy ImmTy) { 5573 int64_t Bit; 5574 SMLoc S = getLoc(); 5575 5576 if (trySkipId(Name)) { 5577 Bit = 1; 5578 } else if (trySkipId("no", Name)) { 5579 Bit = 0; 5580 } else { 5581 return MatchOperand_NoMatch; 5582 } 5583 5584 if (Name == "r128" && !hasMIMG_R128()) { 5585 Error(S, "r128 modifier is not supported on this GPU"); 5586 return MatchOperand_ParseFail; 5587 } 5588 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5589 Error(S, "a16 modifier is not supported on this GPU"); 5590 return MatchOperand_ParseFail; 5591 } 5592 5593 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5594 ImmTy = AMDGPUOperand::ImmTyR128A16; 5595 5596 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5597 return MatchOperand_Success; 5598 } 5599 5600 OperandMatchResultTy 5601 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5602 unsigned CPolOn = 0; 5603 unsigned CPolOff = 0; 5604 SMLoc S = getLoc(); 5605 5606 if (trySkipId("glc")) 5607 CPolOn = AMDGPU::CPol::GLC; 5608 else if (trySkipId("noglc")) 5609 CPolOff = AMDGPU::CPol::GLC; 5610 else if (trySkipId("slc")) 5611 CPolOn = AMDGPU::CPol::SLC; 5612 else if (trySkipId("noslc")) 5613 CPolOff = AMDGPU::CPol::SLC; 5614 else if (trySkipId("dlc")) 5615 CPolOn = AMDGPU::CPol::DLC; 5616 else if (trySkipId("nodlc")) 5617 CPolOff = AMDGPU::CPol::DLC; 5618 else if (trySkipId("scc")) 5619 CPolOn = AMDGPU::CPol::SCC; 5620 else if (trySkipId("noscc")) 5621 CPolOff = AMDGPU::CPol::SCC; 5622 else 5623 return MatchOperand_NoMatch; 5624 5625 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5626 Error(S, "dlc modifier is not supported on this GPU"); 5627 return MatchOperand_ParseFail; 5628 } 5629 5630 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5631 Error(S, "scc modifier is not supported on this GPU"); 5632 return MatchOperand_ParseFail; 5633 } 5634 5635 if (CPolSeen & (CPolOn | CPolOff)) { 5636 Error(S, "duplicate cache policy modifier"); 5637 return MatchOperand_ParseFail; 5638 } 5639 5640 CPolSeen |= (CPolOn | CPolOff); 5641 5642 for (unsigned I = 1; I != Operands.size(); ++I) { 5643 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5644 if (Op.isCPol()) { 5645 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5646 return MatchOperand_Success; 5647 } 5648 } 5649 5650 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5651 AMDGPUOperand::ImmTyCPol)); 5652 5653 return MatchOperand_Success; 5654 } 5655 5656 static void addOptionalImmOperand( 5657 MCInst& Inst, const OperandVector& Operands, 5658 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5659 AMDGPUOperand::ImmTy ImmT, 5660 int64_t Default = 0) { 5661 auto i = OptionalIdx.find(ImmT); 5662 if (i != OptionalIdx.end()) { 5663 unsigned Idx = i->second; 5664 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5665 } else { 5666 Inst.addOperand(MCOperand::createImm(Default)); 5667 } 5668 } 5669 5670 OperandMatchResultTy 5671 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5672 StringRef &Value, 5673 SMLoc &StringLoc) { 5674 if (!trySkipId(Prefix, AsmToken::Colon)) 5675 return MatchOperand_NoMatch; 5676 5677 StringLoc = getLoc(); 5678 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5679 : MatchOperand_ParseFail; 5680 } 5681 5682 //===----------------------------------------------------------------------===// 5683 // MTBUF format 5684 //===----------------------------------------------------------------------===// 5685 5686 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5687 int64_t MaxVal, 5688 int64_t &Fmt) { 5689 int64_t Val; 5690 SMLoc Loc = getLoc(); 5691 5692 auto Res = parseIntWithPrefix(Pref, Val); 5693 if (Res == MatchOperand_ParseFail) 5694 return false; 5695 if (Res == MatchOperand_NoMatch) 5696 return true; 5697 5698 if (Val < 0 || Val > MaxVal) { 5699 Error(Loc, Twine("out of range ", StringRef(Pref))); 5700 return false; 5701 } 5702 5703 Fmt = Val; 5704 return true; 5705 } 5706 5707 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5708 // values to live in a joint format operand in the MCInst encoding. 5709 OperandMatchResultTy 5710 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5711 using namespace llvm::AMDGPU::MTBUFFormat; 5712 5713 int64_t Dfmt = DFMT_UNDEF; 5714 int64_t Nfmt = NFMT_UNDEF; 5715 5716 // dfmt and nfmt can appear in either order, and each is optional. 5717 for (int I = 0; I < 2; ++I) { 5718 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5719 return MatchOperand_ParseFail; 5720 5721 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5722 return MatchOperand_ParseFail; 5723 } 5724 // Skip optional comma between dfmt/nfmt 5725 // but guard against 2 commas following each other. 5726 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5727 !peekToken().is(AsmToken::Comma)) { 5728 trySkipToken(AsmToken::Comma); 5729 } 5730 } 5731 5732 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5733 return MatchOperand_NoMatch; 5734 5735 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5736 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5737 5738 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5739 return MatchOperand_Success; 5740 } 5741 5742 OperandMatchResultTy 5743 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5744 using namespace llvm::AMDGPU::MTBUFFormat; 5745 5746 int64_t Fmt = UFMT_UNDEF; 5747 5748 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5749 return MatchOperand_ParseFail; 5750 5751 if (Fmt == UFMT_UNDEF) 5752 return MatchOperand_NoMatch; 5753 5754 Format = Fmt; 5755 return MatchOperand_Success; 5756 } 5757 5758 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5759 int64_t &Nfmt, 5760 StringRef FormatStr, 5761 SMLoc Loc) { 5762 using namespace llvm::AMDGPU::MTBUFFormat; 5763 int64_t Format; 5764 5765 Format = getDfmt(FormatStr); 5766 if (Format != DFMT_UNDEF) { 5767 Dfmt = Format; 5768 return true; 5769 } 5770 5771 Format = getNfmt(FormatStr, getSTI()); 5772 if (Format != NFMT_UNDEF) { 5773 Nfmt = Format; 5774 return true; 5775 } 5776 5777 Error(Loc, "unsupported format"); 5778 return false; 5779 } 5780 5781 OperandMatchResultTy 5782 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5783 SMLoc FormatLoc, 5784 int64_t &Format) { 5785 using namespace llvm::AMDGPU::MTBUFFormat; 5786 5787 int64_t Dfmt = DFMT_UNDEF; 5788 int64_t Nfmt = NFMT_UNDEF; 5789 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5790 return MatchOperand_ParseFail; 5791 5792 if (trySkipToken(AsmToken::Comma)) { 5793 StringRef Str; 5794 SMLoc Loc = getLoc(); 5795 if (!parseId(Str, "expected a format string") || 5796 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5797 return MatchOperand_ParseFail; 5798 } 5799 if (Dfmt == DFMT_UNDEF) { 5800 Error(Loc, "duplicate numeric format"); 5801 return MatchOperand_ParseFail; 5802 } else if (Nfmt == NFMT_UNDEF) { 5803 Error(Loc, "duplicate data format"); 5804 return MatchOperand_ParseFail; 5805 } 5806 } 5807 5808 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5809 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5810 5811 if (isGFX10Plus()) { 5812 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5813 if (Ufmt == UFMT_UNDEF) { 5814 Error(FormatLoc, "unsupported format"); 5815 return MatchOperand_ParseFail; 5816 } 5817 Format = Ufmt; 5818 } else { 5819 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5820 } 5821 5822 return MatchOperand_Success; 5823 } 5824 5825 OperandMatchResultTy 5826 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5827 SMLoc Loc, 5828 int64_t &Format) { 5829 using namespace llvm::AMDGPU::MTBUFFormat; 5830 5831 auto Id = getUnifiedFormat(FormatStr); 5832 if (Id == UFMT_UNDEF) 5833 return MatchOperand_NoMatch; 5834 5835 if (!isGFX10Plus()) { 5836 Error(Loc, "unified format is not supported on this GPU"); 5837 return MatchOperand_ParseFail; 5838 } 5839 5840 Format = Id; 5841 return MatchOperand_Success; 5842 } 5843 5844 OperandMatchResultTy 5845 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5846 using namespace llvm::AMDGPU::MTBUFFormat; 5847 SMLoc Loc = getLoc(); 5848 5849 if (!parseExpr(Format)) 5850 return MatchOperand_ParseFail; 5851 if (!isValidFormatEncoding(Format, getSTI())) { 5852 Error(Loc, "out of range format"); 5853 return MatchOperand_ParseFail; 5854 } 5855 5856 return MatchOperand_Success; 5857 } 5858 5859 OperandMatchResultTy 5860 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5861 using namespace llvm::AMDGPU::MTBUFFormat; 5862 5863 if (!trySkipId("format", AsmToken::Colon)) 5864 return MatchOperand_NoMatch; 5865 5866 if (trySkipToken(AsmToken::LBrac)) { 5867 StringRef FormatStr; 5868 SMLoc Loc = getLoc(); 5869 if (!parseId(FormatStr, "expected a format string")) 5870 return MatchOperand_ParseFail; 5871 5872 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5873 if (Res == MatchOperand_NoMatch) 5874 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5875 if (Res != MatchOperand_Success) 5876 return Res; 5877 5878 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5879 return MatchOperand_ParseFail; 5880 5881 return MatchOperand_Success; 5882 } 5883 5884 return parseNumericFormat(Format); 5885 } 5886 5887 OperandMatchResultTy 5888 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5889 using namespace llvm::AMDGPU::MTBUFFormat; 5890 5891 int64_t Format = getDefaultFormatEncoding(getSTI()); 5892 OperandMatchResultTy Res; 5893 SMLoc Loc = getLoc(); 5894 5895 // Parse legacy format syntax. 5896 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5897 if (Res == MatchOperand_ParseFail) 5898 return Res; 5899 5900 bool FormatFound = (Res == MatchOperand_Success); 5901 5902 Operands.push_back( 5903 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5904 5905 if (FormatFound) 5906 trySkipToken(AsmToken::Comma); 5907 5908 if (isToken(AsmToken::EndOfStatement)) { 5909 // We are expecting an soffset operand, 5910 // but let matcher handle the error. 5911 return MatchOperand_Success; 5912 } 5913 5914 // Parse soffset. 5915 Res = parseRegOrImm(Operands); 5916 if (Res != MatchOperand_Success) 5917 return Res; 5918 5919 trySkipToken(AsmToken::Comma); 5920 5921 if (!FormatFound) { 5922 Res = parseSymbolicOrNumericFormat(Format); 5923 if (Res == MatchOperand_ParseFail) 5924 return Res; 5925 if (Res == MatchOperand_Success) { 5926 auto Size = Operands.size(); 5927 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5928 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5929 Op.setImm(Format); 5930 } 5931 return MatchOperand_Success; 5932 } 5933 5934 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5935 Error(getLoc(), "duplicate format"); 5936 return MatchOperand_ParseFail; 5937 } 5938 return MatchOperand_Success; 5939 } 5940 5941 //===----------------------------------------------------------------------===// 5942 // ds 5943 //===----------------------------------------------------------------------===// 5944 5945 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5946 const OperandVector &Operands) { 5947 OptionalImmIndexMap OptionalIdx; 5948 5949 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5950 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5951 5952 // Add the register arguments 5953 if (Op.isReg()) { 5954 Op.addRegOperands(Inst, 1); 5955 continue; 5956 } 5957 5958 // Handle optional arguments 5959 OptionalIdx[Op.getImmTy()] = i; 5960 } 5961 5962 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5963 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5964 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5965 5966 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5967 } 5968 5969 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5970 bool IsGdsHardcoded) { 5971 OptionalImmIndexMap OptionalIdx; 5972 5973 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5974 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5975 5976 // Add the register arguments 5977 if (Op.isReg()) { 5978 Op.addRegOperands(Inst, 1); 5979 continue; 5980 } 5981 5982 if (Op.isToken() && Op.getToken() == "gds") { 5983 IsGdsHardcoded = true; 5984 continue; 5985 } 5986 5987 // Handle optional arguments 5988 OptionalIdx[Op.getImmTy()] = i; 5989 } 5990 5991 AMDGPUOperand::ImmTy OffsetType = 5992 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5993 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5994 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5995 AMDGPUOperand::ImmTyOffset; 5996 5997 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5998 5999 if (!IsGdsHardcoded) { 6000 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6001 } 6002 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6003 } 6004 6005 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6006 OptionalImmIndexMap OptionalIdx; 6007 6008 unsigned OperandIdx[4]; 6009 unsigned EnMask = 0; 6010 int SrcIdx = 0; 6011 6012 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6013 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6014 6015 // Add the register arguments 6016 if (Op.isReg()) { 6017 assert(SrcIdx < 4); 6018 OperandIdx[SrcIdx] = Inst.size(); 6019 Op.addRegOperands(Inst, 1); 6020 ++SrcIdx; 6021 continue; 6022 } 6023 6024 if (Op.isOff()) { 6025 assert(SrcIdx < 4); 6026 OperandIdx[SrcIdx] = Inst.size(); 6027 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6028 ++SrcIdx; 6029 continue; 6030 } 6031 6032 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6033 Op.addImmOperands(Inst, 1); 6034 continue; 6035 } 6036 6037 if (Op.isToken() && Op.getToken() == "done") 6038 continue; 6039 6040 // Handle optional arguments 6041 OptionalIdx[Op.getImmTy()] = i; 6042 } 6043 6044 assert(SrcIdx == 4); 6045 6046 bool Compr = false; 6047 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6048 Compr = true; 6049 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6050 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6051 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6052 } 6053 6054 for (auto i = 0; i < SrcIdx; ++i) { 6055 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6056 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6057 } 6058 } 6059 6060 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6061 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6062 6063 Inst.addOperand(MCOperand::createImm(EnMask)); 6064 } 6065 6066 //===----------------------------------------------------------------------===// 6067 // s_waitcnt 6068 //===----------------------------------------------------------------------===// 6069 6070 static bool 6071 encodeCnt( 6072 const AMDGPU::IsaVersion ISA, 6073 int64_t &IntVal, 6074 int64_t CntVal, 6075 bool Saturate, 6076 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6077 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6078 { 6079 bool Failed = false; 6080 6081 IntVal = encode(ISA, IntVal, CntVal); 6082 if (CntVal != decode(ISA, IntVal)) { 6083 if (Saturate) { 6084 IntVal = encode(ISA, IntVal, -1); 6085 } else { 6086 Failed = true; 6087 } 6088 } 6089 return Failed; 6090 } 6091 6092 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6093 6094 SMLoc CntLoc = getLoc(); 6095 StringRef CntName = getTokenStr(); 6096 6097 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6098 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6099 return false; 6100 6101 int64_t CntVal; 6102 SMLoc ValLoc = getLoc(); 6103 if (!parseExpr(CntVal)) 6104 return false; 6105 6106 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6107 6108 bool Failed = true; 6109 bool Sat = CntName.endswith("_sat"); 6110 6111 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6112 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6113 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6114 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6115 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6116 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6117 } else { 6118 Error(CntLoc, "invalid counter name " + CntName); 6119 return false; 6120 } 6121 6122 if (Failed) { 6123 Error(ValLoc, "too large value for " + CntName); 6124 return false; 6125 } 6126 6127 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6128 return false; 6129 6130 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6131 if (isToken(AsmToken::EndOfStatement)) { 6132 Error(getLoc(), "expected a counter name"); 6133 return false; 6134 } 6135 } 6136 6137 return true; 6138 } 6139 6140 OperandMatchResultTy 6141 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6142 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6143 int64_t Waitcnt = getWaitcntBitMask(ISA); 6144 SMLoc S = getLoc(); 6145 6146 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6147 while (!isToken(AsmToken::EndOfStatement)) { 6148 if (!parseCnt(Waitcnt)) 6149 return MatchOperand_ParseFail; 6150 } 6151 } else { 6152 if (!parseExpr(Waitcnt)) 6153 return MatchOperand_ParseFail; 6154 } 6155 6156 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6157 return MatchOperand_Success; 6158 } 6159 6160 bool 6161 AMDGPUOperand::isSWaitCnt() const { 6162 return isImm(); 6163 } 6164 6165 //===----------------------------------------------------------------------===// 6166 // hwreg 6167 //===----------------------------------------------------------------------===// 6168 6169 bool 6170 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6171 OperandInfoTy &Offset, 6172 OperandInfoTy &Width) { 6173 using namespace llvm::AMDGPU::Hwreg; 6174 6175 // The register may be specified by name or using a numeric code 6176 HwReg.Loc = getLoc(); 6177 if (isToken(AsmToken::Identifier) && 6178 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6179 HwReg.IsSymbolic = true; 6180 lex(); // skip register name 6181 } else if (!parseExpr(HwReg.Id, "a register name")) { 6182 return false; 6183 } 6184 6185 if (trySkipToken(AsmToken::RParen)) 6186 return true; 6187 6188 // parse optional params 6189 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6190 return false; 6191 6192 Offset.Loc = getLoc(); 6193 if (!parseExpr(Offset.Id)) 6194 return false; 6195 6196 if (!skipToken(AsmToken::Comma, "expected a comma")) 6197 return false; 6198 6199 Width.Loc = getLoc(); 6200 return parseExpr(Width.Id) && 6201 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6202 } 6203 6204 bool 6205 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6206 const OperandInfoTy &Offset, 6207 const OperandInfoTy &Width) { 6208 6209 using namespace llvm::AMDGPU::Hwreg; 6210 6211 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6212 Error(HwReg.Loc, 6213 "specified hardware register is not supported on this GPU"); 6214 return false; 6215 } 6216 if (!isValidHwreg(HwReg.Id)) { 6217 Error(HwReg.Loc, 6218 "invalid code of hardware register: only 6-bit values are legal"); 6219 return false; 6220 } 6221 if (!isValidHwregOffset(Offset.Id)) { 6222 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6223 return false; 6224 } 6225 if (!isValidHwregWidth(Width.Id)) { 6226 Error(Width.Loc, 6227 "invalid bitfield width: only values from 1 to 32 are legal"); 6228 return false; 6229 } 6230 return true; 6231 } 6232 6233 OperandMatchResultTy 6234 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6235 using namespace llvm::AMDGPU::Hwreg; 6236 6237 int64_t ImmVal = 0; 6238 SMLoc Loc = getLoc(); 6239 6240 if (trySkipId("hwreg", AsmToken::LParen)) { 6241 OperandInfoTy HwReg(ID_UNKNOWN_); 6242 OperandInfoTy Offset(OFFSET_DEFAULT_); 6243 OperandInfoTy Width(WIDTH_DEFAULT_); 6244 if (parseHwregBody(HwReg, Offset, Width) && 6245 validateHwreg(HwReg, Offset, Width)) { 6246 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6247 } else { 6248 return MatchOperand_ParseFail; 6249 } 6250 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6251 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6252 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6253 return MatchOperand_ParseFail; 6254 } 6255 } else { 6256 return MatchOperand_ParseFail; 6257 } 6258 6259 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6260 return MatchOperand_Success; 6261 } 6262 6263 bool AMDGPUOperand::isHwreg() const { 6264 return isImmTy(ImmTyHwreg); 6265 } 6266 6267 //===----------------------------------------------------------------------===// 6268 // sendmsg 6269 //===----------------------------------------------------------------------===// 6270 6271 bool 6272 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6273 OperandInfoTy &Op, 6274 OperandInfoTy &Stream) { 6275 using namespace llvm::AMDGPU::SendMsg; 6276 6277 Msg.Loc = getLoc(); 6278 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6279 Msg.IsSymbolic = true; 6280 lex(); // skip message name 6281 } else if (!parseExpr(Msg.Id, "a message name")) { 6282 return false; 6283 } 6284 6285 if (trySkipToken(AsmToken::Comma)) { 6286 Op.IsDefined = true; 6287 Op.Loc = getLoc(); 6288 if (isToken(AsmToken::Identifier) && 6289 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6290 lex(); // skip operation name 6291 } else if (!parseExpr(Op.Id, "an operation name")) { 6292 return false; 6293 } 6294 6295 if (trySkipToken(AsmToken::Comma)) { 6296 Stream.IsDefined = true; 6297 Stream.Loc = getLoc(); 6298 if (!parseExpr(Stream.Id)) 6299 return false; 6300 } 6301 } 6302 6303 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6304 } 6305 6306 bool 6307 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6308 const OperandInfoTy &Op, 6309 const OperandInfoTy &Stream) { 6310 using namespace llvm::AMDGPU::SendMsg; 6311 6312 // Validation strictness depends on whether message is specified 6313 // in a symbolc or in a numeric form. In the latter case 6314 // only encoding possibility is checked. 6315 bool Strict = Msg.IsSymbolic; 6316 6317 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6318 Error(Msg.Loc, "invalid message id"); 6319 return false; 6320 } 6321 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6322 if (Op.IsDefined) { 6323 Error(Op.Loc, "message does not support operations"); 6324 } else { 6325 Error(Msg.Loc, "missing message operation"); 6326 } 6327 return false; 6328 } 6329 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6330 Error(Op.Loc, "invalid operation id"); 6331 return false; 6332 } 6333 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6334 Error(Stream.Loc, "message operation does not support streams"); 6335 return false; 6336 } 6337 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6338 Error(Stream.Loc, "invalid message stream id"); 6339 return false; 6340 } 6341 return true; 6342 } 6343 6344 OperandMatchResultTy 6345 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6346 using namespace llvm::AMDGPU::SendMsg; 6347 6348 int64_t ImmVal = 0; 6349 SMLoc Loc = getLoc(); 6350 6351 if (trySkipId("sendmsg", AsmToken::LParen)) { 6352 OperandInfoTy Msg(ID_UNKNOWN_); 6353 OperandInfoTy Op(OP_NONE_); 6354 OperandInfoTy Stream(STREAM_ID_NONE_); 6355 if (parseSendMsgBody(Msg, Op, Stream) && 6356 validateSendMsg(Msg, Op, Stream)) { 6357 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6358 } else { 6359 return MatchOperand_ParseFail; 6360 } 6361 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6362 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6363 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6364 return MatchOperand_ParseFail; 6365 } 6366 } else { 6367 return MatchOperand_ParseFail; 6368 } 6369 6370 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6371 return MatchOperand_Success; 6372 } 6373 6374 bool AMDGPUOperand::isSendMsg() const { 6375 return isImmTy(ImmTySendMsg); 6376 } 6377 6378 //===----------------------------------------------------------------------===// 6379 // v_interp 6380 //===----------------------------------------------------------------------===// 6381 6382 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6383 StringRef Str; 6384 SMLoc S = getLoc(); 6385 6386 if (!parseId(Str)) 6387 return MatchOperand_NoMatch; 6388 6389 int Slot = StringSwitch<int>(Str) 6390 .Case("p10", 0) 6391 .Case("p20", 1) 6392 .Case("p0", 2) 6393 .Default(-1); 6394 6395 if (Slot == -1) { 6396 Error(S, "invalid interpolation slot"); 6397 return MatchOperand_ParseFail; 6398 } 6399 6400 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6401 AMDGPUOperand::ImmTyInterpSlot)); 6402 return MatchOperand_Success; 6403 } 6404 6405 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6406 StringRef Str; 6407 SMLoc S = getLoc(); 6408 6409 if (!parseId(Str)) 6410 return MatchOperand_NoMatch; 6411 6412 if (!Str.startswith("attr")) { 6413 Error(S, "invalid interpolation attribute"); 6414 return MatchOperand_ParseFail; 6415 } 6416 6417 StringRef Chan = Str.take_back(2); 6418 int AttrChan = StringSwitch<int>(Chan) 6419 .Case(".x", 0) 6420 .Case(".y", 1) 6421 .Case(".z", 2) 6422 .Case(".w", 3) 6423 .Default(-1); 6424 if (AttrChan == -1) { 6425 Error(S, "invalid or missing interpolation attribute channel"); 6426 return MatchOperand_ParseFail; 6427 } 6428 6429 Str = Str.drop_back(2).drop_front(4); 6430 6431 uint8_t Attr; 6432 if (Str.getAsInteger(10, Attr)) { 6433 Error(S, "invalid or missing interpolation attribute number"); 6434 return MatchOperand_ParseFail; 6435 } 6436 6437 if (Attr > 63) { 6438 Error(S, "out of bounds interpolation attribute number"); 6439 return MatchOperand_ParseFail; 6440 } 6441 6442 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6443 6444 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6445 AMDGPUOperand::ImmTyInterpAttr)); 6446 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6447 AMDGPUOperand::ImmTyAttrChan)); 6448 return MatchOperand_Success; 6449 } 6450 6451 //===----------------------------------------------------------------------===// 6452 // exp 6453 //===----------------------------------------------------------------------===// 6454 6455 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6456 using namespace llvm::AMDGPU::Exp; 6457 6458 StringRef Str; 6459 SMLoc S = getLoc(); 6460 6461 if (!parseId(Str)) 6462 return MatchOperand_NoMatch; 6463 6464 unsigned Id = getTgtId(Str); 6465 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6466 Error(S, (Id == ET_INVALID) ? 6467 "invalid exp target" : 6468 "exp target is not supported on this GPU"); 6469 return MatchOperand_ParseFail; 6470 } 6471 6472 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6473 AMDGPUOperand::ImmTyExpTgt)); 6474 return MatchOperand_Success; 6475 } 6476 6477 //===----------------------------------------------------------------------===// 6478 // parser helpers 6479 //===----------------------------------------------------------------------===// 6480 6481 bool 6482 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6483 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6484 } 6485 6486 bool 6487 AMDGPUAsmParser::isId(const StringRef Id) const { 6488 return isId(getToken(), Id); 6489 } 6490 6491 bool 6492 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6493 return getTokenKind() == Kind; 6494 } 6495 6496 bool 6497 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6498 if (isId(Id)) { 6499 lex(); 6500 return true; 6501 } 6502 return false; 6503 } 6504 6505 bool 6506 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6507 if (isToken(AsmToken::Identifier)) { 6508 StringRef Tok = getTokenStr(); 6509 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6510 lex(); 6511 return true; 6512 } 6513 } 6514 return false; 6515 } 6516 6517 bool 6518 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6519 if (isId(Id) && peekToken().is(Kind)) { 6520 lex(); 6521 lex(); 6522 return true; 6523 } 6524 return false; 6525 } 6526 6527 bool 6528 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6529 if (isToken(Kind)) { 6530 lex(); 6531 return true; 6532 } 6533 return false; 6534 } 6535 6536 bool 6537 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6538 const StringRef ErrMsg) { 6539 if (!trySkipToken(Kind)) { 6540 Error(getLoc(), ErrMsg); 6541 return false; 6542 } 6543 return true; 6544 } 6545 6546 bool 6547 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6548 SMLoc S = getLoc(); 6549 6550 const MCExpr *Expr; 6551 if (Parser.parseExpression(Expr)) 6552 return false; 6553 6554 if (Expr->evaluateAsAbsolute(Imm)) 6555 return true; 6556 6557 if (Expected.empty()) { 6558 Error(S, "expected absolute expression"); 6559 } else { 6560 Error(S, Twine("expected ", Expected) + 6561 Twine(" or an absolute expression")); 6562 } 6563 return false; 6564 } 6565 6566 bool 6567 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6568 SMLoc S = getLoc(); 6569 6570 const MCExpr *Expr; 6571 if (Parser.parseExpression(Expr)) 6572 return false; 6573 6574 int64_t IntVal; 6575 if (Expr->evaluateAsAbsolute(IntVal)) { 6576 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6577 } else { 6578 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6579 } 6580 return true; 6581 } 6582 6583 bool 6584 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6585 if (isToken(AsmToken::String)) { 6586 Val = getToken().getStringContents(); 6587 lex(); 6588 return true; 6589 } else { 6590 Error(getLoc(), ErrMsg); 6591 return false; 6592 } 6593 } 6594 6595 bool 6596 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6597 if (isToken(AsmToken::Identifier)) { 6598 Val = getTokenStr(); 6599 lex(); 6600 return true; 6601 } else { 6602 if (!ErrMsg.empty()) 6603 Error(getLoc(), ErrMsg); 6604 return false; 6605 } 6606 } 6607 6608 AsmToken 6609 AMDGPUAsmParser::getToken() const { 6610 return Parser.getTok(); 6611 } 6612 6613 AsmToken 6614 AMDGPUAsmParser::peekToken() { 6615 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6616 } 6617 6618 void 6619 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6620 auto TokCount = getLexer().peekTokens(Tokens); 6621 6622 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6623 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6624 } 6625 6626 AsmToken::TokenKind 6627 AMDGPUAsmParser::getTokenKind() const { 6628 return getLexer().getKind(); 6629 } 6630 6631 SMLoc 6632 AMDGPUAsmParser::getLoc() const { 6633 return getToken().getLoc(); 6634 } 6635 6636 StringRef 6637 AMDGPUAsmParser::getTokenStr() const { 6638 return getToken().getString(); 6639 } 6640 6641 void 6642 AMDGPUAsmParser::lex() { 6643 Parser.Lex(); 6644 } 6645 6646 SMLoc 6647 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6648 const OperandVector &Operands) const { 6649 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6650 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6651 if (Test(Op)) 6652 return Op.getStartLoc(); 6653 } 6654 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6655 } 6656 6657 SMLoc 6658 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6659 const OperandVector &Operands) const { 6660 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6661 return getOperandLoc(Test, Operands); 6662 } 6663 6664 SMLoc 6665 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6666 const OperandVector &Operands) const { 6667 auto Test = [=](const AMDGPUOperand& Op) { 6668 return Op.isRegKind() && Op.getReg() == Reg; 6669 }; 6670 return getOperandLoc(Test, Operands); 6671 } 6672 6673 SMLoc 6674 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6675 auto Test = [](const AMDGPUOperand& Op) { 6676 return Op.IsImmKindLiteral() || Op.isExpr(); 6677 }; 6678 return getOperandLoc(Test, Operands); 6679 } 6680 6681 SMLoc 6682 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6683 auto Test = [](const AMDGPUOperand& Op) { 6684 return Op.isImmKindConst(); 6685 }; 6686 return getOperandLoc(Test, Operands); 6687 } 6688 6689 //===----------------------------------------------------------------------===// 6690 // swizzle 6691 //===----------------------------------------------------------------------===// 6692 6693 LLVM_READNONE 6694 static unsigned 6695 encodeBitmaskPerm(const unsigned AndMask, 6696 const unsigned OrMask, 6697 const unsigned XorMask) { 6698 using namespace llvm::AMDGPU::Swizzle; 6699 6700 return BITMASK_PERM_ENC | 6701 (AndMask << BITMASK_AND_SHIFT) | 6702 (OrMask << BITMASK_OR_SHIFT) | 6703 (XorMask << BITMASK_XOR_SHIFT); 6704 } 6705 6706 bool 6707 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6708 const unsigned MinVal, 6709 const unsigned MaxVal, 6710 const StringRef ErrMsg, 6711 SMLoc &Loc) { 6712 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6713 return false; 6714 } 6715 Loc = getLoc(); 6716 if (!parseExpr(Op)) { 6717 return false; 6718 } 6719 if (Op < MinVal || Op > MaxVal) { 6720 Error(Loc, ErrMsg); 6721 return false; 6722 } 6723 6724 return true; 6725 } 6726 6727 bool 6728 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6729 const unsigned MinVal, 6730 const unsigned MaxVal, 6731 const StringRef ErrMsg) { 6732 SMLoc Loc; 6733 for (unsigned i = 0; i < OpNum; ++i) { 6734 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6735 return false; 6736 } 6737 6738 return true; 6739 } 6740 6741 bool 6742 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6743 using namespace llvm::AMDGPU::Swizzle; 6744 6745 int64_t Lane[LANE_NUM]; 6746 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6747 "expected a 2-bit lane id")) { 6748 Imm = QUAD_PERM_ENC; 6749 for (unsigned I = 0; I < LANE_NUM; ++I) { 6750 Imm |= Lane[I] << (LANE_SHIFT * I); 6751 } 6752 return true; 6753 } 6754 return false; 6755 } 6756 6757 bool 6758 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6759 using namespace llvm::AMDGPU::Swizzle; 6760 6761 SMLoc Loc; 6762 int64_t GroupSize; 6763 int64_t LaneIdx; 6764 6765 if (!parseSwizzleOperand(GroupSize, 6766 2, 32, 6767 "group size must be in the interval [2,32]", 6768 Loc)) { 6769 return false; 6770 } 6771 if (!isPowerOf2_64(GroupSize)) { 6772 Error(Loc, "group size must be a power of two"); 6773 return false; 6774 } 6775 if (parseSwizzleOperand(LaneIdx, 6776 0, GroupSize - 1, 6777 "lane id must be in the interval [0,group size - 1]", 6778 Loc)) { 6779 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6780 return true; 6781 } 6782 return false; 6783 } 6784 6785 bool 6786 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6787 using namespace llvm::AMDGPU::Swizzle; 6788 6789 SMLoc Loc; 6790 int64_t GroupSize; 6791 6792 if (!parseSwizzleOperand(GroupSize, 6793 2, 32, 6794 "group size must be in the interval [2,32]", 6795 Loc)) { 6796 return false; 6797 } 6798 if (!isPowerOf2_64(GroupSize)) { 6799 Error(Loc, "group size must be a power of two"); 6800 return false; 6801 } 6802 6803 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6804 return true; 6805 } 6806 6807 bool 6808 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6809 using namespace llvm::AMDGPU::Swizzle; 6810 6811 SMLoc Loc; 6812 int64_t GroupSize; 6813 6814 if (!parseSwizzleOperand(GroupSize, 6815 1, 16, 6816 "group size must be in the interval [1,16]", 6817 Loc)) { 6818 return false; 6819 } 6820 if (!isPowerOf2_64(GroupSize)) { 6821 Error(Loc, "group size must be a power of two"); 6822 return false; 6823 } 6824 6825 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6826 return true; 6827 } 6828 6829 bool 6830 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6831 using namespace llvm::AMDGPU::Swizzle; 6832 6833 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6834 return false; 6835 } 6836 6837 StringRef Ctl; 6838 SMLoc StrLoc = getLoc(); 6839 if (!parseString(Ctl)) { 6840 return false; 6841 } 6842 if (Ctl.size() != BITMASK_WIDTH) { 6843 Error(StrLoc, "expected a 5-character mask"); 6844 return false; 6845 } 6846 6847 unsigned AndMask = 0; 6848 unsigned OrMask = 0; 6849 unsigned XorMask = 0; 6850 6851 for (size_t i = 0; i < Ctl.size(); ++i) { 6852 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6853 switch(Ctl[i]) { 6854 default: 6855 Error(StrLoc, "invalid mask"); 6856 return false; 6857 case '0': 6858 break; 6859 case '1': 6860 OrMask |= Mask; 6861 break; 6862 case 'p': 6863 AndMask |= Mask; 6864 break; 6865 case 'i': 6866 AndMask |= Mask; 6867 XorMask |= Mask; 6868 break; 6869 } 6870 } 6871 6872 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6873 return true; 6874 } 6875 6876 bool 6877 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6878 6879 SMLoc OffsetLoc = getLoc(); 6880 6881 if (!parseExpr(Imm, "a swizzle macro")) { 6882 return false; 6883 } 6884 if (!isUInt<16>(Imm)) { 6885 Error(OffsetLoc, "expected a 16-bit offset"); 6886 return false; 6887 } 6888 return true; 6889 } 6890 6891 bool 6892 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6893 using namespace llvm::AMDGPU::Swizzle; 6894 6895 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6896 6897 SMLoc ModeLoc = getLoc(); 6898 bool Ok = false; 6899 6900 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6901 Ok = parseSwizzleQuadPerm(Imm); 6902 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6903 Ok = parseSwizzleBitmaskPerm(Imm); 6904 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6905 Ok = parseSwizzleBroadcast(Imm); 6906 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6907 Ok = parseSwizzleSwap(Imm); 6908 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6909 Ok = parseSwizzleReverse(Imm); 6910 } else { 6911 Error(ModeLoc, "expected a swizzle mode"); 6912 } 6913 6914 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6915 } 6916 6917 return false; 6918 } 6919 6920 OperandMatchResultTy 6921 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6922 SMLoc S = getLoc(); 6923 int64_t Imm = 0; 6924 6925 if (trySkipId("offset")) { 6926 6927 bool Ok = false; 6928 if (skipToken(AsmToken::Colon, "expected a colon")) { 6929 if (trySkipId("swizzle")) { 6930 Ok = parseSwizzleMacro(Imm); 6931 } else { 6932 Ok = parseSwizzleOffset(Imm); 6933 } 6934 } 6935 6936 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6937 6938 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6939 } else { 6940 // Swizzle "offset" operand is optional. 6941 // If it is omitted, try parsing other optional operands. 6942 return parseOptionalOpr(Operands); 6943 } 6944 } 6945 6946 bool 6947 AMDGPUOperand::isSwizzle() const { 6948 return isImmTy(ImmTySwizzle); 6949 } 6950 6951 //===----------------------------------------------------------------------===// 6952 // VGPR Index Mode 6953 //===----------------------------------------------------------------------===// 6954 6955 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6956 6957 using namespace llvm::AMDGPU::VGPRIndexMode; 6958 6959 if (trySkipToken(AsmToken::RParen)) { 6960 return OFF; 6961 } 6962 6963 int64_t Imm = 0; 6964 6965 while (true) { 6966 unsigned Mode = 0; 6967 SMLoc S = getLoc(); 6968 6969 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6970 if (trySkipId(IdSymbolic[ModeId])) { 6971 Mode = 1 << ModeId; 6972 break; 6973 } 6974 } 6975 6976 if (Mode == 0) { 6977 Error(S, (Imm == 0)? 6978 "expected a VGPR index mode or a closing parenthesis" : 6979 "expected a VGPR index mode"); 6980 return UNDEF; 6981 } 6982 6983 if (Imm & Mode) { 6984 Error(S, "duplicate VGPR index mode"); 6985 return UNDEF; 6986 } 6987 Imm |= Mode; 6988 6989 if (trySkipToken(AsmToken::RParen)) 6990 break; 6991 if (!skipToken(AsmToken::Comma, 6992 "expected a comma or a closing parenthesis")) 6993 return UNDEF; 6994 } 6995 6996 return Imm; 6997 } 6998 6999 OperandMatchResultTy 7000 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7001 7002 using namespace llvm::AMDGPU::VGPRIndexMode; 7003 7004 int64_t Imm = 0; 7005 SMLoc S = getLoc(); 7006 7007 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7008 Imm = parseGPRIdxMacro(); 7009 if (Imm == UNDEF) 7010 return MatchOperand_ParseFail; 7011 } else { 7012 if (getParser().parseAbsoluteExpression(Imm)) 7013 return MatchOperand_ParseFail; 7014 if (Imm < 0 || !isUInt<4>(Imm)) { 7015 Error(S, "invalid immediate: only 4-bit values are legal"); 7016 return MatchOperand_ParseFail; 7017 } 7018 } 7019 7020 Operands.push_back( 7021 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7022 return MatchOperand_Success; 7023 } 7024 7025 bool AMDGPUOperand::isGPRIdxMode() const { 7026 return isImmTy(ImmTyGprIdxMode); 7027 } 7028 7029 //===----------------------------------------------------------------------===// 7030 // sopp branch targets 7031 //===----------------------------------------------------------------------===// 7032 7033 OperandMatchResultTy 7034 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7035 7036 // Make sure we are not parsing something 7037 // that looks like a label or an expression but is not. 7038 // This will improve error messages. 7039 if (isRegister() || isModifier()) 7040 return MatchOperand_NoMatch; 7041 7042 if (!parseExpr(Operands)) 7043 return MatchOperand_ParseFail; 7044 7045 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7046 assert(Opr.isImm() || Opr.isExpr()); 7047 SMLoc Loc = Opr.getStartLoc(); 7048 7049 // Currently we do not support arbitrary expressions as branch targets. 7050 // Only labels and absolute expressions are accepted. 7051 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7052 Error(Loc, "expected an absolute expression or a label"); 7053 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7054 Error(Loc, "expected a 16-bit signed jump offset"); 7055 } 7056 7057 return MatchOperand_Success; 7058 } 7059 7060 //===----------------------------------------------------------------------===// 7061 // Boolean holding registers 7062 //===----------------------------------------------------------------------===// 7063 7064 OperandMatchResultTy 7065 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7066 return parseReg(Operands); 7067 } 7068 7069 //===----------------------------------------------------------------------===// 7070 // mubuf 7071 //===----------------------------------------------------------------------===// 7072 7073 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7074 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7075 } 7076 7077 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7078 const OperandVector &Operands, 7079 bool IsAtomic, 7080 bool IsLds) { 7081 bool IsLdsOpcode = IsLds; 7082 bool HasLdsModifier = false; 7083 OptionalImmIndexMap OptionalIdx; 7084 unsigned FirstOperandIdx = 1; 7085 bool IsAtomicReturn = false; 7086 7087 if (IsAtomic) { 7088 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7089 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7090 if (!Op.isCPol()) 7091 continue; 7092 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7093 break; 7094 } 7095 7096 if (!IsAtomicReturn) { 7097 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7098 if (NewOpc != -1) 7099 Inst.setOpcode(NewOpc); 7100 } 7101 7102 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7103 SIInstrFlags::IsAtomicRet; 7104 } 7105 7106 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7107 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7108 7109 // Add the register arguments 7110 if (Op.isReg()) { 7111 Op.addRegOperands(Inst, 1); 7112 // Insert a tied src for atomic return dst. 7113 // This cannot be postponed as subsequent calls to 7114 // addImmOperands rely on correct number of MC operands. 7115 if (IsAtomicReturn && i == FirstOperandIdx) 7116 Op.addRegOperands(Inst, 1); 7117 continue; 7118 } 7119 7120 // Handle the case where soffset is an immediate 7121 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7122 Op.addImmOperands(Inst, 1); 7123 continue; 7124 } 7125 7126 HasLdsModifier |= Op.isLDS(); 7127 7128 // Handle tokens like 'offen' which are sometimes hard-coded into the 7129 // asm string. There are no MCInst operands for these. 7130 if (Op.isToken()) { 7131 continue; 7132 } 7133 assert(Op.isImm()); 7134 7135 // Handle optional arguments 7136 OptionalIdx[Op.getImmTy()] = i; 7137 } 7138 7139 // This is a workaround for an llvm quirk which may result in an 7140 // incorrect instruction selection. Lds and non-lds versions of 7141 // MUBUF instructions are identical except that lds versions 7142 // have mandatory 'lds' modifier. However this modifier follows 7143 // optional modifiers and llvm asm matcher regards this 'lds' 7144 // modifier as an optional one. As a result, an lds version 7145 // of opcode may be selected even if it has no 'lds' modifier. 7146 if (IsLdsOpcode && !HasLdsModifier) { 7147 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7148 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7149 Inst.setOpcode(NoLdsOpcode); 7150 IsLdsOpcode = false; 7151 } 7152 } 7153 7154 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7156 7157 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7158 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7159 } 7160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7161 } 7162 7163 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7164 OptionalImmIndexMap OptionalIdx; 7165 7166 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7167 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7168 7169 // Add the register arguments 7170 if (Op.isReg()) { 7171 Op.addRegOperands(Inst, 1); 7172 continue; 7173 } 7174 7175 // Handle the case where soffset is an immediate 7176 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7177 Op.addImmOperands(Inst, 1); 7178 continue; 7179 } 7180 7181 // Handle tokens like 'offen' which are sometimes hard-coded into the 7182 // asm string. There are no MCInst operands for these. 7183 if (Op.isToken()) { 7184 continue; 7185 } 7186 assert(Op.isImm()); 7187 7188 // Handle optional arguments 7189 OptionalIdx[Op.getImmTy()] = i; 7190 } 7191 7192 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7193 AMDGPUOperand::ImmTyOffset); 7194 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7195 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7198 } 7199 7200 //===----------------------------------------------------------------------===// 7201 // mimg 7202 //===----------------------------------------------------------------------===// 7203 7204 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7205 bool IsAtomic) { 7206 unsigned I = 1; 7207 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7208 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7209 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7210 } 7211 7212 if (IsAtomic) { 7213 // Add src, same as dst 7214 assert(Desc.getNumDefs() == 1); 7215 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7216 } 7217 7218 OptionalImmIndexMap OptionalIdx; 7219 7220 for (unsigned E = Operands.size(); I != E; ++I) { 7221 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7222 7223 // Add the register arguments 7224 if (Op.isReg()) { 7225 Op.addRegOperands(Inst, 1); 7226 } else if (Op.isImmModifier()) { 7227 OptionalIdx[Op.getImmTy()] = I; 7228 } else if (!Op.isToken()) { 7229 llvm_unreachable("unexpected operand type"); 7230 } 7231 } 7232 7233 bool IsGFX10Plus = isGFX10Plus(); 7234 7235 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7236 if (IsGFX10Plus) 7237 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7238 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7241 if (IsGFX10Plus) 7242 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7243 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7244 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7246 if (!IsGFX10Plus) 7247 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7249 } 7250 7251 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7252 cvtMIMG(Inst, Operands, true); 7253 } 7254 7255 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7256 OptionalImmIndexMap OptionalIdx; 7257 bool IsAtomicReturn = false; 7258 7259 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7260 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7261 if (!Op.isCPol()) 7262 continue; 7263 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7264 break; 7265 } 7266 7267 if (!IsAtomicReturn) { 7268 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7269 if (NewOpc != -1) 7270 Inst.setOpcode(NewOpc); 7271 } 7272 7273 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7274 SIInstrFlags::IsAtomicRet; 7275 7276 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7277 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7278 7279 // Add the register arguments 7280 if (Op.isReg()) { 7281 Op.addRegOperands(Inst, 1); 7282 if (IsAtomicReturn && i == 1) 7283 Op.addRegOperands(Inst, 1); 7284 continue; 7285 } 7286 7287 // Handle the case where soffset is an immediate 7288 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7289 Op.addImmOperands(Inst, 1); 7290 continue; 7291 } 7292 7293 // Handle tokens like 'offen' which are sometimes hard-coded into the 7294 // asm string. There are no MCInst operands for these. 7295 if (Op.isToken()) { 7296 continue; 7297 } 7298 assert(Op.isImm()); 7299 7300 // Handle optional arguments 7301 OptionalIdx[Op.getImmTy()] = i; 7302 } 7303 7304 if ((int)Inst.getNumOperands() <= 7305 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7306 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7308 } 7309 7310 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7311 const OperandVector &Operands) { 7312 for (unsigned I = 1; I < Operands.size(); ++I) { 7313 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7314 if (Operand.isReg()) 7315 Operand.addRegOperands(Inst, 1); 7316 } 7317 7318 Inst.addOperand(MCOperand::createImm(1)); // a16 7319 } 7320 7321 //===----------------------------------------------------------------------===// 7322 // smrd 7323 //===----------------------------------------------------------------------===// 7324 7325 bool AMDGPUOperand::isSMRDOffset8() const { 7326 return isImm() && isUInt<8>(getImm()); 7327 } 7328 7329 bool AMDGPUOperand::isSMEMOffset() const { 7330 return isImm(); // Offset range is checked later by validator. 7331 } 7332 7333 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7334 // 32-bit literals are only supported on CI and we only want to use them 7335 // when the offset is > 8-bits. 7336 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7337 } 7338 7339 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7340 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7341 } 7342 7343 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7344 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7345 } 7346 7347 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7348 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7349 } 7350 7351 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7352 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7353 } 7354 7355 //===----------------------------------------------------------------------===// 7356 // vop3 7357 //===----------------------------------------------------------------------===// 7358 7359 static bool ConvertOmodMul(int64_t &Mul) { 7360 if (Mul != 1 && Mul != 2 && Mul != 4) 7361 return false; 7362 7363 Mul >>= 1; 7364 return true; 7365 } 7366 7367 static bool ConvertOmodDiv(int64_t &Div) { 7368 if (Div == 1) { 7369 Div = 0; 7370 return true; 7371 } 7372 7373 if (Div == 2) { 7374 Div = 3; 7375 return true; 7376 } 7377 7378 return false; 7379 } 7380 7381 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7382 // This is intentional and ensures compatibility with sp3. 7383 // See bug 35397 for details. 7384 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7385 if (BoundCtrl == 0 || BoundCtrl == 1) { 7386 BoundCtrl = 1; 7387 return true; 7388 } 7389 return false; 7390 } 7391 7392 // Note: the order in this table matches the order of operands in AsmString. 7393 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7394 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7395 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7396 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7397 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7398 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7399 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7400 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7401 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7402 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7403 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7404 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7405 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7406 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7407 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7408 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7409 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7410 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7411 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7412 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7413 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7414 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7415 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7416 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7417 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7418 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7419 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7420 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7421 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7422 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7423 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7424 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7425 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7426 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7427 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7428 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7429 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7430 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7431 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7432 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7433 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7434 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7435 }; 7436 7437 void AMDGPUAsmParser::onBeginOfFile() { 7438 if (!getParser().getStreamer().getTargetStreamer() || 7439 getSTI().getTargetTriple().getArch() == Triple::r600) 7440 return; 7441 7442 if (!getTargetStreamer().getTargetID()) 7443 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7444 7445 if (isHsaAbiVersion3AndAbove(&getSTI())) 7446 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7447 } 7448 7449 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7450 7451 OperandMatchResultTy res = parseOptionalOpr(Operands); 7452 7453 // This is a hack to enable hardcoded mandatory operands which follow 7454 // optional operands. 7455 // 7456 // Current design assumes that all operands after the first optional operand 7457 // are also optional. However implementation of some instructions violates 7458 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7459 // 7460 // To alleviate this problem, we have to (implicitly) parse extra operands 7461 // to make sure autogenerated parser of custom operands never hit hardcoded 7462 // mandatory operands. 7463 7464 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7465 if (res != MatchOperand_Success || 7466 isToken(AsmToken::EndOfStatement)) 7467 break; 7468 7469 trySkipToken(AsmToken::Comma); 7470 res = parseOptionalOpr(Operands); 7471 } 7472 7473 return res; 7474 } 7475 7476 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7477 OperandMatchResultTy res; 7478 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7479 // try to parse any optional operand here 7480 if (Op.IsBit) { 7481 res = parseNamedBit(Op.Name, Operands, Op.Type); 7482 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7483 res = parseOModOperand(Operands); 7484 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7485 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7486 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7487 res = parseSDWASel(Operands, Op.Name, Op.Type); 7488 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7489 res = parseSDWADstUnused(Operands); 7490 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7491 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7492 Op.Type == AMDGPUOperand::ImmTyNegLo || 7493 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7494 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7495 Op.ConvertResult); 7496 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7497 res = parseDim(Operands); 7498 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7499 res = parseCPol(Operands); 7500 } else { 7501 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7502 } 7503 if (res != MatchOperand_NoMatch) { 7504 return res; 7505 } 7506 } 7507 return MatchOperand_NoMatch; 7508 } 7509 7510 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7511 StringRef Name = getTokenStr(); 7512 if (Name == "mul") { 7513 return parseIntWithPrefix("mul", Operands, 7514 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7515 } 7516 7517 if (Name == "div") { 7518 return parseIntWithPrefix("div", Operands, 7519 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7520 } 7521 7522 return MatchOperand_NoMatch; 7523 } 7524 7525 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7526 cvtVOP3P(Inst, Operands); 7527 7528 int Opc = Inst.getOpcode(); 7529 7530 int SrcNum; 7531 const int Ops[] = { AMDGPU::OpName::src0, 7532 AMDGPU::OpName::src1, 7533 AMDGPU::OpName::src2 }; 7534 for (SrcNum = 0; 7535 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7536 ++SrcNum); 7537 assert(SrcNum > 0); 7538 7539 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7540 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7541 7542 if ((OpSel & (1 << SrcNum)) != 0) { 7543 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7544 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7545 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7546 } 7547 } 7548 7549 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7550 // 1. This operand is input modifiers 7551 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7552 // 2. This is not last operand 7553 && Desc.NumOperands > (OpNum + 1) 7554 // 3. Next operand is register class 7555 && Desc.OpInfo[OpNum + 1].RegClass != -1 7556 // 4. Next register is not tied to any other operand 7557 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7558 } 7559 7560 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7561 { 7562 OptionalImmIndexMap OptionalIdx; 7563 unsigned Opc = Inst.getOpcode(); 7564 7565 unsigned I = 1; 7566 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7567 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7568 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7569 } 7570 7571 for (unsigned E = Operands.size(); I != E; ++I) { 7572 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7573 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7574 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7575 } else if (Op.isInterpSlot() || 7576 Op.isInterpAttr() || 7577 Op.isAttrChan()) { 7578 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7579 } else if (Op.isImmModifier()) { 7580 OptionalIdx[Op.getImmTy()] = I; 7581 } else { 7582 llvm_unreachable("unhandled operand type"); 7583 } 7584 } 7585 7586 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7587 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7588 } 7589 7590 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7592 } 7593 7594 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7595 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7596 } 7597 } 7598 7599 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7600 OptionalImmIndexMap &OptionalIdx) { 7601 unsigned Opc = Inst.getOpcode(); 7602 7603 unsigned I = 1; 7604 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7605 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7606 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7607 } 7608 7609 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7610 // This instruction has src modifiers 7611 for (unsigned E = Operands.size(); I != E; ++I) { 7612 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7613 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7614 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7615 } else if (Op.isImmModifier()) { 7616 OptionalIdx[Op.getImmTy()] = I; 7617 } else if (Op.isRegOrImm()) { 7618 Op.addRegOrImmOperands(Inst, 1); 7619 } else { 7620 llvm_unreachable("unhandled operand type"); 7621 } 7622 } 7623 } else { 7624 // No src modifiers 7625 for (unsigned E = Operands.size(); I != E; ++I) { 7626 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7627 if (Op.isMod()) { 7628 OptionalIdx[Op.getImmTy()] = I; 7629 } else { 7630 Op.addRegOrImmOperands(Inst, 1); 7631 } 7632 } 7633 } 7634 7635 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7636 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7637 } 7638 7639 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7640 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7641 } 7642 7643 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7644 // it has src2 register operand that is tied to dst operand 7645 // we don't allow modifiers for this operand in assembler so src2_modifiers 7646 // should be 0. 7647 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7648 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7649 Opc == AMDGPU::V_MAC_F32_e64_vi || 7650 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7651 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7652 Opc == AMDGPU::V_MAC_F16_e64_vi || 7653 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7654 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7655 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7656 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7657 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7658 auto it = Inst.begin(); 7659 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7660 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7661 ++it; 7662 // Copy the operand to ensure it's not invalidated when Inst grows. 7663 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7664 } 7665 } 7666 7667 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7668 OptionalImmIndexMap OptionalIdx; 7669 cvtVOP3(Inst, Operands, OptionalIdx); 7670 } 7671 7672 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7673 OptionalImmIndexMap &OptIdx) { 7674 const int Opc = Inst.getOpcode(); 7675 const MCInstrDesc &Desc = MII.get(Opc); 7676 7677 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7678 7679 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7680 assert(!IsPacked); 7681 Inst.addOperand(Inst.getOperand(0)); 7682 } 7683 7684 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7685 // instruction, and then figure out where to actually put the modifiers 7686 7687 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7688 if (OpSelIdx != -1) { 7689 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7690 } 7691 7692 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7693 if (OpSelHiIdx != -1) { 7694 int DefaultVal = IsPacked ? -1 : 0; 7695 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7696 DefaultVal); 7697 } 7698 7699 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7700 if (NegLoIdx != -1) { 7701 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7702 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7703 } 7704 7705 const int Ops[] = { AMDGPU::OpName::src0, 7706 AMDGPU::OpName::src1, 7707 AMDGPU::OpName::src2 }; 7708 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7709 AMDGPU::OpName::src1_modifiers, 7710 AMDGPU::OpName::src2_modifiers }; 7711 7712 unsigned OpSel = 0; 7713 unsigned OpSelHi = 0; 7714 unsigned NegLo = 0; 7715 unsigned NegHi = 0; 7716 7717 if (OpSelIdx != -1) 7718 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7719 7720 if (OpSelHiIdx != -1) 7721 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7722 7723 if (NegLoIdx != -1) { 7724 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7725 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7726 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7727 } 7728 7729 for (int J = 0; J < 3; ++J) { 7730 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7731 if (OpIdx == -1) 7732 break; 7733 7734 uint32_t ModVal = 0; 7735 7736 if ((OpSel & (1 << J)) != 0) 7737 ModVal |= SISrcMods::OP_SEL_0; 7738 7739 if ((OpSelHi & (1 << J)) != 0) 7740 ModVal |= SISrcMods::OP_SEL_1; 7741 7742 if ((NegLo & (1 << J)) != 0) 7743 ModVal |= SISrcMods::NEG; 7744 7745 if ((NegHi & (1 << J)) != 0) 7746 ModVal |= SISrcMods::NEG_HI; 7747 7748 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7749 7750 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7751 } 7752 } 7753 7754 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7755 OptionalImmIndexMap OptIdx; 7756 cvtVOP3(Inst, Operands, OptIdx); 7757 cvtVOP3P(Inst, Operands, OptIdx); 7758 } 7759 7760 //===----------------------------------------------------------------------===// 7761 // dpp 7762 //===----------------------------------------------------------------------===// 7763 7764 bool AMDGPUOperand::isDPP8() const { 7765 return isImmTy(ImmTyDPP8); 7766 } 7767 7768 bool AMDGPUOperand::isDPPCtrl() const { 7769 using namespace AMDGPU::DPP; 7770 7771 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7772 if (result) { 7773 int64_t Imm = getImm(); 7774 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7775 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7776 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7777 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7778 (Imm == DppCtrl::WAVE_SHL1) || 7779 (Imm == DppCtrl::WAVE_ROL1) || 7780 (Imm == DppCtrl::WAVE_SHR1) || 7781 (Imm == DppCtrl::WAVE_ROR1) || 7782 (Imm == DppCtrl::ROW_MIRROR) || 7783 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7784 (Imm == DppCtrl::BCAST15) || 7785 (Imm == DppCtrl::BCAST31) || 7786 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7787 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7788 } 7789 return false; 7790 } 7791 7792 //===----------------------------------------------------------------------===// 7793 // mAI 7794 //===----------------------------------------------------------------------===// 7795 7796 bool AMDGPUOperand::isBLGP() const { 7797 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7798 } 7799 7800 bool AMDGPUOperand::isCBSZ() const { 7801 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7802 } 7803 7804 bool AMDGPUOperand::isABID() const { 7805 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7806 } 7807 7808 bool AMDGPUOperand::isS16Imm() const { 7809 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7810 } 7811 7812 bool AMDGPUOperand::isU16Imm() const { 7813 return isImm() && isUInt<16>(getImm()); 7814 } 7815 7816 //===----------------------------------------------------------------------===// 7817 // dim 7818 //===----------------------------------------------------------------------===// 7819 7820 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7821 // We want to allow "dim:1D" etc., 7822 // but the initial 1 is tokenized as an integer. 7823 std::string Token; 7824 if (isToken(AsmToken::Integer)) { 7825 SMLoc Loc = getToken().getEndLoc(); 7826 Token = std::string(getTokenStr()); 7827 lex(); 7828 if (getLoc() != Loc) 7829 return false; 7830 } 7831 7832 StringRef Suffix; 7833 if (!parseId(Suffix)) 7834 return false; 7835 Token += Suffix; 7836 7837 StringRef DimId = Token; 7838 if (DimId.startswith("SQ_RSRC_IMG_")) 7839 DimId = DimId.drop_front(12); 7840 7841 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7842 if (!DimInfo) 7843 return false; 7844 7845 Encoding = DimInfo->Encoding; 7846 return true; 7847 } 7848 7849 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7850 if (!isGFX10Plus()) 7851 return MatchOperand_NoMatch; 7852 7853 SMLoc S = getLoc(); 7854 7855 if (!trySkipId("dim", AsmToken::Colon)) 7856 return MatchOperand_NoMatch; 7857 7858 unsigned Encoding; 7859 SMLoc Loc = getLoc(); 7860 if (!parseDimId(Encoding)) { 7861 Error(Loc, "invalid dim value"); 7862 return MatchOperand_ParseFail; 7863 } 7864 7865 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7866 AMDGPUOperand::ImmTyDim)); 7867 return MatchOperand_Success; 7868 } 7869 7870 //===----------------------------------------------------------------------===// 7871 // dpp 7872 //===----------------------------------------------------------------------===// 7873 7874 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7875 SMLoc S = getLoc(); 7876 7877 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7878 return MatchOperand_NoMatch; 7879 7880 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7881 7882 int64_t Sels[8]; 7883 7884 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7885 return MatchOperand_ParseFail; 7886 7887 for (size_t i = 0; i < 8; ++i) { 7888 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7889 return MatchOperand_ParseFail; 7890 7891 SMLoc Loc = getLoc(); 7892 if (getParser().parseAbsoluteExpression(Sels[i])) 7893 return MatchOperand_ParseFail; 7894 if (0 > Sels[i] || 7 < Sels[i]) { 7895 Error(Loc, "expected a 3-bit value"); 7896 return MatchOperand_ParseFail; 7897 } 7898 } 7899 7900 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7901 return MatchOperand_ParseFail; 7902 7903 unsigned DPP8 = 0; 7904 for (size_t i = 0; i < 8; ++i) 7905 DPP8 |= (Sels[i] << (i * 3)); 7906 7907 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7908 return MatchOperand_Success; 7909 } 7910 7911 bool 7912 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7913 const OperandVector &Operands) { 7914 if (Ctrl == "row_newbcast") 7915 return isGFX90A(); 7916 7917 if (Ctrl == "row_share" || 7918 Ctrl == "row_xmask") 7919 return isGFX10Plus(); 7920 7921 if (Ctrl == "wave_shl" || 7922 Ctrl == "wave_shr" || 7923 Ctrl == "wave_rol" || 7924 Ctrl == "wave_ror" || 7925 Ctrl == "row_bcast") 7926 return isVI() || isGFX9(); 7927 7928 return Ctrl == "row_mirror" || 7929 Ctrl == "row_half_mirror" || 7930 Ctrl == "quad_perm" || 7931 Ctrl == "row_shl" || 7932 Ctrl == "row_shr" || 7933 Ctrl == "row_ror"; 7934 } 7935 7936 int64_t 7937 AMDGPUAsmParser::parseDPPCtrlPerm() { 7938 // quad_perm:[%d,%d,%d,%d] 7939 7940 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7941 return -1; 7942 7943 int64_t Val = 0; 7944 for (int i = 0; i < 4; ++i) { 7945 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7946 return -1; 7947 7948 int64_t Temp; 7949 SMLoc Loc = getLoc(); 7950 if (getParser().parseAbsoluteExpression(Temp)) 7951 return -1; 7952 if (Temp < 0 || Temp > 3) { 7953 Error(Loc, "expected a 2-bit value"); 7954 return -1; 7955 } 7956 7957 Val += (Temp << i * 2); 7958 } 7959 7960 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7961 return -1; 7962 7963 return Val; 7964 } 7965 7966 int64_t 7967 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7968 using namespace AMDGPU::DPP; 7969 7970 // sel:%d 7971 7972 int64_t Val; 7973 SMLoc Loc = getLoc(); 7974 7975 if (getParser().parseAbsoluteExpression(Val)) 7976 return -1; 7977 7978 struct DppCtrlCheck { 7979 int64_t Ctrl; 7980 int Lo; 7981 int Hi; 7982 }; 7983 7984 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7985 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7986 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7987 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7988 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7989 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7990 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7991 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7992 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7993 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7994 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7995 .Default({-1, 0, 0}); 7996 7997 bool Valid; 7998 if (Check.Ctrl == -1) { 7999 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8000 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8001 } else { 8002 Valid = Check.Lo <= Val && Val <= Check.Hi; 8003 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8004 } 8005 8006 if (!Valid) { 8007 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8008 return -1; 8009 } 8010 8011 return Val; 8012 } 8013 8014 OperandMatchResultTy 8015 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8016 using namespace AMDGPU::DPP; 8017 8018 if (!isToken(AsmToken::Identifier) || 8019 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8020 return MatchOperand_NoMatch; 8021 8022 SMLoc S = getLoc(); 8023 int64_t Val = -1; 8024 StringRef Ctrl; 8025 8026 parseId(Ctrl); 8027 8028 if (Ctrl == "row_mirror") { 8029 Val = DppCtrl::ROW_MIRROR; 8030 } else if (Ctrl == "row_half_mirror") { 8031 Val = DppCtrl::ROW_HALF_MIRROR; 8032 } else { 8033 if (skipToken(AsmToken::Colon, "expected a colon")) { 8034 if (Ctrl == "quad_perm") { 8035 Val = parseDPPCtrlPerm(); 8036 } else { 8037 Val = parseDPPCtrlSel(Ctrl); 8038 } 8039 } 8040 } 8041 8042 if (Val == -1) 8043 return MatchOperand_ParseFail; 8044 8045 Operands.push_back( 8046 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8047 return MatchOperand_Success; 8048 } 8049 8050 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8051 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8052 } 8053 8054 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8055 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8056 } 8057 8058 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8059 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8060 } 8061 8062 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8063 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8064 } 8065 8066 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8067 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8068 } 8069 8070 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8071 OptionalImmIndexMap OptionalIdx; 8072 8073 unsigned Opc = Inst.getOpcode(); 8074 bool HasModifiers = 8075 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8076 unsigned I = 1; 8077 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8078 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8079 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8080 } 8081 8082 int Fi = 0; 8083 for (unsigned E = Operands.size(); I != E; ++I) { 8084 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8085 MCOI::TIED_TO); 8086 if (TiedTo != -1) { 8087 assert((unsigned)TiedTo < Inst.getNumOperands()); 8088 // handle tied old or src2 for MAC instructions 8089 Inst.addOperand(Inst.getOperand(TiedTo)); 8090 } 8091 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8092 // Add the register arguments 8093 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8094 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8095 // Skip it. 8096 continue; 8097 } 8098 8099 if (IsDPP8) { 8100 if (Op.isDPP8()) { 8101 Op.addImmOperands(Inst, 1); 8102 } else if (HasModifiers && 8103 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8104 Op.addRegWithFPInputModsOperands(Inst, 2); 8105 } else if (Op.isFI()) { 8106 Fi = Op.getImm(); 8107 } else if (Op.isReg()) { 8108 Op.addRegOperands(Inst, 1); 8109 } else { 8110 llvm_unreachable("Invalid operand type"); 8111 } 8112 } else { 8113 if (HasModifiers && 8114 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8115 Op.addRegWithFPInputModsOperands(Inst, 2); 8116 } else if (Op.isReg()) { 8117 Op.addRegOperands(Inst, 1); 8118 } else if (Op.isDPPCtrl()) { 8119 Op.addImmOperands(Inst, 1); 8120 } else if (Op.isImm()) { 8121 // Handle optional arguments 8122 OptionalIdx[Op.getImmTy()] = I; 8123 } else { 8124 llvm_unreachable("Invalid operand type"); 8125 } 8126 } 8127 } 8128 8129 if (IsDPP8) { 8130 using namespace llvm::AMDGPU::DPP; 8131 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8132 } else { 8133 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8134 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8136 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8137 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8138 } 8139 } 8140 } 8141 8142 //===----------------------------------------------------------------------===// 8143 // sdwa 8144 //===----------------------------------------------------------------------===// 8145 8146 OperandMatchResultTy 8147 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8148 AMDGPUOperand::ImmTy Type) { 8149 using namespace llvm::AMDGPU::SDWA; 8150 8151 SMLoc S = getLoc(); 8152 StringRef Value; 8153 OperandMatchResultTy res; 8154 8155 SMLoc StringLoc; 8156 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8157 if (res != MatchOperand_Success) { 8158 return res; 8159 } 8160 8161 int64_t Int; 8162 Int = StringSwitch<int64_t>(Value) 8163 .Case("BYTE_0", SdwaSel::BYTE_0) 8164 .Case("BYTE_1", SdwaSel::BYTE_1) 8165 .Case("BYTE_2", SdwaSel::BYTE_2) 8166 .Case("BYTE_3", SdwaSel::BYTE_3) 8167 .Case("WORD_0", SdwaSel::WORD_0) 8168 .Case("WORD_1", SdwaSel::WORD_1) 8169 .Case("DWORD", SdwaSel::DWORD) 8170 .Default(0xffffffff); 8171 8172 if (Int == 0xffffffff) { 8173 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8174 return MatchOperand_ParseFail; 8175 } 8176 8177 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8178 return MatchOperand_Success; 8179 } 8180 8181 OperandMatchResultTy 8182 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8183 using namespace llvm::AMDGPU::SDWA; 8184 8185 SMLoc S = getLoc(); 8186 StringRef Value; 8187 OperandMatchResultTy res; 8188 8189 SMLoc StringLoc; 8190 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8191 if (res != MatchOperand_Success) { 8192 return res; 8193 } 8194 8195 int64_t Int; 8196 Int = StringSwitch<int64_t>(Value) 8197 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8198 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8199 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8200 .Default(0xffffffff); 8201 8202 if (Int == 0xffffffff) { 8203 Error(StringLoc, "invalid dst_unused value"); 8204 return MatchOperand_ParseFail; 8205 } 8206 8207 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8208 return MatchOperand_Success; 8209 } 8210 8211 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8212 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8213 } 8214 8215 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8216 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8217 } 8218 8219 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8220 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8221 } 8222 8223 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8224 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8225 } 8226 8227 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8228 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8229 } 8230 8231 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8232 uint64_t BasicInstType, 8233 bool SkipDstVcc, 8234 bool SkipSrcVcc) { 8235 using namespace llvm::AMDGPU::SDWA; 8236 8237 OptionalImmIndexMap OptionalIdx; 8238 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8239 bool SkippedVcc = false; 8240 8241 unsigned I = 1; 8242 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8243 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8244 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8245 } 8246 8247 for (unsigned E = Operands.size(); I != E; ++I) { 8248 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8249 if (SkipVcc && !SkippedVcc && Op.isReg() && 8250 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8251 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8252 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8253 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8254 // Skip VCC only if we didn't skip it on previous iteration. 8255 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8256 if (BasicInstType == SIInstrFlags::VOP2 && 8257 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8258 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8259 SkippedVcc = true; 8260 continue; 8261 } else if (BasicInstType == SIInstrFlags::VOPC && 8262 Inst.getNumOperands() == 0) { 8263 SkippedVcc = true; 8264 continue; 8265 } 8266 } 8267 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8268 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8269 } else if (Op.isImm()) { 8270 // Handle optional arguments 8271 OptionalIdx[Op.getImmTy()] = I; 8272 } else { 8273 llvm_unreachable("Invalid operand type"); 8274 } 8275 SkippedVcc = false; 8276 } 8277 8278 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8279 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8280 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8281 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8282 switch (BasicInstType) { 8283 case SIInstrFlags::VOP1: 8284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8285 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8286 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8287 } 8288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8291 break; 8292 8293 case SIInstrFlags::VOP2: 8294 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8295 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8296 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8297 } 8298 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8299 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8300 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8301 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8302 break; 8303 8304 case SIInstrFlags::VOPC: 8305 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8306 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8309 break; 8310 8311 default: 8312 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8313 } 8314 } 8315 8316 // special case v_mac_{f16, f32}: 8317 // it has src2 register operand that is tied to dst operand 8318 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8319 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8320 auto it = Inst.begin(); 8321 std::advance( 8322 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8323 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8324 } 8325 } 8326 8327 //===----------------------------------------------------------------------===// 8328 // mAI 8329 //===----------------------------------------------------------------------===// 8330 8331 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8332 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8333 } 8334 8335 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8336 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8337 } 8338 8339 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8340 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8341 } 8342 8343 /// Force static initialization. 8344 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8345 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8346 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8347 } 8348 8349 #define GET_REGISTER_MATCHER 8350 #define GET_MATCHER_IMPLEMENTATION 8351 #define GET_MNEMONIC_SPELL_CHECKER 8352 #define GET_MNEMONIC_CHECKER 8353 #include "AMDGPUGenAsmMatcher.inc" 8354 8355 // This fuction should be defined after auto-generated include so that we have 8356 // MatchClassKind enum defined 8357 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8358 unsigned Kind) { 8359 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8360 // But MatchInstructionImpl() expects to meet token and fails to validate 8361 // operand. This method checks if we are given immediate operand but expect to 8362 // get corresponding token. 8363 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8364 switch (Kind) { 8365 case MCK_addr64: 8366 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8367 case MCK_gds: 8368 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8369 case MCK_lds: 8370 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8371 case MCK_idxen: 8372 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8373 case MCK_offen: 8374 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8375 case MCK_SSrcB32: 8376 // When operands have expression values, they will return true for isToken, 8377 // because it is not possible to distinguish between a token and an 8378 // expression at parse time. MatchInstructionImpl() will always try to 8379 // match an operand as a token, when isToken returns true, and when the 8380 // name of the expression is not a valid token, the match will fail, 8381 // so we need to handle it here. 8382 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8383 case MCK_SSrcF32: 8384 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8385 case MCK_SoppBrTarget: 8386 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8387 case MCK_VReg32OrOff: 8388 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8389 case MCK_InterpSlot: 8390 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8391 case MCK_Attr: 8392 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8393 case MCK_AttrChan: 8394 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8395 case MCK_ImmSMEMOffset: 8396 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8397 case MCK_SReg_64: 8398 case MCK_SReg_64_XEXEC: 8399 // Null is defined as a 32-bit register but 8400 // it should also be enabled with 64-bit operands. 8401 // The following code enables it for SReg_64 operands 8402 // used as source and destination. Remaining source 8403 // operands are handled in isInlinableImm. 8404 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8405 default: 8406 return Match_InvalidOperand; 8407 } 8408 } 8409 8410 //===----------------------------------------------------------------------===// 8411 // endpgm 8412 //===----------------------------------------------------------------------===// 8413 8414 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8415 SMLoc S = getLoc(); 8416 int64_t Imm = 0; 8417 8418 if (!parseExpr(Imm)) { 8419 // The operand is optional, if not present default to 0 8420 Imm = 0; 8421 } 8422 8423 if (!isUInt<16>(Imm)) { 8424 Error(S, "expected a 16-bit value"); 8425 return MatchOperand_ParseFail; 8426 } 8427 8428 Operands.push_back( 8429 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8430 return MatchOperand_Success; 8431 } 8432 8433 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8434