1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "TargetInfo/AMDGPUTargetInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/SmallBitVector.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/MC/MCAsmInfo.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCExpr.h" 25 #include "llvm/MC/MCInst.h" 26 #include "llvm/MC/MCParser/MCAsmParser.h" 27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 28 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 29 #include "llvm/MC/MCSymbol.h" 30 #include "llvm/Support/AMDGPUMetadata.h" 31 #include "llvm/Support/AMDHSAKernelDescriptor.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/MachineValueType.h" 34 #include "llvm/Support/TargetParser.h" 35 #include "llvm/Support/TargetRegistry.h" 36 37 using namespace llvm; 38 using namespace llvm::AMDGPU; 39 using namespace llvm::amdhsa; 40 41 namespace { 42 43 class AMDGPUAsmParser; 44 45 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 46 47 //===----------------------------------------------------------------------===// 48 // Operand 49 //===----------------------------------------------------------------------===// 50 51 class AMDGPUOperand : public MCParsedAsmOperand { 52 enum KindTy { 53 Token, 54 Immediate, 55 Register, 56 Expression 57 } Kind; 58 59 SMLoc StartLoc, EndLoc; 60 const AMDGPUAsmParser *AsmParser; 61 62 public: 63 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 64 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 65 66 using Ptr = std::unique_ptr<AMDGPUOperand>; 67 68 struct Modifiers { 69 bool Abs = false; 70 bool Neg = false; 71 bool Sext = false; 72 73 bool hasFPModifiers() const { return Abs || Neg; } 74 bool hasIntModifiers() const { return Sext; } 75 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 76 77 int64_t getFPModifiersOperand() const { 78 int64_t Operand = 0; 79 Operand |= Abs ? SISrcMods::ABS : 0u; 80 Operand |= Neg ? SISrcMods::NEG : 0u; 81 return Operand; 82 } 83 84 int64_t getIntModifiersOperand() const { 85 int64_t Operand = 0; 86 Operand |= Sext ? SISrcMods::SEXT : 0u; 87 return Operand; 88 } 89 90 int64_t getModifiersOperand() const { 91 assert(!(hasFPModifiers() && hasIntModifiers()) 92 && "fp and int modifiers should not be used simultaneously"); 93 if (hasFPModifiers()) { 94 return getFPModifiersOperand(); 95 } else if (hasIntModifiers()) { 96 return getIntModifiersOperand(); 97 } else { 98 return 0; 99 } 100 } 101 102 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 103 }; 104 105 enum ImmTy { 106 ImmTyNone, 107 ImmTyGDS, 108 ImmTyLDS, 109 ImmTyOffen, 110 ImmTyIdxen, 111 ImmTyAddr64, 112 ImmTyOffset, 113 ImmTyInstOffset, 114 ImmTyOffset0, 115 ImmTyOffset1, 116 ImmTyDLC, 117 ImmTyGLC, 118 ImmTySLC, 119 ImmTySWZ, 120 ImmTyTFE, 121 ImmTyD16, 122 ImmTyClampSI, 123 ImmTyOModSI, 124 ImmTyDPP8, 125 ImmTyDppCtrl, 126 ImmTyDppRowMask, 127 ImmTyDppBankMask, 128 ImmTyDppBoundCtrl, 129 ImmTyDppFi, 130 ImmTySdwaDstSel, 131 ImmTySdwaSrc0Sel, 132 ImmTySdwaSrc1Sel, 133 ImmTySdwaDstUnused, 134 ImmTyDMask, 135 ImmTyDim, 136 ImmTyUNorm, 137 ImmTyDA, 138 ImmTyR128A16, 139 ImmTyA16, 140 ImmTyLWE, 141 ImmTyExpTgt, 142 ImmTyExpCompr, 143 ImmTyExpVM, 144 ImmTyFORMAT, 145 ImmTyHwreg, 146 ImmTyOff, 147 ImmTySendMsg, 148 ImmTyInterpSlot, 149 ImmTyInterpAttr, 150 ImmTyAttrChan, 151 ImmTyOpSel, 152 ImmTyOpSelHi, 153 ImmTyNegLo, 154 ImmTyNegHi, 155 ImmTySwizzle, 156 ImmTyGprIdxMode, 157 ImmTyHigh, 158 ImmTyBLGP, 159 ImmTyCBSZ, 160 ImmTyABID, 161 ImmTyEndpgm, 162 }; 163 164 enum ImmKindTy { 165 ImmKindTyNone, 166 ImmKindTyLiteral, 167 ImmKindTyConst, 168 }; 169 170 private: 171 struct TokOp { 172 const char *Data; 173 unsigned Length; 174 }; 175 176 struct ImmOp { 177 int64_t Val; 178 ImmTy Type; 179 bool IsFPImm; 180 mutable ImmKindTy Kind; 181 Modifiers Mods; 182 }; 183 184 struct RegOp { 185 unsigned RegNo; 186 Modifiers Mods; 187 }; 188 189 union { 190 TokOp Tok; 191 ImmOp Imm; 192 RegOp Reg; 193 const MCExpr *Expr; 194 }; 195 196 public: 197 bool isToken() const override { 198 if (Kind == Token) 199 return true; 200 201 // When parsing operands, we can't always tell if something was meant to be 202 // a token, like 'gds', or an expression that references a global variable. 203 // In this case, we assume the string is an expression, and if we need to 204 // interpret is a token, then we treat the symbol name as the token. 205 return isSymbolRefExpr(); 206 } 207 208 bool isSymbolRefExpr() const { 209 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 210 } 211 212 bool isImm() const override { 213 return Kind == Immediate; 214 } 215 216 void setImmKindNone() const { 217 assert(isImm()); 218 Imm.Kind = ImmKindTyNone; 219 } 220 221 void setImmKindLiteral() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyLiteral; 224 } 225 226 void setImmKindConst() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyConst; 229 } 230 231 bool IsImmKindLiteral() const { 232 return isImm() && Imm.Kind == ImmKindTyLiteral; 233 } 234 235 bool isImmKindConst() const { 236 return isImm() && Imm.Kind == ImmKindTyConst; 237 } 238 239 bool isInlinableImm(MVT type) const; 240 bool isLiteralImm(MVT type) const; 241 242 bool isRegKind() const { 243 return Kind == Register; 244 } 245 246 bool isReg() const override { 247 return isRegKind() && !hasModifiers(); 248 } 249 250 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 251 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 252 } 253 254 bool isRegOrImmWithInt16InputMods() const { 255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 256 } 257 258 bool isRegOrImmWithInt32InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 260 } 261 262 bool isRegOrImmWithInt64InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 264 } 265 266 bool isRegOrImmWithFP16InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 268 } 269 270 bool isRegOrImmWithFP32InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 272 } 273 274 bool isRegOrImmWithFP64InputMods() const { 275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 276 } 277 278 bool isVReg() const { 279 return isRegClass(AMDGPU::VGPR_32RegClassID) || 280 isRegClass(AMDGPU::VReg_64RegClassID) || 281 isRegClass(AMDGPU::VReg_96RegClassID) || 282 isRegClass(AMDGPU::VReg_128RegClassID) || 283 isRegClass(AMDGPU::VReg_160RegClassID) || 284 isRegClass(AMDGPU::VReg_192RegClassID) || 285 isRegClass(AMDGPU::VReg_256RegClassID) || 286 isRegClass(AMDGPU::VReg_512RegClassID) || 287 isRegClass(AMDGPU::VReg_1024RegClassID); 288 } 289 290 bool isVReg32() const { 291 return isRegClass(AMDGPU::VGPR_32RegClassID); 292 } 293 294 bool isVReg32OrOff() const { 295 return isOff() || isVReg32(); 296 } 297 298 bool isNull() const { 299 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 300 } 301 302 bool isSDWAOperand(MVT type) const; 303 bool isSDWAFP16Operand() const; 304 bool isSDWAFP32Operand() const; 305 bool isSDWAInt16Operand() const; 306 bool isSDWAInt32Operand() const; 307 308 bool isImmTy(ImmTy ImmT) const { 309 return isImm() && Imm.Type == ImmT; 310 } 311 312 bool isImmModifier() const { 313 return isImm() && Imm.Type != ImmTyNone; 314 } 315 316 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 317 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 318 bool isDMask() const { return isImmTy(ImmTyDMask); } 319 bool isDim() const { return isImmTy(ImmTyDim); } 320 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 321 bool isDA() const { return isImmTy(ImmTyDA); } 322 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 323 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 324 bool isLWE() const { return isImmTy(ImmTyLWE); } 325 bool isOff() const { return isImmTy(ImmTyOff); } 326 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 327 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 328 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 329 bool isOffen() const { return isImmTy(ImmTyOffen); } 330 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 331 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 332 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 333 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 334 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 335 336 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 337 bool isGDS() const { return isImmTy(ImmTyGDS); } 338 bool isLDS() const { return isImmTy(ImmTyLDS); } 339 bool isDLC() const { return isImmTy(ImmTyDLC); } 340 bool isGLC() const { return isImmTy(ImmTyGLC); } 341 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 342 // value of the GLC operand. 343 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 344 bool isSLC() const { return isImmTy(ImmTySLC); } 345 bool isSWZ() const { return isImmTy(ImmTySWZ); } 346 bool isTFE() const { return isImmTy(ImmTyTFE); } 347 bool isD16() const { return isImmTy(ImmTyD16); } 348 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 349 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 350 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 351 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 352 bool isFI() const { return isImmTy(ImmTyDppFi); } 353 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 354 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 355 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 356 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 357 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 358 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 359 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 360 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 361 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 362 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 363 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 364 bool isHigh() const { return isImmTy(ImmTyHigh); } 365 366 bool isMod() const { 367 return isClampSI() || isOModSI(); 368 } 369 370 bool isRegOrImm() const { 371 return isReg() || isImm(); 372 } 373 374 bool isRegClass(unsigned RCID) const; 375 376 bool isInlineValue() const; 377 378 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 379 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 380 } 381 382 bool isSCSrcB16() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 384 } 385 386 bool isSCSrcV2B16() const { 387 return isSCSrcB16(); 388 } 389 390 bool isSCSrcB32() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 392 } 393 394 bool isSCSrcB64() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 396 } 397 398 bool isBoolReg() const; 399 400 bool isSCSrcF16() const { 401 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 402 } 403 404 bool isSCSrcV2F16() const { 405 return isSCSrcF16(); 406 } 407 408 bool isSCSrcF32() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 410 } 411 412 bool isSCSrcF64() const { 413 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 414 } 415 416 bool isSSrcB32() const { 417 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 418 } 419 420 bool isSSrcB16() const { 421 return isSCSrcB16() || isLiteralImm(MVT::i16); 422 } 423 424 bool isSSrcV2B16() const { 425 llvm_unreachable("cannot happen"); 426 return isSSrcB16(); 427 } 428 429 bool isSSrcB64() const { 430 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 431 // See isVSrc64(). 432 return isSCSrcB64() || isLiteralImm(MVT::i64); 433 } 434 435 bool isSSrcF32() const { 436 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 437 } 438 439 bool isSSrcF64() const { 440 return isSCSrcB64() || isLiteralImm(MVT::f64); 441 } 442 443 bool isSSrcF16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::f16); 445 } 446 447 bool isSSrcV2F16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcF16(); 450 } 451 452 bool isSSrcOrLdsB32() const { 453 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 454 isLiteralImm(MVT::i32) || isExpr(); 455 } 456 457 bool isVCSrcB32() const { 458 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 459 } 460 461 bool isVCSrcB64() const { 462 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 463 } 464 465 bool isVCSrcB16() const { 466 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 467 } 468 469 bool isVCSrcV2B16() const { 470 return isVCSrcB16(); 471 } 472 473 bool isVCSrcF32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 475 } 476 477 bool isVCSrcF64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 479 } 480 481 bool isVCSrcF16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 483 } 484 485 bool isVCSrcV2F16() const { 486 return isVCSrcF16(); 487 } 488 489 bool isVSrcB32() const { 490 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 491 } 492 493 bool isVSrcB64() const { 494 return isVCSrcF64() || isLiteralImm(MVT::i64); 495 } 496 497 bool isVSrcB16() const { 498 return isVCSrcB16() || isLiteralImm(MVT::i16); 499 } 500 501 bool isVSrcV2B16() const { 502 return isVSrcB16() || isLiteralImm(MVT::v2i16); 503 } 504 505 bool isVSrcF32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 507 } 508 509 bool isVSrcF64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::f64); 511 } 512 513 bool isVSrcF16() const { 514 return isVCSrcF16() || isLiteralImm(MVT::f16); 515 } 516 517 bool isVSrcV2F16() const { 518 return isVSrcF16() || isLiteralImm(MVT::v2f16); 519 } 520 521 bool isVISrcB32() const { 522 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 523 } 524 525 bool isVISrcB16() const { 526 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 527 } 528 529 bool isVISrcV2B16() const { 530 return isVISrcB16(); 531 } 532 533 bool isVISrcF32() const { 534 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 535 } 536 537 bool isVISrcF16() const { 538 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 539 } 540 541 bool isVISrcV2F16() const { 542 return isVISrcF16() || isVISrcB32(); 543 } 544 545 bool isAISrcB32() const { 546 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 547 } 548 549 bool isAISrcB16() const { 550 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 551 } 552 553 bool isAISrcV2B16() const { 554 return isAISrcB16(); 555 } 556 557 bool isAISrcF32() const { 558 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 559 } 560 561 bool isAISrcF16() const { 562 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 563 } 564 565 bool isAISrcV2F16() const { 566 return isAISrcF16() || isAISrcB32(); 567 } 568 569 bool isAISrc_128B32() const { 570 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 571 } 572 573 bool isAISrc_128B16() const { 574 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 575 } 576 577 bool isAISrc_128V2B16() const { 578 return isAISrc_128B16(); 579 } 580 581 bool isAISrc_128F32() const { 582 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 583 } 584 585 bool isAISrc_128F16() const { 586 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 587 } 588 589 bool isAISrc_128V2F16() const { 590 return isAISrc_128F16() || isAISrc_128B32(); 591 } 592 593 bool isAISrc_512B32() const { 594 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 595 } 596 597 bool isAISrc_512B16() const { 598 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 599 } 600 601 bool isAISrc_512V2B16() const { 602 return isAISrc_512B16(); 603 } 604 605 bool isAISrc_512F32() const { 606 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 607 } 608 609 bool isAISrc_512F16() const { 610 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 611 } 612 613 bool isAISrc_512V2F16() const { 614 return isAISrc_512F16() || isAISrc_512B32(); 615 } 616 617 bool isAISrc_1024B32() const { 618 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 619 } 620 621 bool isAISrc_1024B16() const { 622 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 623 } 624 625 bool isAISrc_1024V2B16() const { 626 return isAISrc_1024B16(); 627 } 628 629 bool isAISrc_1024F32() const { 630 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 631 } 632 633 bool isAISrc_1024F16() const { 634 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 635 } 636 637 bool isAISrc_1024V2F16() const { 638 return isAISrc_1024F16() || isAISrc_1024B32(); 639 } 640 641 bool isKImmFP32() const { 642 return isLiteralImm(MVT::f32); 643 } 644 645 bool isKImmFP16() const { 646 return isLiteralImm(MVT::f16); 647 } 648 649 bool isMem() const override { 650 return false; 651 } 652 653 bool isExpr() const { 654 return Kind == Expression; 655 } 656 657 bool isSoppBrTarget() const { 658 return isExpr() || isImm(); 659 } 660 661 bool isSWaitCnt() const; 662 bool isHwreg() const; 663 bool isSendMsg() const; 664 bool isSwizzle() const; 665 bool isSMRDOffset8() const; 666 bool isSMEMOffset() const; 667 bool isSMRDLiteralOffset() const; 668 bool isDPP8() const; 669 bool isDPPCtrl() const; 670 bool isBLGP() const; 671 bool isCBSZ() const; 672 bool isABID() const; 673 bool isGPRIdxMode() const; 674 bool isS16Imm() const; 675 bool isU16Imm() const; 676 bool isEndpgm() const; 677 678 StringRef getExpressionAsToken() const { 679 assert(isExpr()); 680 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 681 return S->getSymbol().getName(); 682 } 683 684 StringRef getToken() const { 685 assert(isToken()); 686 687 if (Kind == Expression) 688 return getExpressionAsToken(); 689 690 return StringRef(Tok.Data, Tok.Length); 691 } 692 693 int64_t getImm() const { 694 assert(isImm()); 695 return Imm.Val; 696 } 697 698 void setImm(int64_t Val) { 699 assert(isImm()); 700 Imm.Val = Val; 701 } 702 703 ImmTy getImmTy() const { 704 assert(isImm()); 705 return Imm.Type; 706 } 707 708 unsigned getReg() const override { 709 assert(isRegKind()); 710 return Reg.RegNo; 711 } 712 713 SMLoc getStartLoc() const override { 714 return StartLoc; 715 } 716 717 SMLoc getEndLoc() const override { 718 return EndLoc; 719 } 720 721 SMRange getLocRange() const { 722 return SMRange(StartLoc, EndLoc); 723 } 724 725 Modifiers getModifiers() const { 726 assert(isRegKind() || isImmTy(ImmTyNone)); 727 return isRegKind() ? Reg.Mods : Imm.Mods; 728 } 729 730 void setModifiers(Modifiers Mods) { 731 assert(isRegKind() || isImmTy(ImmTyNone)); 732 if (isRegKind()) 733 Reg.Mods = Mods; 734 else 735 Imm.Mods = Mods; 736 } 737 738 bool hasModifiers() const { 739 return getModifiers().hasModifiers(); 740 } 741 742 bool hasFPModifiers() const { 743 return getModifiers().hasFPModifiers(); 744 } 745 746 bool hasIntModifiers() const { 747 return getModifiers().hasIntModifiers(); 748 } 749 750 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 751 752 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 753 754 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 755 756 template <unsigned Bitwidth> 757 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 758 759 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 760 addKImmFPOperands<16>(Inst, N); 761 } 762 763 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 764 addKImmFPOperands<32>(Inst, N); 765 } 766 767 void addRegOperands(MCInst &Inst, unsigned N) const; 768 769 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 770 addRegOperands(Inst, N); 771 } 772 773 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 774 if (isRegKind()) 775 addRegOperands(Inst, N); 776 else if (isExpr()) 777 Inst.addOperand(MCOperand::createExpr(Expr)); 778 else 779 addImmOperands(Inst, N); 780 } 781 782 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 783 Modifiers Mods = getModifiers(); 784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 785 if (isRegKind()) { 786 addRegOperands(Inst, N); 787 } else { 788 addImmOperands(Inst, N, false); 789 } 790 } 791 792 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 793 assert(!hasIntModifiers()); 794 addRegOrImmWithInputModsOperands(Inst, N); 795 } 796 797 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 798 assert(!hasFPModifiers()); 799 addRegOrImmWithInputModsOperands(Inst, N); 800 } 801 802 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 803 Modifiers Mods = getModifiers(); 804 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 805 assert(isRegKind()); 806 addRegOperands(Inst, N); 807 } 808 809 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 810 assert(!hasIntModifiers()); 811 addRegWithInputModsOperands(Inst, N); 812 } 813 814 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 815 assert(!hasFPModifiers()); 816 addRegWithInputModsOperands(Inst, N); 817 } 818 819 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 820 if (isImm()) 821 addImmOperands(Inst, N); 822 else { 823 assert(isExpr()); 824 Inst.addOperand(MCOperand::createExpr(Expr)); 825 } 826 } 827 828 static void printImmTy(raw_ostream& OS, ImmTy Type) { 829 switch (Type) { 830 case ImmTyNone: OS << "None"; break; 831 case ImmTyGDS: OS << "GDS"; break; 832 case ImmTyLDS: OS << "LDS"; break; 833 case ImmTyOffen: OS << "Offen"; break; 834 case ImmTyIdxen: OS << "Idxen"; break; 835 case ImmTyAddr64: OS << "Addr64"; break; 836 case ImmTyOffset: OS << "Offset"; break; 837 case ImmTyInstOffset: OS << "InstOffset"; break; 838 case ImmTyOffset0: OS << "Offset0"; break; 839 case ImmTyOffset1: OS << "Offset1"; break; 840 case ImmTyDLC: OS << "DLC"; break; 841 case ImmTyGLC: OS << "GLC"; break; 842 case ImmTySLC: OS << "SLC"; break; 843 case ImmTySWZ: OS << "SWZ"; break; 844 case ImmTyTFE: OS << "TFE"; break; 845 case ImmTyD16: OS << "D16"; break; 846 case ImmTyFORMAT: OS << "FORMAT"; break; 847 case ImmTyClampSI: OS << "ClampSI"; break; 848 case ImmTyOModSI: OS << "OModSI"; break; 849 case ImmTyDPP8: OS << "DPP8"; break; 850 case ImmTyDppCtrl: OS << "DppCtrl"; break; 851 case ImmTyDppRowMask: OS << "DppRowMask"; break; 852 case ImmTyDppBankMask: OS << "DppBankMask"; break; 853 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 854 case ImmTyDppFi: OS << "FI"; break; 855 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 856 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 857 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 858 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 859 case ImmTyDMask: OS << "DMask"; break; 860 case ImmTyDim: OS << "Dim"; break; 861 case ImmTyUNorm: OS << "UNorm"; break; 862 case ImmTyDA: OS << "DA"; break; 863 case ImmTyR128A16: OS << "R128A16"; break; 864 case ImmTyA16: OS << "A16"; break; 865 case ImmTyLWE: OS << "LWE"; break; 866 case ImmTyOff: OS << "Off"; break; 867 case ImmTyExpTgt: OS << "ExpTgt"; break; 868 case ImmTyExpCompr: OS << "ExpCompr"; break; 869 case ImmTyExpVM: OS << "ExpVM"; break; 870 case ImmTyHwreg: OS << "Hwreg"; break; 871 case ImmTySendMsg: OS << "SendMsg"; break; 872 case ImmTyInterpSlot: OS << "InterpSlot"; break; 873 case ImmTyInterpAttr: OS << "InterpAttr"; break; 874 case ImmTyAttrChan: OS << "AttrChan"; break; 875 case ImmTyOpSel: OS << "OpSel"; break; 876 case ImmTyOpSelHi: OS << "OpSelHi"; break; 877 case ImmTyNegLo: OS << "NegLo"; break; 878 case ImmTyNegHi: OS << "NegHi"; break; 879 case ImmTySwizzle: OS << "Swizzle"; break; 880 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 881 case ImmTyHigh: OS << "High"; break; 882 case ImmTyBLGP: OS << "BLGP"; break; 883 case ImmTyCBSZ: OS << "CBSZ"; break; 884 case ImmTyABID: OS << "ABID"; break; 885 case ImmTyEndpgm: OS << "Endpgm"; break; 886 } 887 } 888 889 void print(raw_ostream &OS) const override { 890 switch (Kind) { 891 case Register: 892 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 893 break; 894 case Immediate: 895 OS << '<' << getImm(); 896 if (getImmTy() != ImmTyNone) { 897 OS << " type: "; printImmTy(OS, getImmTy()); 898 } 899 OS << " mods: " << Imm.Mods << '>'; 900 break; 901 case Token: 902 OS << '\'' << getToken() << '\''; 903 break; 904 case Expression: 905 OS << "<expr " << *Expr << '>'; 906 break; 907 } 908 } 909 910 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 911 int64_t Val, SMLoc Loc, 912 ImmTy Type = ImmTyNone, 913 bool IsFPImm = false) { 914 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 915 Op->Imm.Val = Val; 916 Op->Imm.IsFPImm = IsFPImm; 917 Op->Imm.Kind = ImmKindTyNone; 918 Op->Imm.Type = Type; 919 Op->Imm.Mods = Modifiers(); 920 Op->StartLoc = Loc; 921 Op->EndLoc = Loc; 922 return Op; 923 } 924 925 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 926 StringRef Str, SMLoc Loc, 927 bool HasExplicitEncodingSize = true) { 928 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 929 Res->Tok.Data = Str.data(); 930 Res->Tok.Length = Str.size(); 931 Res->StartLoc = Loc; 932 Res->EndLoc = Loc; 933 return Res; 934 } 935 936 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 937 unsigned RegNo, SMLoc S, 938 SMLoc E) { 939 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 940 Op->Reg.RegNo = RegNo; 941 Op->Reg.Mods = Modifiers(); 942 Op->StartLoc = S; 943 Op->EndLoc = E; 944 return Op; 945 } 946 947 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 948 const class MCExpr *Expr, SMLoc S) { 949 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 950 Op->Expr = Expr; 951 Op->StartLoc = S; 952 Op->EndLoc = S; 953 return Op; 954 } 955 }; 956 957 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 958 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 959 return OS; 960 } 961 962 //===----------------------------------------------------------------------===// 963 // AsmParser 964 //===----------------------------------------------------------------------===// 965 966 // Holds info related to the current kernel, e.g. count of SGPRs used. 967 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 968 // .amdgpu_hsa_kernel or at EOF. 969 class KernelScopeInfo { 970 int SgprIndexUnusedMin = -1; 971 int VgprIndexUnusedMin = -1; 972 MCContext *Ctx = nullptr; 973 974 void usesSgprAt(int i) { 975 if (i >= SgprIndexUnusedMin) { 976 SgprIndexUnusedMin = ++i; 977 if (Ctx) { 978 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 979 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 980 } 981 } 982 } 983 984 void usesVgprAt(int i) { 985 if (i >= VgprIndexUnusedMin) { 986 VgprIndexUnusedMin = ++i; 987 if (Ctx) { 988 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 989 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 990 } 991 } 992 } 993 994 public: 995 KernelScopeInfo() = default; 996 997 void initialize(MCContext &Context) { 998 Ctx = &Context; 999 usesSgprAt(SgprIndexUnusedMin = -1); 1000 usesVgprAt(VgprIndexUnusedMin = -1); 1001 } 1002 1003 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1004 switch (RegKind) { 1005 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1006 case IS_AGPR: // fall through 1007 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1008 default: break; 1009 } 1010 } 1011 }; 1012 1013 class AMDGPUAsmParser : public MCTargetAsmParser { 1014 MCAsmParser &Parser; 1015 1016 // Number of extra operands parsed after the first optional operand. 1017 // This may be necessary to skip hardcoded mandatory operands. 1018 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1019 1020 unsigned ForcedEncodingSize = 0; 1021 bool ForcedDPP = false; 1022 bool ForcedSDWA = false; 1023 KernelScopeInfo KernelScope; 1024 1025 /// @name Auto-generated Match Functions 1026 /// { 1027 1028 #define GET_ASSEMBLER_HEADER 1029 #include "AMDGPUGenAsmMatcher.inc" 1030 1031 /// } 1032 1033 private: 1034 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1035 bool OutOfRangeError(SMRange Range); 1036 /// Calculate VGPR/SGPR blocks required for given target, reserved 1037 /// registers, and user-specified NextFreeXGPR values. 1038 /// 1039 /// \param Features [in] Target features, used for bug corrections. 1040 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1041 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1042 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1043 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1044 /// descriptor field, if valid. 1045 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1046 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1047 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1048 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1049 /// \param VGPRBlocks [out] Result VGPR block count. 1050 /// \param SGPRBlocks [out] Result SGPR block count. 1051 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1052 bool FlatScrUsed, bool XNACKUsed, 1053 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1054 SMRange VGPRRange, unsigned NextFreeSGPR, 1055 SMRange SGPRRange, unsigned &VGPRBlocks, 1056 unsigned &SGPRBlocks); 1057 bool ParseDirectiveAMDGCNTarget(); 1058 bool ParseDirectiveAMDHSAKernel(); 1059 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1060 bool ParseDirectiveHSACodeObjectVersion(); 1061 bool ParseDirectiveHSACodeObjectISA(); 1062 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1063 bool ParseDirectiveAMDKernelCodeT(); 1064 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1065 bool ParseDirectiveAMDGPUHsaKernel(); 1066 1067 bool ParseDirectiveISAVersion(); 1068 bool ParseDirectiveHSAMetadata(); 1069 bool ParseDirectivePALMetadataBegin(); 1070 bool ParseDirectivePALMetadata(); 1071 bool ParseDirectiveAMDGPULDS(); 1072 1073 /// Common code to parse out a block of text (typically YAML) between start and 1074 /// end directives. 1075 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1076 const char *AssemblerDirectiveEnd, 1077 std::string &CollectString); 1078 1079 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1080 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1081 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1082 unsigned &RegNum, unsigned &RegWidth, 1083 bool RestoreOnFailure = false); 1084 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1085 unsigned &RegNum, unsigned &RegWidth, 1086 SmallVectorImpl<AsmToken> &Tokens); 1087 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1088 unsigned &RegWidth, 1089 SmallVectorImpl<AsmToken> &Tokens); 1090 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1091 unsigned &RegWidth, 1092 SmallVectorImpl<AsmToken> &Tokens); 1093 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1094 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1095 bool ParseRegRange(unsigned& Num, unsigned& Width); 1096 unsigned getRegularReg(RegisterKind RegKind, 1097 unsigned RegNum, 1098 unsigned RegWidth, 1099 SMLoc Loc); 1100 1101 bool isRegister(); 1102 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1103 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1104 void initializeGprCountSymbol(RegisterKind RegKind); 1105 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1106 unsigned RegWidth); 1107 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1108 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1109 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1110 bool IsGdsHardcoded); 1111 1112 public: 1113 enum AMDGPUMatchResultTy { 1114 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1115 }; 1116 enum OperandMode { 1117 OperandMode_Default, 1118 OperandMode_NSA, 1119 }; 1120 1121 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1122 1123 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1124 const MCInstrInfo &MII, 1125 const MCTargetOptions &Options) 1126 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1127 MCAsmParserExtension::Initialize(Parser); 1128 1129 if (getFeatureBits().none()) { 1130 // Set default features. 1131 copySTI().ToggleFeature("southern-islands"); 1132 } 1133 1134 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1135 1136 { 1137 // TODO: make those pre-defined variables read-only. 1138 // Currently there is none suitable machinery in the core llvm-mc for this. 1139 // MCSymbol::isRedefinable is intended for another purpose, and 1140 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1141 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1142 MCContext &Ctx = getContext(); 1143 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1144 MCSymbol *Sym = 1145 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1146 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1147 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1148 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1149 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1150 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1151 } else { 1152 MCSymbol *Sym = 1153 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1154 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1155 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1156 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1157 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1158 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1159 } 1160 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1161 initializeGprCountSymbol(IS_VGPR); 1162 initializeGprCountSymbol(IS_SGPR); 1163 } else 1164 KernelScope.initialize(getContext()); 1165 } 1166 } 1167 1168 bool hasXNACK() const { 1169 return AMDGPU::hasXNACK(getSTI()); 1170 } 1171 1172 bool hasMIMG_R128() const { 1173 return AMDGPU::hasMIMG_R128(getSTI()); 1174 } 1175 1176 bool hasPackedD16() const { 1177 return AMDGPU::hasPackedD16(getSTI()); 1178 } 1179 1180 bool hasGFX10A16() const { 1181 return AMDGPU::hasGFX10A16(getSTI()); 1182 } 1183 1184 bool isSI() const { 1185 return AMDGPU::isSI(getSTI()); 1186 } 1187 1188 bool isCI() const { 1189 return AMDGPU::isCI(getSTI()); 1190 } 1191 1192 bool isVI() const { 1193 return AMDGPU::isVI(getSTI()); 1194 } 1195 1196 bool isGFX9() const { 1197 return AMDGPU::isGFX9(getSTI()); 1198 } 1199 1200 bool isGFX9Plus() const { 1201 return AMDGPU::isGFX9Plus(getSTI()); 1202 } 1203 1204 bool isGFX10() const { 1205 return AMDGPU::isGFX10(getSTI()); 1206 } 1207 1208 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1209 1210 bool isGFX10_BEncoding() const { 1211 return AMDGPU::isGFX10_BEncoding(getSTI()); 1212 } 1213 1214 bool hasInv2PiInlineImm() const { 1215 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1216 } 1217 1218 bool hasFlatOffsets() const { 1219 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1220 } 1221 1222 bool hasSGPR102_SGPR103() const { 1223 return !isVI() && !isGFX9(); 1224 } 1225 1226 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1227 1228 bool hasIntClamp() const { 1229 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1230 } 1231 1232 AMDGPUTargetStreamer &getTargetStreamer() { 1233 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1234 return static_cast<AMDGPUTargetStreamer &>(TS); 1235 } 1236 1237 const MCRegisterInfo *getMRI() const { 1238 // We need this const_cast because for some reason getContext() is not const 1239 // in MCAsmParser. 1240 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1241 } 1242 1243 const MCInstrInfo *getMII() const { 1244 return &MII; 1245 } 1246 1247 const FeatureBitset &getFeatureBits() const { 1248 return getSTI().getFeatureBits(); 1249 } 1250 1251 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1252 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1253 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1254 1255 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1256 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1257 bool isForcedDPP() const { return ForcedDPP; } 1258 bool isForcedSDWA() const { return ForcedSDWA; } 1259 ArrayRef<unsigned> getMatchedVariants() const; 1260 StringRef getMatchedVariantName() const; 1261 1262 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1263 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1264 bool RestoreOnFailure); 1265 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1266 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1267 SMLoc &EndLoc) override; 1268 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1269 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1270 unsigned Kind) override; 1271 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1272 OperandVector &Operands, MCStreamer &Out, 1273 uint64_t &ErrorInfo, 1274 bool MatchingInlineAsm) override; 1275 bool ParseDirective(AsmToken DirectiveID) override; 1276 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1277 OperandMode Mode = OperandMode_Default); 1278 StringRef parseMnemonicSuffix(StringRef Name); 1279 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1280 SMLoc NameLoc, OperandVector &Operands) override; 1281 //bool ProcessInstruction(MCInst &Inst); 1282 1283 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1284 1285 OperandMatchResultTy 1286 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1287 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1288 bool (*ConvertResult)(int64_t &) = nullptr); 1289 1290 OperandMatchResultTy 1291 parseOperandArrayWithPrefix(const char *Prefix, 1292 OperandVector &Operands, 1293 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1294 bool (*ConvertResult)(int64_t&) = nullptr); 1295 1296 OperandMatchResultTy 1297 parseNamedBit(const char *Name, OperandVector &Operands, 1298 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1299 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1300 StringRef &Value, 1301 SMLoc &StringLoc); 1302 1303 bool isModifier(); 1304 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1305 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1306 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1307 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1308 bool parseSP3NegModifier(); 1309 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1310 OperandMatchResultTy parseReg(OperandVector &Operands); 1311 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1312 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1313 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1314 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1315 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1316 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1317 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1318 OperandMatchResultTy parseUfmt(int64_t &Format); 1319 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1320 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1321 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1322 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1323 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1324 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1325 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1326 1327 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1328 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1329 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1330 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1331 1332 bool parseCnt(int64_t &IntVal); 1333 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1334 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1335 1336 private: 1337 struct OperandInfoTy { 1338 SMLoc Loc; 1339 int64_t Id; 1340 bool IsSymbolic = false; 1341 bool IsDefined = false; 1342 1343 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1344 }; 1345 1346 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1347 bool validateSendMsg(const OperandInfoTy &Msg, 1348 const OperandInfoTy &Op, 1349 const OperandInfoTy &Stream); 1350 1351 bool parseHwregBody(OperandInfoTy &HwReg, 1352 OperandInfoTy &Offset, 1353 OperandInfoTy &Width); 1354 bool validateHwreg(const OperandInfoTy &HwReg, 1355 const OperandInfoTy &Offset, 1356 const OperandInfoTy &Width); 1357 1358 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1359 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1360 1361 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1362 const OperandVector &Operands) const; 1363 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1364 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1365 SMLoc getLitLoc(const OperandVector &Operands) const; 1366 SMLoc getConstLoc(const OperandVector &Operands) const; 1367 1368 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1369 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1370 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1371 bool validateSOPLiteral(const MCInst &Inst) const; 1372 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1373 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1374 bool validateIntClampSupported(const MCInst &Inst); 1375 bool validateMIMGAtomicDMask(const MCInst &Inst); 1376 bool validateMIMGGatherDMask(const MCInst &Inst); 1377 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1378 bool validateMIMGDataSize(const MCInst &Inst); 1379 bool validateMIMGAddrSize(const MCInst &Inst); 1380 bool validateMIMGD16(const MCInst &Inst); 1381 bool validateMIMGDim(const MCInst &Inst); 1382 bool validateLdsDirect(const MCInst &Inst); 1383 bool validateOpSel(const MCInst &Inst); 1384 bool validateVccOperand(unsigned Reg) const; 1385 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1386 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1387 bool validateDivScale(const MCInst &Inst); 1388 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1389 const SMLoc &IDLoc); 1390 unsigned getConstantBusLimit(unsigned Opcode) const; 1391 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1392 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1393 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1394 1395 bool isSupportedMnemo(StringRef Mnemo, 1396 const FeatureBitset &FBS); 1397 bool isSupportedMnemo(StringRef Mnemo, 1398 const FeatureBitset &FBS, 1399 ArrayRef<unsigned> Variants); 1400 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1401 1402 bool isId(const StringRef Id) const; 1403 bool isId(const AsmToken &Token, const StringRef Id) const; 1404 bool isToken(const AsmToken::TokenKind Kind) const; 1405 bool trySkipId(const StringRef Id); 1406 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1407 bool trySkipToken(const AsmToken::TokenKind Kind); 1408 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1409 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1410 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1411 1412 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1413 AsmToken::TokenKind getTokenKind() const; 1414 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1415 bool parseExpr(OperandVector &Operands); 1416 StringRef getTokenStr() const; 1417 AsmToken peekToken(); 1418 AsmToken getToken() const; 1419 SMLoc getLoc() const; 1420 void lex(); 1421 1422 public: 1423 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1424 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1425 1426 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1427 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1428 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1429 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1430 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1431 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1432 1433 bool parseSwizzleOperand(int64_t &Op, 1434 const unsigned MinVal, 1435 const unsigned MaxVal, 1436 const StringRef ErrMsg, 1437 SMLoc &Loc); 1438 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1439 const unsigned MinVal, 1440 const unsigned MaxVal, 1441 const StringRef ErrMsg); 1442 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1443 bool parseSwizzleOffset(int64_t &Imm); 1444 bool parseSwizzleMacro(int64_t &Imm); 1445 bool parseSwizzleQuadPerm(int64_t &Imm); 1446 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1447 bool parseSwizzleBroadcast(int64_t &Imm); 1448 bool parseSwizzleSwap(int64_t &Imm); 1449 bool parseSwizzleReverse(int64_t &Imm); 1450 1451 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1452 int64_t parseGPRIdxMacro(); 1453 1454 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1455 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1456 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1457 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1458 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1459 1460 AMDGPUOperand::Ptr defaultDLC() const; 1461 AMDGPUOperand::Ptr defaultGLC() const; 1462 AMDGPUOperand::Ptr defaultGLC_1() const; 1463 AMDGPUOperand::Ptr defaultSLC() const; 1464 1465 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1466 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1467 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1468 AMDGPUOperand::Ptr defaultFlatOffset() const; 1469 1470 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1471 1472 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1473 OptionalImmIndexMap &OptionalIdx); 1474 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1475 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1476 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1477 1478 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1479 1480 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1481 bool IsAtomic = false); 1482 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1483 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1484 1485 OperandMatchResultTy parseDim(OperandVector &Operands); 1486 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1487 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1488 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1489 int64_t parseDPPCtrlSel(StringRef Ctrl); 1490 int64_t parseDPPCtrlPerm(); 1491 AMDGPUOperand::Ptr defaultRowMask() const; 1492 AMDGPUOperand::Ptr defaultBankMask() const; 1493 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1494 AMDGPUOperand::Ptr defaultFI() const; 1495 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1496 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1497 1498 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1499 AMDGPUOperand::ImmTy Type); 1500 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1501 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1502 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1503 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1504 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1505 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1506 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1507 uint64_t BasicInstType, 1508 bool SkipDstVcc = false, 1509 bool SkipSrcVcc = false); 1510 1511 AMDGPUOperand::Ptr defaultBLGP() const; 1512 AMDGPUOperand::Ptr defaultCBSZ() const; 1513 AMDGPUOperand::Ptr defaultABID() const; 1514 1515 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1516 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1517 }; 1518 1519 struct OptionalOperand { 1520 const char *Name; 1521 AMDGPUOperand::ImmTy Type; 1522 bool IsBit; 1523 bool (*ConvertResult)(int64_t&); 1524 }; 1525 1526 } // end anonymous namespace 1527 1528 // May be called with integer type with equivalent bitwidth. 1529 static const fltSemantics *getFltSemantics(unsigned Size) { 1530 switch (Size) { 1531 case 4: 1532 return &APFloat::IEEEsingle(); 1533 case 8: 1534 return &APFloat::IEEEdouble(); 1535 case 2: 1536 return &APFloat::IEEEhalf(); 1537 default: 1538 llvm_unreachable("unsupported fp type"); 1539 } 1540 } 1541 1542 static const fltSemantics *getFltSemantics(MVT VT) { 1543 return getFltSemantics(VT.getSizeInBits() / 8); 1544 } 1545 1546 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1547 switch (OperandType) { 1548 case AMDGPU::OPERAND_REG_IMM_INT32: 1549 case AMDGPU::OPERAND_REG_IMM_FP32: 1550 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1551 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1552 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1553 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1554 return &APFloat::IEEEsingle(); 1555 case AMDGPU::OPERAND_REG_IMM_INT64: 1556 case AMDGPU::OPERAND_REG_IMM_FP64: 1557 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1558 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1559 return &APFloat::IEEEdouble(); 1560 case AMDGPU::OPERAND_REG_IMM_INT16: 1561 case AMDGPU::OPERAND_REG_IMM_FP16: 1562 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1563 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1564 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1565 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1566 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1567 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1568 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1569 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1570 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1571 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1572 return &APFloat::IEEEhalf(); 1573 default: 1574 llvm_unreachable("unsupported fp type"); 1575 } 1576 } 1577 1578 //===----------------------------------------------------------------------===// 1579 // Operand 1580 //===----------------------------------------------------------------------===// 1581 1582 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1583 bool Lost; 1584 1585 // Convert literal to single precision 1586 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1587 APFloat::rmNearestTiesToEven, 1588 &Lost); 1589 // We allow precision lost but not overflow or underflow 1590 if (Status != APFloat::opOK && 1591 Lost && 1592 ((Status & APFloat::opOverflow) != 0 || 1593 (Status & APFloat::opUnderflow) != 0)) { 1594 return false; 1595 } 1596 1597 return true; 1598 } 1599 1600 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1601 return isUIntN(Size, Val) || isIntN(Size, Val); 1602 } 1603 1604 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1605 if (VT.getScalarType() == MVT::i16) { 1606 // FP immediate values are broken. 1607 return isInlinableIntLiteral(Val); 1608 } 1609 1610 // f16/v2f16 operands work correctly for all values. 1611 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1612 } 1613 1614 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1615 1616 // This is a hack to enable named inline values like 1617 // shared_base with both 32-bit and 64-bit operands. 1618 // Note that these values are defined as 1619 // 32-bit operands only. 1620 if (isInlineValue()) { 1621 return true; 1622 } 1623 1624 if (!isImmTy(ImmTyNone)) { 1625 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1626 return false; 1627 } 1628 // TODO: We should avoid using host float here. It would be better to 1629 // check the float bit values which is what a few other places do. 1630 // We've had bot failures before due to weird NaN support on mips hosts. 1631 1632 APInt Literal(64, Imm.Val); 1633 1634 if (Imm.IsFPImm) { // We got fp literal token 1635 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1636 return AMDGPU::isInlinableLiteral64(Imm.Val, 1637 AsmParser->hasInv2PiInlineImm()); 1638 } 1639 1640 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1641 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1642 return false; 1643 1644 if (type.getScalarSizeInBits() == 16) { 1645 return isInlineableLiteralOp16( 1646 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1647 type, AsmParser->hasInv2PiInlineImm()); 1648 } 1649 1650 // Check if single precision literal is inlinable 1651 return AMDGPU::isInlinableLiteral32( 1652 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1653 AsmParser->hasInv2PiInlineImm()); 1654 } 1655 1656 // We got int literal token. 1657 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1658 return AMDGPU::isInlinableLiteral64(Imm.Val, 1659 AsmParser->hasInv2PiInlineImm()); 1660 } 1661 1662 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1663 return false; 1664 } 1665 1666 if (type.getScalarSizeInBits() == 16) { 1667 return isInlineableLiteralOp16( 1668 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1669 type, AsmParser->hasInv2PiInlineImm()); 1670 } 1671 1672 return AMDGPU::isInlinableLiteral32( 1673 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1674 AsmParser->hasInv2PiInlineImm()); 1675 } 1676 1677 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1678 // Check that this immediate can be added as literal 1679 if (!isImmTy(ImmTyNone)) { 1680 return false; 1681 } 1682 1683 if (!Imm.IsFPImm) { 1684 // We got int literal token. 1685 1686 if (type == MVT::f64 && hasFPModifiers()) { 1687 // Cannot apply fp modifiers to int literals preserving the same semantics 1688 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1689 // disable these cases. 1690 return false; 1691 } 1692 1693 unsigned Size = type.getSizeInBits(); 1694 if (Size == 64) 1695 Size = 32; 1696 1697 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1698 // types. 1699 return isSafeTruncation(Imm.Val, Size); 1700 } 1701 1702 // We got fp literal token 1703 if (type == MVT::f64) { // Expected 64-bit fp operand 1704 // We would set low 64-bits of literal to zeroes but we accept this literals 1705 return true; 1706 } 1707 1708 if (type == MVT::i64) { // Expected 64-bit int operand 1709 // We don't allow fp literals in 64-bit integer instructions. It is 1710 // unclear how we should encode them. 1711 return false; 1712 } 1713 1714 // We allow fp literals with f16x2 operands assuming that the specified 1715 // literal goes into the lower half and the upper half is zero. We also 1716 // require that the literal may be losslesly converted to f16. 1717 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1718 (type == MVT::v2i16)? MVT::i16 : type; 1719 1720 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1721 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1722 } 1723 1724 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1725 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1726 } 1727 1728 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1729 if (AsmParser->isVI()) 1730 return isVReg32(); 1731 else if (AsmParser->isGFX9Plus()) 1732 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1733 else 1734 return false; 1735 } 1736 1737 bool AMDGPUOperand::isSDWAFP16Operand() const { 1738 return isSDWAOperand(MVT::f16); 1739 } 1740 1741 bool AMDGPUOperand::isSDWAFP32Operand() const { 1742 return isSDWAOperand(MVT::f32); 1743 } 1744 1745 bool AMDGPUOperand::isSDWAInt16Operand() const { 1746 return isSDWAOperand(MVT::i16); 1747 } 1748 1749 bool AMDGPUOperand::isSDWAInt32Operand() const { 1750 return isSDWAOperand(MVT::i32); 1751 } 1752 1753 bool AMDGPUOperand::isBoolReg() const { 1754 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1755 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1756 } 1757 1758 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1759 { 1760 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1761 assert(Size == 2 || Size == 4 || Size == 8); 1762 1763 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1764 1765 if (Imm.Mods.Abs) { 1766 Val &= ~FpSignMask; 1767 } 1768 if (Imm.Mods.Neg) { 1769 Val ^= FpSignMask; 1770 } 1771 1772 return Val; 1773 } 1774 1775 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1776 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1777 Inst.getNumOperands())) { 1778 addLiteralImmOperand(Inst, Imm.Val, 1779 ApplyModifiers & 1780 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1781 } else { 1782 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1783 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1784 setImmKindNone(); 1785 } 1786 } 1787 1788 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1789 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1790 auto OpNum = Inst.getNumOperands(); 1791 // Check that this operand accepts literals 1792 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1793 1794 if (ApplyModifiers) { 1795 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1796 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1797 Val = applyInputFPModifiers(Val, Size); 1798 } 1799 1800 APInt Literal(64, Val); 1801 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1802 1803 if (Imm.IsFPImm) { // We got fp literal token 1804 switch (OpTy) { 1805 case AMDGPU::OPERAND_REG_IMM_INT64: 1806 case AMDGPU::OPERAND_REG_IMM_FP64: 1807 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1808 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1809 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1810 AsmParser->hasInv2PiInlineImm())) { 1811 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1812 setImmKindConst(); 1813 return; 1814 } 1815 1816 // Non-inlineable 1817 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1818 // For fp operands we check if low 32 bits are zeros 1819 if (Literal.getLoBits(32) != 0) { 1820 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1821 "Can't encode literal as exact 64-bit floating-point operand. " 1822 "Low 32-bits will be set to zero"); 1823 } 1824 1825 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1826 setImmKindLiteral(); 1827 return; 1828 } 1829 1830 // We don't allow fp literals in 64-bit integer instructions. It is 1831 // unclear how we should encode them. This case should be checked earlier 1832 // in predicate methods (isLiteralImm()) 1833 llvm_unreachable("fp literal in 64-bit integer instruction."); 1834 1835 case AMDGPU::OPERAND_REG_IMM_INT32: 1836 case AMDGPU::OPERAND_REG_IMM_FP32: 1837 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1838 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1839 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1840 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1841 case AMDGPU::OPERAND_REG_IMM_INT16: 1842 case AMDGPU::OPERAND_REG_IMM_FP16: 1843 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1844 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1845 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1846 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1847 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1848 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1849 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1850 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1851 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1852 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1853 bool lost; 1854 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1855 // Convert literal to single precision 1856 FPLiteral.convert(*getOpFltSemantics(OpTy), 1857 APFloat::rmNearestTiesToEven, &lost); 1858 // We allow precision lost but not overflow or underflow. This should be 1859 // checked earlier in isLiteralImm() 1860 1861 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1862 Inst.addOperand(MCOperand::createImm(ImmVal)); 1863 setImmKindLiteral(); 1864 return; 1865 } 1866 default: 1867 llvm_unreachable("invalid operand size"); 1868 } 1869 1870 return; 1871 } 1872 1873 // We got int literal token. 1874 // Only sign extend inline immediates. 1875 switch (OpTy) { 1876 case AMDGPU::OPERAND_REG_IMM_INT32: 1877 case AMDGPU::OPERAND_REG_IMM_FP32: 1878 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1879 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1880 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1881 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1882 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1883 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1884 if (isSafeTruncation(Val, 32) && 1885 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1886 AsmParser->hasInv2PiInlineImm())) { 1887 Inst.addOperand(MCOperand::createImm(Val)); 1888 setImmKindConst(); 1889 return; 1890 } 1891 1892 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1893 setImmKindLiteral(); 1894 return; 1895 1896 case AMDGPU::OPERAND_REG_IMM_INT64: 1897 case AMDGPU::OPERAND_REG_IMM_FP64: 1898 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1899 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1900 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1901 Inst.addOperand(MCOperand::createImm(Val)); 1902 setImmKindConst(); 1903 return; 1904 } 1905 1906 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1907 setImmKindLiteral(); 1908 return; 1909 1910 case AMDGPU::OPERAND_REG_IMM_INT16: 1911 case AMDGPU::OPERAND_REG_IMM_FP16: 1912 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1913 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1914 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1915 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1916 if (isSafeTruncation(Val, 16) && 1917 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1918 AsmParser->hasInv2PiInlineImm())) { 1919 Inst.addOperand(MCOperand::createImm(Val)); 1920 setImmKindConst(); 1921 return; 1922 } 1923 1924 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1925 setImmKindLiteral(); 1926 return; 1927 1928 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1929 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1930 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1931 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1932 assert(isSafeTruncation(Val, 16)); 1933 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1934 AsmParser->hasInv2PiInlineImm())); 1935 1936 Inst.addOperand(MCOperand::createImm(Val)); 1937 return; 1938 } 1939 default: 1940 llvm_unreachable("invalid operand size"); 1941 } 1942 } 1943 1944 template <unsigned Bitwidth> 1945 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1946 APInt Literal(64, Imm.Val); 1947 setImmKindNone(); 1948 1949 if (!Imm.IsFPImm) { 1950 // We got int literal token. 1951 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1952 return; 1953 } 1954 1955 bool Lost; 1956 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1957 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1958 APFloat::rmNearestTiesToEven, &Lost); 1959 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1960 } 1961 1962 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1963 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1964 } 1965 1966 static bool isInlineValue(unsigned Reg) { 1967 switch (Reg) { 1968 case AMDGPU::SRC_SHARED_BASE: 1969 case AMDGPU::SRC_SHARED_LIMIT: 1970 case AMDGPU::SRC_PRIVATE_BASE: 1971 case AMDGPU::SRC_PRIVATE_LIMIT: 1972 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1973 return true; 1974 case AMDGPU::SRC_VCCZ: 1975 case AMDGPU::SRC_EXECZ: 1976 case AMDGPU::SRC_SCC: 1977 return true; 1978 case AMDGPU::SGPR_NULL: 1979 return true; 1980 default: 1981 return false; 1982 } 1983 } 1984 1985 bool AMDGPUOperand::isInlineValue() const { 1986 return isRegKind() && ::isInlineValue(getReg()); 1987 } 1988 1989 //===----------------------------------------------------------------------===// 1990 // AsmParser 1991 //===----------------------------------------------------------------------===// 1992 1993 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1994 if (Is == IS_VGPR) { 1995 switch (RegWidth) { 1996 default: return -1; 1997 case 1: return AMDGPU::VGPR_32RegClassID; 1998 case 2: return AMDGPU::VReg_64RegClassID; 1999 case 3: return AMDGPU::VReg_96RegClassID; 2000 case 4: return AMDGPU::VReg_128RegClassID; 2001 case 5: return AMDGPU::VReg_160RegClassID; 2002 case 6: return AMDGPU::VReg_192RegClassID; 2003 case 8: return AMDGPU::VReg_256RegClassID; 2004 case 16: return AMDGPU::VReg_512RegClassID; 2005 case 32: return AMDGPU::VReg_1024RegClassID; 2006 } 2007 } else if (Is == IS_TTMP) { 2008 switch (RegWidth) { 2009 default: return -1; 2010 case 1: return AMDGPU::TTMP_32RegClassID; 2011 case 2: return AMDGPU::TTMP_64RegClassID; 2012 case 4: return AMDGPU::TTMP_128RegClassID; 2013 case 8: return AMDGPU::TTMP_256RegClassID; 2014 case 16: return AMDGPU::TTMP_512RegClassID; 2015 } 2016 } else if (Is == IS_SGPR) { 2017 switch (RegWidth) { 2018 default: return -1; 2019 case 1: return AMDGPU::SGPR_32RegClassID; 2020 case 2: return AMDGPU::SGPR_64RegClassID; 2021 case 3: return AMDGPU::SGPR_96RegClassID; 2022 case 4: return AMDGPU::SGPR_128RegClassID; 2023 case 5: return AMDGPU::SGPR_160RegClassID; 2024 case 6: return AMDGPU::SGPR_192RegClassID; 2025 case 8: return AMDGPU::SGPR_256RegClassID; 2026 case 16: return AMDGPU::SGPR_512RegClassID; 2027 } 2028 } else if (Is == IS_AGPR) { 2029 switch (RegWidth) { 2030 default: return -1; 2031 case 1: return AMDGPU::AGPR_32RegClassID; 2032 case 2: return AMDGPU::AReg_64RegClassID; 2033 case 3: return AMDGPU::AReg_96RegClassID; 2034 case 4: return AMDGPU::AReg_128RegClassID; 2035 case 5: return AMDGPU::AReg_160RegClassID; 2036 case 6: return AMDGPU::AReg_192RegClassID; 2037 case 8: return AMDGPU::AReg_256RegClassID; 2038 case 16: return AMDGPU::AReg_512RegClassID; 2039 case 32: return AMDGPU::AReg_1024RegClassID; 2040 } 2041 } 2042 return -1; 2043 } 2044 2045 static unsigned getSpecialRegForName(StringRef RegName) { 2046 return StringSwitch<unsigned>(RegName) 2047 .Case("exec", AMDGPU::EXEC) 2048 .Case("vcc", AMDGPU::VCC) 2049 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2050 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2051 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2052 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2053 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2054 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2055 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2056 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2057 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2058 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2059 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2060 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2061 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2062 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2063 .Case("m0", AMDGPU::M0) 2064 .Case("vccz", AMDGPU::SRC_VCCZ) 2065 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2066 .Case("execz", AMDGPU::SRC_EXECZ) 2067 .Case("src_execz", AMDGPU::SRC_EXECZ) 2068 .Case("scc", AMDGPU::SRC_SCC) 2069 .Case("src_scc", AMDGPU::SRC_SCC) 2070 .Case("tba", AMDGPU::TBA) 2071 .Case("tma", AMDGPU::TMA) 2072 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2073 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2074 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2075 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2076 .Case("vcc_lo", AMDGPU::VCC_LO) 2077 .Case("vcc_hi", AMDGPU::VCC_HI) 2078 .Case("exec_lo", AMDGPU::EXEC_LO) 2079 .Case("exec_hi", AMDGPU::EXEC_HI) 2080 .Case("tma_lo", AMDGPU::TMA_LO) 2081 .Case("tma_hi", AMDGPU::TMA_HI) 2082 .Case("tba_lo", AMDGPU::TBA_LO) 2083 .Case("tba_hi", AMDGPU::TBA_HI) 2084 .Case("pc", AMDGPU::PC_REG) 2085 .Case("null", AMDGPU::SGPR_NULL) 2086 .Default(AMDGPU::NoRegister); 2087 } 2088 2089 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2090 SMLoc &EndLoc, bool RestoreOnFailure) { 2091 auto R = parseRegister(); 2092 if (!R) return true; 2093 assert(R->isReg()); 2094 RegNo = R->getReg(); 2095 StartLoc = R->getStartLoc(); 2096 EndLoc = R->getEndLoc(); 2097 return false; 2098 } 2099 2100 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2101 SMLoc &EndLoc) { 2102 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2103 } 2104 2105 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2106 SMLoc &StartLoc, 2107 SMLoc &EndLoc) { 2108 bool Result = 2109 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2110 bool PendingErrors = getParser().hasPendingError(); 2111 getParser().clearPendingErrors(); 2112 if (PendingErrors) 2113 return MatchOperand_ParseFail; 2114 if (Result) 2115 return MatchOperand_NoMatch; 2116 return MatchOperand_Success; 2117 } 2118 2119 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2120 RegisterKind RegKind, unsigned Reg1, 2121 SMLoc Loc) { 2122 switch (RegKind) { 2123 case IS_SPECIAL: 2124 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2125 Reg = AMDGPU::EXEC; 2126 RegWidth = 2; 2127 return true; 2128 } 2129 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2130 Reg = AMDGPU::FLAT_SCR; 2131 RegWidth = 2; 2132 return true; 2133 } 2134 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2135 Reg = AMDGPU::XNACK_MASK; 2136 RegWidth = 2; 2137 return true; 2138 } 2139 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2140 Reg = AMDGPU::VCC; 2141 RegWidth = 2; 2142 return true; 2143 } 2144 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2145 Reg = AMDGPU::TBA; 2146 RegWidth = 2; 2147 return true; 2148 } 2149 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2150 Reg = AMDGPU::TMA; 2151 RegWidth = 2; 2152 return true; 2153 } 2154 Error(Loc, "register does not fit in the list"); 2155 return false; 2156 case IS_VGPR: 2157 case IS_SGPR: 2158 case IS_AGPR: 2159 case IS_TTMP: 2160 if (Reg1 != Reg + RegWidth) { 2161 Error(Loc, "registers in a list must have consecutive indices"); 2162 return false; 2163 } 2164 RegWidth++; 2165 return true; 2166 default: 2167 llvm_unreachable("unexpected register kind"); 2168 } 2169 } 2170 2171 struct RegInfo { 2172 StringLiteral Name; 2173 RegisterKind Kind; 2174 }; 2175 2176 static constexpr RegInfo RegularRegisters[] = { 2177 {{"v"}, IS_VGPR}, 2178 {{"s"}, IS_SGPR}, 2179 {{"ttmp"}, IS_TTMP}, 2180 {{"acc"}, IS_AGPR}, 2181 {{"a"}, IS_AGPR}, 2182 }; 2183 2184 static bool isRegularReg(RegisterKind Kind) { 2185 return Kind == IS_VGPR || 2186 Kind == IS_SGPR || 2187 Kind == IS_TTMP || 2188 Kind == IS_AGPR; 2189 } 2190 2191 static const RegInfo* getRegularRegInfo(StringRef Str) { 2192 for (const RegInfo &Reg : RegularRegisters) 2193 if (Str.startswith(Reg.Name)) 2194 return &Reg; 2195 return nullptr; 2196 } 2197 2198 static bool getRegNum(StringRef Str, unsigned& Num) { 2199 return !Str.getAsInteger(10, Num); 2200 } 2201 2202 bool 2203 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2204 const AsmToken &NextToken) const { 2205 2206 // A list of consecutive registers: [s0,s1,s2,s3] 2207 if (Token.is(AsmToken::LBrac)) 2208 return true; 2209 2210 if (!Token.is(AsmToken::Identifier)) 2211 return false; 2212 2213 // A single register like s0 or a range of registers like s[0:1] 2214 2215 StringRef Str = Token.getString(); 2216 const RegInfo *Reg = getRegularRegInfo(Str); 2217 if (Reg) { 2218 StringRef RegName = Reg->Name; 2219 StringRef RegSuffix = Str.substr(RegName.size()); 2220 if (!RegSuffix.empty()) { 2221 unsigned Num; 2222 // A single register with an index: rXX 2223 if (getRegNum(RegSuffix, Num)) 2224 return true; 2225 } else { 2226 // A range of registers: r[XX:YY]. 2227 if (NextToken.is(AsmToken::LBrac)) 2228 return true; 2229 } 2230 } 2231 2232 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2233 } 2234 2235 bool 2236 AMDGPUAsmParser::isRegister() 2237 { 2238 return isRegister(getToken(), peekToken()); 2239 } 2240 2241 unsigned 2242 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2243 unsigned RegNum, 2244 unsigned RegWidth, 2245 SMLoc Loc) { 2246 2247 assert(isRegularReg(RegKind)); 2248 2249 unsigned AlignSize = 1; 2250 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2251 // SGPR and TTMP registers must be aligned. 2252 // Max required alignment is 4 dwords. 2253 AlignSize = std::min(RegWidth, 4u); 2254 } 2255 2256 if (RegNum % AlignSize != 0) { 2257 Error(Loc, "invalid register alignment"); 2258 return AMDGPU::NoRegister; 2259 } 2260 2261 unsigned RegIdx = RegNum / AlignSize; 2262 int RCID = getRegClass(RegKind, RegWidth); 2263 if (RCID == -1) { 2264 Error(Loc, "invalid or unsupported register size"); 2265 return AMDGPU::NoRegister; 2266 } 2267 2268 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2269 const MCRegisterClass RC = TRI->getRegClass(RCID); 2270 if (RegIdx >= RC.getNumRegs()) { 2271 Error(Loc, "register index is out of range"); 2272 return AMDGPU::NoRegister; 2273 } 2274 2275 return RC.getRegister(RegIdx); 2276 } 2277 2278 bool 2279 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2280 int64_t RegLo, RegHi; 2281 if (!skipToken(AsmToken::LBrac, "missing register index")) 2282 return false; 2283 2284 SMLoc FirstIdxLoc = getLoc(); 2285 SMLoc SecondIdxLoc; 2286 2287 if (!parseExpr(RegLo)) 2288 return false; 2289 2290 if (trySkipToken(AsmToken::Colon)) { 2291 SecondIdxLoc = getLoc(); 2292 if (!parseExpr(RegHi)) 2293 return false; 2294 } else { 2295 RegHi = RegLo; 2296 } 2297 2298 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2299 return false; 2300 2301 if (!isUInt<32>(RegLo)) { 2302 Error(FirstIdxLoc, "invalid register index"); 2303 return false; 2304 } 2305 2306 if (!isUInt<32>(RegHi)) { 2307 Error(SecondIdxLoc, "invalid register index"); 2308 return false; 2309 } 2310 2311 if (RegLo > RegHi) { 2312 Error(FirstIdxLoc, "first register index should not exceed second index"); 2313 return false; 2314 } 2315 2316 Num = static_cast<unsigned>(RegLo); 2317 Width = (RegHi - RegLo) + 1; 2318 return true; 2319 } 2320 2321 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2322 unsigned &RegNum, unsigned &RegWidth, 2323 SmallVectorImpl<AsmToken> &Tokens) { 2324 assert(isToken(AsmToken::Identifier)); 2325 unsigned Reg = getSpecialRegForName(getTokenStr()); 2326 if (Reg) { 2327 RegNum = 0; 2328 RegWidth = 1; 2329 RegKind = IS_SPECIAL; 2330 Tokens.push_back(getToken()); 2331 lex(); // skip register name 2332 } 2333 return Reg; 2334 } 2335 2336 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2337 unsigned &RegNum, unsigned &RegWidth, 2338 SmallVectorImpl<AsmToken> &Tokens) { 2339 assert(isToken(AsmToken::Identifier)); 2340 StringRef RegName = getTokenStr(); 2341 auto Loc = getLoc(); 2342 2343 const RegInfo *RI = getRegularRegInfo(RegName); 2344 if (!RI) { 2345 Error(Loc, "invalid register name"); 2346 return AMDGPU::NoRegister; 2347 } 2348 2349 Tokens.push_back(getToken()); 2350 lex(); // skip register name 2351 2352 RegKind = RI->Kind; 2353 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2354 if (!RegSuffix.empty()) { 2355 // Single 32-bit register: vXX. 2356 if (!getRegNum(RegSuffix, RegNum)) { 2357 Error(Loc, "invalid register index"); 2358 return AMDGPU::NoRegister; 2359 } 2360 RegWidth = 1; 2361 } else { 2362 // Range of registers: v[XX:YY]. ":YY" is optional. 2363 if (!ParseRegRange(RegNum, RegWidth)) 2364 return AMDGPU::NoRegister; 2365 } 2366 2367 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2368 } 2369 2370 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2371 unsigned &RegWidth, 2372 SmallVectorImpl<AsmToken> &Tokens) { 2373 unsigned Reg = AMDGPU::NoRegister; 2374 auto ListLoc = getLoc(); 2375 2376 if (!skipToken(AsmToken::LBrac, 2377 "expected a register or a list of registers")) { 2378 return AMDGPU::NoRegister; 2379 } 2380 2381 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2382 2383 auto Loc = getLoc(); 2384 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2385 return AMDGPU::NoRegister; 2386 if (RegWidth != 1) { 2387 Error(Loc, "expected a single 32-bit register"); 2388 return AMDGPU::NoRegister; 2389 } 2390 2391 for (; trySkipToken(AsmToken::Comma); ) { 2392 RegisterKind NextRegKind; 2393 unsigned NextReg, NextRegNum, NextRegWidth; 2394 Loc = getLoc(); 2395 2396 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2397 NextRegNum, NextRegWidth, 2398 Tokens)) { 2399 return AMDGPU::NoRegister; 2400 } 2401 if (NextRegWidth != 1) { 2402 Error(Loc, "expected a single 32-bit register"); 2403 return AMDGPU::NoRegister; 2404 } 2405 if (NextRegKind != RegKind) { 2406 Error(Loc, "registers in a list must be of the same kind"); 2407 return AMDGPU::NoRegister; 2408 } 2409 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2410 return AMDGPU::NoRegister; 2411 } 2412 2413 if (!skipToken(AsmToken::RBrac, 2414 "expected a comma or a closing square bracket")) { 2415 return AMDGPU::NoRegister; 2416 } 2417 2418 if (isRegularReg(RegKind)) 2419 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2420 2421 return Reg; 2422 } 2423 2424 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2425 unsigned &RegNum, unsigned &RegWidth, 2426 SmallVectorImpl<AsmToken> &Tokens) { 2427 auto Loc = getLoc(); 2428 Reg = AMDGPU::NoRegister; 2429 2430 if (isToken(AsmToken::Identifier)) { 2431 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2432 if (Reg == AMDGPU::NoRegister) 2433 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2434 } else { 2435 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2436 } 2437 2438 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2439 if (Reg == AMDGPU::NoRegister) { 2440 assert(Parser.hasPendingError()); 2441 return false; 2442 } 2443 2444 if (!subtargetHasRegister(*TRI, Reg)) { 2445 if (Reg == AMDGPU::SGPR_NULL) { 2446 Error(Loc, "'null' operand is not supported on this GPU"); 2447 } else { 2448 Error(Loc, "register not available on this GPU"); 2449 } 2450 return false; 2451 } 2452 2453 return true; 2454 } 2455 2456 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2457 unsigned &RegNum, unsigned &RegWidth, 2458 bool RestoreOnFailure /*=false*/) { 2459 Reg = AMDGPU::NoRegister; 2460 2461 SmallVector<AsmToken, 1> Tokens; 2462 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2463 if (RestoreOnFailure) { 2464 while (!Tokens.empty()) { 2465 getLexer().UnLex(Tokens.pop_back_val()); 2466 } 2467 } 2468 return true; 2469 } 2470 return false; 2471 } 2472 2473 Optional<StringRef> 2474 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2475 switch (RegKind) { 2476 case IS_VGPR: 2477 return StringRef(".amdgcn.next_free_vgpr"); 2478 case IS_SGPR: 2479 return StringRef(".amdgcn.next_free_sgpr"); 2480 default: 2481 return None; 2482 } 2483 } 2484 2485 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2486 auto SymbolName = getGprCountSymbolName(RegKind); 2487 assert(SymbolName && "initializing invalid register kind"); 2488 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2489 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2490 } 2491 2492 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2493 unsigned DwordRegIndex, 2494 unsigned RegWidth) { 2495 // Symbols are only defined for GCN targets 2496 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2497 return true; 2498 2499 auto SymbolName = getGprCountSymbolName(RegKind); 2500 if (!SymbolName) 2501 return true; 2502 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2503 2504 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2505 int64_t OldCount; 2506 2507 if (!Sym->isVariable()) 2508 return !Error(getLoc(), 2509 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2510 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2511 return !Error( 2512 getLoc(), 2513 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2514 2515 if (OldCount <= NewMax) 2516 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2517 2518 return true; 2519 } 2520 2521 std::unique_ptr<AMDGPUOperand> 2522 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2523 const auto &Tok = getToken(); 2524 SMLoc StartLoc = Tok.getLoc(); 2525 SMLoc EndLoc = Tok.getEndLoc(); 2526 RegisterKind RegKind; 2527 unsigned Reg, RegNum, RegWidth; 2528 2529 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2530 return nullptr; 2531 } 2532 if (isHsaAbiVersion3(&getSTI())) { 2533 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2534 return nullptr; 2535 } else 2536 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2537 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2538 } 2539 2540 OperandMatchResultTy 2541 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2542 // TODO: add syntactic sugar for 1/(2*PI) 2543 2544 assert(!isRegister()); 2545 assert(!isModifier()); 2546 2547 const auto& Tok = getToken(); 2548 const auto& NextTok = peekToken(); 2549 bool IsReal = Tok.is(AsmToken::Real); 2550 SMLoc S = getLoc(); 2551 bool Negate = false; 2552 2553 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2554 lex(); 2555 IsReal = true; 2556 Negate = true; 2557 } 2558 2559 if (IsReal) { 2560 // Floating-point expressions are not supported. 2561 // Can only allow floating-point literals with an 2562 // optional sign. 2563 2564 StringRef Num = getTokenStr(); 2565 lex(); 2566 2567 APFloat RealVal(APFloat::IEEEdouble()); 2568 auto roundMode = APFloat::rmNearestTiesToEven; 2569 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2570 return MatchOperand_ParseFail; 2571 } 2572 if (Negate) 2573 RealVal.changeSign(); 2574 2575 Operands.push_back( 2576 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2577 AMDGPUOperand::ImmTyNone, true)); 2578 2579 return MatchOperand_Success; 2580 2581 } else { 2582 int64_t IntVal; 2583 const MCExpr *Expr; 2584 SMLoc S = getLoc(); 2585 2586 if (HasSP3AbsModifier) { 2587 // This is a workaround for handling expressions 2588 // as arguments of SP3 'abs' modifier, for example: 2589 // |1.0| 2590 // |-1| 2591 // |1+x| 2592 // This syntax is not compatible with syntax of standard 2593 // MC expressions (due to the trailing '|'). 2594 SMLoc EndLoc; 2595 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2596 return MatchOperand_ParseFail; 2597 } else { 2598 if (Parser.parseExpression(Expr)) 2599 return MatchOperand_ParseFail; 2600 } 2601 2602 if (Expr->evaluateAsAbsolute(IntVal)) { 2603 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2604 } else { 2605 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2606 } 2607 2608 return MatchOperand_Success; 2609 } 2610 2611 return MatchOperand_NoMatch; 2612 } 2613 2614 OperandMatchResultTy 2615 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2616 if (!isRegister()) 2617 return MatchOperand_NoMatch; 2618 2619 if (auto R = parseRegister()) { 2620 assert(R->isReg()); 2621 Operands.push_back(std::move(R)); 2622 return MatchOperand_Success; 2623 } 2624 return MatchOperand_ParseFail; 2625 } 2626 2627 OperandMatchResultTy 2628 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2629 auto res = parseReg(Operands); 2630 if (res != MatchOperand_NoMatch) { 2631 return res; 2632 } else if (isModifier()) { 2633 return MatchOperand_NoMatch; 2634 } else { 2635 return parseImm(Operands, HasSP3AbsMod); 2636 } 2637 } 2638 2639 bool 2640 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2641 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2642 const auto &str = Token.getString(); 2643 return str == "abs" || str == "neg" || str == "sext"; 2644 } 2645 return false; 2646 } 2647 2648 bool 2649 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2650 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2651 } 2652 2653 bool 2654 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2655 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2656 } 2657 2658 bool 2659 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2660 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2661 } 2662 2663 // Check if this is an operand modifier or an opcode modifier 2664 // which may look like an expression but it is not. We should 2665 // avoid parsing these modifiers as expressions. Currently 2666 // recognized sequences are: 2667 // |...| 2668 // abs(...) 2669 // neg(...) 2670 // sext(...) 2671 // -reg 2672 // -|...| 2673 // -abs(...) 2674 // name:... 2675 // Note that simple opcode modifiers like 'gds' may be parsed as 2676 // expressions; this is a special case. See getExpressionAsToken. 2677 // 2678 bool 2679 AMDGPUAsmParser::isModifier() { 2680 2681 AsmToken Tok = getToken(); 2682 AsmToken NextToken[2]; 2683 peekTokens(NextToken); 2684 2685 return isOperandModifier(Tok, NextToken[0]) || 2686 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2687 isOpcodeModifierWithVal(Tok, NextToken[0]); 2688 } 2689 2690 // Check if the current token is an SP3 'neg' modifier. 2691 // Currently this modifier is allowed in the following context: 2692 // 2693 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2694 // 2. Before an 'abs' modifier: -abs(...) 2695 // 3. Before an SP3 'abs' modifier: -|...| 2696 // 2697 // In all other cases "-" is handled as a part 2698 // of an expression that follows the sign. 2699 // 2700 // Note: When "-" is followed by an integer literal, 2701 // this is interpreted as integer negation rather 2702 // than a floating-point NEG modifier applied to N. 2703 // Beside being contr-intuitive, such use of floating-point 2704 // NEG modifier would have resulted in different meaning 2705 // of integer literals used with VOP1/2/C and VOP3, 2706 // for example: 2707 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2708 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2709 // Negative fp literals with preceding "-" are 2710 // handled likewise for unifomtity 2711 // 2712 bool 2713 AMDGPUAsmParser::parseSP3NegModifier() { 2714 2715 AsmToken NextToken[2]; 2716 peekTokens(NextToken); 2717 2718 if (isToken(AsmToken::Minus) && 2719 (isRegister(NextToken[0], NextToken[1]) || 2720 NextToken[0].is(AsmToken::Pipe) || 2721 isId(NextToken[0], "abs"))) { 2722 lex(); 2723 return true; 2724 } 2725 2726 return false; 2727 } 2728 2729 OperandMatchResultTy 2730 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2731 bool AllowImm) { 2732 bool Neg, SP3Neg; 2733 bool Abs, SP3Abs; 2734 SMLoc Loc; 2735 2736 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2737 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2738 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2739 return MatchOperand_ParseFail; 2740 } 2741 2742 SP3Neg = parseSP3NegModifier(); 2743 2744 Loc = getLoc(); 2745 Neg = trySkipId("neg"); 2746 if (Neg && SP3Neg) { 2747 Error(Loc, "expected register or immediate"); 2748 return MatchOperand_ParseFail; 2749 } 2750 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2751 return MatchOperand_ParseFail; 2752 2753 Abs = trySkipId("abs"); 2754 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2755 return MatchOperand_ParseFail; 2756 2757 Loc = getLoc(); 2758 SP3Abs = trySkipToken(AsmToken::Pipe); 2759 if (Abs && SP3Abs) { 2760 Error(Loc, "expected register or immediate"); 2761 return MatchOperand_ParseFail; 2762 } 2763 2764 OperandMatchResultTy Res; 2765 if (AllowImm) { 2766 Res = parseRegOrImm(Operands, SP3Abs); 2767 } else { 2768 Res = parseReg(Operands); 2769 } 2770 if (Res != MatchOperand_Success) { 2771 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2772 } 2773 2774 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2775 return MatchOperand_ParseFail; 2776 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2777 return MatchOperand_ParseFail; 2778 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2779 return MatchOperand_ParseFail; 2780 2781 AMDGPUOperand::Modifiers Mods; 2782 Mods.Abs = Abs || SP3Abs; 2783 Mods.Neg = Neg || SP3Neg; 2784 2785 if (Mods.hasFPModifiers()) { 2786 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2787 if (Op.isExpr()) { 2788 Error(Op.getStartLoc(), "expected an absolute expression"); 2789 return MatchOperand_ParseFail; 2790 } 2791 Op.setModifiers(Mods); 2792 } 2793 return MatchOperand_Success; 2794 } 2795 2796 OperandMatchResultTy 2797 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2798 bool AllowImm) { 2799 bool Sext = trySkipId("sext"); 2800 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2801 return MatchOperand_ParseFail; 2802 2803 OperandMatchResultTy Res; 2804 if (AllowImm) { 2805 Res = parseRegOrImm(Operands); 2806 } else { 2807 Res = parseReg(Operands); 2808 } 2809 if (Res != MatchOperand_Success) { 2810 return Sext? MatchOperand_ParseFail : Res; 2811 } 2812 2813 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2814 return MatchOperand_ParseFail; 2815 2816 AMDGPUOperand::Modifiers Mods; 2817 Mods.Sext = Sext; 2818 2819 if (Mods.hasIntModifiers()) { 2820 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2821 if (Op.isExpr()) { 2822 Error(Op.getStartLoc(), "expected an absolute expression"); 2823 return MatchOperand_ParseFail; 2824 } 2825 Op.setModifiers(Mods); 2826 } 2827 2828 return MatchOperand_Success; 2829 } 2830 2831 OperandMatchResultTy 2832 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2833 return parseRegOrImmWithFPInputMods(Operands, false); 2834 } 2835 2836 OperandMatchResultTy 2837 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2838 return parseRegOrImmWithIntInputMods(Operands, false); 2839 } 2840 2841 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2842 auto Loc = getLoc(); 2843 if (trySkipId("off")) { 2844 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2845 AMDGPUOperand::ImmTyOff, false)); 2846 return MatchOperand_Success; 2847 } 2848 2849 if (!isRegister()) 2850 return MatchOperand_NoMatch; 2851 2852 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2853 if (Reg) { 2854 Operands.push_back(std::move(Reg)); 2855 return MatchOperand_Success; 2856 } 2857 2858 return MatchOperand_ParseFail; 2859 2860 } 2861 2862 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2863 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2864 2865 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2866 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2867 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2868 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2869 return Match_InvalidOperand; 2870 2871 if ((TSFlags & SIInstrFlags::VOP3) && 2872 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2873 getForcedEncodingSize() != 64) 2874 return Match_PreferE32; 2875 2876 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2877 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2878 // v_mac_f32/16 allow only dst_sel == DWORD; 2879 auto OpNum = 2880 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2881 const auto &Op = Inst.getOperand(OpNum); 2882 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2883 return Match_InvalidOperand; 2884 } 2885 } 2886 2887 return Match_Success; 2888 } 2889 2890 static ArrayRef<unsigned> getAllVariants() { 2891 static const unsigned Variants[] = { 2892 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2893 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2894 }; 2895 2896 return makeArrayRef(Variants); 2897 } 2898 2899 // What asm variants we should check 2900 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2901 if (getForcedEncodingSize() == 32) { 2902 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2903 return makeArrayRef(Variants); 2904 } 2905 2906 if (isForcedVOP3()) { 2907 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2908 return makeArrayRef(Variants); 2909 } 2910 2911 if (isForcedSDWA()) { 2912 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2913 AMDGPUAsmVariants::SDWA9}; 2914 return makeArrayRef(Variants); 2915 } 2916 2917 if (isForcedDPP()) { 2918 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2919 return makeArrayRef(Variants); 2920 } 2921 2922 return getAllVariants(); 2923 } 2924 2925 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2926 if (getForcedEncodingSize() == 32) 2927 return "e32"; 2928 2929 if (isForcedVOP3()) 2930 return "e64"; 2931 2932 if (isForcedSDWA()) 2933 return "sdwa"; 2934 2935 if (isForcedDPP()) 2936 return "dpp"; 2937 2938 return ""; 2939 } 2940 2941 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2942 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2943 const unsigned Num = Desc.getNumImplicitUses(); 2944 for (unsigned i = 0; i < Num; ++i) { 2945 unsigned Reg = Desc.ImplicitUses[i]; 2946 switch (Reg) { 2947 case AMDGPU::FLAT_SCR: 2948 case AMDGPU::VCC: 2949 case AMDGPU::VCC_LO: 2950 case AMDGPU::VCC_HI: 2951 case AMDGPU::M0: 2952 return Reg; 2953 default: 2954 break; 2955 } 2956 } 2957 return AMDGPU::NoRegister; 2958 } 2959 2960 // NB: This code is correct only when used to check constant 2961 // bus limitations because GFX7 support no f16 inline constants. 2962 // Note that there are no cases when a GFX7 opcode violates 2963 // constant bus limitations due to the use of an f16 constant. 2964 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2965 unsigned OpIdx) const { 2966 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2967 2968 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2969 return false; 2970 } 2971 2972 const MCOperand &MO = Inst.getOperand(OpIdx); 2973 2974 int64_t Val = MO.getImm(); 2975 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2976 2977 switch (OpSize) { // expected operand size 2978 case 8: 2979 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2980 case 4: 2981 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2982 case 2: { 2983 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2984 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2985 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2986 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2987 return AMDGPU::isInlinableIntLiteral(Val); 2988 2989 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2990 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2991 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2992 return AMDGPU::isInlinableIntLiteralV216(Val); 2993 2994 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2995 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2996 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2997 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2998 2999 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3000 } 3001 default: 3002 llvm_unreachable("invalid operand size"); 3003 } 3004 } 3005 3006 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3007 if (!isGFX10Plus()) 3008 return 1; 3009 3010 switch (Opcode) { 3011 // 64-bit shift instructions can use only one scalar value input 3012 case AMDGPU::V_LSHLREV_B64_e64: 3013 case AMDGPU::V_LSHLREV_B64_gfx10: 3014 case AMDGPU::V_LSHRREV_B64_e64: 3015 case AMDGPU::V_LSHRREV_B64_gfx10: 3016 case AMDGPU::V_ASHRREV_I64_e64: 3017 case AMDGPU::V_ASHRREV_I64_gfx10: 3018 case AMDGPU::V_LSHL_B64_e64: 3019 case AMDGPU::V_LSHR_B64_e64: 3020 case AMDGPU::V_ASHR_I64_e64: 3021 return 1; 3022 default: 3023 return 2; 3024 } 3025 } 3026 3027 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3028 const MCOperand &MO = Inst.getOperand(OpIdx); 3029 if (MO.isImm()) { 3030 return !isInlineConstant(Inst, OpIdx); 3031 } else if (MO.isReg()) { 3032 auto Reg = MO.getReg(); 3033 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3034 auto PReg = mc2PseudoReg(Reg); 3035 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3036 } else { 3037 return true; 3038 } 3039 } 3040 3041 bool 3042 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3043 const OperandVector &Operands) { 3044 const unsigned Opcode = Inst.getOpcode(); 3045 const MCInstrDesc &Desc = MII.get(Opcode); 3046 unsigned LastSGPR = AMDGPU::NoRegister; 3047 unsigned ConstantBusUseCount = 0; 3048 unsigned NumLiterals = 0; 3049 unsigned LiteralSize; 3050 3051 if (Desc.TSFlags & 3052 (SIInstrFlags::VOPC | 3053 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3054 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3055 SIInstrFlags::SDWA)) { 3056 // Check special imm operands (used by madmk, etc) 3057 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3058 ++ConstantBusUseCount; 3059 } 3060 3061 SmallDenseSet<unsigned> SGPRsUsed; 3062 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3063 if (SGPRUsed != AMDGPU::NoRegister) { 3064 SGPRsUsed.insert(SGPRUsed); 3065 ++ConstantBusUseCount; 3066 } 3067 3068 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3069 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3070 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3071 3072 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3073 3074 for (int OpIdx : OpIndices) { 3075 if (OpIdx == -1) break; 3076 3077 const MCOperand &MO = Inst.getOperand(OpIdx); 3078 if (usesConstantBus(Inst, OpIdx)) { 3079 if (MO.isReg()) { 3080 LastSGPR = mc2PseudoReg(MO.getReg()); 3081 // Pairs of registers with a partial intersections like these 3082 // s0, s[0:1] 3083 // flat_scratch_lo, flat_scratch 3084 // flat_scratch_lo, flat_scratch_hi 3085 // are theoretically valid but they are disabled anyway. 3086 // Note that this code mimics SIInstrInfo::verifyInstruction 3087 if (!SGPRsUsed.count(LastSGPR)) { 3088 SGPRsUsed.insert(LastSGPR); 3089 ++ConstantBusUseCount; 3090 } 3091 } else { // Expression or a literal 3092 3093 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3094 continue; // special operand like VINTERP attr_chan 3095 3096 // An instruction may use only one literal. 3097 // This has been validated on the previous step. 3098 // See validateVOP3Literal. 3099 // This literal may be used as more than one operand. 3100 // If all these operands are of the same size, 3101 // this literal counts as one scalar value. 3102 // Otherwise it counts as 2 scalar values. 3103 // See "GFX10 Shader Programming", section 3.6.2.3. 3104 3105 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3106 if (Size < 4) Size = 4; 3107 3108 if (NumLiterals == 0) { 3109 NumLiterals = 1; 3110 LiteralSize = Size; 3111 } else if (LiteralSize != Size) { 3112 NumLiterals = 2; 3113 } 3114 } 3115 } 3116 } 3117 } 3118 ConstantBusUseCount += NumLiterals; 3119 3120 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3121 return true; 3122 3123 SMLoc LitLoc = getLitLoc(Operands); 3124 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3125 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3126 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3127 return false; 3128 } 3129 3130 bool 3131 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3132 const OperandVector &Operands) { 3133 const unsigned Opcode = Inst.getOpcode(); 3134 const MCInstrDesc &Desc = MII.get(Opcode); 3135 3136 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3137 if (DstIdx == -1 || 3138 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3139 return true; 3140 } 3141 3142 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3143 3144 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3145 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3146 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3147 3148 assert(DstIdx != -1); 3149 const MCOperand &Dst = Inst.getOperand(DstIdx); 3150 assert(Dst.isReg()); 3151 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3152 3153 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3154 3155 for (int SrcIdx : SrcIndices) { 3156 if (SrcIdx == -1) break; 3157 const MCOperand &Src = Inst.getOperand(SrcIdx); 3158 if (Src.isReg()) { 3159 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3160 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3161 Error(getRegLoc(SrcReg, Operands), 3162 "destination must be different than all sources"); 3163 return false; 3164 } 3165 } 3166 } 3167 3168 return true; 3169 } 3170 3171 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3172 3173 const unsigned Opc = Inst.getOpcode(); 3174 const MCInstrDesc &Desc = MII.get(Opc); 3175 3176 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3177 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3178 assert(ClampIdx != -1); 3179 return Inst.getOperand(ClampIdx).getImm() == 0; 3180 } 3181 3182 return true; 3183 } 3184 3185 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3186 3187 const unsigned Opc = Inst.getOpcode(); 3188 const MCInstrDesc &Desc = MII.get(Opc); 3189 3190 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3191 return true; 3192 3193 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3194 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3195 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3196 3197 assert(VDataIdx != -1); 3198 3199 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3200 return true; 3201 3202 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3203 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3204 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3205 if (DMask == 0) 3206 DMask = 1; 3207 3208 unsigned DataSize = 3209 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3210 if (hasPackedD16()) { 3211 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3212 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3213 DataSize = (DataSize + 1) / 2; 3214 } 3215 3216 return (VDataSize / 4) == DataSize + TFESize; 3217 } 3218 3219 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3220 const unsigned Opc = Inst.getOpcode(); 3221 const MCInstrDesc &Desc = MII.get(Opc); 3222 3223 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3224 return true; 3225 3226 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3227 3228 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3229 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3230 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3231 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3232 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3233 3234 assert(VAddr0Idx != -1); 3235 assert(SrsrcIdx != -1); 3236 assert(SrsrcIdx > VAddr0Idx); 3237 3238 if (DimIdx == -1) 3239 return true; // intersect_ray 3240 3241 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3242 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3243 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3244 unsigned VAddrSize = 3245 IsNSA ? SrsrcIdx - VAddr0Idx 3246 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3247 3248 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3249 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3250 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3251 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3252 if (!IsNSA) { 3253 if (AddrSize > 8) 3254 AddrSize = 16; 3255 else if (AddrSize > 4) 3256 AddrSize = 8; 3257 } 3258 3259 return VAddrSize == AddrSize; 3260 } 3261 3262 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3263 3264 const unsigned Opc = Inst.getOpcode(); 3265 const MCInstrDesc &Desc = MII.get(Opc); 3266 3267 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3268 return true; 3269 if (!Desc.mayLoad() || !Desc.mayStore()) 3270 return true; // Not atomic 3271 3272 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3273 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3274 3275 // This is an incomplete check because image_atomic_cmpswap 3276 // may only use 0x3 and 0xf while other atomic operations 3277 // may use 0x1 and 0x3. However these limitations are 3278 // verified when we check that dmask matches dst size. 3279 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3280 } 3281 3282 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3283 3284 const unsigned Opc = Inst.getOpcode(); 3285 const MCInstrDesc &Desc = MII.get(Opc); 3286 3287 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3288 return true; 3289 3290 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3291 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3292 3293 // GATHER4 instructions use dmask in a different fashion compared to 3294 // other MIMG instructions. The only useful DMASK values are 3295 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3296 // (red,red,red,red) etc.) The ISA document doesn't mention 3297 // this. 3298 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3299 } 3300 3301 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3302 { 3303 switch (Opcode) { 3304 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3305 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3306 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3307 return true; 3308 default: 3309 return false; 3310 } 3311 } 3312 3313 // movrels* opcodes should only allow VGPRS as src0. 3314 // This is specified in .td description for vop1/vop3, 3315 // but sdwa is handled differently. See isSDWAOperand. 3316 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3317 const OperandVector &Operands) { 3318 3319 const unsigned Opc = Inst.getOpcode(); 3320 const MCInstrDesc &Desc = MII.get(Opc); 3321 3322 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3323 return true; 3324 3325 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3326 assert(Src0Idx != -1); 3327 3328 SMLoc ErrLoc; 3329 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3330 if (Src0.isReg()) { 3331 auto Reg = mc2PseudoReg(Src0.getReg()); 3332 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3333 if (!isSGPR(Reg, TRI)) 3334 return true; 3335 ErrLoc = getRegLoc(Reg, Operands); 3336 } else { 3337 ErrLoc = getConstLoc(Operands); 3338 } 3339 3340 Error(ErrLoc, "source operand must be a VGPR"); 3341 return false; 3342 } 3343 3344 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3345 const OperandVector &Operands) { 3346 3347 const unsigned Opc = Inst.getOpcode(); 3348 3349 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3350 return true; 3351 3352 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3353 assert(Src0Idx != -1); 3354 3355 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3356 if (!Src0.isReg()) 3357 return true; 3358 3359 auto Reg = mc2PseudoReg(Src0.getReg()); 3360 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3361 if (isSGPR(Reg, TRI)) { 3362 Error(getRegLoc(Reg, Operands), 3363 "source operand must be either a VGPR or an inline constant"); 3364 return false; 3365 } 3366 3367 return true; 3368 } 3369 3370 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3371 switch (Inst.getOpcode()) { 3372 default: 3373 return true; 3374 case V_DIV_SCALE_F32_gfx6_gfx7: 3375 case V_DIV_SCALE_F32_vi: 3376 case V_DIV_SCALE_F32_gfx10: 3377 case V_DIV_SCALE_F64_gfx6_gfx7: 3378 case V_DIV_SCALE_F64_vi: 3379 case V_DIV_SCALE_F64_gfx10: 3380 break; 3381 } 3382 3383 // TODO: Check that src0 = src1 or src2. 3384 3385 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3386 AMDGPU::OpName::src2_modifiers, 3387 AMDGPU::OpName::src2_modifiers}) { 3388 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3389 .getImm() & 3390 SISrcMods::ABS) { 3391 return false; 3392 } 3393 } 3394 3395 return true; 3396 } 3397 3398 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3399 3400 const unsigned Opc = Inst.getOpcode(); 3401 const MCInstrDesc &Desc = MII.get(Opc); 3402 3403 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3404 return true; 3405 3406 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3407 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3408 if (isCI() || isSI()) 3409 return false; 3410 } 3411 3412 return true; 3413 } 3414 3415 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3416 const unsigned Opc = Inst.getOpcode(); 3417 const MCInstrDesc &Desc = MII.get(Opc); 3418 3419 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3420 return true; 3421 3422 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3423 if (DimIdx < 0) 3424 return true; 3425 3426 long Imm = Inst.getOperand(DimIdx).getImm(); 3427 if (Imm < 0 || Imm >= 8) 3428 return false; 3429 3430 return true; 3431 } 3432 3433 static bool IsRevOpcode(const unsigned Opcode) 3434 { 3435 switch (Opcode) { 3436 case AMDGPU::V_SUBREV_F32_e32: 3437 case AMDGPU::V_SUBREV_F32_e64: 3438 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3439 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3440 case AMDGPU::V_SUBREV_F32_e32_vi: 3441 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3442 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3443 case AMDGPU::V_SUBREV_F32_e64_vi: 3444 3445 case AMDGPU::V_SUBREV_CO_U32_e32: 3446 case AMDGPU::V_SUBREV_CO_U32_e64: 3447 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3448 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3449 3450 case AMDGPU::V_SUBBREV_U32_e32: 3451 case AMDGPU::V_SUBBREV_U32_e64: 3452 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3453 case AMDGPU::V_SUBBREV_U32_e32_vi: 3454 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3455 case AMDGPU::V_SUBBREV_U32_e64_vi: 3456 3457 case AMDGPU::V_SUBREV_U32_e32: 3458 case AMDGPU::V_SUBREV_U32_e64: 3459 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3460 case AMDGPU::V_SUBREV_U32_e32_vi: 3461 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3462 case AMDGPU::V_SUBREV_U32_e64_vi: 3463 3464 case AMDGPU::V_SUBREV_F16_e32: 3465 case AMDGPU::V_SUBREV_F16_e64: 3466 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3467 case AMDGPU::V_SUBREV_F16_e32_vi: 3468 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3469 case AMDGPU::V_SUBREV_F16_e64_vi: 3470 3471 case AMDGPU::V_SUBREV_U16_e32: 3472 case AMDGPU::V_SUBREV_U16_e64: 3473 case AMDGPU::V_SUBREV_U16_e32_vi: 3474 case AMDGPU::V_SUBREV_U16_e64_vi: 3475 3476 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3477 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3478 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3479 3480 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3481 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3482 3483 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3484 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3485 3486 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3487 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3488 3489 case AMDGPU::V_LSHRREV_B32_e32: 3490 case AMDGPU::V_LSHRREV_B32_e64: 3491 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3492 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3493 case AMDGPU::V_LSHRREV_B32_e32_vi: 3494 case AMDGPU::V_LSHRREV_B32_e64_vi: 3495 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3496 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3497 3498 case AMDGPU::V_ASHRREV_I32_e32: 3499 case AMDGPU::V_ASHRREV_I32_e64: 3500 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3501 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3502 case AMDGPU::V_ASHRREV_I32_e32_vi: 3503 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3504 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3505 case AMDGPU::V_ASHRREV_I32_e64_vi: 3506 3507 case AMDGPU::V_LSHLREV_B32_e32: 3508 case AMDGPU::V_LSHLREV_B32_e64: 3509 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3510 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3511 case AMDGPU::V_LSHLREV_B32_e32_vi: 3512 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3513 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3514 case AMDGPU::V_LSHLREV_B32_e64_vi: 3515 3516 case AMDGPU::V_LSHLREV_B16_e32: 3517 case AMDGPU::V_LSHLREV_B16_e64: 3518 case AMDGPU::V_LSHLREV_B16_e32_vi: 3519 case AMDGPU::V_LSHLREV_B16_e64_vi: 3520 case AMDGPU::V_LSHLREV_B16_gfx10: 3521 3522 case AMDGPU::V_LSHRREV_B16_e32: 3523 case AMDGPU::V_LSHRREV_B16_e64: 3524 case AMDGPU::V_LSHRREV_B16_e32_vi: 3525 case AMDGPU::V_LSHRREV_B16_e64_vi: 3526 case AMDGPU::V_LSHRREV_B16_gfx10: 3527 3528 case AMDGPU::V_ASHRREV_I16_e32: 3529 case AMDGPU::V_ASHRREV_I16_e64: 3530 case AMDGPU::V_ASHRREV_I16_e32_vi: 3531 case AMDGPU::V_ASHRREV_I16_e64_vi: 3532 case AMDGPU::V_ASHRREV_I16_gfx10: 3533 3534 case AMDGPU::V_LSHLREV_B64_e64: 3535 case AMDGPU::V_LSHLREV_B64_gfx10: 3536 case AMDGPU::V_LSHLREV_B64_vi: 3537 3538 case AMDGPU::V_LSHRREV_B64_e64: 3539 case AMDGPU::V_LSHRREV_B64_gfx10: 3540 case AMDGPU::V_LSHRREV_B64_vi: 3541 3542 case AMDGPU::V_ASHRREV_I64_e64: 3543 case AMDGPU::V_ASHRREV_I64_gfx10: 3544 case AMDGPU::V_ASHRREV_I64_vi: 3545 3546 case AMDGPU::V_PK_LSHLREV_B16: 3547 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3548 case AMDGPU::V_PK_LSHLREV_B16_vi: 3549 3550 case AMDGPU::V_PK_LSHRREV_B16: 3551 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3552 case AMDGPU::V_PK_LSHRREV_B16_vi: 3553 case AMDGPU::V_PK_ASHRREV_I16: 3554 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3555 case AMDGPU::V_PK_ASHRREV_I16_vi: 3556 return true; 3557 default: 3558 return false; 3559 } 3560 } 3561 3562 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3563 3564 using namespace SIInstrFlags; 3565 const unsigned Opcode = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opcode); 3567 3568 // lds_direct register is defined so that it can be used 3569 // with 9-bit operands only. Ignore encodings which do not accept these. 3570 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3571 return true; 3572 3573 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3574 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3575 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3576 3577 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3578 3579 // lds_direct cannot be specified as either src1 or src2. 3580 for (int SrcIdx : SrcIndices) { 3581 if (SrcIdx == -1) break; 3582 const MCOperand &Src = Inst.getOperand(SrcIdx); 3583 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3584 return false; 3585 } 3586 } 3587 3588 if (Src0Idx == -1) 3589 return true; 3590 3591 const MCOperand &Src = Inst.getOperand(Src0Idx); 3592 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3593 return true; 3594 3595 // lds_direct is specified as src0. Check additional limitations. 3596 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3597 } 3598 3599 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3600 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3601 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3602 if (Op.isFlatOffset()) 3603 return Op.getStartLoc(); 3604 } 3605 return getLoc(); 3606 } 3607 3608 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3609 const OperandVector &Operands) { 3610 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3611 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3612 return true; 3613 3614 auto Opcode = Inst.getOpcode(); 3615 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3616 assert(OpNum != -1); 3617 3618 const auto &Op = Inst.getOperand(OpNum); 3619 if (!hasFlatOffsets() && Op.getImm() != 0) { 3620 Error(getFlatOffsetLoc(Operands), 3621 "flat offset modifier is not supported on this GPU"); 3622 return false; 3623 } 3624 3625 // For FLAT segment the offset must be positive; 3626 // MSB is ignored and forced to zero. 3627 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3628 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3629 if (!isIntN(OffsetSize, Op.getImm())) { 3630 Error(getFlatOffsetLoc(Operands), 3631 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3632 return false; 3633 } 3634 } else { 3635 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3636 if (!isUIntN(OffsetSize, Op.getImm())) { 3637 Error(getFlatOffsetLoc(Operands), 3638 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3639 return false; 3640 } 3641 } 3642 3643 return true; 3644 } 3645 3646 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3647 // Start with second operand because SMEM Offset cannot be dst or src0. 3648 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3649 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3650 if (Op.isSMEMOffset()) 3651 return Op.getStartLoc(); 3652 } 3653 return getLoc(); 3654 } 3655 3656 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3657 const OperandVector &Operands) { 3658 if (isCI() || isSI()) 3659 return true; 3660 3661 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3662 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3663 return true; 3664 3665 auto Opcode = Inst.getOpcode(); 3666 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3667 if (OpNum == -1) 3668 return true; 3669 3670 const auto &Op = Inst.getOperand(OpNum); 3671 if (!Op.isImm()) 3672 return true; 3673 3674 uint64_t Offset = Op.getImm(); 3675 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3676 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3677 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3678 return true; 3679 3680 Error(getSMEMOffsetLoc(Operands), 3681 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3682 "expected a 21-bit signed offset"); 3683 3684 return false; 3685 } 3686 3687 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3688 unsigned Opcode = Inst.getOpcode(); 3689 const MCInstrDesc &Desc = MII.get(Opcode); 3690 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3691 return true; 3692 3693 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3694 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3695 3696 const int OpIndices[] = { Src0Idx, Src1Idx }; 3697 3698 unsigned NumExprs = 0; 3699 unsigned NumLiterals = 0; 3700 uint32_t LiteralValue; 3701 3702 for (int OpIdx : OpIndices) { 3703 if (OpIdx == -1) break; 3704 3705 const MCOperand &MO = Inst.getOperand(OpIdx); 3706 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3707 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3708 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3709 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3710 if (NumLiterals == 0 || LiteralValue != Value) { 3711 LiteralValue = Value; 3712 ++NumLiterals; 3713 } 3714 } else if (MO.isExpr()) { 3715 ++NumExprs; 3716 } 3717 } 3718 } 3719 3720 return NumLiterals + NumExprs <= 1; 3721 } 3722 3723 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3724 const unsigned Opc = Inst.getOpcode(); 3725 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3726 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3727 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3728 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3729 3730 if (OpSel & ~3) 3731 return false; 3732 } 3733 return true; 3734 } 3735 3736 // Check if VCC register matches wavefront size 3737 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3738 auto FB = getFeatureBits(); 3739 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3740 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3741 } 3742 3743 // VOP3 literal is only allowed in GFX10+ and only one can be used 3744 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3745 const OperandVector &Operands) { 3746 unsigned Opcode = Inst.getOpcode(); 3747 const MCInstrDesc &Desc = MII.get(Opcode); 3748 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3749 return true; 3750 3751 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3752 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3753 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3754 3755 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3756 3757 unsigned NumExprs = 0; 3758 unsigned NumLiterals = 0; 3759 uint32_t LiteralValue; 3760 3761 for (int OpIdx : OpIndices) { 3762 if (OpIdx == -1) break; 3763 3764 const MCOperand &MO = Inst.getOperand(OpIdx); 3765 if (!MO.isImm() && !MO.isExpr()) 3766 continue; 3767 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3768 continue; 3769 3770 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3771 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3772 Error(getConstLoc(Operands), 3773 "inline constants are not allowed for this operand"); 3774 return false; 3775 } 3776 3777 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3778 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3779 if (NumLiterals == 0 || LiteralValue != Value) { 3780 LiteralValue = Value; 3781 ++NumLiterals; 3782 } 3783 } else if (MO.isExpr()) { 3784 ++NumExprs; 3785 } 3786 } 3787 NumLiterals += NumExprs; 3788 3789 if (!NumLiterals) 3790 return true; 3791 3792 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3793 Error(getLitLoc(Operands), "literal operands are not supported"); 3794 return false; 3795 } 3796 3797 if (NumLiterals > 1) { 3798 Error(getLitLoc(Operands), "only one literal operand is allowed"); 3799 return false; 3800 } 3801 3802 return true; 3803 } 3804 3805 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 3806 const OperandVector &Operands, 3807 const SMLoc &IDLoc) { 3808 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 3809 AMDGPU::OpName::glc1); 3810 if (GLCPos != -1) { 3811 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 3812 // in the asm string, and the default value means it is not present. 3813 if (Inst.getOperand(GLCPos).getImm() == -1) { 3814 Error(IDLoc, "instruction must use glc"); 3815 return false; 3816 } 3817 } 3818 3819 return true; 3820 } 3821 3822 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3823 const SMLoc &IDLoc, 3824 const OperandVector &Operands) { 3825 if (!validateLdsDirect(Inst)) { 3826 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), 3827 "invalid use of lds_direct"); 3828 return false; 3829 } 3830 if (!validateSOPLiteral(Inst)) { 3831 Error(getLitLoc(Operands), 3832 "only one literal operand is allowed"); 3833 return false; 3834 } 3835 if (!validateVOP3Literal(Inst, Operands)) { 3836 return false; 3837 } 3838 if (!validateConstantBusLimitations(Inst, Operands)) { 3839 return false; 3840 } 3841 if (!validateEarlyClobberLimitations(Inst, Operands)) { 3842 return false; 3843 } 3844 if (!validateIntClampSupported(Inst)) { 3845 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 3846 "integer clamping is not supported on this GPU"); 3847 return false; 3848 } 3849 if (!validateOpSel(Inst)) { 3850 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 3851 "invalid op_sel operand"); 3852 return false; 3853 } 3854 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3855 if (!validateMIMGD16(Inst)) { 3856 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 3857 "d16 modifier is not supported on this GPU"); 3858 return false; 3859 } 3860 if (!validateMIMGDim(Inst)) { 3861 Error(IDLoc, "dim modifier is required on this GPU"); 3862 return false; 3863 } 3864 if (!validateMIMGDataSize(Inst)) { 3865 Error(IDLoc, 3866 "image data size does not match dmask and tfe"); 3867 return false; 3868 } 3869 if (!validateMIMGAddrSize(Inst)) { 3870 Error(IDLoc, 3871 "image address size does not match dim and a16"); 3872 return false; 3873 } 3874 if (!validateMIMGAtomicDMask(Inst)) { 3875 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3876 "invalid atomic image dmask"); 3877 return false; 3878 } 3879 if (!validateMIMGGatherDMask(Inst)) { 3880 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3881 "invalid image_gather dmask: only one bit must be set"); 3882 return false; 3883 } 3884 if (!validateMovrels(Inst, Operands)) { 3885 return false; 3886 } 3887 if (!validateFlatOffset(Inst, Operands)) { 3888 return false; 3889 } 3890 if (!validateSMEMOffset(Inst, Operands)) { 3891 return false; 3892 } 3893 if (!validateMAIAccWrite(Inst, Operands)) { 3894 return false; 3895 } 3896 if (!validateDivScale(Inst)) { 3897 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 3898 return false; 3899 } 3900 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 3901 return false; 3902 } 3903 3904 return true; 3905 } 3906 3907 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3908 const FeatureBitset &FBS, 3909 unsigned VariantID = 0); 3910 3911 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3912 const FeatureBitset &AvailableFeatures, 3913 unsigned VariantID); 3914 3915 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3916 const FeatureBitset &FBS) { 3917 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3918 } 3919 3920 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3921 const FeatureBitset &FBS, 3922 ArrayRef<unsigned> Variants) { 3923 for (auto Variant : Variants) { 3924 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3925 return true; 3926 } 3927 3928 return false; 3929 } 3930 3931 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3932 const SMLoc &IDLoc) { 3933 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3934 3935 // Check if requested instruction variant is supported. 3936 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3937 return false; 3938 3939 // This instruction is not supported. 3940 // Clear any other pending errors because they are no longer relevant. 3941 getParser().clearPendingErrors(); 3942 3943 // Requested instruction variant is not supported. 3944 // Check if any other variants are supported. 3945 StringRef VariantName = getMatchedVariantName(); 3946 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3947 return Error(IDLoc, 3948 Twine(VariantName, 3949 " variant of this instruction is not supported")); 3950 } 3951 3952 // Finally check if this instruction is supported on any other GPU. 3953 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3954 return Error(IDLoc, "instruction not supported on this GPU"); 3955 } 3956 3957 // Instruction not supported on any GPU. Probably a typo. 3958 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3959 return Error(IDLoc, "invalid instruction" + Suggestion); 3960 } 3961 3962 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3963 OperandVector &Operands, 3964 MCStreamer &Out, 3965 uint64_t &ErrorInfo, 3966 bool MatchingInlineAsm) { 3967 MCInst Inst; 3968 unsigned Result = Match_Success; 3969 for (auto Variant : getMatchedVariants()) { 3970 uint64_t EI; 3971 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3972 Variant); 3973 // We order match statuses from least to most specific. We use most specific 3974 // status as resulting 3975 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3976 if ((R == Match_Success) || 3977 (R == Match_PreferE32) || 3978 (R == Match_MissingFeature && Result != Match_PreferE32) || 3979 (R == Match_InvalidOperand && Result != Match_MissingFeature 3980 && Result != Match_PreferE32) || 3981 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3982 && Result != Match_MissingFeature 3983 && Result != Match_PreferE32)) { 3984 Result = R; 3985 ErrorInfo = EI; 3986 } 3987 if (R == Match_Success) 3988 break; 3989 } 3990 3991 if (Result == Match_Success) { 3992 if (!validateInstruction(Inst, IDLoc, Operands)) { 3993 return true; 3994 } 3995 Inst.setLoc(IDLoc); 3996 Out.emitInstruction(Inst, getSTI()); 3997 return false; 3998 } 3999 4000 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4001 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4002 return true; 4003 } 4004 4005 switch (Result) { 4006 default: break; 4007 case Match_MissingFeature: 4008 // It has been verified that the specified instruction 4009 // mnemonic is valid. A match was found but it requires 4010 // features which are not supported on this GPU. 4011 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4012 4013 case Match_InvalidOperand: { 4014 SMLoc ErrorLoc = IDLoc; 4015 if (ErrorInfo != ~0ULL) { 4016 if (ErrorInfo >= Operands.size()) { 4017 return Error(IDLoc, "too few operands for instruction"); 4018 } 4019 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4020 if (ErrorLoc == SMLoc()) 4021 ErrorLoc = IDLoc; 4022 } 4023 return Error(ErrorLoc, "invalid operand for instruction"); 4024 } 4025 4026 case Match_PreferE32: 4027 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4028 "should be encoded as e32"); 4029 case Match_MnemonicFail: 4030 llvm_unreachable("Invalid instructions should have been handled already"); 4031 } 4032 llvm_unreachable("Implement any new match types added!"); 4033 } 4034 4035 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4036 int64_t Tmp = -1; 4037 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4038 return true; 4039 } 4040 if (getParser().parseAbsoluteExpression(Tmp)) { 4041 return true; 4042 } 4043 Ret = static_cast<uint32_t>(Tmp); 4044 return false; 4045 } 4046 4047 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4048 uint32_t &Minor) { 4049 if (ParseAsAbsoluteExpression(Major)) 4050 return TokError("invalid major version"); 4051 4052 if (!trySkipToken(AsmToken::Comma)) 4053 return TokError("minor version number required, comma expected"); 4054 4055 if (ParseAsAbsoluteExpression(Minor)) 4056 return TokError("invalid minor version"); 4057 4058 return false; 4059 } 4060 4061 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4062 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4063 return TokError("directive only supported for amdgcn architecture"); 4064 4065 std::string Target; 4066 4067 SMLoc TargetStart = getLoc(); 4068 if (getParser().parseEscapedString(Target)) 4069 return true; 4070 SMRange TargetRange = SMRange(TargetStart, getLoc()); 4071 4072 std::string ExpectedTarget; 4073 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4074 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4075 4076 if (Target != ExpectedTargetOS.str()) 4077 return Error(TargetRange.Start, "target must match options", TargetRange); 4078 4079 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4080 return false; 4081 } 4082 4083 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4084 return Error(Range.Start, "value out of range", Range); 4085 } 4086 4087 bool AMDGPUAsmParser::calculateGPRBlocks( 4088 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4089 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4090 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4091 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4092 // TODO(scott.linder): These calculations are duplicated from 4093 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4094 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4095 4096 unsigned NumVGPRs = NextFreeVGPR; 4097 unsigned NumSGPRs = NextFreeSGPR; 4098 4099 if (Version.Major >= 10) 4100 NumSGPRs = 0; 4101 else { 4102 unsigned MaxAddressableNumSGPRs = 4103 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4104 4105 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4106 NumSGPRs > MaxAddressableNumSGPRs) 4107 return OutOfRangeError(SGPRRange); 4108 4109 NumSGPRs += 4110 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4111 4112 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4113 NumSGPRs > MaxAddressableNumSGPRs) 4114 return OutOfRangeError(SGPRRange); 4115 4116 if (Features.test(FeatureSGPRInitBug)) 4117 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4118 } 4119 4120 VGPRBlocks = 4121 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4122 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4123 4124 return false; 4125 } 4126 4127 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4128 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4129 return TokError("directive only supported for amdgcn architecture"); 4130 4131 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4132 return TokError("directive only supported for amdhsa OS"); 4133 4134 StringRef KernelName; 4135 if (getParser().parseIdentifier(KernelName)) 4136 return true; 4137 4138 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4139 4140 StringSet<> Seen; 4141 4142 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4143 4144 SMRange VGPRRange; 4145 uint64_t NextFreeVGPR = 0; 4146 SMRange SGPRRange; 4147 uint64_t NextFreeSGPR = 0; 4148 unsigned UserSGPRCount = 0; 4149 bool ReserveVCC = true; 4150 bool ReserveFlatScr = true; 4151 bool ReserveXNACK = hasXNACK(); 4152 Optional<bool> EnableWavefrontSize32; 4153 4154 while (true) { 4155 while (trySkipToken(AsmToken::EndOfStatement)); 4156 4157 StringRef ID; 4158 SMRange IDRange = getTok().getLocRange(); 4159 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4160 return true; 4161 4162 if (ID == ".end_amdhsa_kernel") 4163 break; 4164 4165 if (Seen.find(ID) != Seen.end()) 4166 return TokError(".amdhsa_ directives cannot be repeated"); 4167 Seen.insert(ID); 4168 4169 SMLoc ValStart = getLoc(); 4170 int64_t IVal; 4171 if (getParser().parseAbsoluteExpression(IVal)) 4172 return true; 4173 SMLoc ValEnd = getLoc(); 4174 SMRange ValRange = SMRange(ValStart, ValEnd); 4175 4176 if (IVal < 0) 4177 return OutOfRangeError(ValRange); 4178 4179 uint64_t Val = IVal; 4180 4181 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4182 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4183 return OutOfRangeError(RANGE); \ 4184 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4185 4186 if (ID == ".amdhsa_group_segment_fixed_size") { 4187 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4188 return OutOfRangeError(ValRange); 4189 KD.group_segment_fixed_size = Val; 4190 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4191 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4192 return OutOfRangeError(ValRange); 4193 KD.private_segment_fixed_size = Val; 4194 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4195 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4196 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4197 Val, ValRange); 4198 if (Val) 4199 UserSGPRCount += 4; 4200 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4201 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4202 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4203 ValRange); 4204 if (Val) 4205 UserSGPRCount += 2; 4206 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4207 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4208 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4209 ValRange); 4210 if (Val) 4211 UserSGPRCount += 2; 4212 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4213 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4214 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4215 Val, ValRange); 4216 if (Val) 4217 UserSGPRCount += 2; 4218 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4219 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4220 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4221 ValRange); 4222 if (Val) 4223 UserSGPRCount += 2; 4224 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4225 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4226 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4227 ValRange); 4228 if (Val) 4229 UserSGPRCount += 2; 4230 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4231 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4232 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4233 Val, ValRange); 4234 if (Val) 4235 UserSGPRCount += 1; 4236 } else if (ID == ".amdhsa_wavefront_size32") { 4237 if (IVersion.Major < 10) 4238 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4239 EnableWavefrontSize32 = Val; 4240 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4241 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4242 Val, ValRange); 4243 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4244 PARSE_BITS_ENTRY( 4245 KD.compute_pgm_rsrc2, 4246 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4247 ValRange); 4248 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4249 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4250 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4251 ValRange); 4252 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4253 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4254 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4255 ValRange); 4256 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4257 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4258 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4259 ValRange); 4260 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4261 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4262 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4263 ValRange); 4264 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4265 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4266 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4267 ValRange); 4268 } else if (ID == ".amdhsa_next_free_vgpr") { 4269 VGPRRange = ValRange; 4270 NextFreeVGPR = Val; 4271 } else if (ID == ".amdhsa_next_free_sgpr") { 4272 SGPRRange = ValRange; 4273 NextFreeSGPR = Val; 4274 } else if (ID == ".amdhsa_reserve_vcc") { 4275 if (!isUInt<1>(Val)) 4276 return OutOfRangeError(ValRange); 4277 ReserveVCC = Val; 4278 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4279 if (IVersion.Major < 7) 4280 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4281 if (!isUInt<1>(Val)) 4282 return OutOfRangeError(ValRange); 4283 ReserveFlatScr = Val; 4284 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4285 if (IVersion.Major < 8) 4286 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4287 if (!isUInt<1>(Val)) 4288 return OutOfRangeError(ValRange); 4289 ReserveXNACK = Val; 4290 } else if (ID == ".amdhsa_float_round_mode_32") { 4291 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4292 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4293 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4294 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4295 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4296 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4297 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4298 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4299 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4300 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4301 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4302 ValRange); 4303 } else if (ID == ".amdhsa_dx10_clamp") { 4304 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4305 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4306 } else if (ID == ".amdhsa_ieee_mode") { 4307 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4308 Val, ValRange); 4309 } else if (ID == ".amdhsa_fp16_overflow") { 4310 if (IVersion.Major < 9) 4311 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4312 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4313 ValRange); 4314 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4315 if (IVersion.Major < 10) 4316 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4317 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4318 ValRange); 4319 } else if (ID == ".amdhsa_memory_ordered") { 4320 if (IVersion.Major < 10) 4321 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4322 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4323 ValRange); 4324 } else if (ID == ".amdhsa_forward_progress") { 4325 if (IVersion.Major < 10) 4326 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4327 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4328 ValRange); 4329 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4330 PARSE_BITS_ENTRY( 4331 KD.compute_pgm_rsrc2, 4332 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4333 ValRange); 4334 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4335 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4336 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4337 Val, ValRange); 4338 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4339 PARSE_BITS_ENTRY( 4340 KD.compute_pgm_rsrc2, 4341 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4342 ValRange); 4343 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4344 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4345 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4346 Val, ValRange); 4347 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4348 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4349 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4350 Val, ValRange); 4351 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4352 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4353 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4354 Val, ValRange); 4355 } else if (ID == ".amdhsa_exception_int_div_zero") { 4356 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4357 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4358 Val, ValRange); 4359 } else { 4360 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4361 } 4362 4363 #undef PARSE_BITS_ENTRY 4364 } 4365 4366 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4367 return TokError(".amdhsa_next_free_vgpr directive is required"); 4368 4369 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4370 return TokError(".amdhsa_next_free_sgpr directive is required"); 4371 4372 unsigned VGPRBlocks; 4373 unsigned SGPRBlocks; 4374 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4375 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4376 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4377 SGPRBlocks)) 4378 return true; 4379 4380 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4381 VGPRBlocks)) 4382 return OutOfRangeError(VGPRRange); 4383 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4384 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4385 4386 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4387 SGPRBlocks)) 4388 return OutOfRangeError(SGPRRange); 4389 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4390 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4391 SGPRBlocks); 4392 4393 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4394 return TokError("too many user SGPRs enabled"); 4395 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4396 UserSGPRCount); 4397 4398 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4399 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4400 ReserveFlatScr, ReserveXNACK); 4401 return false; 4402 } 4403 4404 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4405 uint32_t Major; 4406 uint32_t Minor; 4407 4408 if (ParseDirectiveMajorMinor(Major, Minor)) 4409 return true; 4410 4411 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4412 return false; 4413 } 4414 4415 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4416 uint32_t Major; 4417 uint32_t Minor; 4418 uint32_t Stepping; 4419 StringRef VendorName; 4420 StringRef ArchName; 4421 4422 // If this directive has no arguments, then use the ISA version for the 4423 // targeted GPU. 4424 if (isToken(AsmToken::EndOfStatement)) { 4425 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4426 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4427 ISA.Stepping, 4428 "AMD", "AMDGPU"); 4429 return false; 4430 } 4431 4432 if (ParseDirectiveMajorMinor(Major, Minor)) 4433 return true; 4434 4435 if (!trySkipToken(AsmToken::Comma)) 4436 return TokError("stepping version number required, comma expected"); 4437 4438 if (ParseAsAbsoluteExpression(Stepping)) 4439 return TokError("invalid stepping version"); 4440 4441 if (!trySkipToken(AsmToken::Comma)) 4442 return TokError("vendor name required, comma expected"); 4443 4444 if (!parseString(VendorName, "invalid vendor name")) 4445 return true; 4446 4447 if (!trySkipToken(AsmToken::Comma)) 4448 return TokError("arch name required, comma expected"); 4449 4450 if (!parseString(ArchName, "invalid arch name")) 4451 return true; 4452 4453 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4454 VendorName, ArchName); 4455 return false; 4456 } 4457 4458 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4459 amd_kernel_code_t &Header) { 4460 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4461 // assembly for backwards compatibility. 4462 if (ID == "max_scratch_backing_memory_byte_size") { 4463 Parser.eatToEndOfStatement(); 4464 return false; 4465 } 4466 4467 SmallString<40> ErrStr; 4468 raw_svector_ostream Err(ErrStr); 4469 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4470 return TokError(Err.str()); 4471 } 4472 Lex(); 4473 4474 if (ID == "enable_wavefront_size32") { 4475 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4476 if (!isGFX10Plus()) 4477 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4478 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4479 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4480 } else { 4481 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4482 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4483 } 4484 } 4485 4486 if (ID == "wavefront_size") { 4487 if (Header.wavefront_size == 5) { 4488 if (!isGFX10Plus()) 4489 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4490 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4491 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4492 } else if (Header.wavefront_size == 6) { 4493 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4494 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4495 } 4496 } 4497 4498 if (ID == "enable_wgp_mode") { 4499 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4500 !isGFX10Plus()) 4501 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4502 } 4503 4504 if (ID == "enable_mem_ordered") { 4505 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4506 !isGFX10Plus()) 4507 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4508 } 4509 4510 if (ID == "enable_fwd_progress") { 4511 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4512 !isGFX10Plus()) 4513 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4514 } 4515 4516 return false; 4517 } 4518 4519 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4520 amd_kernel_code_t Header; 4521 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4522 4523 while (true) { 4524 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4525 // will set the current token to EndOfStatement. 4526 while(trySkipToken(AsmToken::EndOfStatement)); 4527 4528 StringRef ID; 4529 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4530 return true; 4531 4532 if (ID == ".end_amd_kernel_code_t") 4533 break; 4534 4535 if (ParseAMDKernelCodeTValue(ID, Header)) 4536 return true; 4537 } 4538 4539 getTargetStreamer().EmitAMDKernelCodeT(Header); 4540 4541 return false; 4542 } 4543 4544 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4545 StringRef KernelName; 4546 if (!parseId(KernelName, "expected symbol name")) 4547 return true; 4548 4549 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4550 ELF::STT_AMDGPU_HSA_KERNEL); 4551 4552 KernelScope.initialize(getContext()); 4553 return false; 4554 } 4555 4556 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4557 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4558 return Error(getLoc(), 4559 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4560 "architectures"); 4561 } 4562 4563 auto ISAVersionStringFromASM = getToken().getStringContents(); 4564 4565 std::string ISAVersionStringFromSTI; 4566 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4567 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4568 4569 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4570 return Error(getLoc(), 4571 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4572 "arguments specified through the command line"); 4573 } 4574 4575 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4576 Lex(); 4577 4578 return false; 4579 } 4580 4581 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4582 const char *AssemblerDirectiveBegin; 4583 const char *AssemblerDirectiveEnd; 4584 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4585 isHsaAbiVersion3(&getSTI()) 4586 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4587 HSAMD::V3::AssemblerDirectiveEnd) 4588 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4589 HSAMD::AssemblerDirectiveEnd); 4590 4591 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4592 return Error(getLoc(), 4593 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4594 "not available on non-amdhsa OSes")).str()); 4595 } 4596 4597 std::string HSAMetadataString; 4598 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4599 HSAMetadataString)) 4600 return true; 4601 4602 if (isHsaAbiVersion3(&getSTI())) { 4603 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4604 return Error(getLoc(), "invalid HSA metadata"); 4605 } else { 4606 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4607 return Error(getLoc(), "invalid HSA metadata"); 4608 } 4609 4610 return false; 4611 } 4612 4613 /// Common code to parse out a block of text (typically YAML) between start and 4614 /// end directives. 4615 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4616 const char *AssemblerDirectiveEnd, 4617 std::string &CollectString) { 4618 4619 raw_string_ostream CollectStream(CollectString); 4620 4621 getLexer().setSkipSpace(false); 4622 4623 bool FoundEnd = false; 4624 while (!isToken(AsmToken::Eof)) { 4625 while (isToken(AsmToken::Space)) { 4626 CollectStream << getTokenStr(); 4627 Lex(); 4628 } 4629 4630 if (trySkipId(AssemblerDirectiveEnd)) { 4631 FoundEnd = true; 4632 break; 4633 } 4634 4635 CollectStream << Parser.parseStringToEndOfStatement() 4636 << getContext().getAsmInfo()->getSeparatorString(); 4637 4638 Parser.eatToEndOfStatement(); 4639 } 4640 4641 getLexer().setSkipSpace(true); 4642 4643 if (isToken(AsmToken::Eof) && !FoundEnd) { 4644 return TokError(Twine("expected directive ") + 4645 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4646 } 4647 4648 CollectStream.flush(); 4649 return false; 4650 } 4651 4652 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4653 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4654 std::string String; 4655 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4656 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4657 return true; 4658 4659 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4660 if (!PALMetadata->setFromString(String)) 4661 return Error(getLoc(), "invalid PAL metadata"); 4662 return false; 4663 } 4664 4665 /// Parse the assembler directive for old linear-format PAL metadata. 4666 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4667 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4668 return Error(getLoc(), 4669 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4670 "not available on non-amdpal OSes")).str()); 4671 } 4672 4673 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4674 PALMetadata->setLegacy(); 4675 for (;;) { 4676 uint32_t Key, Value; 4677 if (ParseAsAbsoluteExpression(Key)) { 4678 return TokError(Twine("invalid value in ") + 4679 Twine(PALMD::AssemblerDirective)); 4680 } 4681 if (!trySkipToken(AsmToken::Comma)) { 4682 return TokError(Twine("expected an even number of values in ") + 4683 Twine(PALMD::AssemblerDirective)); 4684 } 4685 if (ParseAsAbsoluteExpression(Value)) { 4686 return TokError(Twine("invalid value in ") + 4687 Twine(PALMD::AssemblerDirective)); 4688 } 4689 PALMetadata->setRegister(Key, Value); 4690 if (!trySkipToken(AsmToken::Comma)) 4691 break; 4692 } 4693 return false; 4694 } 4695 4696 /// ParseDirectiveAMDGPULDS 4697 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4698 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4699 if (getParser().checkForValidSection()) 4700 return true; 4701 4702 StringRef Name; 4703 SMLoc NameLoc = getLoc(); 4704 if (getParser().parseIdentifier(Name)) 4705 return TokError("expected identifier in directive"); 4706 4707 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4708 if (parseToken(AsmToken::Comma, "expected ','")) 4709 return true; 4710 4711 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4712 4713 int64_t Size; 4714 SMLoc SizeLoc = getLoc(); 4715 if (getParser().parseAbsoluteExpression(Size)) 4716 return true; 4717 if (Size < 0) 4718 return Error(SizeLoc, "size must be non-negative"); 4719 if (Size > LocalMemorySize) 4720 return Error(SizeLoc, "size is too large"); 4721 4722 int64_t Alignment = 4; 4723 if (trySkipToken(AsmToken::Comma)) { 4724 SMLoc AlignLoc = getLoc(); 4725 if (getParser().parseAbsoluteExpression(Alignment)) 4726 return true; 4727 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4728 return Error(AlignLoc, "alignment must be a power of two"); 4729 4730 // Alignment larger than the size of LDS is possible in theory, as long 4731 // as the linker manages to place to symbol at address 0, but we do want 4732 // to make sure the alignment fits nicely into a 32-bit integer. 4733 if (Alignment >= 1u << 31) 4734 return Error(AlignLoc, "alignment is too large"); 4735 } 4736 4737 if (parseToken(AsmToken::EndOfStatement, 4738 "unexpected token in '.amdgpu_lds' directive")) 4739 return true; 4740 4741 Symbol->redefineIfPossible(); 4742 if (!Symbol->isUndefined()) 4743 return Error(NameLoc, "invalid symbol redefinition"); 4744 4745 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4746 return false; 4747 } 4748 4749 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4750 StringRef IDVal = DirectiveID.getString(); 4751 4752 if (isHsaAbiVersion3(&getSTI())) { 4753 if (IDVal == ".amdgcn_target") 4754 return ParseDirectiveAMDGCNTarget(); 4755 4756 if (IDVal == ".amdhsa_kernel") 4757 return ParseDirectiveAMDHSAKernel(); 4758 4759 // TODO: Restructure/combine with PAL metadata directive. 4760 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4761 return ParseDirectiveHSAMetadata(); 4762 } else { 4763 if (IDVal == ".hsa_code_object_version") 4764 return ParseDirectiveHSACodeObjectVersion(); 4765 4766 if (IDVal == ".hsa_code_object_isa") 4767 return ParseDirectiveHSACodeObjectISA(); 4768 4769 if (IDVal == ".amd_kernel_code_t") 4770 return ParseDirectiveAMDKernelCodeT(); 4771 4772 if (IDVal == ".amdgpu_hsa_kernel") 4773 return ParseDirectiveAMDGPUHsaKernel(); 4774 4775 if (IDVal == ".amd_amdgpu_isa") 4776 return ParseDirectiveISAVersion(); 4777 4778 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4779 return ParseDirectiveHSAMetadata(); 4780 } 4781 4782 if (IDVal == ".amdgpu_lds") 4783 return ParseDirectiveAMDGPULDS(); 4784 4785 if (IDVal == PALMD::AssemblerDirectiveBegin) 4786 return ParseDirectivePALMetadataBegin(); 4787 4788 if (IDVal == PALMD::AssemblerDirective) 4789 return ParseDirectivePALMetadata(); 4790 4791 return true; 4792 } 4793 4794 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4795 unsigned RegNo) const { 4796 4797 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4798 R.isValid(); ++R) { 4799 if (*R == RegNo) 4800 return isGFX9Plus(); 4801 } 4802 4803 // GFX10 has 2 more SGPRs 104 and 105. 4804 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4805 R.isValid(); ++R) { 4806 if (*R == RegNo) 4807 return hasSGPR104_SGPR105(); 4808 } 4809 4810 switch (RegNo) { 4811 case AMDGPU::SRC_SHARED_BASE: 4812 case AMDGPU::SRC_SHARED_LIMIT: 4813 case AMDGPU::SRC_PRIVATE_BASE: 4814 case AMDGPU::SRC_PRIVATE_LIMIT: 4815 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4816 return isGFX9Plus(); 4817 case AMDGPU::TBA: 4818 case AMDGPU::TBA_LO: 4819 case AMDGPU::TBA_HI: 4820 case AMDGPU::TMA: 4821 case AMDGPU::TMA_LO: 4822 case AMDGPU::TMA_HI: 4823 return !isGFX9Plus(); 4824 case AMDGPU::XNACK_MASK: 4825 case AMDGPU::XNACK_MASK_LO: 4826 case AMDGPU::XNACK_MASK_HI: 4827 return (isVI() || isGFX9()) && hasXNACK(); 4828 case AMDGPU::SGPR_NULL: 4829 return isGFX10Plus(); 4830 default: 4831 break; 4832 } 4833 4834 if (isCI()) 4835 return true; 4836 4837 if (isSI() || isGFX10Plus()) { 4838 // No flat_scr on SI. 4839 // On GFX10 flat scratch is not a valid register operand and can only be 4840 // accessed with s_setreg/s_getreg. 4841 switch (RegNo) { 4842 case AMDGPU::FLAT_SCR: 4843 case AMDGPU::FLAT_SCR_LO: 4844 case AMDGPU::FLAT_SCR_HI: 4845 return false; 4846 default: 4847 return true; 4848 } 4849 } 4850 4851 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4852 // SI/CI have. 4853 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4854 R.isValid(); ++R) { 4855 if (*R == RegNo) 4856 return hasSGPR102_SGPR103(); 4857 } 4858 4859 return true; 4860 } 4861 4862 OperandMatchResultTy 4863 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4864 OperandMode Mode) { 4865 // Try to parse with a custom parser 4866 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4867 4868 // If we successfully parsed the operand or if there as an error parsing, 4869 // we are done. 4870 // 4871 // If we are parsing after we reach EndOfStatement then this means we 4872 // are appending default values to the Operands list. This is only done 4873 // by custom parser, so we shouldn't continue on to the generic parsing. 4874 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4875 isToken(AsmToken::EndOfStatement)) 4876 return ResTy; 4877 4878 SMLoc RBraceLoc; 4879 SMLoc LBraceLoc = getLoc(); 4880 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 4881 unsigned Prefix = Operands.size(); 4882 4883 for (;;) { 4884 ResTy = parseReg(Operands); 4885 if (ResTy != MatchOperand_Success) 4886 return ResTy; 4887 4888 RBraceLoc = getLoc(); 4889 if (trySkipToken(AsmToken::RBrac)) 4890 break; 4891 4892 if (!trySkipToken(AsmToken::Comma)) 4893 return MatchOperand_ParseFail; 4894 } 4895 4896 if (Operands.size() - Prefix > 1) { 4897 Operands.insert(Operands.begin() + Prefix, 4898 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4899 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 4900 } 4901 4902 return MatchOperand_Success; 4903 } 4904 4905 return parseRegOrImm(Operands); 4906 } 4907 4908 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4909 // Clear any forced encodings from the previous instruction. 4910 setForcedEncodingSize(0); 4911 setForcedDPP(false); 4912 setForcedSDWA(false); 4913 4914 if (Name.endswith("_e64")) { 4915 setForcedEncodingSize(64); 4916 return Name.substr(0, Name.size() - 4); 4917 } else if (Name.endswith("_e32")) { 4918 setForcedEncodingSize(32); 4919 return Name.substr(0, Name.size() - 4); 4920 } else if (Name.endswith("_dpp")) { 4921 setForcedDPP(true); 4922 return Name.substr(0, Name.size() - 4); 4923 } else if (Name.endswith("_sdwa")) { 4924 setForcedSDWA(true); 4925 return Name.substr(0, Name.size() - 5); 4926 } 4927 return Name; 4928 } 4929 4930 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4931 StringRef Name, 4932 SMLoc NameLoc, OperandVector &Operands) { 4933 // Add the instruction mnemonic 4934 Name = parseMnemonicSuffix(Name); 4935 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4936 4937 bool IsMIMG = Name.startswith("image_"); 4938 4939 while (!trySkipToken(AsmToken::EndOfStatement)) { 4940 OperandMode Mode = OperandMode_Default; 4941 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 4942 Mode = OperandMode_NSA; 4943 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4944 4945 // Eat the comma or space if there is one. 4946 trySkipToken(AsmToken::Comma); 4947 4948 if (Res != MatchOperand_Success) { 4949 checkUnsupportedInstruction(Name, NameLoc); 4950 if (!Parser.hasPendingError()) { 4951 // FIXME: use real operand location rather than the current location. 4952 StringRef Msg = 4953 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4954 "not a valid operand."; 4955 Error(getLoc(), Msg); 4956 } 4957 while (!trySkipToken(AsmToken::EndOfStatement)) { 4958 lex(); 4959 } 4960 return true; 4961 } 4962 } 4963 4964 return false; 4965 } 4966 4967 //===----------------------------------------------------------------------===// 4968 // Utility functions 4969 //===----------------------------------------------------------------------===// 4970 4971 OperandMatchResultTy 4972 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4973 4974 if (!trySkipId(Prefix, AsmToken::Colon)) 4975 return MatchOperand_NoMatch; 4976 4977 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4978 } 4979 4980 OperandMatchResultTy 4981 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4982 AMDGPUOperand::ImmTy ImmTy, 4983 bool (*ConvertResult)(int64_t&)) { 4984 SMLoc S = getLoc(); 4985 int64_t Value = 0; 4986 4987 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4988 if (Res != MatchOperand_Success) 4989 return Res; 4990 4991 if (ConvertResult && !ConvertResult(Value)) { 4992 Error(S, "invalid " + StringRef(Prefix) + " value."); 4993 } 4994 4995 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4996 return MatchOperand_Success; 4997 } 4998 4999 OperandMatchResultTy 5000 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5001 OperandVector &Operands, 5002 AMDGPUOperand::ImmTy ImmTy, 5003 bool (*ConvertResult)(int64_t&)) { 5004 SMLoc S = getLoc(); 5005 if (!trySkipId(Prefix, AsmToken::Colon)) 5006 return MatchOperand_NoMatch; 5007 5008 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5009 return MatchOperand_ParseFail; 5010 5011 unsigned Val = 0; 5012 const unsigned MaxSize = 4; 5013 5014 // FIXME: How to verify the number of elements matches the number of src 5015 // operands? 5016 for (int I = 0; ; ++I) { 5017 int64_t Op; 5018 SMLoc Loc = getLoc(); 5019 if (!parseExpr(Op)) 5020 return MatchOperand_ParseFail; 5021 5022 if (Op != 0 && Op != 1) { 5023 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5024 return MatchOperand_ParseFail; 5025 } 5026 5027 Val |= (Op << I); 5028 5029 if (trySkipToken(AsmToken::RBrac)) 5030 break; 5031 5032 if (I + 1 == MaxSize) { 5033 Error(getLoc(), "expected a closing square bracket"); 5034 return MatchOperand_ParseFail; 5035 } 5036 5037 if (!skipToken(AsmToken::Comma, "expected a comma")) 5038 return MatchOperand_ParseFail; 5039 } 5040 5041 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5042 return MatchOperand_Success; 5043 } 5044 5045 OperandMatchResultTy 5046 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 5047 AMDGPUOperand::ImmTy ImmTy) { 5048 int64_t Bit = 0; 5049 SMLoc S = getLoc(); 5050 5051 // We are at the end of the statement, and this is a default argument, so 5052 // use a default value. 5053 if (!isToken(AsmToken::EndOfStatement)) { 5054 switch(getTokenKind()) { 5055 case AsmToken::Identifier: { 5056 StringRef Tok = getTokenStr(); 5057 if (Tok == Name) { 5058 if (Tok == "r128" && !hasMIMG_R128()) 5059 Error(S, "r128 modifier is not supported on this GPU"); 5060 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 5061 Error(S, "a16 modifier is not supported on this GPU"); 5062 Bit = 1; 5063 Parser.Lex(); 5064 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 5065 Bit = 0; 5066 Parser.Lex(); 5067 } else { 5068 return MatchOperand_NoMatch; 5069 } 5070 break; 5071 } 5072 default: 5073 return MatchOperand_NoMatch; 5074 } 5075 } 5076 5077 if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) 5078 return MatchOperand_ParseFail; 5079 5080 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5081 ImmTy = AMDGPUOperand::ImmTyR128A16; 5082 5083 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5084 return MatchOperand_Success; 5085 } 5086 5087 static void addOptionalImmOperand( 5088 MCInst& Inst, const OperandVector& Operands, 5089 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5090 AMDGPUOperand::ImmTy ImmT, 5091 int64_t Default = 0) { 5092 auto i = OptionalIdx.find(ImmT); 5093 if (i != OptionalIdx.end()) { 5094 unsigned Idx = i->second; 5095 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5096 } else { 5097 Inst.addOperand(MCOperand::createImm(Default)); 5098 } 5099 } 5100 5101 OperandMatchResultTy 5102 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5103 StringRef &Value, 5104 SMLoc &StringLoc) { 5105 if (!trySkipId(Prefix, AsmToken::Colon)) 5106 return MatchOperand_NoMatch; 5107 5108 StringLoc = getLoc(); 5109 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5110 : MatchOperand_ParseFail; 5111 } 5112 5113 //===----------------------------------------------------------------------===// 5114 // MTBUF format 5115 //===----------------------------------------------------------------------===// 5116 5117 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5118 int64_t MaxVal, 5119 int64_t &Fmt) { 5120 int64_t Val; 5121 SMLoc Loc = getLoc(); 5122 5123 auto Res = parseIntWithPrefix(Pref, Val); 5124 if (Res == MatchOperand_ParseFail) 5125 return false; 5126 if (Res == MatchOperand_NoMatch) 5127 return true; 5128 5129 if (Val < 0 || Val > MaxVal) { 5130 Error(Loc, Twine("out of range ", StringRef(Pref))); 5131 return false; 5132 } 5133 5134 Fmt = Val; 5135 return true; 5136 } 5137 5138 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5139 // values to live in a joint format operand in the MCInst encoding. 5140 OperandMatchResultTy 5141 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5142 using namespace llvm::AMDGPU::MTBUFFormat; 5143 5144 int64_t Dfmt = DFMT_UNDEF; 5145 int64_t Nfmt = NFMT_UNDEF; 5146 5147 // dfmt and nfmt can appear in either order, and each is optional. 5148 for (int I = 0; I < 2; ++I) { 5149 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5150 return MatchOperand_ParseFail; 5151 5152 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5153 return MatchOperand_ParseFail; 5154 } 5155 // Skip optional comma between dfmt/nfmt 5156 // but guard against 2 commas following each other. 5157 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5158 !peekToken().is(AsmToken::Comma)) { 5159 trySkipToken(AsmToken::Comma); 5160 } 5161 } 5162 5163 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5164 return MatchOperand_NoMatch; 5165 5166 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5167 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5168 5169 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5170 return MatchOperand_Success; 5171 } 5172 5173 OperandMatchResultTy 5174 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5175 using namespace llvm::AMDGPU::MTBUFFormat; 5176 5177 int64_t Fmt = UFMT_UNDEF; 5178 5179 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5180 return MatchOperand_ParseFail; 5181 5182 if (Fmt == UFMT_UNDEF) 5183 return MatchOperand_NoMatch; 5184 5185 Format = Fmt; 5186 return MatchOperand_Success; 5187 } 5188 5189 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5190 int64_t &Nfmt, 5191 StringRef FormatStr, 5192 SMLoc Loc) { 5193 using namespace llvm::AMDGPU::MTBUFFormat; 5194 int64_t Format; 5195 5196 Format = getDfmt(FormatStr); 5197 if (Format != DFMT_UNDEF) { 5198 Dfmt = Format; 5199 return true; 5200 } 5201 5202 Format = getNfmt(FormatStr, getSTI()); 5203 if (Format != NFMT_UNDEF) { 5204 Nfmt = Format; 5205 return true; 5206 } 5207 5208 Error(Loc, "unsupported format"); 5209 return false; 5210 } 5211 5212 OperandMatchResultTy 5213 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5214 SMLoc FormatLoc, 5215 int64_t &Format) { 5216 using namespace llvm::AMDGPU::MTBUFFormat; 5217 5218 int64_t Dfmt = DFMT_UNDEF; 5219 int64_t Nfmt = NFMT_UNDEF; 5220 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5221 return MatchOperand_ParseFail; 5222 5223 if (trySkipToken(AsmToken::Comma)) { 5224 StringRef Str; 5225 SMLoc Loc = getLoc(); 5226 if (!parseId(Str, "expected a format string") || 5227 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5228 return MatchOperand_ParseFail; 5229 } 5230 if (Dfmt == DFMT_UNDEF) { 5231 Error(Loc, "duplicate numeric format"); 5232 return MatchOperand_ParseFail; 5233 } else if (Nfmt == NFMT_UNDEF) { 5234 Error(Loc, "duplicate data format"); 5235 return MatchOperand_ParseFail; 5236 } 5237 } 5238 5239 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5240 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5241 5242 if (isGFX10Plus()) { 5243 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5244 if (Ufmt == UFMT_UNDEF) { 5245 Error(FormatLoc, "unsupported format"); 5246 return MatchOperand_ParseFail; 5247 } 5248 Format = Ufmt; 5249 } else { 5250 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5251 } 5252 5253 return MatchOperand_Success; 5254 } 5255 5256 OperandMatchResultTy 5257 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5258 SMLoc Loc, 5259 int64_t &Format) { 5260 using namespace llvm::AMDGPU::MTBUFFormat; 5261 5262 auto Id = getUnifiedFormat(FormatStr); 5263 if (Id == UFMT_UNDEF) 5264 return MatchOperand_NoMatch; 5265 5266 if (!isGFX10Plus()) { 5267 Error(Loc, "unified format is not supported on this GPU"); 5268 return MatchOperand_ParseFail; 5269 } 5270 5271 Format = Id; 5272 return MatchOperand_Success; 5273 } 5274 5275 OperandMatchResultTy 5276 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5277 using namespace llvm::AMDGPU::MTBUFFormat; 5278 SMLoc Loc = getLoc(); 5279 5280 if (!parseExpr(Format)) 5281 return MatchOperand_ParseFail; 5282 if (!isValidFormatEncoding(Format, getSTI())) { 5283 Error(Loc, "out of range format"); 5284 return MatchOperand_ParseFail; 5285 } 5286 5287 return MatchOperand_Success; 5288 } 5289 5290 OperandMatchResultTy 5291 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5292 using namespace llvm::AMDGPU::MTBUFFormat; 5293 5294 if (!trySkipId("format", AsmToken::Colon)) 5295 return MatchOperand_NoMatch; 5296 5297 if (trySkipToken(AsmToken::LBrac)) { 5298 StringRef FormatStr; 5299 SMLoc Loc = getLoc(); 5300 if (!parseId(FormatStr, "expected a format string")) 5301 return MatchOperand_ParseFail; 5302 5303 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5304 if (Res == MatchOperand_NoMatch) 5305 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5306 if (Res != MatchOperand_Success) 5307 return Res; 5308 5309 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5310 return MatchOperand_ParseFail; 5311 5312 return MatchOperand_Success; 5313 } 5314 5315 return parseNumericFormat(Format); 5316 } 5317 5318 OperandMatchResultTy 5319 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5320 using namespace llvm::AMDGPU::MTBUFFormat; 5321 5322 int64_t Format = getDefaultFormatEncoding(getSTI()); 5323 OperandMatchResultTy Res; 5324 SMLoc Loc = getLoc(); 5325 5326 // Parse legacy format syntax. 5327 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5328 if (Res == MatchOperand_ParseFail) 5329 return Res; 5330 5331 bool FormatFound = (Res == MatchOperand_Success); 5332 5333 Operands.push_back( 5334 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5335 5336 if (FormatFound) 5337 trySkipToken(AsmToken::Comma); 5338 5339 if (isToken(AsmToken::EndOfStatement)) { 5340 // We are expecting an soffset operand, 5341 // but let matcher handle the error. 5342 return MatchOperand_Success; 5343 } 5344 5345 // Parse soffset. 5346 Res = parseRegOrImm(Operands); 5347 if (Res != MatchOperand_Success) 5348 return Res; 5349 5350 trySkipToken(AsmToken::Comma); 5351 5352 if (!FormatFound) { 5353 Res = parseSymbolicOrNumericFormat(Format); 5354 if (Res == MatchOperand_ParseFail) 5355 return Res; 5356 if (Res == MatchOperand_Success) { 5357 auto Size = Operands.size(); 5358 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5359 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5360 Op.setImm(Format); 5361 } 5362 return MatchOperand_Success; 5363 } 5364 5365 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5366 Error(getLoc(), "duplicate format"); 5367 return MatchOperand_ParseFail; 5368 } 5369 return MatchOperand_Success; 5370 } 5371 5372 //===----------------------------------------------------------------------===// 5373 // ds 5374 //===----------------------------------------------------------------------===// 5375 5376 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5377 const OperandVector &Operands) { 5378 OptionalImmIndexMap OptionalIdx; 5379 5380 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5381 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5382 5383 // Add the register arguments 5384 if (Op.isReg()) { 5385 Op.addRegOperands(Inst, 1); 5386 continue; 5387 } 5388 5389 // Handle optional arguments 5390 OptionalIdx[Op.getImmTy()] = i; 5391 } 5392 5393 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5394 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5395 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5396 5397 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5398 } 5399 5400 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5401 bool IsGdsHardcoded) { 5402 OptionalImmIndexMap OptionalIdx; 5403 5404 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5405 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5406 5407 // Add the register arguments 5408 if (Op.isReg()) { 5409 Op.addRegOperands(Inst, 1); 5410 continue; 5411 } 5412 5413 if (Op.isToken() && Op.getToken() == "gds") { 5414 IsGdsHardcoded = true; 5415 continue; 5416 } 5417 5418 // Handle optional arguments 5419 OptionalIdx[Op.getImmTy()] = i; 5420 } 5421 5422 AMDGPUOperand::ImmTy OffsetType = 5423 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5424 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5425 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5426 AMDGPUOperand::ImmTyOffset; 5427 5428 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5429 5430 if (!IsGdsHardcoded) { 5431 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5432 } 5433 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5434 } 5435 5436 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5437 OptionalImmIndexMap OptionalIdx; 5438 5439 unsigned OperandIdx[4]; 5440 unsigned EnMask = 0; 5441 int SrcIdx = 0; 5442 5443 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5444 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5445 5446 // Add the register arguments 5447 if (Op.isReg()) { 5448 assert(SrcIdx < 4); 5449 OperandIdx[SrcIdx] = Inst.size(); 5450 Op.addRegOperands(Inst, 1); 5451 ++SrcIdx; 5452 continue; 5453 } 5454 5455 if (Op.isOff()) { 5456 assert(SrcIdx < 4); 5457 OperandIdx[SrcIdx] = Inst.size(); 5458 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5459 ++SrcIdx; 5460 continue; 5461 } 5462 5463 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5464 Op.addImmOperands(Inst, 1); 5465 continue; 5466 } 5467 5468 if (Op.isToken() && Op.getToken() == "done") 5469 continue; 5470 5471 // Handle optional arguments 5472 OptionalIdx[Op.getImmTy()] = i; 5473 } 5474 5475 assert(SrcIdx == 4); 5476 5477 bool Compr = false; 5478 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5479 Compr = true; 5480 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5481 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5482 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5483 } 5484 5485 for (auto i = 0; i < SrcIdx; ++i) { 5486 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5487 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5488 } 5489 } 5490 5491 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5492 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5493 5494 Inst.addOperand(MCOperand::createImm(EnMask)); 5495 } 5496 5497 //===----------------------------------------------------------------------===// 5498 // s_waitcnt 5499 //===----------------------------------------------------------------------===// 5500 5501 static bool 5502 encodeCnt( 5503 const AMDGPU::IsaVersion ISA, 5504 int64_t &IntVal, 5505 int64_t CntVal, 5506 bool Saturate, 5507 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5508 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5509 { 5510 bool Failed = false; 5511 5512 IntVal = encode(ISA, IntVal, CntVal); 5513 if (CntVal != decode(ISA, IntVal)) { 5514 if (Saturate) { 5515 IntVal = encode(ISA, IntVal, -1); 5516 } else { 5517 Failed = true; 5518 } 5519 } 5520 return Failed; 5521 } 5522 5523 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5524 5525 SMLoc CntLoc = getLoc(); 5526 StringRef CntName = getTokenStr(); 5527 5528 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5529 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5530 return false; 5531 5532 int64_t CntVal; 5533 SMLoc ValLoc = getLoc(); 5534 if (!parseExpr(CntVal)) 5535 return false; 5536 5537 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5538 5539 bool Failed = true; 5540 bool Sat = CntName.endswith("_sat"); 5541 5542 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5543 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5544 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5545 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5546 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5547 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5548 } else { 5549 Error(CntLoc, "invalid counter name " + CntName); 5550 return false; 5551 } 5552 5553 if (Failed) { 5554 Error(ValLoc, "too large value for " + CntName); 5555 return false; 5556 } 5557 5558 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5559 return false; 5560 5561 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5562 if (isToken(AsmToken::EndOfStatement)) { 5563 Error(getLoc(), "expected a counter name"); 5564 return false; 5565 } 5566 } 5567 5568 return true; 5569 } 5570 5571 OperandMatchResultTy 5572 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5573 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5574 int64_t Waitcnt = getWaitcntBitMask(ISA); 5575 SMLoc S = getLoc(); 5576 5577 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5578 while (!isToken(AsmToken::EndOfStatement)) { 5579 if (!parseCnt(Waitcnt)) 5580 return MatchOperand_ParseFail; 5581 } 5582 } else { 5583 if (!parseExpr(Waitcnt)) 5584 return MatchOperand_ParseFail; 5585 } 5586 5587 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5588 return MatchOperand_Success; 5589 } 5590 5591 bool 5592 AMDGPUOperand::isSWaitCnt() const { 5593 return isImm(); 5594 } 5595 5596 //===----------------------------------------------------------------------===// 5597 // hwreg 5598 //===----------------------------------------------------------------------===// 5599 5600 bool 5601 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5602 OperandInfoTy &Offset, 5603 OperandInfoTy &Width) { 5604 using namespace llvm::AMDGPU::Hwreg; 5605 5606 // The register may be specified by name or using a numeric code 5607 HwReg.Loc = getLoc(); 5608 if (isToken(AsmToken::Identifier) && 5609 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5610 HwReg.IsSymbolic = true; 5611 lex(); // skip register name 5612 } else if (!parseExpr(HwReg.Id, "a register name")) { 5613 return false; 5614 } 5615 5616 if (trySkipToken(AsmToken::RParen)) 5617 return true; 5618 5619 // parse optional params 5620 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 5621 return false; 5622 5623 Offset.Loc = getLoc(); 5624 if (!parseExpr(Offset.Id)) 5625 return false; 5626 5627 if (!skipToken(AsmToken::Comma, "expected a comma")) 5628 return false; 5629 5630 Width.Loc = getLoc(); 5631 return parseExpr(Width.Id) && 5632 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5633 } 5634 5635 bool 5636 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5637 const OperandInfoTy &Offset, 5638 const OperandInfoTy &Width) { 5639 5640 using namespace llvm::AMDGPU::Hwreg; 5641 5642 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5643 Error(HwReg.Loc, 5644 "specified hardware register is not supported on this GPU"); 5645 return false; 5646 } 5647 if (!isValidHwreg(HwReg.Id)) { 5648 Error(HwReg.Loc, 5649 "invalid code of hardware register: only 6-bit values are legal"); 5650 return false; 5651 } 5652 if (!isValidHwregOffset(Offset.Id)) { 5653 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 5654 return false; 5655 } 5656 if (!isValidHwregWidth(Width.Id)) { 5657 Error(Width.Loc, 5658 "invalid bitfield width: only values from 1 to 32 are legal"); 5659 return false; 5660 } 5661 return true; 5662 } 5663 5664 OperandMatchResultTy 5665 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5666 using namespace llvm::AMDGPU::Hwreg; 5667 5668 int64_t ImmVal = 0; 5669 SMLoc Loc = getLoc(); 5670 5671 if (trySkipId("hwreg", AsmToken::LParen)) { 5672 OperandInfoTy HwReg(ID_UNKNOWN_); 5673 OperandInfoTy Offset(OFFSET_DEFAULT_); 5674 OperandInfoTy Width(WIDTH_DEFAULT_); 5675 if (parseHwregBody(HwReg, Offset, Width) && 5676 validateHwreg(HwReg, Offset, Width)) { 5677 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 5678 } else { 5679 return MatchOperand_ParseFail; 5680 } 5681 } else if (parseExpr(ImmVal, "a hwreg macro")) { 5682 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5683 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5684 return MatchOperand_ParseFail; 5685 } 5686 } else { 5687 return MatchOperand_ParseFail; 5688 } 5689 5690 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5691 return MatchOperand_Success; 5692 } 5693 5694 bool AMDGPUOperand::isHwreg() const { 5695 return isImmTy(ImmTyHwreg); 5696 } 5697 5698 //===----------------------------------------------------------------------===// 5699 // sendmsg 5700 //===----------------------------------------------------------------------===// 5701 5702 bool 5703 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5704 OperandInfoTy &Op, 5705 OperandInfoTy &Stream) { 5706 using namespace llvm::AMDGPU::SendMsg; 5707 5708 Msg.Loc = getLoc(); 5709 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5710 Msg.IsSymbolic = true; 5711 lex(); // skip message name 5712 } else if (!parseExpr(Msg.Id, "a message name")) { 5713 return false; 5714 } 5715 5716 if (trySkipToken(AsmToken::Comma)) { 5717 Op.IsDefined = true; 5718 Op.Loc = getLoc(); 5719 if (isToken(AsmToken::Identifier) && 5720 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5721 lex(); // skip operation name 5722 } else if (!parseExpr(Op.Id, "an operation name")) { 5723 return false; 5724 } 5725 5726 if (trySkipToken(AsmToken::Comma)) { 5727 Stream.IsDefined = true; 5728 Stream.Loc = getLoc(); 5729 if (!parseExpr(Stream.Id)) 5730 return false; 5731 } 5732 } 5733 5734 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5735 } 5736 5737 bool 5738 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5739 const OperandInfoTy &Op, 5740 const OperandInfoTy &Stream) { 5741 using namespace llvm::AMDGPU::SendMsg; 5742 5743 // Validation strictness depends on whether message is specified 5744 // in a symbolc or in a numeric form. In the latter case 5745 // only encoding possibility is checked. 5746 bool Strict = Msg.IsSymbolic; 5747 5748 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5749 Error(Msg.Loc, "invalid message id"); 5750 return false; 5751 } 5752 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5753 if (Op.IsDefined) { 5754 Error(Op.Loc, "message does not support operations"); 5755 } else { 5756 Error(Msg.Loc, "missing message operation"); 5757 } 5758 return false; 5759 } 5760 if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5761 Error(Op.Loc, "invalid operation id"); 5762 return false; 5763 } 5764 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5765 Error(Stream.Loc, "message operation does not support streams"); 5766 return false; 5767 } 5768 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5769 Error(Stream.Loc, "invalid message stream id"); 5770 return false; 5771 } 5772 return true; 5773 } 5774 5775 OperandMatchResultTy 5776 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5777 using namespace llvm::AMDGPU::SendMsg; 5778 5779 int64_t ImmVal = 0; 5780 SMLoc Loc = getLoc(); 5781 5782 if (trySkipId("sendmsg", AsmToken::LParen)) { 5783 OperandInfoTy Msg(ID_UNKNOWN_); 5784 OperandInfoTy Op(OP_NONE_); 5785 OperandInfoTy Stream(STREAM_ID_NONE_); 5786 if (parseSendMsgBody(Msg, Op, Stream) && 5787 validateSendMsg(Msg, Op, Stream)) { 5788 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5789 } else { 5790 return MatchOperand_ParseFail; 5791 } 5792 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 5793 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5794 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5795 return MatchOperand_ParseFail; 5796 } 5797 } else { 5798 return MatchOperand_ParseFail; 5799 } 5800 5801 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5802 return MatchOperand_Success; 5803 } 5804 5805 bool AMDGPUOperand::isSendMsg() const { 5806 return isImmTy(ImmTySendMsg); 5807 } 5808 5809 //===----------------------------------------------------------------------===// 5810 // v_interp 5811 //===----------------------------------------------------------------------===// 5812 5813 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5814 StringRef Str; 5815 SMLoc S = getLoc(); 5816 5817 if (!parseId(Str)) 5818 return MatchOperand_NoMatch; 5819 5820 int Slot = StringSwitch<int>(Str) 5821 .Case("p10", 0) 5822 .Case("p20", 1) 5823 .Case("p0", 2) 5824 .Default(-1); 5825 5826 if (Slot == -1) { 5827 Error(S, "invalid interpolation slot"); 5828 return MatchOperand_ParseFail; 5829 } 5830 5831 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5832 AMDGPUOperand::ImmTyInterpSlot)); 5833 return MatchOperand_Success; 5834 } 5835 5836 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5837 StringRef Str; 5838 SMLoc S = getLoc(); 5839 5840 if (!parseId(Str)) 5841 return MatchOperand_NoMatch; 5842 5843 if (!Str.startswith("attr")) { 5844 Error(S, "invalid interpolation attribute"); 5845 return MatchOperand_ParseFail; 5846 } 5847 5848 StringRef Chan = Str.take_back(2); 5849 int AttrChan = StringSwitch<int>(Chan) 5850 .Case(".x", 0) 5851 .Case(".y", 1) 5852 .Case(".z", 2) 5853 .Case(".w", 3) 5854 .Default(-1); 5855 if (AttrChan == -1) { 5856 Error(S, "invalid or missing interpolation attribute channel"); 5857 return MatchOperand_ParseFail; 5858 } 5859 5860 Str = Str.drop_back(2).drop_front(4); 5861 5862 uint8_t Attr; 5863 if (Str.getAsInteger(10, Attr)) { 5864 Error(S, "invalid or missing interpolation attribute number"); 5865 return MatchOperand_ParseFail; 5866 } 5867 5868 if (Attr > 63) { 5869 Error(S, "out of bounds interpolation attribute number"); 5870 return MatchOperand_ParseFail; 5871 } 5872 5873 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5874 5875 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5876 AMDGPUOperand::ImmTyInterpAttr)); 5877 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5878 AMDGPUOperand::ImmTyAttrChan)); 5879 return MatchOperand_Success; 5880 } 5881 5882 //===----------------------------------------------------------------------===// 5883 // exp 5884 //===----------------------------------------------------------------------===// 5885 5886 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5887 using namespace llvm::AMDGPU::Exp; 5888 5889 StringRef Str; 5890 SMLoc S = getLoc(); 5891 5892 if (!parseId(Str)) 5893 return MatchOperand_NoMatch; 5894 5895 unsigned Id = getTgtId(Str); 5896 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 5897 Error(S, (Id == ET_INVALID) ? 5898 "invalid exp target" : 5899 "exp target is not supported on this GPU"); 5900 return MatchOperand_ParseFail; 5901 } 5902 5903 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 5904 AMDGPUOperand::ImmTyExpTgt)); 5905 return MatchOperand_Success; 5906 } 5907 5908 //===----------------------------------------------------------------------===// 5909 // parser helpers 5910 //===----------------------------------------------------------------------===// 5911 5912 bool 5913 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5914 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5915 } 5916 5917 bool 5918 AMDGPUAsmParser::isId(const StringRef Id) const { 5919 return isId(getToken(), Id); 5920 } 5921 5922 bool 5923 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5924 return getTokenKind() == Kind; 5925 } 5926 5927 bool 5928 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5929 if (isId(Id)) { 5930 lex(); 5931 return true; 5932 } 5933 return false; 5934 } 5935 5936 bool 5937 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5938 if (isId(Id) && peekToken().is(Kind)) { 5939 lex(); 5940 lex(); 5941 return true; 5942 } 5943 return false; 5944 } 5945 5946 bool 5947 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5948 if (isToken(Kind)) { 5949 lex(); 5950 return true; 5951 } 5952 return false; 5953 } 5954 5955 bool 5956 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5957 const StringRef ErrMsg) { 5958 if (!trySkipToken(Kind)) { 5959 Error(getLoc(), ErrMsg); 5960 return false; 5961 } 5962 return true; 5963 } 5964 5965 bool 5966 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 5967 SMLoc S = getLoc(); 5968 5969 const MCExpr *Expr; 5970 if (Parser.parseExpression(Expr)) 5971 return false; 5972 5973 if (Expr->evaluateAsAbsolute(Imm)) 5974 return true; 5975 5976 if (Expected.empty()) { 5977 Error(S, "expected absolute expression"); 5978 } else { 5979 Error(S, Twine("expected ", Expected) + 5980 Twine(" or an absolute expression")); 5981 } 5982 return false; 5983 } 5984 5985 bool 5986 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5987 SMLoc S = getLoc(); 5988 5989 const MCExpr *Expr; 5990 if (Parser.parseExpression(Expr)) 5991 return false; 5992 5993 int64_t IntVal; 5994 if (Expr->evaluateAsAbsolute(IntVal)) { 5995 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5996 } else { 5997 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5998 } 5999 return true; 6000 } 6001 6002 bool 6003 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6004 if (isToken(AsmToken::String)) { 6005 Val = getToken().getStringContents(); 6006 lex(); 6007 return true; 6008 } else { 6009 Error(getLoc(), ErrMsg); 6010 return false; 6011 } 6012 } 6013 6014 bool 6015 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6016 if (isToken(AsmToken::Identifier)) { 6017 Val = getTokenStr(); 6018 lex(); 6019 return true; 6020 } else { 6021 if (!ErrMsg.empty()) 6022 Error(getLoc(), ErrMsg); 6023 return false; 6024 } 6025 } 6026 6027 AsmToken 6028 AMDGPUAsmParser::getToken() const { 6029 return Parser.getTok(); 6030 } 6031 6032 AsmToken 6033 AMDGPUAsmParser::peekToken() { 6034 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6035 } 6036 6037 void 6038 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6039 auto TokCount = getLexer().peekTokens(Tokens); 6040 6041 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6042 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6043 } 6044 6045 AsmToken::TokenKind 6046 AMDGPUAsmParser::getTokenKind() const { 6047 return getLexer().getKind(); 6048 } 6049 6050 SMLoc 6051 AMDGPUAsmParser::getLoc() const { 6052 return getToken().getLoc(); 6053 } 6054 6055 StringRef 6056 AMDGPUAsmParser::getTokenStr() const { 6057 return getToken().getString(); 6058 } 6059 6060 void 6061 AMDGPUAsmParser::lex() { 6062 Parser.Lex(); 6063 } 6064 6065 SMLoc 6066 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6067 const OperandVector &Operands) const { 6068 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6069 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6070 if (Test(Op)) 6071 return Op.getStartLoc(); 6072 } 6073 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6074 } 6075 6076 SMLoc 6077 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6078 const OperandVector &Operands) const { 6079 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6080 return getOperandLoc(Test, Operands); 6081 } 6082 6083 SMLoc 6084 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6085 const OperandVector &Operands) const { 6086 auto Test = [=](const AMDGPUOperand& Op) { 6087 return Op.isRegKind() && Op.getReg() == Reg; 6088 }; 6089 return getOperandLoc(Test, Operands); 6090 } 6091 6092 SMLoc 6093 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6094 auto Test = [](const AMDGPUOperand& Op) { 6095 return Op.IsImmKindLiteral() || Op.isExpr(); 6096 }; 6097 return getOperandLoc(Test, Operands); 6098 } 6099 6100 SMLoc 6101 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6102 auto Test = [](const AMDGPUOperand& Op) { 6103 return Op.isImmKindConst(); 6104 }; 6105 return getOperandLoc(Test, Operands); 6106 } 6107 6108 //===----------------------------------------------------------------------===// 6109 // swizzle 6110 //===----------------------------------------------------------------------===// 6111 6112 LLVM_READNONE 6113 static unsigned 6114 encodeBitmaskPerm(const unsigned AndMask, 6115 const unsigned OrMask, 6116 const unsigned XorMask) { 6117 using namespace llvm::AMDGPU::Swizzle; 6118 6119 return BITMASK_PERM_ENC | 6120 (AndMask << BITMASK_AND_SHIFT) | 6121 (OrMask << BITMASK_OR_SHIFT) | 6122 (XorMask << BITMASK_XOR_SHIFT); 6123 } 6124 6125 bool 6126 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6127 const unsigned MinVal, 6128 const unsigned MaxVal, 6129 const StringRef ErrMsg, 6130 SMLoc &Loc) { 6131 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6132 return false; 6133 } 6134 Loc = getLoc(); 6135 if (!parseExpr(Op)) { 6136 return false; 6137 } 6138 if (Op < MinVal || Op > MaxVal) { 6139 Error(Loc, ErrMsg); 6140 return false; 6141 } 6142 6143 return true; 6144 } 6145 6146 bool 6147 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6148 const unsigned MinVal, 6149 const unsigned MaxVal, 6150 const StringRef ErrMsg) { 6151 SMLoc Loc; 6152 for (unsigned i = 0; i < OpNum; ++i) { 6153 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6154 return false; 6155 } 6156 6157 return true; 6158 } 6159 6160 bool 6161 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6162 using namespace llvm::AMDGPU::Swizzle; 6163 6164 int64_t Lane[LANE_NUM]; 6165 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6166 "expected a 2-bit lane id")) { 6167 Imm = QUAD_PERM_ENC; 6168 for (unsigned I = 0; I < LANE_NUM; ++I) { 6169 Imm |= Lane[I] << (LANE_SHIFT * I); 6170 } 6171 return true; 6172 } 6173 return false; 6174 } 6175 6176 bool 6177 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6178 using namespace llvm::AMDGPU::Swizzle; 6179 6180 SMLoc Loc; 6181 int64_t GroupSize; 6182 int64_t LaneIdx; 6183 6184 if (!parseSwizzleOperand(GroupSize, 6185 2, 32, 6186 "group size must be in the interval [2,32]", 6187 Loc)) { 6188 return false; 6189 } 6190 if (!isPowerOf2_64(GroupSize)) { 6191 Error(Loc, "group size must be a power of two"); 6192 return false; 6193 } 6194 if (parseSwizzleOperand(LaneIdx, 6195 0, GroupSize - 1, 6196 "lane id must be in the interval [0,group size - 1]", 6197 Loc)) { 6198 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6199 return true; 6200 } 6201 return false; 6202 } 6203 6204 bool 6205 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6206 using namespace llvm::AMDGPU::Swizzle; 6207 6208 SMLoc Loc; 6209 int64_t GroupSize; 6210 6211 if (!parseSwizzleOperand(GroupSize, 6212 2, 32, 6213 "group size must be in the interval [2,32]", 6214 Loc)) { 6215 return false; 6216 } 6217 if (!isPowerOf2_64(GroupSize)) { 6218 Error(Loc, "group size must be a power of two"); 6219 return false; 6220 } 6221 6222 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6223 return true; 6224 } 6225 6226 bool 6227 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6228 using namespace llvm::AMDGPU::Swizzle; 6229 6230 SMLoc Loc; 6231 int64_t GroupSize; 6232 6233 if (!parseSwizzleOperand(GroupSize, 6234 1, 16, 6235 "group size must be in the interval [1,16]", 6236 Loc)) { 6237 return false; 6238 } 6239 if (!isPowerOf2_64(GroupSize)) { 6240 Error(Loc, "group size must be a power of two"); 6241 return false; 6242 } 6243 6244 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6245 return true; 6246 } 6247 6248 bool 6249 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6250 using namespace llvm::AMDGPU::Swizzle; 6251 6252 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6253 return false; 6254 } 6255 6256 StringRef Ctl; 6257 SMLoc StrLoc = getLoc(); 6258 if (!parseString(Ctl)) { 6259 return false; 6260 } 6261 if (Ctl.size() != BITMASK_WIDTH) { 6262 Error(StrLoc, "expected a 5-character mask"); 6263 return false; 6264 } 6265 6266 unsigned AndMask = 0; 6267 unsigned OrMask = 0; 6268 unsigned XorMask = 0; 6269 6270 for (size_t i = 0; i < Ctl.size(); ++i) { 6271 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6272 switch(Ctl[i]) { 6273 default: 6274 Error(StrLoc, "invalid mask"); 6275 return false; 6276 case '0': 6277 break; 6278 case '1': 6279 OrMask |= Mask; 6280 break; 6281 case 'p': 6282 AndMask |= Mask; 6283 break; 6284 case 'i': 6285 AndMask |= Mask; 6286 XorMask |= Mask; 6287 break; 6288 } 6289 } 6290 6291 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6292 return true; 6293 } 6294 6295 bool 6296 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6297 6298 SMLoc OffsetLoc = getLoc(); 6299 6300 if (!parseExpr(Imm, "a swizzle macro")) { 6301 return false; 6302 } 6303 if (!isUInt<16>(Imm)) { 6304 Error(OffsetLoc, "expected a 16-bit offset"); 6305 return false; 6306 } 6307 return true; 6308 } 6309 6310 bool 6311 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6312 using namespace llvm::AMDGPU::Swizzle; 6313 6314 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6315 6316 SMLoc ModeLoc = getLoc(); 6317 bool Ok = false; 6318 6319 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6320 Ok = parseSwizzleQuadPerm(Imm); 6321 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6322 Ok = parseSwizzleBitmaskPerm(Imm); 6323 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6324 Ok = parseSwizzleBroadcast(Imm); 6325 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6326 Ok = parseSwizzleSwap(Imm); 6327 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6328 Ok = parseSwizzleReverse(Imm); 6329 } else { 6330 Error(ModeLoc, "expected a swizzle mode"); 6331 } 6332 6333 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6334 } 6335 6336 return false; 6337 } 6338 6339 OperandMatchResultTy 6340 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6341 SMLoc S = getLoc(); 6342 int64_t Imm = 0; 6343 6344 if (trySkipId("offset")) { 6345 6346 bool Ok = false; 6347 if (skipToken(AsmToken::Colon, "expected a colon")) { 6348 if (trySkipId("swizzle")) { 6349 Ok = parseSwizzleMacro(Imm); 6350 } else { 6351 Ok = parseSwizzleOffset(Imm); 6352 } 6353 } 6354 6355 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6356 6357 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6358 } else { 6359 // Swizzle "offset" operand is optional. 6360 // If it is omitted, try parsing other optional operands. 6361 return parseOptionalOpr(Operands); 6362 } 6363 } 6364 6365 bool 6366 AMDGPUOperand::isSwizzle() const { 6367 return isImmTy(ImmTySwizzle); 6368 } 6369 6370 //===----------------------------------------------------------------------===// 6371 // VGPR Index Mode 6372 //===----------------------------------------------------------------------===// 6373 6374 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6375 6376 using namespace llvm::AMDGPU::VGPRIndexMode; 6377 6378 if (trySkipToken(AsmToken::RParen)) { 6379 return OFF; 6380 } 6381 6382 int64_t Imm = 0; 6383 6384 while (true) { 6385 unsigned Mode = 0; 6386 SMLoc S = getLoc(); 6387 6388 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6389 if (trySkipId(IdSymbolic[ModeId])) { 6390 Mode = 1 << ModeId; 6391 break; 6392 } 6393 } 6394 6395 if (Mode == 0) { 6396 Error(S, (Imm == 0)? 6397 "expected a VGPR index mode or a closing parenthesis" : 6398 "expected a VGPR index mode"); 6399 return UNDEF; 6400 } 6401 6402 if (Imm & Mode) { 6403 Error(S, "duplicate VGPR index mode"); 6404 return UNDEF; 6405 } 6406 Imm |= Mode; 6407 6408 if (trySkipToken(AsmToken::RParen)) 6409 break; 6410 if (!skipToken(AsmToken::Comma, 6411 "expected a comma or a closing parenthesis")) 6412 return UNDEF; 6413 } 6414 6415 return Imm; 6416 } 6417 6418 OperandMatchResultTy 6419 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6420 6421 using namespace llvm::AMDGPU::VGPRIndexMode; 6422 6423 int64_t Imm = 0; 6424 SMLoc S = getLoc(); 6425 6426 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6427 Imm = parseGPRIdxMacro(); 6428 if (Imm == UNDEF) 6429 return MatchOperand_ParseFail; 6430 } else { 6431 if (getParser().parseAbsoluteExpression(Imm)) 6432 return MatchOperand_ParseFail; 6433 if (Imm < 0 || !isUInt<4>(Imm)) { 6434 Error(S, "invalid immediate: only 4-bit values are legal"); 6435 return MatchOperand_ParseFail; 6436 } 6437 } 6438 6439 Operands.push_back( 6440 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6441 return MatchOperand_Success; 6442 } 6443 6444 bool AMDGPUOperand::isGPRIdxMode() const { 6445 return isImmTy(ImmTyGprIdxMode); 6446 } 6447 6448 //===----------------------------------------------------------------------===// 6449 // sopp branch targets 6450 //===----------------------------------------------------------------------===// 6451 6452 OperandMatchResultTy 6453 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6454 6455 // Make sure we are not parsing something 6456 // that looks like a label or an expression but is not. 6457 // This will improve error messages. 6458 if (isRegister() || isModifier()) 6459 return MatchOperand_NoMatch; 6460 6461 if (!parseExpr(Operands)) 6462 return MatchOperand_ParseFail; 6463 6464 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6465 assert(Opr.isImm() || Opr.isExpr()); 6466 SMLoc Loc = Opr.getStartLoc(); 6467 6468 // Currently we do not support arbitrary expressions as branch targets. 6469 // Only labels and absolute expressions are accepted. 6470 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6471 Error(Loc, "expected an absolute expression or a label"); 6472 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6473 Error(Loc, "expected a 16-bit signed jump offset"); 6474 } 6475 6476 return MatchOperand_Success; 6477 } 6478 6479 //===----------------------------------------------------------------------===// 6480 // Boolean holding registers 6481 //===----------------------------------------------------------------------===// 6482 6483 OperandMatchResultTy 6484 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6485 return parseReg(Operands); 6486 } 6487 6488 //===----------------------------------------------------------------------===// 6489 // mubuf 6490 //===----------------------------------------------------------------------===// 6491 6492 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6493 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6494 } 6495 6496 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6497 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6498 } 6499 6500 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6501 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6502 } 6503 6504 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6505 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6506 } 6507 6508 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6509 const OperandVector &Operands, 6510 bool IsAtomic, 6511 bool IsAtomicReturn, 6512 bool IsLds) { 6513 bool IsLdsOpcode = IsLds; 6514 bool HasLdsModifier = false; 6515 OptionalImmIndexMap OptionalIdx; 6516 assert(IsAtomicReturn ? IsAtomic : true); 6517 unsigned FirstOperandIdx = 1; 6518 6519 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6520 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6521 6522 // Add the register arguments 6523 if (Op.isReg()) { 6524 Op.addRegOperands(Inst, 1); 6525 // Insert a tied src for atomic return dst. 6526 // This cannot be postponed as subsequent calls to 6527 // addImmOperands rely on correct number of MC operands. 6528 if (IsAtomicReturn && i == FirstOperandIdx) 6529 Op.addRegOperands(Inst, 1); 6530 continue; 6531 } 6532 6533 // Handle the case where soffset is an immediate 6534 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6535 Op.addImmOperands(Inst, 1); 6536 continue; 6537 } 6538 6539 HasLdsModifier |= Op.isLDS(); 6540 6541 // Handle tokens like 'offen' which are sometimes hard-coded into the 6542 // asm string. There are no MCInst operands for these. 6543 if (Op.isToken()) { 6544 continue; 6545 } 6546 assert(Op.isImm()); 6547 6548 // Handle optional arguments 6549 OptionalIdx[Op.getImmTy()] = i; 6550 } 6551 6552 // This is a workaround for an llvm quirk which may result in an 6553 // incorrect instruction selection. Lds and non-lds versions of 6554 // MUBUF instructions are identical except that lds versions 6555 // have mandatory 'lds' modifier. However this modifier follows 6556 // optional modifiers and llvm asm matcher regards this 'lds' 6557 // modifier as an optional one. As a result, an lds version 6558 // of opcode may be selected even if it has no 'lds' modifier. 6559 if (IsLdsOpcode && !HasLdsModifier) { 6560 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6561 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6562 Inst.setOpcode(NoLdsOpcode); 6563 IsLdsOpcode = false; 6564 } 6565 } 6566 6567 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6568 if (!IsAtomic || IsAtomicReturn) { 6569 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, 6570 IsAtomicReturn ? -1 : 0); 6571 } 6572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6573 6574 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6575 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6576 } 6577 6578 if (isGFX10Plus()) 6579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6580 } 6581 6582 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6583 OptionalImmIndexMap OptionalIdx; 6584 6585 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6586 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6587 6588 // Add the register arguments 6589 if (Op.isReg()) { 6590 Op.addRegOperands(Inst, 1); 6591 continue; 6592 } 6593 6594 // Handle the case where soffset is an immediate 6595 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6596 Op.addImmOperands(Inst, 1); 6597 continue; 6598 } 6599 6600 // Handle tokens like 'offen' which are sometimes hard-coded into the 6601 // asm string. There are no MCInst operands for these. 6602 if (Op.isToken()) { 6603 continue; 6604 } 6605 assert(Op.isImm()); 6606 6607 // Handle optional arguments 6608 OptionalIdx[Op.getImmTy()] = i; 6609 } 6610 6611 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6612 AMDGPUOperand::ImmTyOffset); 6613 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6614 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6615 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6616 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6617 6618 if (isGFX10Plus()) 6619 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6620 } 6621 6622 //===----------------------------------------------------------------------===// 6623 // mimg 6624 //===----------------------------------------------------------------------===// 6625 6626 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6627 bool IsAtomic) { 6628 unsigned I = 1; 6629 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6630 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6631 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6632 } 6633 6634 if (IsAtomic) { 6635 // Add src, same as dst 6636 assert(Desc.getNumDefs() == 1); 6637 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6638 } 6639 6640 OptionalImmIndexMap OptionalIdx; 6641 6642 for (unsigned E = Operands.size(); I != E; ++I) { 6643 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6644 6645 // Add the register arguments 6646 if (Op.isReg()) { 6647 Op.addRegOperands(Inst, 1); 6648 } else if (Op.isImmModifier()) { 6649 OptionalIdx[Op.getImmTy()] = I; 6650 } else if (!Op.isToken()) { 6651 llvm_unreachable("unexpected operand type"); 6652 } 6653 } 6654 6655 bool IsGFX10Plus = isGFX10Plus(); 6656 6657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6658 if (IsGFX10Plus) 6659 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6661 if (IsGFX10Plus) 6662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6666 if (IsGFX10Plus) 6667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6670 if (!IsGFX10Plus) 6671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6673 } 6674 6675 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6676 cvtMIMG(Inst, Operands, true); 6677 } 6678 6679 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6680 const OperandVector &Operands) { 6681 for (unsigned I = 1; I < Operands.size(); ++I) { 6682 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6683 if (Operand.isReg()) 6684 Operand.addRegOperands(Inst, 1); 6685 } 6686 6687 Inst.addOperand(MCOperand::createImm(1)); // a16 6688 } 6689 6690 //===----------------------------------------------------------------------===// 6691 // smrd 6692 //===----------------------------------------------------------------------===// 6693 6694 bool AMDGPUOperand::isSMRDOffset8() const { 6695 return isImm() && isUInt<8>(getImm()); 6696 } 6697 6698 bool AMDGPUOperand::isSMEMOffset() const { 6699 return isImm(); // Offset range is checked later by validator. 6700 } 6701 6702 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6703 // 32-bit literals are only supported on CI and we only want to use them 6704 // when the offset is > 8-bits. 6705 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6706 } 6707 6708 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6709 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6710 } 6711 6712 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6713 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6714 } 6715 6716 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6717 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6718 } 6719 6720 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6721 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6722 } 6723 6724 //===----------------------------------------------------------------------===// 6725 // vop3 6726 //===----------------------------------------------------------------------===// 6727 6728 static bool ConvertOmodMul(int64_t &Mul) { 6729 if (Mul != 1 && Mul != 2 && Mul != 4) 6730 return false; 6731 6732 Mul >>= 1; 6733 return true; 6734 } 6735 6736 static bool ConvertOmodDiv(int64_t &Div) { 6737 if (Div == 1) { 6738 Div = 0; 6739 return true; 6740 } 6741 6742 if (Div == 2) { 6743 Div = 3; 6744 return true; 6745 } 6746 6747 return false; 6748 } 6749 6750 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6751 if (BoundCtrl == 0) { 6752 BoundCtrl = 1; 6753 return true; 6754 } 6755 6756 if (BoundCtrl == -1) { 6757 BoundCtrl = 0; 6758 return true; 6759 } 6760 6761 return false; 6762 } 6763 6764 // Note: the order in this table matches the order of operands in AsmString. 6765 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6766 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6767 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6768 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6769 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6770 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6771 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6772 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6773 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6774 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6775 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6776 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6777 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6778 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6779 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6780 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6781 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6782 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6783 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6784 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6785 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6786 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6787 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6788 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6789 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6790 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6791 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6792 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6793 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6794 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6795 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6796 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6797 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6798 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6799 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6800 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6801 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6802 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6803 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6804 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6805 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6806 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6807 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6808 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6809 }; 6810 6811 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6812 6813 OperandMatchResultTy res = parseOptionalOpr(Operands); 6814 6815 // This is a hack to enable hardcoded mandatory operands which follow 6816 // optional operands. 6817 // 6818 // Current design assumes that all operands after the first optional operand 6819 // are also optional. However implementation of some instructions violates 6820 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6821 // 6822 // To alleviate this problem, we have to (implicitly) parse extra operands 6823 // to make sure autogenerated parser of custom operands never hit hardcoded 6824 // mandatory operands. 6825 6826 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6827 if (res != MatchOperand_Success || 6828 isToken(AsmToken::EndOfStatement)) 6829 break; 6830 6831 trySkipToken(AsmToken::Comma); 6832 res = parseOptionalOpr(Operands); 6833 } 6834 6835 return res; 6836 } 6837 6838 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6839 OperandMatchResultTy res; 6840 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6841 // try to parse any optional operand here 6842 if (Op.IsBit) { 6843 res = parseNamedBit(Op.Name, Operands, Op.Type); 6844 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6845 res = parseOModOperand(Operands); 6846 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6847 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6848 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6849 res = parseSDWASel(Operands, Op.Name, Op.Type); 6850 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6851 res = parseSDWADstUnused(Operands); 6852 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6853 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6854 Op.Type == AMDGPUOperand::ImmTyNegLo || 6855 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6856 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6857 Op.ConvertResult); 6858 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6859 res = parseDim(Operands); 6860 } else { 6861 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6862 } 6863 if (res != MatchOperand_NoMatch) { 6864 return res; 6865 } 6866 } 6867 return MatchOperand_NoMatch; 6868 } 6869 6870 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6871 StringRef Name = getTokenStr(); 6872 if (Name == "mul") { 6873 return parseIntWithPrefix("mul", Operands, 6874 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6875 } 6876 6877 if (Name == "div") { 6878 return parseIntWithPrefix("div", Operands, 6879 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6880 } 6881 6882 return MatchOperand_NoMatch; 6883 } 6884 6885 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6886 cvtVOP3P(Inst, Operands); 6887 6888 int Opc = Inst.getOpcode(); 6889 6890 int SrcNum; 6891 const int Ops[] = { AMDGPU::OpName::src0, 6892 AMDGPU::OpName::src1, 6893 AMDGPU::OpName::src2 }; 6894 for (SrcNum = 0; 6895 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6896 ++SrcNum); 6897 assert(SrcNum > 0); 6898 6899 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6900 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6901 6902 if ((OpSel & (1 << SrcNum)) != 0) { 6903 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6904 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6905 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6906 } 6907 } 6908 6909 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6910 // 1. This operand is input modifiers 6911 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6912 // 2. This is not last operand 6913 && Desc.NumOperands > (OpNum + 1) 6914 // 3. Next operand is register class 6915 && Desc.OpInfo[OpNum + 1].RegClass != -1 6916 // 4. Next register is not tied to any other operand 6917 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6918 } 6919 6920 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6921 { 6922 OptionalImmIndexMap OptionalIdx; 6923 unsigned Opc = Inst.getOpcode(); 6924 6925 unsigned I = 1; 6926 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6927 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6928 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6929 } 6930 6931 for (unsigned E = Operands.size(); I != E; ++I) { 6932 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6933 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6934 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6935 } else if (Op.isInterpSlot() || 6936 Op.isInterpAttr() || 6937 Op.isAttrChan()) { 6938 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6939 } else if (Op.isImmModifier()) { 6940 OptionalIdx[Op.getImmTy()] = I; 6941 } else { 6942 llvm_unreachable("unhandled operand type"); 6943 } 6944 } 6945 6946 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6948 } 6949 6950 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6952 } 6953 6954 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6955 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6956 } 6957 } 6958 6959 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6960 OptionalImmIndexMap &OptionalIdx) { 6961 unsigned Opc = Inst.getOpcode(); 6962 6963 unsigned I = 1; 6964 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6965 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6966 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6967 } 6968 6969 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6970 // This instruction has src modifiers 6971 for (unsigned E = Operands.size(); I != E; ++I) { 6972 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6973 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6974 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6975 } else if (Op.isImmModifier()) { 6976 OptionalIdx[Op.getImmTy()] = I; 6977 } else if (Op.isRegOrImm()) { 6978 Op.addRegOrImmOperands(Inst, 1); 6979 } else { 6980 llvm_unreachable("unhandled operand type"); 6981 } 6982 } 6983 } else { 6984 // No src modifiers 6985 for (unsigned E = Operands.size(); I != E; ++I) { 6986 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6987 if (Op.isMod()) { 6988 OptionalIdx[Op.getImmTy()] = I; 6989 } else { 6990 Op.addRegOrImmOperands(Inst, 1); 6991 } 6992 } 6993 } 6994 6995 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6997 } 6998 6999 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7000 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7001 } 7002 7003 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7004 // it has src2 register operand that is tied to dst operand 7005 // we don't allow modifiers for this operand in assembler so src2_modifiers 7006 // should be 0. 7007 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7008 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7009 Opc == AMDGPU::V_MAC_F32_e64_vi || 7010 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7011 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7012 Opc == AMDGPU::V_MAC_F16_e64_vi || 7013 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7014 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7015 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7016 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7017 auto it = Inst.begin(); 7018 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7019 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7020 ++it; 7021 // Copy the operand to ensure it's not invalidated when Inst grows. 7022 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7023 } 7024 } 7025 7026 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7027 OptionalImmIndexMap OptionalIdx; 7028 cvtVOP3(Inst, Operands, OptionalIdx); 7029 } 7030 7031 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7032 const OperandVector &Operands) { 7033 OptionalImmIndexMap OptIdx; 7034 const int Opc = Inst.getOpcode(); 7035 const MCInstrDesc &Desc = MII.get(Opc); 7036 7037 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7038 7039 cvtVOP3(Inst, Operands, OptIdx); 7040 7041 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7042 assert(!IsPacked); 7043 Inst.addOperand(Inst.getOperand(0)); 7044 } 7045 7046 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7047 // instruction, and then figure out where to actually put the modifiers 7048 7049 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7050 7051 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7052 if (OpSelHiIdx != -1) { 7053 int DefaultVal = IsPacked ? -1 : 0; 7054 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7055 DefaultVal); 7056 } 7057 7058 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7059 if (NegLoIdx != -1) { 7060 assert(IsPacked); 7061 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7062 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7063 } 7064 7065 const int Ops[] = { AMDGPU::OpName::src0, 7066 AMDGPU::OpName::src1, 7067 AMDGPU::OpName::src2 }; 7068 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7069 AMDGPU::OpName::src1_modifiers, 7070 AMDGPU::OpName::src2_modifiers }; 7071 7072 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7073 7074 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7075 unsigned OpSelHi = 0; 7076 unsigned NegLo = 0; 7077 unsigned NegHi = 0; 7078 7079 if (OpSelHiIdx != -1) { 7080 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7081 } 7082 7083 if (NegLoIdx != -1) { 7084 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7085 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7086 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7087 } 7088 7089 for (int J = 0; J < 3; ++J) { 7090 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7091 if (OpIdx == -1) 7092 break; 7093 7094 uint32_t ModVal = 0; 7095 7096 if ((OpSel & (1 << J)) != 0) 7097 ModVal |= SISrcMods::OP_SEL_0; 7098 7099 if ((OpSelHi & (1 << J)) != 0) 7100 ModVal |= SISrcMods::OP_SEL_1; 7101 7102 if ((NegLo & (1 << J)) != 0) 7103 ModVal |= SISrcMods::NEG; 7104 7105 if ((NegHi & (1 << J)) != 0) 7106 ModVal |= SISrcMods::NEG_HI; 7107 7108 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7109 7110 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7111 } 7112 } 7113 7114 //===----------------------------------------------------------------------===// 7115 // dpp 7116 //===----------------------------------------------------------------------===// 7117 7118 bool AMDGPUOperand::isDPP8() const { 7119 return isImmTy(ImmTyDPP8); 7120 } 7121 7122 bool AMDGPUOperand::isDPPCtrl() const { 7123 using namespace AMDGPU::DPP; 7124 7125 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7126 if (result) { 7127 int64_t Imm = getImm(); 7128 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7129 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7130 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7131 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7132 (Imm == DppCtrl::WAVE_SHL1) || 7133 (Imm == DppCtrl::WAVE_ROL1) || 7134 (Imm == DppCtrl::WAVE_SHR1) || 7135 (Imm == DppCtrl::WAVE_ROR1) || 7136 (Imm == DppCtrl::ROW_MIRROR) || 7137 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7138 (Imm == DppCtrl::BCAST15) || 7139 (Imm == DppCtrl::BCAST31) || 7140 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7141 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7142 } 7143 return false; 7144 } 7145 7146 //===----------------------------------------------------------------------===// 7147 // mAI 7148 //===----------------------------------------------------------------------===// 7149 7150 bool AMDGPUOperand::isBLGP() const { 7151 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7152 } 7153 7154 bool AMDGPUOperand::isCBSZ() const { 7155 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7156 } 7157 7158 bool AMDGPUOperand::isABID() const { 7159 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7160 } 7161 7162 bool AMDGPUOperand::isS16Imm() const { 7163 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7164 } 7165 7166 bool AMDGPUOperand::isU16Imm() const { 7167 return isImm() && isUInt<16>(getImm()); 7168 } 7169 7170 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7171 if (!isGFX10Plus()) 7172 return MatchOperand_NoMatch; 7173 7174 SMLoc S = getLoc(); 7175 7176 if (!trySkipId("dim", AsmToken::Colon)) 7177 return MatchOperand_NoMatch; 7178 7179 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7180 // integer. 7181 std::string Token; 7182 if (isToken(AsmToken::Integer)) { 7183 SMLoc Loc = getToken().getEndLoc(); 7184 Token = std::string(getTokenStr()); 7185 lex(); 7186 if (getLoc() != Loc) 7187 return MatchOperand_ParseFail; 7188 } 7189 if (!isToken(AsmToken::Identifier)) 7190 return MatchOperand_ParseFail; 7191 Token += getTokenStr(); 7192 7193 StringRef DimId = Token; 7194 if (DimId.startswith("SQ_RSRC_IMG_")) 7195 DimId = DimId.substr(12); 7196 7197 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7198 if (!DimInfo) 7199 return MatchOperand_ParseFail; 7200 7201 lex(); 7202 7203 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7204 AMDGPUOperand::ImmTyDim)); 7205 return MatchOperand_Success; 7206 } 7207 7208 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7209 SMLoc S = getLoc(); 7210 7211 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7212 return MatchOperand_NoMatch; 7213 7214 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7215 7216 int64_t Sels[8]; 7217 7218 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7219 return MatchOperand_ParseFail; 7220 7221 for (size_t i = 0; i < 8; ++i) { 7222 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7223 return MatchOperand_ParseFail; 7224 7225 SMLoc Loc = getLoc(); 7226 if (getParser().parseAbsoluteExpression(Sels[i])) 7227 return MatchOperand_ParseFail; 7228 if (0 > Sels[i] || 7 < Sels[i]) { 7229 Error(Loc, "expected a 3-bit value"); 7230 return MatchOperand_ParseFail; 7231 } 7232 } 7233 7234 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7235 return MatchOperand_ParseFail; 7236 7237 unsigned DPP8 = 0; 7238 for (size_t i = 0; i < 8; ++i) 7239 DPP8 |= (Sels[i] << (i * 3)); 7240 7241 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7242 return MatchOperand_Success; 7243 } 7244 7245 bool 7246 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7247 const OperandVector &Operands) { 7248 if (Ctrl == "row_share" || 7249 Ctrl == "row_xmask") 7250 return isGFX10Plus(); 7251 7252 if (Ctrl == "wave_shl" || 7253 Ctrl == "wave_shr" || 7254 Ctrl == "wave_rol" || 7255 Ctrl == "wave_ror" || 7256 Ctrl == "row_bcast") 7257 return isVI() || isGFX9(); 7258 7259 return Ctrl == "row_mirror" || 7260 Ctrl == "row_half_mirror" || 7261 Ctrl == "quad_perm" || 7262 Ctrl == "row_shl" || 7263 Ctrl == "row_shr" || 7264 Ctrl == "row_ror"; 7265 } 7266 7267 int64_t 7268 AMDGPUAsmParser::parseDPPCtrlPerm() { 7269 // quad_perm:[%d,%d,%d,%d] 7270 7271 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7272 return -1; 7273 7274 int64_t Val = 0; 7275 for (int i = 0; i < 4; ++i) { 7276 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7277 return -1; 7278 7279 int64_t Temp; 7280 SMLoc Loc = getLoc(); 7281 if (getParser().parseAbsoluteExpression(Temp)) 7282 return -1; 7283 if (Temp < 0 || Temp > 3) { 7284 Error(Loc, "expected a 2-bit value"); 7285 return -1; 7286 } 7287 7288 Val += (Temp << i * 2); 7289 } 7290 7291 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7292 return -1; 7293 7294 return Val; 7295 } 7296 7297 int64_t 7298 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7299 using namespace AMDGPU::DPP; 7300 7301 // sel:%d 7302 7303 int64_t Val; 7304 SMLoc Loc = getLoc(); 7305 7306 if (getParser().parseAbsoluteExpression(Val)) 7307 return -1; 7308 7309 struct DppCtrlCheck { 7310 int64_t Ctrl; 7311 int Lo; 7312 int Hi; 7313 }; 7314 7315 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7316 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7317 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7318 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7319 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7320 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7321 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7322 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7323 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7324 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7325 .Default({-1, 0, 0}); 7326 7327 bool Valid; 7328 if (Check.Ctrl == -1) { 7329 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7330 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7331 } else { 7332 Valid = Check.Lo <= Val && Val <= Check.Hi; 7333 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7334 } 7335 7336 if (!Valid) { 7337 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7338 return -1; 7339 } 7340 7341 return Val; 7342 } 7343 7344 OperandMatchResultTy 7345 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7346 using namespace AMDGPU::DPP; 7347 7348 if (!isToken(AsmToken::Identifier) || 7349 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7350 return MatchOperand_NoMatch; 7351 7352 SMLoc S = getLoc(); 7353 int64_t Val = -1; 7354 StringRef Ctrl; 7355 7356 parseId(Ctrl); 7357 7358 if (Ctrl == "row_mirror") { 7359 Val = DppCtrl::ROW_MIRROR; 7360 } else if (Ctrl == "row_half_mirror") { 7361 Val = DppCtrl::ROW_HALF_MIRROR; 7362 } else { 7363 if (skipToken(AsmToken::Colon, "expected a colon")) { 7364 if (Ctrl == "quad_perm") { 7365 Val = parseDPPCtrlPerm(); 7366 } else { 7367 Val = parseDPPCtrlSel(Ctrl); 7368 } 7369 } 7370 } 7371 7372 if (Val == -1) 7373 return MatchOperand_ParseFail; 7374 7375 Operands.push_back( 7376 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7377 return MatchOperand_Success; 7378 } 7379 7380 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7381 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7382 } 7383 7384 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7385 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7386 } 7387 7388 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7389 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7390 } 7391 7392 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7393 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7394 } 7395 7396 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7397 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7398 } 7399 7400 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7401 OptionalImmIndexMap OptionalIdx; 7402 7403 unsigned I = 1; 7404 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7405 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7406 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7407 } 7408 7409 int Fi = 0; 7410 for (unsigned E = Operands.size(); I != E; ++I) { 7411 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7412 MCOI::TIED_TO); 7413 if (TiedTo != -1) { 7414 assert((unsigned)TiedTo < Inst.getNumOperands()); 7415 // handle tied old or src2 for MAC instructions 7416 Inst.addOperand(Inst.getOperand(TiedTo)); 7417 } 7418 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7419 // Add the register arguments 7420 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7421 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7422 // Skip it. 7423 continue; 7424 } 7425 7426 if (IsDPP8) { 7427 if (Op.isDPP8()) { 7428 Op.addImmOperands(Inst, 1); 7429 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7430 Op.addRegWithFPInputModsOperands(Inst, 2); 7431 } else if (Op.isFI()) { 7432 Fi = Op.getImm(); 7433 } else if (Op.isReg()) { 7434 Op.addRegOperands(Inst, 1); 7435 } else { 7436 llvm_unreachable("Invalid operand type"); 7437 } 7438 } else { 7439 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7440 Op.addRegWithFPInputModsOperands(Inst, 2); 7441 } else if (Op.isDPPCtrl()) { 7442 Op.addImmOperands(Inst, 1); 7443 } else if (Op.isImm()) { 7444 // Handle optional arguments 7445 OptionalIdx[Op.getImmTy()] = I; 7446 } else { 7447 llvm_unreachable("Invalid operand type"); 7448 } 7449 } 7450 } 7451 7452 if (IsDPP8) { 7453 using namespace llvm::AMDGPU::DPP; 7454 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7455 } else { 7456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7459 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7460 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7461 } 7462 } 7463 } 7464 7465 //===----------------------------------------------------------------------===// 7466 // sdwa 7467 //===----------------------------------------------------------------------===// 7468 7469 OperandMatchResultTy 7470 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7471 AMDGPUOperand::ImmTy Type) { 7472 using namespace llvm::AMDGPU::SDWA; 7473 7474 SMLoc S = getLoc(); 7475 StringRef Value; 7476 OperandMatchResultTy res; 7477 7478 SMLoc StringLoc; 7479 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7480 if (res != MatchOperand_Success) { 7481 return res; 7482 } 7483 7484 int64_t Int; 7485 Int = StringSwitch<int64_t>(Value) 7486 .Case("BYTE_0", SdwaSel::BYTE_0) 7487 .Case("BYTE_1", SdwaSel::BYTE_1) 7488 .Case("BYTE_2", SdwaSel::BYTE_2) 7489 .Case("BYTE_3", SdwaSel::BYTE_3) 7490 .Case("WORD_0", SdwaSel::WORD_0) 7491 .Case("WORD_1", SdwaSel::WORD_1) 7492 .Case("DWORD", SdwaSel::DWORD) 7493 .Default(0xffffffff); 7494 7495 if (Int == 0xffffffff) { 7496 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7497 return MatchOperand_ParseFail; 7498 } 7499 7500 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7501 return MatchOperand_Success; 7502 } 7503 7504 OperandMatchResultTy 7505 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7506 using namespace llvm::AMDGPU::SDWA; 7507 7508 SMLoc S = getLoc(); 7509 StringRef Value; 7510 OperandMatchResultTy res; 7511 7512 SMLoc StringLoc; 7513 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 7514 if (res != MatchOperand_Success) { 7515 return res; 7516 } 7517 7518 int64_t Int; 7519 Int = StringSwitch<int64_t>(Value) 7520 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7521 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7522 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7523 .Default(0xffffffff); 7524 7525 if (Int == 0xffffffff) { 7526 Error(StringLoc, "invalid dst_unused value"); 7527 return MatchOperand_ParseFail; 7528 } 7529 7530 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7531 return MatchOperand_Success; 7532 } 7533 7534 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7535 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7536 } 7537 7538 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7539 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7540 } 7541 7542 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7543 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7544 } 7545 7546 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7547 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7548 } 7549 7550 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7551 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7552 } 7553 7554 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7555 uint64_t BasicInstType, 7556 bool SkipDstVcc, 7557 bool SkipSrcVcc) { 7558 using namespace llvm::AMDGPU::SDWA; 7559 7560 OptionalImmIndexMap OptionalIdx; 7561 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7562 bool SkippedVcc = false; 7563 7564 unsigned I = 1; 7565 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7566 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7567 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7568 } 7569 7570 for (unsigned E = Operands.size(); I != E; ++I) { 7571 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7572 if (SkipVcc && !SkippedVcc && Op.isReg() && 7573 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7574 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7575 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7576 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7577 // Skip VCC only if we didn't skip it on previous iteration. 7578 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7579 if (BasicInstType == SIInstrFlags::VOP2 && 7580 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7581 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7582 SkippedVcc = true; 7583 continue; 7584 } else if (BasicInstType == SIInstrFlags::VOPC && 7585 Inst.getNumOperands() == 0) { 7586 SkippedVcc = true; 7587 continue; 7588 } 7589 } 7590 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7591 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7592 } else if (Op.isImm()) { 7593 // Handle optional arguments 7594 OptionalIdx[Op.getImmTy()] = I; 7595 } else { 7596 llvm_unreachable("Invalid operand type"); 7597 } 7598 SkippedVcc = false; 7599 } 7600 7601 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7602 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7603 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7604 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7605 switch (BasicInstType) { 7606 case SIInstrFlags::VOP1: 7607 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7608 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7610 } 7611 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7612 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7613 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7614 break; 7615 7616 case SIInstrFlags::VOP2: 7617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7618 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7619 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7620 } 7621 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7622 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7624 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7625 break; 7626 7627 case SIInstrFlags::VOPC: 7628 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7630 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7631 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7632 break; 7633 7634 default: 7635 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7636 } 7637 } 7638 7639 // special case v_mac_{f16, f32}: 7640 // it has src2 register operand that is tied to dst operand 7641 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7642 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7643 auto it = Inst.begin(); 7644 std::advance( 7645 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7646 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7647 } 7648 } 7649 7650 //===----------------------------------------------------------------------===// 7651 // mAI 7652 //===----------------------------------------------------------------------===// 7653 7654 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7655 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7656 } 7657 7658 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7659 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7660 } 7661 7662 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7663 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7664 } 7665 7666 /// Force static initialization. 7667 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7668 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7669 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7670 } 7671 7672 #define GET_REGISTER_MATCHER 7673 #define GET_MATCHER_IMPLEMENTATION 7674 #define GET_MNEMONIC_SPELL_CHECKER 7675 #define GET_MNEMONIC_CHECKER 7676 #include "AMDGPUGenAsmMatcher.inc" 7677 7678 // This fuction should be defined after auto-generated include so that we have 7679 // MatchClassKind enum defined 7680 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7681 unsigned Kind) { 7682 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7683 // But MatchInstructionImpl() expects to meet token and fails to validate 7684 // operand. This method checks if we are given immediate operand but expect to 7685 // get corresponding token. 7686 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7687 switch (Kind) { 7688 case MCK_addr64: 7689 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7690 case MCK_gds: 7691 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7692 case MCK_lds: 7693 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7694 case MCK_glc: 7695 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7696 case MCK_idxen: 7697 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7698 case MCK_offen: 7699 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7700 case MCK_SSrcB32: 7701 // When operands have expression values, they will return true for isToken, 7702 // because it is not possible to distinguish between a token and an 7703 // expression at parse time. MatchInstructionImpl() will always try to 7704 // match an operand as a token, when isToken returns true, and when the 7705 // name of the expression is not a valid token, the match will fail, 7706 // so we need to handle it here. 7707 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7708 case MCK_SSrcF32: 7709 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7710 case MCK_SoppBrTarget: 7711 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7712 case MCK_VReg32OrOff: 7713 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7714 case MCK_InterpSlot: 7715 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7716 case MCK_Attr: 7717 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7718 case MCK_AttrChan: 7719 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7720 case MCK_ImmSMEMOffset: 7721 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7722 case MCK_SReg_64: 7723 case MCK_SReg_64_XEXEC: 7724 // Null is defined as a 32-bit register but 7725 // it should also be enabled with 64-bit operands. 7726 // The following code enables it for SReg_64 operands 7727 // used as source and destination. Remaining source 7728 // operands are handled in isInlinableImm. 7729 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7730 default: 7731 return Match_InvalidOperand; 7732 } 7733 } 7734 7735 //===----------------------------------------------------------------------===// 7736 // endpgm 7737 //===----------------------------------------------------------------------===// 7738 7739 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7740 SMLoc S = getLoc(); 7741 int64_t Imm = 0; 7742 7743 if (!parseExpr(Imm)) { 7744 // The operand is optional, if not present default to 0 7745 Imm = 0; 7746 } 7747 7748 if (!isUInt<16>(Imm)) { 7749 Error(S, "expected a 16-bit value"); 7750 return MatchOperand_ParseFail; 7751 } 7752 7753 Operands.push_back( 7754 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7755 return MatchOperand_Success; 7756 } 7757 7758 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7759