1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/CodeGen/MachineValueType.h" 25 #include "llvm/MC/MCAsmInfo.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCExpr.h" 28 #include "llvm/MC/MCInst.h" 29 #include "llvm/MC/MCInstrDesc.h" 30 #include "llvm/MC/MCParser/MCAsmLexer.h" 31 #include "llvm/MC/MCParser/MCAsmParser.h" 32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 33 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/MC/TargetRegistry.h" 36 #include "llvm/Support/AMDGPUMetadata.h" 37 #include "llvm/Support/AMDHSAKernelDescriptor.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/TargetParser/TargetParser.h" 41 #include <optional> 42 43 using namespace llvm; 44 using namespace llvm::AMDGPU; 45 using namespace llvm::amdhsa; 46 47 namespace { 48 49 class AMDGPUAsmParser; 50 51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 52 53 //===----------------------------------------------------------------------===// 54 // Operand 55 //===----------------------------------------------------------------------===// 56 57 class AMDGPUOperand : public MCParsedAsmOperand { 58 enum KindTy { 59 Token, 60 Immediate, 61 Register, 62 Expression 63 } Kind; 64 65 SMLoc StartLoc, EndLoc; 66 const AMDGPUAsmParser *AsmParser; 67 68 public: 69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 70 : Kind(Kind_), AsmParser(AsmParser_) {} 71 72 using Ptr = std::unique_ptr<AMDGPUOperand>; 73 74 struct Modifiers { 75 bool Abs = false; 76 bool Neg = false; 77 bool Sext = false; 78 79 bool hasFPModifiers() const { return Abs || Neg; } 80 bool hasIntModifiers() const { return Sext; } 81 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 82 83 int64_t getFPModifiersOperand() const { 84 int64_t Operand = 0; 85 Operand |= Abs ? SISrcMods::ABS : 0u; 86 Operand |= Neg ? SISrcMods::NEG : 0u; 87 return Operand; 88 } 89 90 int64_t getIntModifiersOperand() const { 91 int64_t Operand = 0; 92 Operand |= Sext ? SISrcMods::SEXT : 0u; 93 return Operand; 94 } 95 96 int64_t getModifiersOperand() const { 97 assert(!(hasFPModifiers() && hasIntModifiers()) 98 && "fp and int modifiers should not be used simultaneously"); 99 if (hasFPModifiers()) { 100 return getFPModifiersOperand(); 101 } else if (hasIntModifiers()) { 102 return getIntModifiersOperand(); 103 } else { 104 return 0; 105 } 106 } 107 108 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 109 }; 110 111 enum ImmTy { 112 ImmTyNone, 113 ImmTyGDS, 114 ImmTyLDS, 115 ImmTyOffen, 116 ImmTyIdxen, 117 ImmTyAddr64, 118 ImmTyOffset, 119 ImmTyInstOffset, 120 ImmTyOffset0, 121 ImmTyOffset1, 122 ImmTySMEMOffsetMod, 123 ImmTyCPol, 124 ImmTyTFE, 125 ImmTyD16, 126 ImmTyClampSI, 127 ImmTyOModSI, 128 ImmTySDWADstSel, 129 ImmTySDWASrc0Sel, 130 ImmTySDWASrc1Sel, 131 ImmTySDWADstUnused, 132 ImmTyDMask, 133 ImmTyDim, 134 ImmTyUNorm, 135 ImmTyDA, 136 ImmTyR128A16, 137 ImmTyA16, 138 ImmTyLWE, 139 ImmTyExpTgt, 140 ImmTyExpCompr, 141 ImmTyExpVM, 142 ImmTyFORMAT, 143 ImmTyHwreg, 144 ImmTyOff, 145 ImmTySendMsg, 146 ImmTyInterpSlot, 147 ImmTyInterpAttr, 148 ImmTyInterpAttrChan, 149 ImmTyOpSel, 150 ImmTyOpSelHi, 151 ImmTyNegLo, 152 ImmTyNegHi, 153 ImmTyDPP8, 154 ImmTyDppCtrl, 155 ImmTyDppRowMask, 156 ImmTyDppBankMask, 157 ImmTyDppBoundCtrl, 158 ImmTyDppFI, 159 ImmTySwizzle, 160 ImmTyGprIdxMode, 161 ImmTyHigh, 162 ImmTyBLGP, 163 ImmTyCBSZ, 164 ImmTyABID, 165 ImmTyEndpgm, 166 ImmTyWaitVDST, 167 ImmTyWaitEXP, 168 }; 169 170 // Immediate operand kind. 171 // It helps to identify the location of an offending operand after an error. 172 // Note that regular literals and mandatory literals (KImm) must be handled 173 // differently. When looking for an offending operand, we should usually 174 // ignore mandatory literals because they are part of the instruction and 175 // cannot be changed. Report location of mandatory operands only for VOPD, 176 // when both OpX and OpY have a KImm and there are no other literals. 177 enum ImmKindTy { 178 ImmKindTyNone, 179 ImmKindTyLiteral, 180 ImmKindTyMandatoryLiteral, 181 ImmKindTyConst, 182 }; 183 184 private: 185 struct TokOp { 186 const char *Data; 187 unsigned Length; 188 }; 189 190 struct ImmOp { 191 int64_t Val; 192 ImmTy Type; 193 bool IsFPImm; 194 mutable ImmKindTy Kind; 195 Modifiers Mods; 196 }; 197 198 struct RegOp { 199 unsigned RegNo; 200 Modifiers Mods; 201 }; 202 203 union { 204 TokOp Tok; 205 ImmOp Imm; 206 RegOp Reg; 207 const MCExpr *Expr; 208 }; 209 210 public: 211 bool isToken() const override { return Kind == Token; } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindMandatoryLiteral() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyMandatoryLiteral; 234 } 235 236 void setImmKindConst() const { 237 assert(isImm()); 238 Imm.Kind = ImmKindTyConst; 239 } 240 241 bool IsImmKindLiteral() const { 242 return isImm() && Imm.Kind == ImmKindTyLiteral; 243 } 244 245 bool IsImmKindMandatoryLiteral() const { 246 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; 247 } 248 249 bool isImmKindConst() const { 250 return isImm() && Imm.Kind == ImmKindTyConst; 251 } 252 253 bool isInlinableImm(MVT type) const; 254 bool isLiteralImm(MVT type) const; 255 256 bool isRegKind() const { 257 return Kind == Register; 258 } 259 260 bool isReg() const override { 261 return isRegKind() && !hasModifiers(); 262 } 263 264 bool isRegOrInline(unsigned RCID, MVT type) const { 265 return isRegClass(RCID) || isInlinableImm(type); 266 } 267 268 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 269 return isRegOrInline(RCID, type) || isLiteralImm(type); 270 } 271 272 bool isRegOrImmWithInt16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 274 } 275 276 bool isRegOrImmWithInt32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 278 } 279 280 bool isRegOrInlineImmWithInt16InputMods() const { 281 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 282 } 283 284 bool isRegOrInlineImmWithInt32InputMods() const { 285 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 286 } 287 288 bool isRegOrImmWithInt64InputMods() const { 289 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 290 } 291 292 bool isRegOrImmWithFP16InputMods() const { 293 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 294 } 295 296 bool isRegOrImmWithFP32InputMods() const { 297 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 298 } 299 300 bool isRegOrImmWithFP64InputMods() const { 301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 302 } 303 304 bool isRegOrInlineImmWithFP16InputMods() const { 305 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 306 } 307 308 bool isRegOrInlineImmWithFP32InputMods() const { 309 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 310 } 311 312 313 bool isVReg() const { 314 return isRegClass(AMDGPU::VGPR_32RegClassID) || 315 isRegClass(AMDGPU::VReg_64RegClassID) || 316 isRegClass(AMDGPU::VReg_96RegClassID) || 317 isRegClass(AMDGPU::VReg_128RegClassID) || 318 isRegClass(AMDGPU::VReg_160RegClassID) || 319 isRegClass(AMDGPU::VReg_192RegClassID) || 320 isRegClass(AMDGPU::VReg_256RegClassID) || 321 isRegClass(AMDGPU::VReg_512RegClassID) || 322 isRegClass(AMDGPU::VReg_1024RegClassID); 323 } 324 325 bool isVReg32() const { 326 return isRegClass(AMDGPU::VGPR_32RegClassID); 327 } 328 329 bool isVReg32OrOff() const { 330 return isOff() || isVReg32(); 331 } 332 333 bool isNull() const { 334 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 335 } 336 337 bool isVRegWithInputMods() const; 338 bool isT16VRegWithInputMods() const; 339 340 bool isSDWAOperand(MVT type) const; 341 bool isSDWAFP16Operand() const; 342 bool isSDWAFP32Operand() const; 343 bool isSDWAInt16Operand() const; 344 bool isSDWAInt32Operand() const; 345 346 bool isImmTy(ImmTy ImmT) const { 347 return isImm() && Imm.Type == ImmT; 348 } 349 350 bool isImmLiteral() const { return isImmTy(ImmTyNone); } 351 352 bool isImmModifier() const { 353 return isImm() && Imm.Type != ImmTyNone; 354 } 355 356 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 357 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 358 bool isDMask() const { return isImmTy(ImmTyDMask); } 359 bool isDim() const { return isImmTy(ImmTyDim); } 360 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 361 bool isDA() const { return isImmTy(ImmTyDA); } 362 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 363 bool isA16() const { return isImmTy(ImmTyA16); } 364 bool isLWE() const { return isImmTy(ImmTyLWE); } 365 bool isOff() const { return isImmTy(ImmTyOff); } 366 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 367 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 368 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 369 bool isOffen() const { return isImmTy(ImmTyOffen); } 370 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 371 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 372 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 373 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 374 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 375 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); } 376 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 377 bool isGDS() const { return isImmTy(ImmTyGDS); } 378 bool isLDS() const { return isImmTy(ImmTyLDS); } 379 bool isCPol() const { return isImmTy(ImmTyCPol); } 380 bool isTFE() const { return isImmTy(ImmTyTFE); } 381 bool isD16() const { return isImmTy(ImmTyD16); } 382 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 383 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); } 384 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); } 385 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 386 bool isDppFI() const { return isImmTy(ImmTyDppFI); } 387 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); } 388 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); } 389 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); } 390 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); } 391 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 392 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 393 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); } 394 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 395 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 396 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 397 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 398 bool isHigh() const { return isImmTy(ImmTyHigh); } 399 400 bool isRegOrImm() const { 401 return isReg() || isImm(); 402 } 403 404 bool isRegClass(unsigned RCID) const; 405 406 bool isInlineValue() const; 407 408 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 409 return isRegOrInline(RCID, type) && !hasModifiers(); 410 } 411 412 bool isSCSrcB16() const { 413 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 414 } 415 416 bool isSCSrcV2B16() const { 417 return isSCSrcB16(); 418 } 419 420 bool isSCSrcB32() const { 421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 422 } 423 424 bool isSCSrcB64() const { 425 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 426 } 427 428 bool isBoolReg() const; 429 430 bool isSCSrcF16() const { 431 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 432 } 433 434 bool isSCSrcV2F16() const { 435 return isSCSrcF16(); 436 } 437 438 bool isSCSrcF32() const { 439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 440 } 441 442 bool isSCSrcF64() const { 443 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 444 } 445 446 bool isSSrcB32() const { 447 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 448 } 449 450 bool isSSrcB16() const { 451 return isSCSrcB16() || isLiteralImm(MVT::i16); 452 } 453 454 bool isSSrcV2B16() const { 455 llvm_unreachable("cannot happen"); 456 return isSSrcB16(); 457 } 458 459 bool isSSrcB64() const { 460 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 461 // See isVSrc64(). 462 return isSCSrcB64() || isLiteralImm(MVT::i64); 463 } 464 465 bool isSSrcF32() const { 466 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 467 } 468 469 bool isSSrcF64() const { 470 return isSCSrcB64() || isLiteralImm(MVT::f64); 471 } 472 473 bool isSSrcF16() const { 474 return isSCSrcB16() || isLiteralImm(MVT::f16); 475 } 476 477 bool isSSrcV2F16() const { 478 llvm_unreachable("cannot happen"); 479 return isSSrcF16(); 480 } 481 482 bool isSSrcV2FP32() const { 483 llvm_unreachable("cannot happen"); 484 return isSSrcF32(); 485 } 486 487 bool isSCSrcV2FP32() const { 488 llvm_unreachable("cannot happen"); 489 return isSCSrcF32(); 490 } 491 492 bool isSSrcV2INT32() const { 493 llvm_unreachable("cannot happen"); 494 return isSSrcB32(); 495 } 496 497 bool isSCSrcV2INT32() const { 498 llvm_unreachable("cannot happen"); 499 return isSCSrcB32(); 500 } 501 502 bool isSSrcOrLdsB32() const { 503 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 504 isLiteralImm(MVT::i32) || isExpr(); 505 } 506 507 bool isVCSrcB32() const { 508 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 509 } 510 511 bool isVCSrcB64() const { 512 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 513 } 514 515 bool isVCSrcTB16_Lo128() const { 516 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); 517 } 518 519 bool isVCSrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 521 } 522 523 bool isVCSrcV2B16() const { 524 return isVCSrcB16(); 525 } 526 527 bool isVCSrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 529 } 530 531 bool isVCSrcF64() const { 532 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 533 } 534 535 bool isVCSrcTF16_Lo128() const { 536 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); 537 } 538 539 bool isVCSrcF16() const { 540 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 541 } 542 543 bool isVCSrcV2F16() const { 544 return isVCSrcF16(); 545 } 546 547 bool isVSrcB32() const { 548 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 549 } 550 551 bool isVSrcB64() const { 552 return isVCSrcF64() || isLiteralImm(MVT::i64); 553 } 554 555 bool isVSrcTB16_Lo128() const { 556 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); 557 } 558 559 bool isVSrcB16() const { 560 return isVCSrcB16() || isLiteralImm(MVT::i16); 561 } 562 563 bool isVSrcV2B16() const { 564 return isVSrcB16() || isLiteralImm(MVT::v2i16); 565 } 566 567 bool isVCSrcV2FP32() const { 568 return isVCSrcF64(); 569 } 570 571 bool isVSrcV2FP32() const { 572 return isVSrcF64() || isLiteralImm(MVT::v2f32); 573 } 574 575 bool isVCSrcV2INT32() const { 576 return isVCSrcB64(); 577 } 578 579 bool isVSrcV2INT32() const { 580 return isVSrcB64() || isLiteralImm(MVT::v2i32); 581 } 582 583 bool isVSrcF32() const { 584 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 585 } 586 587 bool isVSrcF64() const { 588 return isVCSrcF64() || isLiteralImm(MVT::f64); 589 } 590 591 bool isVSrcTF16_Lo128() const { 592 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); 593 } 594 595 bool isVSrcF16() const { 596 return isVCSrcF16() || isLiteralImm(MVT::f16); 597 } 598 599 bool isVSrcV2F16() const { 600 return isVSrcF16() || isLiteralImm(MVT::v2f16); 601 } 602 603 bool isVISrcB32() const { 604 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 605 } 606 607 bool isVISrcB16() const { 608 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 609 } 610 611 bool isVISrcV2B16() const { 612 return isVISrcB16(); 613 } 614 615 bool isVISrcF32() const { 616 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 617 } 618 619 bool isVISrcF16() const { 620 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 621 } 622 623 bool isVISrcV2F16() const { 624 return isVISrcF16() || isVISrcB32(); 625 } 626 627 bool isVISrc_64B64() const { 628 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 629 } 630 631 bool isVISrc_64F64() const { 632 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 633 } 634 635 bool isVISrc_64V2FP32() const { 636 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 637 } 638 639 bool isVISrc_64V2INT32() const { 640 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 641 } 642 643 bool isVISrc_256B64() const { 644 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 645 } 646 647 bool isVISrc_256F64() const { 648 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 649 } 650 651 bool isVISrc_128B16() const { 652 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 653 } 654 655 bool isVISrc_128V2B16() const { 656 return isVISrc_128B16(); 657 } 658 659 bool isVISrc_128B32() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 661 } 662 663 bool isVISrc_128F32() const { 664 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 665 } 666 667 bool isVISrc_256V2FP32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_256V2INT32() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 673 } 674 675 bool isVISrc_512B32() const { 676 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 677 } 678 679 bool isVISrc_512B16() const { 680 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 681 } 682 683 bool isVISrc_512V2B16() const { 684 return isVISrc_512B16(); 685 } 686 687 bool isVISrc_512F32() const { 688 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 689 } 690 691 bool isVISrc_512F16() const { 692 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 693 } 694 695 bool isVISrc_512V2F16() const { 696 return isVISrc_512F16() || isVISrc_512B32(); 697 } 698 699 bool isVISrc_1024B32() const { 700 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 701 } 702 703 bool isVISrc_1024B16() const { 704 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 705 } 706 707 bool isVISrc_1024V2B16() const { 708 return isVISrc_1024B16(); 709 } 710 711 bool isVISrc_1024F32() const { 712 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 713 } 714 715 bool isVISrc_1024F16() const { 716 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 717 } 718 719 bool isVISrc_1024V2F16() const { 720 return isVISrc_1024F16() || isVISrc_1024B32(); 721 } 722 723 bool isAISrcB32() const { 724 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 725 } 726 727 bool isAISrcB16() const { 728 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 729 } 730 731 bool isAISrcV2B16() const { 732 return isAISrcB16(); 733 } 734 735 bool isAISrcF32() const { 736 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 737 } 738 739 bool isAISrcF16() const { 740 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 741 } 742 743 bool isAISrcV2F16() const { 744 return isAISrcF16() || isAISrcB32(); 745 } 746 747 bool isAISrc_64B64() const { 748 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 749 } 750 751 bool isAISrc_64F64() const { 752 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 753 } 754 755 bool isAISrc_128B32() const { 756 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 757 } 758 759 bool isAISrc_128B16() const { 760 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 761 } 762 763 bool isAISrc_128V2B16() const { 764 return isAISrc_128B16(); 765 } 766 767 bool isAISrc_128F32() const { 768 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 769 } 770 771 bool isAISrc_128F16() const { 772 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 773 } 774 775 bool isAISrc_128V2F16() const { 776 return isAISrc_128F16() || isAISrc_128B32(); 777 } 778 779 bool isVISrc_128F16() const { 780 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 781 } 782 783 bool isVISrc_128V2F16() const { 784 return isVISrc_128F16() || isVISrc_128B32(); 785 } 786 787 bool isAISrc_256B64() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 789 } 790 791 bool isAISrc_256F64() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 793 } 794 795 bool isAISrc_512B32() const { 796 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 797 } 798 799 bool isAISrc_512B16() const { 800 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 801 } 802 803 bool isAISrc_512V2B16() const { 804 return isAISrc_512B16(); 805 } 806 807 bool isAISrc_512F32() const { 808 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 809 } 810 811 bool isAISrc_512F16() const { 812 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 813 } 814 815 bool isAISrc_512V2F16() const { 816 return isAISrc_512F16() || isAISrc_512B32(); 817 } 818 819 bool isAISrc_1024B32() const { 820 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 821 } 822 823 bool isAISrc_1024B16() const { 824 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 825 } 826 827 bool isAISrc_1024V2B16() const { 828 return isAISrc_1024B16(); 829 } 830 831 bool isAISrc_1024F32() const { 832 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 833 } 834 835 bool isAISrc_1024F16() const { 836 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 837 } 838 839 bool isAISrc_1024V2F16() const { 840 return isAISrc_1024F16() || isAISrc_1024B32(); 841 } 842 843 bool isKImmFP32() const { 844 return isLiteralImm(MVT::f32); 845 } 846 847 bool isKImmFP16() const { 848 return isLiteralImm(MVT::f16); 849 } 850 851 bool isMem() const override { 852 return false; 853 } 854 855 bool isExpr() const { 856 return Kind == Expression; 857 } 858 859 bool isSOPPBrTarget() const { return isExpr() || isImm(); } 860 861 bool isSWaitCnt() const; 862 bool isDepCtr() const; 863 bool isSDelayALU() const; 864 bool isHwreg() const; 865 bool isSendMsg() const; 866 bool isSwizzle() const; 867 bool isSMRDOffset8() const; 868 bool isSMEMOffset() const; 869 bool isSMRDLiteralOffset() const; 870 bool isDPP8() const; 871 bool isDPPCtrl() const; 872 bool isBLGP() const; 873 bool isCBSZ() const; 874 bool isABID() const; 875 bool isGPRIdxMode() const; 876 bool isS16Imm() const; 877 bool isU16Imm() const; 878 bool isEndpgm() const; 879 bool isWaitVDST() const; 880 bool isWaitEXP() const; 881 882 StringRef getToken() const { 883 assert(isToken()); 884 return StringRef(Tok.Data, Tok.Length); 885 } 886 887 int64_t getImm() const { 888 assert(isImm()); 889 return Imm.Val; 890 } 891 892 void setImm(int64_t Val) { 893 assert(isImm()); 894 Imm.Val = Val; 895 } 896 897 ImmTy getImmTy() const { 898 assert(isImm()); 899 return Imm.Type; 900 } 901 902 unsigned getReg() const override { 903 assert(isRegKind()); 904 return Reg.RegNo; 905 } 906 907 SMLoc getStartLoc() const override { 908 return StartLoc; 909 } 910 911 SMLoc getEndLoc() const override { 912 return EndLoc; 913 } 914 915 SMRange getLocRange() const { 916 return SMRange(StartLoc, EndLoc); 917 } 918 919 Modifiers getModifiers() const { 920 assert(isRegKind() || isImmTy(ImmTyNone)); 921 return isRegKind() ? Reg.Mods : Imm.Mods; 922 } 923 924 void setModifiers(Modifiers Mods) { 925 assert(isRegKind() || isImmTy(ImmTyNone)); 926 if (isRegKind()) 927 Reg.Mods = Mods; 928 else 929 Imm.Mods = Mods; 930 } 931 932 bool hasModifiers() const { 933 return getModifiers().hasModifiers(); 934 } 935 936 bool hasFPModifiers() const { 937 return getModifiers().hasFPModifiers(); 938 } 939 940 bool hasIntModifiers() const { 941 return getModifiers().hasIntModifiers(); 942 } 943 944 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 945 946 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 947 948 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 953 if (isRegKind()) 954 addRegOperands(Inst, N); 955 else 956 addImmOperands(Inst, N); 957 } 958 959 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 960 Modifiers Mods = getModifiers(); 961 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 962 if (isRegKind()) { 963 addRegOperands(Inst, N); 964 } else { 965 addImmOperands(Inst, N, false); 966 } 967 } 968 969 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 970 assert(!hasIntModifiers()); 971 addRegOrImmWithInputModsOperands(Inst, N); 972 } 973 974 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 975 assert(!hasFPModifiers()); 976 addRegOrImmWithInputModsOperands(Inst, N); 977 } 978 979 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 980 Modifiers Mods = getModifiers(); 981 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 982 assert(isRegKind()); 983 addRegOperands(Inst, N); 984 } 985 986 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 987 assert(!hasIntModifiers()); 988 addRegWithInputModsOperands(Inst, N); 989 } 990 991 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 992 assert(!hasFPModifiers()); 993 addRegWithInputModsOperands(Inst, N); 994 } 995 996 static void printImmTy(raw_ostream& OS, ImmTy Type) { 997 switch (Type) { 998 case ImmTyNone: OS << "None"; break; 999 case ImmTyGDS: OS << "GDS"; break; 1000 case ImmTyLDS: OS << "LDS"; break; 1001 case ImmTyOffen: OS << "Offen"; break; 1002 case ImmTyIdxen: OS << "Idxen"; break; 1003 case ImmTyAddr64: OS << "Addr64"; break; 1004 case ImmTyOffset: OS << "Offset"; break; 1005 case ImmTyInstOffset: OS << "InstOffset"; break; 1006 case ImmTyOffset0: OS << "Offset0"; break; 1007 case ImmTyOffset1: OS << "Offset1"; break; 1008 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break; 1009 case ImmTyCPol: OS << "CPol"; break; 1010 case ImmTyTFE: OS << "TFE"; break; 1011 case ImmTyD16: OS << "D16"; break; 1012 case ImmTyFORMAT: OS << "FORMAT"; break; 1013 case ImmTyClampSI: OS << "ClampSI"; break; 1014 case ImmTyOModSI: OS << "OModSI"; break; 1015 case ImmTyDPP8: OS << "DPP8"; break; 1016 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1017 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1018 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1019 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1020 case ImmTyDppFI: OS << "DppFI"; break; 1021 case ImmTySDWADstSel: OS << "SDWADstSel"; break; 1022 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break; 1023 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break; 1024 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break; 1025 case ImmTyDMask: OS << "DMask"; break; 1026 case ImmTyDim: OS << "Dim"; break; 1027 case ImmTyUNorm: OS << "UNorm"; break; 1028 case ImmTyDA: OS << "DA"; break; 1029 case ImmTyR128A16: OS << "R128A16"; break; 1030 case ImmTyA16: OS << "A16"; break; 1031 case ImmTyLWE: OS << "LWE"; break; 1032 case ImmTyOff: OS << "Off"; break; 1033 case ImmTyExpTgt: OS << "ExpTgt"; break; 1034 case ImmTyExpCompr: OS << "ExpCompr"; break; 1035 case ImmTyExpVM: OS << "ExpVM"; break; 1036 case ImmTyHwreg: OS << "Hwreg"; break; 1037 case ImmTySendMsg: OS << "SendMsg"; break; 1038 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1039 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1040 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break; 1041 case ImmTyOpSel: OS << "OpSel"; break; 1042 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1043 case ImmTyNegLo: OS << "NegLo"; break; 1044 case ImmTyNegHi: OS << "NegHi"; break; 1045 case ImmTySwizzle: OS << "Swizzle"; break; 1046 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1047 case ImmTyHigh: OS << "High"; break; 1048 case ImmTyBLGP: OS << "BLGP"; break; 1049 case ImmTyCBSZ: OS << "CBSZ"; break; 1050 case ImmTyABID: OS << "ABID"; break; 1051 case ImmTyEndpgm: OS << "Endpgm"; break; 1052 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1053 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1054 } 1055 } 1056 1057 void print(raw_ostream &OS) const override { 1058 switch (Kind) { 1059 case Register: 1060 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1061 break; 1062 case Immediate: 1063 OS << '<' << getImm(); 1064 if (getImmTy() != ImmTyNone) { 1065 OS << " type: "; printImmTy(OS, getImmTy()); 1066 } 1067 OS << " mods: " << Imm.Mods << '>'; 1068 break; 1069 case Token: 1070 OS << '\'' << getToken() << '\''; 1071 break; 1072 case Expression: 1073 OS << "<expr " << *Expr << '>'; 1074 break; 1075 } 1076 } 1077 1078 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1079 int64_t Val, SMLoc Loc, 1080 ImmTy Type = ImmTyNone, 1081 bool IsFPImm = false) { 1082 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1083 Op->Imm.Val = Val; 1084 Op->Imm.IsFPImm = IsFPImm; 1085 Op->Imm.Kind = ImmKindTyNone; 1086 Op->Imm.Type = Type; 1087 Op->Imm.Mods = Modifiers(); 1088 Op->StartLoc = Loc; 1089 Op->EndLoc = Loc; 1090 return Op; 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1094 StringRef Str, SMLoc Loc, 1095 bool HasExplicitEncodingSize = true) { 1096 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1097 Res->Tok.Data = Str.data(); 1098 Res->Tok.Length = Str.size(); 1099 Res->StartLoc = Loc; 1100 Res->EndLoc = Loc; 1101 return Res; 1102 } 1103 1104 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1105 unsigned RegNo, SMLoc S, 1106 SMLoc E) { 1107 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1108 Op->Reg.RegNo = RegNo; 1109 Op->Reg.Mods = Modifiers(); 1110 Op->StartLoc = S; 1111 Op->EndLoc = E; 1112 return Op; 1113 } 1114 1115 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1116 const class MCExpr *Expr, SMLoc S) { 1117 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1118 Op->Expr = Expr; 1119 Op->StartLoc = S; 1120 Op->EndLoc = S; 1121 return Op; 1122 } 1123 }; 1124 1125 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1126 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1127 return OS; 1128 } 1129 1130 //===----------------------------------------------------------------------===// 1131 // AsmParser 1132 //===----------------------------------------------------------------------===// 1133 1134 // Holds info related to the current kernel, e.g. count of SGPRs used. 1135 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1136 // .amdgpu_hsa_kernel or at EOF. 1137 class KernelScopeInfo { 1138 int SgprIndexUnusedMin = -1; 1139 int VgprIndexUnusedMin = -1; 1140 int AgprIndexUnusedMin = -1; 1141 MCContext *Ctx = nullptr; 1142 MCSubtargetInfo const *MSTI = nullptr; 1143 1144 void usesSgprAt(int i) { 1145 if (i >= SgprIndexUnusedMin) { 1146 SgprIndexUnusedMin = ++i; 1147 if (Ctx) { 1148 MCSymbol* const Sym = 1149 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1150 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1151 } 1152 } 1153 } 1154 1155 void usesVgprAt(int i) { 1156 if (i >= VgprIndexUnusedMin) { 1157 VgprIndexUnusedMin = ++i; 1158 if (Ctx) { 1159 MCSymbol* const Sym = 1160 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1161 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1162 VgprIndexUnusedMin); 1163 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1164 } 1165 } 1166 } 1167 1168 void usesAgprAt(int i) { 1169 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1170 if (!hasMAIInsts(*MSTI)) 1171 return; 1172 1173 if (i >= AgprIndexUnusedMin) { 1174 AgprIndexUnusedMin = ++i; 1175 if (Ctx) { 1176 MCSymbol* const Sym = 1177 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1178 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1179 1180 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1181 MCSymbol* const vSym = 1182 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1183 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1184 VgprIndexUnusedMin); 1185 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1186 } 1187 } 1188 } 1189 1190 public: 1191 KernelScopeInfo() = default; 1192 1193 void initialize(MCContext &Context) { 1194 Ctx = &Context; 1195 MSTI = Ctx->getSubtargetInfo(); 1196 1197 usesSgprAt(SgprIndexUnusedMin = -1); 1198 usesVgprAt(VgprIndexUnusedMin = -1); 1199 if (hasMAIInsts(*MSTI)) { 1200 usesAgprAt(AgprIndexUnusedMin = -1); 1201 } 1202 } 1203 1204 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1205 unsigned RegWidth) { 1206 switch (RegKind) { 1207 case IS_SGPR: 1208 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1209 break; 1210 case IS_AGPR: 1211 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1212 break; 1213 case IS_VGPR: 1214 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1215 break; 1216 default: 1217 break; 1218 } 1219 } 1220 }; 1221 1222 class AMDGPUAsmParser : public MCTargetAsmParser { 1223 MCAsmParser &Parser; 1224 1225 unsigned ForcedEncodingSize = 0; 1226 bool ForcedDPP = false; 1227 bool ForcedSDWA = false; 1228 KernelScopeInfo KernelScope; 1229 1230 /// @name Auto-generated Match Functions 1231 /// { 1232 1233 #define GET_ASSEMBLER_HEADER 1234 #include "AMDGPUGenAsmMatcher.inc" 1235 1236 /// } 1237 1238 private: 1239 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1240 bool OutOfRangeError(SMRange Range); 1241 /// Calculate VGPR/SGPR blocks required for given target, reserved 1242 /// registers, and user-specified NextFreeXGPR values. 1243 /// 1244 /// \param Features [in] Target features, used for bug corrections. 1245 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1246 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1247 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1248 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1249 /// descriptor field, if valid. 1250 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1251 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1252 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1253 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1254 /// \param VGPRBlocks [out] Result VGPR block count. 1255 /// \param SGPRBlocks [out] Result SGPR block count. 1256 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1257 bool FlatScrUsed, bool XNACKUsed, 1258 std::optional<bool> EnableWavefrontSize32, 1259 unsigned NextFreeVGPR, SMRange VGPRRange, 1260 unsigned NextFreeSGPR, SMRange SGPRRange, 1261 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 1262 bool ParseDirectiveAMDGCNTarget(); 1263 bool ParseDirectiveAMDHSAKernel(); 1264 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1265 bool ParseDirectiveHSACodeObjectVersion(); 1266 bool ParseDirectiveHSACodeObjectISA(); 1267 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1268 bool ParseDirectiveAMDKernelCodeT(); 1269 // TODO: Possibly make subtargetHasRegister const. 1270 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1271 bool ParseDirectiveAMDGPUHsaKernel(); 1272 1273 bool ParseDirectiveISAVersion(); 1274 bool ParseDirectiveHSAMetadata(); 1275 bool ParseDirectivePALMetadataBegin(); 1276 bool ParseDirectivePALMetadata(); 1277 bool ParseDirectiveAMDGPULDS(); 1278 1279 /// Common code to parse out a block of text (typically YAML) between start and 1280 /// end directives. 1281 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1282 const char *AssemblerDirectiveEnd, 1283 std::string &CollectString); 1284 1285 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1286 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1287 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1288 unsigned &RegNum, unsigned &RegWidth, 1289 bool RestoreOnFailure = false); 1290 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1291 unsigned &RegNum, unsigned &RegWidth, 1292 SmallVectorImpl<AsmToken> &Tokens); 1293 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1294 unsigned &RegWidth, 1295 SmallVectorImpl<AsmToken> &Tokens); 1296 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1297 unsigned &RegWidth, 1298 SmallVectorImpl<AsmToken> &Tokens); 1299 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1300 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1301 bool ParseRegRange(unsigned& Num, unsigned& Width); 1302 unsigned getRegularReg(RegisterKind RegKind, 1303 unsigned RegNum, 1304 unsigned RegWidth, 1305 SMLoc Loc); 1306 1307 bool isRegister(); 1308 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1309 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1310 void initializeGprCountSymbol(RegisterKind RegKind); 1311 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1312 unsigned RegWidth); 1313 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1314 bool IsAtomic); 1315 1316 public: 1317 enum AMDGPUMatchResultTy { 1318 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1319 }; 1320 enum OperandMode { 1321 OperandMode_Default, 1322 OperandMode_NSA, 1323 }; 1324 1325 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1326 1327 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1328 const MCInstrInfo &MII, 1329 const MCTargetOptions &Options) 1330 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1331 MCAsmParserExtension::Initialize(Parser); 1332 1333 if (getFeatureBits().none()) { 1334 // Set default features. 1335 copySTI().ToggleFeature("southern-islands"); 1336 } 1337 1338 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1339 1340 { 1341 // TODO: make those pre-defined variables read-only. 1342 // Currently there is none suitable machinery in the core llvm-mc for this. 1343 // MCSymbol::isRedefinable is intended for another purpose, and 1344 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1345 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1346 MCContext &Ctx = getContext(); 1347 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1348 MCSymbol *Sym = 1349 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1350 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1351 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1352 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1353 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1354 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1355 } else { 1356 MCSymbol *Sym = 1357 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1358 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1359 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1360 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1361 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1362 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1363 } 1364 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1365 initializeGprCountSymbol(IS_VGPR); 1366 initializeGprCountSymbol(IS_SGPR); 1367 } else 1368 KernelScope.initialize(getContext()); 1369 } 1370 } 1371 1372 bool hasMIMG_R128() const { 1373 return AMDGPU::hasMIMG_R128(getSTI()); 1374 } 1375 1376 bool hasPackedD16() const { 1377 return AMDGPU::hasPackedD16(getSTI()); 1378 } 1379 1380 bool hasA16() const { return AMDGPU::hasA16(getSTI()); } 1381 1382 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1383 1384 bool isSI() const { 1385 return AMDGPU::isSI(getSTI()); 1386 } 1387 1388 bool isCI() const { 1389 return AMDGPU::isCI(getSTI()); 1390 } 1391 1392 bool isVI() const { 1393 return AMDGPU::isVI(getSTI()); 1394 } 1395 1396 bool isGFX9() const { 1397 return AMDGPU::isGFX9(getSTI()); 1398 } 1399 1400 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1401 bool isGFX90A() const { 1402 return AMDGPU::isGFX90A(getSTI()); 1403 } 1404 1405 bool isGFX940() const { 1406 return AMDGPU::isGFX940(getSTI()); 1407 } 1408 1409 bool isGFX9Plus() const { 1410 return AMDGPU::isGFX9Plus(getSTI()); 1411 } 1412 1413 bool isGFX10() const { 1414 return AMDGPU::isGFX10(getSTI()); 1415 } 1416 1417 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1418 1419 bool isGFX11() const { 1420 return AMDGPU::isGFX11(getSTI()); 1421 } 1422 1423 bool isGFX11Plus() const { 1424 return AMDGPU::isGFX11Plus(getSTI()); 1425 } 1426 1427 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } 1428 1429 bool isGFX10_BEncoding() const { 1430 return AMDGPU::isGFX10_BEncoding(getSTI()); 1431 } 1432 1433 bool hasInv2PiInlineImm() const { 1434 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1435 } 1436 1437 bool hasFlatOffsets() const { 1438 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1439 } 1440 1441 bool hasArchitectedFlatScratch() const { 1442 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1443 } 1444 1445 bool hasSGPR102_SGPR103() const { 1446 return !isVI() && !isGFX9(); 1447 } 1448 1449 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1450 1451 bool hasIntClamp() const { 1452 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1453 } 1454 1455 bool hasPartialNSAEncoding() const { 1456 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; 1457 } 1458 1459 unsigned getNSAMaxSize() const { 1460 return AMDGPU::getNSAMaxSize(getSTI()); 1461 } 1462 1463 AMDGPUTargetStreamer &getTargetStreamer() { 1464 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1465 return static_cast<AMDGPUTargetStreamer &>(TS); 1466 } 1467 1468 const MCRegisterInfo *getMRI() const { 1469 // We need this const_cast because for some reason getContext() is not const 1470 // in MCAsmParser. 1471 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1472 } 1473 1474 const MCInstrInfo *getMII() const { 1475 return &MII; 1476 } 1477 1478 const FeatureBitset &getFeatureBits() const { 1479 return getSTI().getFeatureBits(); 1480 } 1481 1482 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1483 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1484 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1485 1486 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1487 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1488 bool isForcedDPP() const { return ForcedDPP; } 1489 bool isForcedSDWA() const { return ForcedSDWA; } 1490 ArrayRef<unsigned> getMatchedVariants() const; 1491 StringRef getMatchedVariantName() const; 1492 1493 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1494 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1495 bool RestoreOnFailure); 1496 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1497 SMLoc &EndLoc) override; 1498 OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1499 SMLoc &EndLoc) override; 1500 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1501 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1502 unsigned Kind) override; 1503 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1504 OperandVector &Operands, MCStreamer &Out, 1505 uint64_t &ErrorInfo, 1506 bool MatchingInlineAsm) override; 1507 bool ParseDirective(AsmToken DirectiveID) override; 1508 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, 1509 OperandMode Mode = OperandMode_Default); 1510 StringRef parseMnemonicSuffix(StringRef Name); 1511 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1512 SMLoc NameLoc, OperandVector &Operands) override; 1513 //bool ProcessInstruction(MCInst &Inst); 1514 1515 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); 1516 1517 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); 1518 1519 ParseStatus 1520 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1521 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1522 std::function<bool(int64_t &)> ConvertResult = nullptr); 1523 1524 ParseStatus parseOperandArrayWithPrefix( 1525 const char *Prefix, OperandVector &Operands, 1526 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1527 bool (*ConvertResult)(int64_t &) = nullptr); 1528 1529 ParseStatus 1530 parseNamedBit(StringRef Name, OperandVector &Operands, 1531 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1532 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; 1533 ParseStatus parseCPol(OperandVector &Operands); 1534 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, 1535 SMLoc &StringLoc); 1536 1537 bool isModifier(); 1538 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1539 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1540 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1541 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1542 bool parseSP3NegModifier(); 1543 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1544 ParseStatus parseReg(OperandVector &Operands); 1545 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1546 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, 1547 bool AllowImm = true); 1548 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, 1549 bool AllowImm = true); 1550 ParseStatus parseRegWithFPInputMods(OperandVector &Operands); 1551 ParseStatus parseRegWithIntInputMods(OperandVector &Operands); 1552 ParseStatus parseVReg32OrOff(OperandVector &Operands); 1553 ParseStatus parseDfmtNfmt(int64_t &Format); 1554 ParseStatus parseUfmt(int64_t &Format); 1555 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, 1556 int64_t &Format); 1557 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, 1558 int64_t &Format); 1559 ParseStatus parseFORMAT(OperandVector &Operands); 1560 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); 1561 ParseStatus parseNumericFormat(int64_t &Format); 1562 ParseStatus parseFlatOffset(OperandVector &Operands); 1563 ParseStatus parseR128A16(OperandVector &Operands); 1564 ParseStatus parseBLGP(OperandVector &Operands); 1565 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1566 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1567 1568 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1569 1570 bool parseCnt(int64_t &IntVal); 1571 ParseStatus parseSWaitCnt(OperandVector &Operands); 1572 1573 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1574 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1575 ParseStatus parseDepCtr(OperandVector &Operands); 1576 1577 bool parseDelay(int64_t &Delay); 1578 ParseStatus parseSDelayALU(OperandVector &Operands); 1579 1580 ParseStatus parseHwreg(OperandVector &Operands); 1581 1582 private: 1583 struct OperandInfoTy { 1584 SMLoc Loc; 1585 int64_t Id; 1586 bool IsSymbolic = false; 1587 bool IsDefined = false; 1588 1589 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1590 }; 1591 1592 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1593 bool validateSendMsg(const OperandInfoTy &Msg, 1594 const OperandInfoTy &Op, 1595 const OperandInfoTy &Stream); 1596 1597 bool parseHwregBody(OperandInfoTy &HwReg, 1598 OperandInfoTy &Offset, 1599 OperandInfoTy &Width); 1600 bool validateHwreg(const OperandInfoTy &HwReg, 1601 const OperandInfoTy &Offset, 1602 const OperandInfoTy &Width); 1603 1604 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1605 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1606 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1607 1608 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1609 const OperandVector &Operands) const; 1610 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1611 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1612 SMLoc getLitLoc(const OperandVector &Operands, 1613 bool SearchMandatoryLiterals = false) const; 1614 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; 1615 SMLoc getConstLoc(const OperandVector &Operands) const; 1616 SMLoc getInstLoc(const OperandVector &Operands) const; 1617 1618 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1619 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1620 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1621 bool validateSOPLiteral(const MCInst &Inst) const; 1622 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1623 bool validateVOPDRegBankConstraints(const MCInst &Inst, 1624 const OperandVector &Operands); 1625 bool validateIntClampSupported(const MCInst &Inst); 1626 bool validateMIMGAtomicDMask(const MCInst &Inst); 1627 bool validateMIMGGatherDMask(const MCInst &Inst); 1628 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); 1630 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); 1631 bool validateMIMGD16(const MCInst &Inst); 1632 bool validateMIMGMSAA(const MCInst &Inst); 1633 bool validateOpSel(const MCInst &Inst); 1634 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1635 bool validateVccOperand(unsigned Reg) const; 1636 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1637 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1638 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); 1639 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateAGPRLdSt(const MCInst &Inst) const; 1641 bool validateVGPRAlign(const MCInst &Inst) const; 1642 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateDivScale(const MCInst &Inst); 1645 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); 1646 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1647 const SMLoc &IDLoc); 1648 bool validateExeczVcczOperands(const OperandVector &Operands); 1649 bool validateTFE(const MCInst &Inst, const OperandVector &Operands); 1650 std::optional<StringRef> validateLdsDirect(const MCInst &Inst); 1651 unsigned getConstantBusLimit(unsigned Opcode) const; 1652 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1653 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1654 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1655 1656 bool isSupportedMnemo(StringRef Mnemo, 1657 const FeatureBitset &FBS); 1658 bool isSupportedMnemo(StringRef Mnemo, 1659 const FeatureBitset &FBS, 1660 ArrayRef<unsigned> Variants); 1661 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1662 1663 bool isId(const StringRef Id) const; 1664 bool isId(const AsmToken &Token, const StringRef Id) const; 1665 bool isToken(const AsmToken::TokenKind Kind) const; 1666 StringRef getId() const; 1667 bool trySkipId(const StringRef Id); 1668 bool trySkipId(const StringRef Pref, const StringRef Id); 1669 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1670 bool trySkipToken(const AsmToken::TokenKind Kind); 1671 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1672 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1673 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1674 1675 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1676 AsmToken::TokenKind getTokenKind() const; 1677 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1678 bool parseExpr(OperandVector &Operands); 1679 StringRef getTokenStr() const; 1680 AsmToken peekToken(bool ShouldSkipSpace = true); 1681 AsmToken getToken() const; 1682 SMLoc getLoc() const; 1683 void lex(); 1684 1685 public: 1686 void onBeginOfFile() override; 1687 1688 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); 1689 1690 ParseStatus parseExpTgt(OperandVector &Operands); 1691 ParseStatus parseSendMsg(OperandVector &Operands); 1692 ParseStatus parseInterpSlot(OperandVector &Operands); 1693 ParseStatus parseInterpAttr(OperandVector &Operands); 1694 ParseStatus parseSOPPBrTarget(OperandVector &Operands); 1695 ParseStatus parseBoolReg(OperandVector &Operands); 1696 1697 bool parseSwizzleOperand(int64_t &Op, 1698 const unsigned MinVal, 1699 const unsigned MaxVal, 1700 const StringRef ErrMsg, 1701 SMLoc &Loc); 1702 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1703 const unsigned MinVal, 1704 const unsigned MaxVal, 1705 const StringRef ErrMsg); 1706 ParseStatus parseSwizzle(OperandVector &Operands); 1707 bool parseSwizzleOffset(int64_t &Imm); 1708 bool parseSwizzleMacro(int64_t &Imm); 1709 bool parseSwizzleQuadPerm(int64_t &Imm); 1710 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1711 bool parseSwizzleBroadcast(int64_t &Imm); 1712 bool parseSwizzleSwap(int64_t &Imm); 1713 bool parseSwizzleReverse(int64_t &Imm); 1714 1715 ParseStatus parseGPRIdxMode(OperandVector &Operands); 1716 int64_t parseGPRIdxMacro(); 1717 1718 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1719 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1720 1721 ParseStatus parseOModSI(OperandVector &Operands); 1722 1723 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1724 OptionalImmIndexMap &OptionalIdx); 1725 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1726 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1727 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1728 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1729 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1730 OptionalImmIndexMap &OptionalIdx); 1731 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1732 OptionalImmIndexMap &OptionalIdx); 1733 1734 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1735 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1736 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1737 1738 bool parseDimId(unsigned &Encoding); 1739 ParseStatus parseDim(OperandVector &Operands); 1740 bool convertDppBoundCtrl(int64_t &BoundCtrl); 1741 ParseStatus parseDPP8(OperandVector &Operands); 1742 ParseStatus parseDPPCtrl(OperandVector &Operands); 1743 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1744 int64_t parseDPPCtrlSel(StringRef Ctrl); 1745 int64_t parseDPPCtrlPerm(); 1746 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1747 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1748 cvtDPP(Inst, Operands, true); 1749 } 1750 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1751 bool IsDPP8 = false); 1752 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1753 cvtVOP3DPP(Inst, Operands, true); 1754 } 1755 1756 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, 1757 AMDGPUOperand::ImmTy Type); 1758 ParseStatus parseSDWADstUnused(OperandVector &Operands); 1759 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1760 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1761 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1762 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1763 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1764 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1765 uint64_t BasicInstType, 1766 bool SkipDstVcc = false, 1767 bool SkipSrcVcc = false); 1768 1769 ParseStatus parseEndpgm(OperandVector &Operands); 1770 1771 ParseStatus parseVOPD(OperandVector &Operands); 1772 }; 1773 1774 } // end anonymous namespace 1775 1776 // May be called with integer type with equivalent bitwidth. 1777 static const fltSemantics *getFltSemantics(unsigned Size) { 1778 switch (Size) { 1779 case 4: 1780 return &APFloat::IEEEsingle(); 1781 case 8: 1782 return &APFloat::IEEEdouble(); 1783 case 2: 1784 return &APFloat::IEEEhalf(); 1785 default: 1786 llvm_unreachable("unsupported fp type"); 1787 } 1788 } 1789 1790 static const fltSemantics *getFltSemantics(MVT VT) { 1791 return getFltSemantics(VT.getSizeInBits() / 8); 1792 } 1793 1794 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1795 switch (OperandType) { 1796 case AMDGPU::OPERAND_REG_IMM_INT32: 1797 case AMDGPU::OPERAND_REG_IMM_FP32: 1798 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1799 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1800 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1801 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1802 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1804 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1805 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1806 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1807 case AMDGPU::OPERAND_KIMM32: 1808 return &APFloat::IEEEsingle(); 1809 case AMDGPU::OPERAND_REG_IMM_INT64: 1810 case AMDGPU::OPERAND_REG_IMM_FP64: 1811 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1812 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1813 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1814 return &APFloat::IEEEdouble(); 1815 case AMDGPU::OPERAND_REG_IMM_INT16: 1816 case AMDGPU::OPERAND_REG_IMM_FP16: 1817 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1818 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1819 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1820 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1821 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1822 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1823 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1824 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1825 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1826 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1827 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1828 case AMDGPU::OPERAND_KIMM16: 1829 return &APFloat::IEEEhalf(); 1830 default: 1831 llvm_unreachable("unsupported fp type"); 1832 } 1833 } 1834 1835 //===----------------------------------------------------------------------===// 1836 // Operand 1837 //===----------------------------------------------------------------------===// 1838 1839 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1840 bool Lost; 1841 1842 // Convert literal to single precision 1843 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1844 APFloat::rmNearestTiesToEven, 1845 &Lost); 1846 // We allow precision lost but not overflow or underflow 1847 if (Status != APFloat::opOK && 1848 Lost && 1849 ((Status & APFloat::opOverflow) != 0 || 1850 (Status & APFloat::opUnderflow) != 0)) { 1851 return false; 1852 } 1853 1854 return true; 1855 } 1856 1857 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1858 return isUIntN(Size, Val) || isIntN(Size, Val); 1859 } 1860 1861 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1862 if (VT.getScalarType() == MVT::i16) { 1863 // FP immediate values are broken. 1864 return isInlinableIntLiteral(Val); 1865 } 1866 1867 // f16/v2f16 operands work correctly for all values. 1868 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1869 } 1870 1871 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1872 1873 // This is a hack to enable named inline values like 1874 // shared_base with both 32-bit and 64-bit operands. 1875 // Note that these values are defined as 1876 // 32-bit operands only. 1877 if (isInlineValue()) { 1878 return true; 1879 } 1880 1881 if (!isImmTy(ImmTyNone)) { 1882 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1883 return false; 1884 } 1885 // TODO: We should avoid using host float here. It would be better to 1886 // check the float bit values which is what a few other places do. 1887 // We've had bot failures before due to weird NaN support on mips hosts. 1888 1889 APInt Literal(64, Imm.Val); 1890 1891 if (Imm.IsFPImm) { // We got fp literal token 1892 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1893 return AMDGPU::isInlinableLiteral64(Imm.Val, 1894 AsmParser->hasInv2PiInlineImm()); 1895 } 1896 1897 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1898 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1899 return false; 1900 1901 if (type.getScalarSizeInBits() == 16) { 1902 return isInlineableLiteralOp16( 1903 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1904 type, AsmParser->hasInv2PiInlineImm()); 1905 } 1906 1907 // Check if single precision literal is inlinable 1908 return AMDGPU::isInlinableLiteral32( 1909 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1910 AsmParser->hasInv2PiInlineImm()); 1911 } 1912 1913 // We got int literal token. 1914 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1915 return AMDGPU::isInlinableLiteral64(Imm.Val, 1916 AsmParser->hasInv2PiInlineImm()); 1917 } 1918 1919 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1920 return false; 1921 } 1922 1923 if (type.getScalarSizeInBits() == 16) { 1924 return isInlineableLiteralOp16( 1925 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1926 type, AsmParser->hasInv2PiInlineImm()); 1927 } 1928 1929 return AMDGPU::isInlinableLiteral32( 1930 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1931 AsmParser->hasInv2PiInlineImm()); 1932 } 1933 1934 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1935 // Check that this immediate can be added as literal 1936 if (!isImmTy(ImmTyNone)) { 1937 return false; 1938 } 1939 1940 if (!Imm.IsFPImm) { 1941 // We got int literal token. 1942 1943 if (type == MVT::f64 && hasFPModifiers()) { 1944 // Cannot apply fp modifiers to int literals preserving the same semantics 1945 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1946 // disable these cases. 1947 return false; 1948 } 1949 1950 unsigned Size = type.getSizeInBits(); 1951 if (Size == 64) 1952 Size = 32; 1953 1954 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1955 // types. 1956 return isSafeTruncation(Imm.Val, Size); 1957 } 1958 1959 // We got fp literal token 1960 if (type == MVT::f64) { // Expected 64-bit fp operand 1961 // We would set low 64-bits of literal to zeroes but we accept this literals 1962 return true; 1963 } 1964 1965 if (type == MVT::i64) { // Expected 64-bit int operand 1966 // We don't allow fp literals in 64-bit integer instructions. It is 1967 // unclear how we should encode them. 1968 return false; 1969 } 1970 1971 // We allow fp literals with f16x2 operands assuming that the specified 1972 // literal goes into the lower half and the upper half is zero. We also 1973 // require that the literal may be losslessly converted to f16. 1974 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1975 (type == MVT::v2i16)? MVT::i16 : 1976 (type == MVT::v2f32)? MVT::f32 : type; 1977 1978 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1979 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1980 } 1981 1982 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1983 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1984 } 1985 1986 bool AMDGPUOperand::isVRegWithInputMods() const { 1987 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1988 // GFX90A allows DPP on 64-bit operands. 1989 (isRegClass(AMDGPU::VReg_64RegClassID) && 1990 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1991 } 1992 1993 bool AMDGPUOperand::isT16VRegWithInputMods() const { 1994 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID); 1995 } 1996 1997 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1998 if (AsmParser->isVI()) 1999 return isVReg32(); 2000 else if (AsmParser->isGFX9Plus()) 2001 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2002 else 2003 return false; 2004 } 2005 2006 bool AMDGPUOperand::isSDWAFP16Operand() const { 2007 return isSDWAOperand(MVT::f16); 2008 } 2009 2010 bool AMDGPUOperand::isSDWAFP32Operand() const { 2011 return isSDWAOperand(MVT::f32); 2012 } 2013 2014 bool AMDGPUOperand::isSDWAInt16Operand() const { 2015 return isSDWAOperand(MVT::i16); 2016 } 2017 2018 bool AMDGPUOperand::isSDWAInt32Operand() const { 2019 return isSDWAOperand(MVT::i32); 2020 } 2021 2022 bool AMDGPUOperand::isBoolReg() const { 2023 auto FB = AsmParser->getFeatureBits(); 2024 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2025 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2026 } 2027 2028 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2029 { 2030 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2031 assert(Size == 2 || Size == 4 || Size == 8); 2032 2033 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2034 2035 if (Imm.Mods.Abs) { 2036 Val &= ~FpSignMask; 2037 } 2038 if (Imm.Mods.Neg) { 2039 Val ^= FpSignMask; 2040 } 2041 2042 return Val; 2043 } 2044 2045 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2046 if (isExpr()) { 2047 Inst.addOperand(MCOperand::createExpr(Expr)); 2048 return; 2049 } 2050 2051 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2052 Inst.getNumOperands())) { 2053 addLiteralImmOperand(Inst, Imm.Val, 2054 ApplyModifiers & 2055 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2056 } else { 2057 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2058 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2059 setImmKindNone(); 2060 } 2061 } 2062 2063 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2064 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2065 auto OpNum = Inst.getNumOperands(); 2066 // Check that this operand accepts literals 2067 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2068 2069 if (ApplyModifiers) { 2070 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2071 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2072 Val = applyInputFPModifiers(Val, Size); 2073 } 2074 2075 APInt Literal(64, Val); 2076 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; 2077 2078 if (Imm.IsFPImm) { // We got fp literal token 2079 switch (OpTy) { 2080 case AMDGPU::OPERAND_REG_IMM_INT64: 2081 case AMDGPU::OPERAND_REG_IMM_FP64: 2082 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2083 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2084 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2085 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2086 AsmParser->hasInv2PiInlineImm())) { 2087 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2088 setImmKindConst(); 2089 return; 2090 } 2091 2092 // Non-inlineable 2093 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2094 // For fp operands we check if low 32 bits are zeros 2095 if (Literal.getLoBits(32) != 0) { 2096 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2097 "Can't encode literal as exact 64-bit floating-point operand. " 2098 "Low 32-bits will be set to zero"); 2099 } 2100 2101 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2102 setImmKindLiteral(); 2103 return; 2104 } 2105 2106 // We don't allow fp literals in 64-bit integer instructions. It is 2107 // unclear how we should encode them. This case should be checked earlier 2108 // in predicate methods (isLiteralImm()) 2109 llvm_unreachable("fp literal in 64-bit integer instruction."); 2110 2111 case AMDGPU::OPERAND_REG_IMM_INT32: 2112 case AMDGPU::OPERAND_REG_IMM_FP32: 2113 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2114 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2115 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2116 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2118 case AMDGPU::OPERAND_REG_IMM_INT16: 2119 case AMDGPU::OPERAND_REG_IMM_FP16: 2120 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2121 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2122 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2123 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2124 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2126 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2127 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2128 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2129 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2130 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2131 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2132 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2133 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2134 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2135 case AMDGPU::OPERAND_KIMM32: 2136 case AMDGPU::OPERAND_KIMM16: { 2137 bool lost; 2138 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2139 // Convert literal to single precision 2140 FPLiteral.convert(*getOpFltSemantics(OpTy), 2141 APFloat::rmNearestTiesToEven, &lost); 2142 // We allow precision lost but not overflow or underflow. This should be 2143 // checked earlier in isLiteralImm() 2144 2145 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2146 Inst.addOperand(MCOperand::createImm(ImmVal)); 2147 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { 2148 setImmKindMandatoryLiteral(); 2149 } else { 2150 setImmKindLiteral(); 2151 } 2152 return; 2153 } 2154 default: 2155 llvm_unreachable("invalid operand size"); 2156 } 2157 2158 return; 2159 } 2160 2161 // We got int literal token. 2162 // Only sign extend inline immediates. 2163 switch (OpTy) { 2164 case AMDGPU::OPERAND_REG_IMM_INT32: 2165 case AMDGPU::OPERAND_REG_IMM_FP32: 2166 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2167 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2168 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2169 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2170 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2171 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2172 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2173 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2174 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2175 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2176 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2177 if (isSafeTruncation(Val, 32) && 2178 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2179 AsmParser->hasInv2PiInlineImm())) { 2180 Inst.addOperand(MCOperand::createImm(Val)); 2181 setImmKindConst(); 2182 return; 2183 } 2184 2185 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2186 setImmKindLiteral(); 2187 return; 2188 2189 case AMDGPU::OPERAND_REG_IMM_INT64: 2190 case AMDGPU::OPERAND_REG_IMM_FP64: 2191 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2192 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2193 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2194 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2195 Inst.addOperand(MCOperand::createImm(Val)); 2196 setImmKindConst(); 2197 return; 2198 } 2199 2200 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2201 setImmKindLiteral(); 2202 return; 2203 2204 case AMDGPU::OPERAND_REG_IMM_INT16: 2205 case AMDGPU::OPERAND_REG_IMM_FP16: 2206 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2207 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2208 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2209 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2210 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2211 if (isSafeTruncation(Val, 16) && 2212 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2213 AsmParser->hasInv2PiInlineImm())) { 2214 Inst.addOperand(MCOperand::createImm(Val)); 2215 setImmKindConst(); 2216 return; 2217 } 2218 2219 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2220 setImmKindLiteral(); 2221 return; 2222 2223 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2224 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2225 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2226 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2227 assert(isSafeTruncation(Val, 16)); 2228 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2229 AsmParser->hasInv2PiInlineImm())); 2230 2231 Inst.addOperand(MCOperand::createImm(Val)); 2232 return; 2233 } 2234 case AMDGPU::OPERAND_KIMM32: 2235 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2236 setImmKindMandatoryLiteral(); 2237 return; 2238 case AMDGPU::OPERAND_KIMM16: 2239 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2240 setImmKindMandatoryLiteral(); 2241 return; 2242 default: 2243 llvm_unreachable("invalid operand size"); 2244 } 2245 } 2246 2247 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2248 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2249 } 2250 2251 bool AMDGPUOperand::isInlineValue() const { 2252 return isRegKind() && ::isInlineValue(getReg()); 2253 } 2254 2255 //===----------------------------------------------------------------------===// 2256 // AsmParser 2257 //===----------------------------------------------------------------------===// 2258 2259 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2260 if (Is == IS_VGPR) { 2261 switch (RegWidth) { 2262 default: return -1; 2263 case 32: 2264 return AMDGPU::VGPR_32RegClassID; 2265 case 64: 2266 return AMDGPU::VReg_64RegClassID; 2267 case 96: 2268 return AMDGPU::VReg_96RegClassID; 2269 case 128: 2270 return AMDGPU::VReg_128RegClassID; 2271 case 160: 2272 return AMDGPU::VReg_160RegClassID; 2273 case 192: 2274 return AMDGPU::VReg_192RegClassID; 2275 case 224: 2276 return AMDGPU::VReg_224RegClassID; 2277 case 256: 2278 return AMDGPU::VReg_256RegClassID; 2279 case 288: 2280 return AMDGPU::VReg_288RegClassID; 2281 case 320: 2282 return AMDGPU::VReg_320RegClassID; 2283 case 352: 2284 return AMDGPU::VReg_352RegClassID; 2285 case 384: 2286 return AMDGPU::VReg_384RegClassID; 2287 case 512: 2288 return AMDGPU::VReg_512RegClassID; 2289 case 1024: 2290 return AMDGPU::VReg_1024RegClassID; 2291 } 2292 } else if (Is == IS_TTMP) { 2293 switch (RegWidth) { 2294 default: return -1; 2295 case 32: 2296 return AMDGPU::TTMP_32RegClassID; 2297 case 64: 2298 return AMDGPU::TTMP_64RegClassID; 2299 case 128: 2300 return AMDGPU::TTMP_128RegClassID; 2301 case 256: 2302 return AMDGPU::TTMP_256RegClassID; 2303 case 512: 2304 return AMDGPU::TTMP_512RegClassID; 2305 } 2306 } else if (Is == IS_SGPR) { 2307 switch (RegWidth) { 2308 default: return -1; 2309 case 32: 2310 return AMDGPU::SGPR_32RegClassID; 2311 case 64: 2312 return AMDGPU::SGPR_64RegClassID; 2313 case 96: 2314 return AMDGPU::SGPR_96RegClassID; 2315 case 128: 2316 return AMDGPU::SGPR_128RegClassID; 2317 case 160: 2318 return AMDGPU::SGPR_160RegClassID; 2319 case 192: 2320 return AMDGPU::SGPR_192RegClassID; 2321 case 224: 2322 return AMDGPU::SGPR_224RegClassID; 2323 case 256: 2324 return AMDGPU::SGPR_256RegClassID; 2325 case 288: 2326 return AMDGPU::SGPR_288RegClassID; 2327 case 320: 2328 return AMDGPU::SGPR_320RegClassID; 2329 case 352: 2330 return AMDGPU::SGPR_352RegClassID; 2331 case 384: 2332 return AMDGPU::SGPR_384RegClassID; 2333 case 512: 2334 return AMDGPU::SGPR_512RegClassID; 2335 } 2336 } else if (Is == IS_AGPR) { 2337 switch (RegWidth) { 2338 default: return -1; 2339 case 32: 2340 return AMDGPU::AGPR_32RegClassID; 2341 case 64: 2342 return AMDGPU::AReg_64RegClassID; 2343 case 96: 2344 return AMDGPU::AReg_96RegClassID; 2345 case 128: 2346 return AMDGPU::AReg_128RegClassID; 2347 case 160: 2348 return AMDGPU::AReg_160RegClassID; 2349 case 192: 2350 return AMDGPU::AReg_192RegClassID; 2351 case 224: 2352 return AMDGPU::AReg_224RegClassID; 2353 case 256: 2354 return AMDGPU::AReg_256RegClassID; 2355 case 288: 2356 return AMDGPU::AReg_288RegClassID; 2357 case 320: 2358 return AMDGPU::AReg_320RegClassID; 2359 case 352: 2360 return AMDGPU::AReg_352RegClassID; 2361 case 384: 2362 return AMDGPU::AReg_384RegClassID; 2363 case 512: 2364 return AMDGPU::AReg_512RegClassID; 2365 case 1024: 2366 return AMDGPU::AReg_1024RegClassID; 2367 } 2368 } 2369 return -1; 2370 } 2371 2372 static unsigned getSpecialRegForName(StringRef RegName) { 2373 return StringSwitch<unsigned>(RegName) 2374 .Case("exec", AMDGPU::EXEC) 2375 .Case("vcc", AMDGPU::VCC) 2376 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2377 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2378 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2379 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2380 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2381 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2382 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2383 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2384 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2385 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2386 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2387 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2388 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2389 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2390 .Case("m0", AMDGPU::M0) 2391 .Case("vccz", AMDGPU::SRC_VCCZ) 2392 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2393 .Case("execz", AMDGPU::SRC_EXECZ) 2394 .Case("src_execz", AMDGPU::SRC_EXECZ) 2395 .Case("scc", AMDGPU::SRC_SCC) 2396 .Case("src_scc", AMDGPU::SRC_SCC) 2397 .Case("tba", AMDGPU::TBA) 2398 .Case("tma", AMDGPU::TMA) 2399 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2400 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2401 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2402 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2403 .Case("vcc_lo", AMDGPU::VCC_LO) 2404 .Case("vcc_hi", AMDGPU::VCC_HI) 2405 .Case("exec_lo", AMDGPU::EXEC_LO) 2406 .Case("exec_hi", AMDGPU::EXEC_HI) 2407 .Case("tma_lo", AMDGPU::TMA_LO) 2408 .Case("tma_hi", AMDGPU::TMA_HI) 2409 .Case("tba_lo", AMDGPU::TBA_LO) 2410 .Case("tba_hi", AMDGPU::TBA_HI) 2411 .Case("pc", AMDGPU::PC_REG) 2412 .Case("null", AMDGPU::SGPR_NULL) 2413 .Default(AMDGPU::NoRegister); 2414 } 2415 2416 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2417 SMLoc &EndLoc, bool RestoreOnFailure) { 2418 auto R = parseRegister(); 2419 if (!R) return true; 2420 assert(R->isReg()); 2421 RegNo = R->getReg(); 2422 StartLoc = R->getStartLoc(); 2423 EndLoc = R->getEndLoc(); 2424 return false; 2425 } 2426 2427 bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2428 SMLoc &EndLoc) { 2429 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2430 } 2431 2432 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo, 2433 SMLoc &StartLoc, 2434 SMLoc &EndLoc) { 2435 bool Result = 2436 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2437 bool PendingErrors = getParser().hasPendingError(); 2438 getParser().clearPendingErrors(); 2439 if (PendingErrors) 2440 return MatchOperand_ParseFail; 2441 if (Result) 2442 return MatchOperand_NoMatch; 2443 return MatchOperand_Success; 2444 } 2445 2446 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2447 RegisterKind RegKind, unsigned Reg1, 2448 SMLoc Loc) { 2449 switch (RegKind) { 2450 case IS_SPECIAL: 2451 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2452 Reg = AMDGPU::EXEC; 2453 RegWidth = 64; 2454 return true; 2455 } 2456 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2457 Reg = AMDGPU::FLAT_SCR; 2458 RegWidth = 64; 2459 return true; 2460 } 2461 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2462 Reg = AMDGPU::XNACK_MASK; 2463 RegWidth = 64; 2464 return true; 2465 } 2466 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2467 Reg = AMDGPU::VCC; 2468 RegWidth = 64; 2469 return true; 2470 } 2471 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2472 Reg = AMDGPU::TBA; 2473 RegWidth = 64; 2474 return true; 2475 } 2476 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2477 Reg = AMDGPU::TMA; 2478 RegWidth = 64; 2479 return true; 2480 } 2481 Error(Loc, "register does not fit in the list"); 2482 return false; 2483 case IS_VGPR: 2484 case IS_SGPR: 2485 case IS_AGPR: 2486 case IS_TTMP: 2487 if (Reg1 != Reg + RegWidth / 32) { 2488 Error(Loc, "registers in a list must have consecutive indices"); 2489 return false; 2490 } 2491 RegWidth += 32; 2492 return true; 2493 default: 2494 llvm_unreachable("unexpected register kind"); 2495 } 2496 } 2497 2498 struct RegInfo { 2499 StringLiteral Name; 2500 RegisterKind Kind; 2501 }; 2502 2503 static constexpr RegInfo RegularRegisters[] = { 2504 {{"v"}, IS_VGPR}, 2505 {{"s"}, IS_SGPR}, 2506 {{"ttmp"}, IS_TTMP}, 2507 {{"acc"}, IS_AGPR}, 2508 {{"a"}, IS_AGPR}, 2509 }; 2510 2511 static bool isRegularReg(RegisterKind Kind) { 2512 return Kind == IS_VGPR || 2513 Kind == IS_SGPR || 2514 Kind == IS_TTMP || 2515 Kind == IS_AGPR; 2516 } 2517 2518 static const RegInfo* getRegularRegInfo(StringRef Str) { 2519 for (const RegInfo &Reg : RegularRegisters) 2520 if (Str.startswith(Reg.Name)) 2521 return &Reg; 2522 return nullptr; 2523 } 2524 2525 static bool getRegNum(StringRef Str, unsigned& Num) { 2526 return !Str.getAsInteger(10, Num); 2527 } 2528 2529 bool 2530 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2531 const AsmToken &NextToken) const { 2532 2533 // A list of consecutive registers: [s0,s1,s2,s3] 2534 if (Token.is(AsmToken::LBrac)) 2535 return true; 2536 2537 if (!Token.is(AsmToken::Identifier)) 2538 return false; 2539 2540 // A single register like s0 or a range of registers like s[0:1] 2541 2542 StringRef Str = Token.getString(); 2543 const RegInfo *Reg = getRegularRegInfo(Str); 2544 if (Reg) { 2545 StringRef RegName = Reg->Name; 2546 StringRef RegSuffix = Str.substr(RegName.size()); 2547 if (!RegSuffix.empty()) { 2548 unsigned Num; 2549 // A single register with an index: rXX 2550 if (getRegNum(RegSuffix, Num)) 2551 return true; 2552 } else { 2553 // A range of registers: r[XX:YY]. 2554 if (NextToken.is(AsmToken::LBrac)) 2555 return true; 2556 } 2557 } 2558 2559 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2560 } 2561 2562 bool 2563 AMDGPUAsmParser::isRegister() 2564 { 2565 return isRegister(getToken(), peekToken()); 2566 } 2567 2568 unsigned 2569 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2570 unsigned RegNum, 2571 unsigned RegWidth, 2572 SMLoc Loc) { 2573 2574 assert(isRegularReg(RegKind)); 2575 2576 unsigned AlignSize = 1; 2577 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2578 // SGPR and TTMP registers must be aligned. 2579 // Max required alignment is 4 dwords. 2580 AlignSize = std::min(RegWidth / 32, 4u); 2581 } 2582 2583 if (RegNum % AlignSize != 0) { 2584 Error(Loc, "invalid register alignment"); 2585 return AMDGPU::NoRegister; 2586 } 2587 2588 unsigned RegIdx = RegNum / AlignSize; 2589 int RCID = getRegClass(RegKind, RegWidth); 2590 if (RCID == -1) { 2591 Error(Loc, "invalid or unsupported register size"); 2592 return AMDGPU::NoRegister; 2593 } 2594 2595 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2596 const MCRegisterClass RC = TRI->getRegClass(RCID); 2597 if (RegIdx >= RC.getNumRegs()) { 2598 Error(Loc, "register index is out of range"); 2599 return AMDGPU::NoRegister; 2600 } 2601 2602 return RC.getRegister(RegIdx); 2603 } 2604 2605 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2606 int64_t RegLo, RegHi; 2607 if (!skipToken(AsmToken::LBrac, "missing register index")) 2608 return false; 2609 2610 SMLoc FirstIdxLoc = getLoc(); 2611 SMLoc SecondIdxLoc; 2612 2613 if (!parseExpr(RegLo)) 2614 return false; 2615 2616 if (trySkipToken(AsmToken::Colon)) { 2617 SecondIdxLoc = getLoc(); 2618 if (!parseExpr(RegHi)) 2619 return false; 2620 } else { 2621 RegHi = RegLo; 2622 } 2623 2624 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2625 return false; 2626 2627 if (!isUInt<32>(RegLo)) { 2628 Error(FirstIdxLoc, "invalid register index"); 2629 return false; 2630 } 2631 2632 if (!isUInt<32>(RegHi)) { 2633 Error(SecondIdxLoc, "invalid register index"); 2634 return false; 2635 } 2636 2637 if (RegLo > RegHi) { 2638 Error(FirstIdxLoc, "first register index should not exceed second index"); 2639 return false; 2640 } 2641 2642 Num = static_cast<unsigned>(RegLo); 2643 RegWidth = 32 * ((RegHi - RegLo) + 1); 2644 return true; 2645 } 2646 2647 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2648 unsigned &RegNum, unsigned &RegWidth, 2649 SmallVectorImpl<AsmToken> &Tokens) { 2650 assert(isToken(AsmToken::Identifier)); 2651 unsigned Reg = getSpecialRegForName(getTokenStr()); 2652 if (Reg) { 2653 RegNum = 0; 2654 RegWidth = 32; 2655 RegKind = IS_SPECIAL; 2656 Tokens.push_back(getToken()); 2657 lex(); // skip register name 2658 } 2659 return Reg; 2660 } 2661 2662 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2663 unsigned &RegNum, unsigned &RegWidth, 2664 SmallVectorImpl<AsmToken> &Tokens) { 2665 assert(isToken(AsmToken::Identifier)); 2666 StringRef RegName = getTokenStr(); 2667 auto Loc = getLoc(); 2668 2669 const RegInfo *RI = getRegularRegInfo(RegName); 2670 if (!RI) { 2671 Error(Loc, "invalid register name"); 2672 return AMDGPU::NoRegister; 2673 } 2674 2675 Tokens.push_back(getToken()); 2676 lex(); // skip register name 2677 2678 RegKind = RI->Kind; 2679 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2680 if (!RegSuffix.empty()) { 2681 // Single 32-bit register: vXX. 2682 if (!getRegNum(RegSuffix, RegNum)) { 2683 Error(Loc, "invalid register index"); 2684 return AMDGPU::NoRegister; 2685 } 2686 RegWidth = 32; 2687 } else { 2688 // Range of registers: v[XX:YY]. ":YY" is optional. 2689 if (!ParseRegRange(RegNum, RegWidth)) 2690 return AMDGPU::NoRegister; 2691 } 2692 2693 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2694 } 2695 2696 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2697 unsigned &RegWidth, 2698 SmallVectorImpl<AsmToken> &Tokens) { 2699 unsigned Reg = AMDGPU::NoRegister; 2700 auto ListLoc = getLoc(); 2701 2702 if (!skipToken(AsmToken::LBrac, 2703 "expected a register or a list of registers")) { 2704 return AMDGPU::NoRegister; 2705 } 2706 2707 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2708 2709 auto Loc = getLoc(); 2710 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2711 return AMDGPU::NoRegister; 2712 if (RegWidth != 32) { 2713 Error(Loc, "expected a single 32-bit register"); 2714 return AMDGPU::NoRegister; 2715 } 2716 2717 for (; trySkipToken(AsmToken::Comma); ) { 2718 RegisterKind NextRegKind; 2719 unsigned NextReg, NextRegNum, NextRegWidth; 2720 Loc = getLoc(); 2721 2722 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2723 NextRegNum, NextRegWidth, 2724 Tokens)) { 2725 return AMDGPU::NoRegister; 2726 } 2727 if (NextRegWidth != 32) { 2728 Error(Loc, "expected a single 32-bit register"); 2729 return AMDGPU::NoRegister; 2730 } 2731 if (NextRegKind != RegKind) { 2732 Error(Loc, "registers in a list must be of the same kind"); 2733 return AMDGPU::NoRegister; 2734 } 2735 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2736 return AMDGPU::NoRegister; 2737 } 2738 2739 if (!skipToken(AsmToken::RBrac, 2740 "expected a comma or a closing square bracket")) { 2741 return AMDGPU::NoRegister; 2742 } 2743 2744 if (isRegularReg(RegKind)) 2745 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2746 2747 return Reg; 2748 } 2749 2750 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2751 unsigned &RegNum, unsigned &RegWidth, 2752 SmallVectorImpl<AsmToken> &Tokens) { 2753 auto Loc = getLoc(); 2754 Reg = AMDGPU::NoRegister; 2755 2756 if (isToken(AsmToken::Identifier)) { 2757 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2758 if (Reg == AMDGPU::NoRegister) 2759 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2760 } else { 2761 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2762 } 2763 2764 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2765 if (Reg == AMDGPU::NoRegister) { 2766 assert(Parser.hasPendingError()); 2767 return false; 2768 } 2769 2770 if (!subtargetHasRegister(*TRI, Reg)) { 2771 if (Reg == AMDGPU::SGPR_NULL) { 2772 Error(Loc, "'null' operand is not supported on this GPU"); 2773 } else { 2774 Error(Loc, "register not available on this GPU"); 2775 } 2776 return false; 2777 } 2778 2779 return true; 2780 } 2781 2782 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2783 unsigned &RegNum, unsigned &RegWidth, 2784 bool RestoreOnFailure /*=false*/) { 2785 Reg = AMDGPU::NoRegister; 2786 2787 SmallVector<AsmToken, 1> Tokens; 2788 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2789 if (RestoreOnFailure) { 2790 while (!Tokens.empty()) { 2791 getLexer().UnLex(Tokens.pop_back_val()); 2792 } 2793 } 2794 return true; 2795 } 2796 return false; 2797 } 2798 2799 std::optional<StringRef> 2800 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2801 switch (RegKind) { 2802 case IS_VGPR: 2803 return StringRef(".amdgcn.next_free_vgpr"); 2804 case IS_SGPR: 2805 return StringRef(".amdgcn.next_free_sgpr"); 2806 default: 2807 return std::nullopt; 2808 } 2809 } 2810 2811 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2812 auto SymbolName = getGprCountSymbolName(RegKind); 2813 assert(SymbolName && "initializing invalid register kind"); 2814 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2815 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2816 } 2817 2818 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2819 unsigned DwordRegIndex, 2820 unsigned RegWidth) { 2821 // Symbols are only defined for GCN targets 2822 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2823 return true; 2824 2825 auto SymbolName = getGprCountSymbolName(RegKind); 2826 if (!SymbolName) 2827 return true; 2828 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2829 2830 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2831 int64_t OldCount; 2832 2833 if (!Sym->isVariable()) 2834 return !Error(getLoc(), 2835 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2836 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2837 return !Error( 2838 getLoc(), 2839 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2840 2841 if (OldCount <= NewMax) 2842 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2843 2844 return true; 2845 } 2846 2847 std::unique_ptr<AMDGPUOperand> 2848 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2849 const auto &Tok = getToken(); 2850 SMLoc StartLoc = Tok.getLoc(); 2851 SMLoc EndLoc = Tok.getEndLoc(); 2852 RegisterKind RegKind; 2853 unsigned Reg, RegNum, RegWidth; 2854 2855 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2856 return nullptr; 2857 } 2858 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2859 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2860 return nullptr; 2861 } else 2862 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2863 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2864 } 2865 2866 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, 2867 bool HasSP3AbsModifier) { 2868 // TODO: add syntactic sugar for 1/(2*PI) 2869 2870 if (isRegister()) 2871 return ParseStatus::NoMatch; 2872 assert(!isModifier()); 2873 2874 const auto& Tok = getToken(); 2875 const auto& NextTok = peekToken(); 2876 bool IsReal = Tok.is(AsmToken::Real); 2877 SMLoc S = getLoc(); 2878 bool Negate = false; 2879 2880 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2881 lex(); 2882 IsReal = true; 2883 Negate = true; 2884 } 2885 2886 if (IsReal) { 2887 // Floating-point expressions are not supported. 2888 // Can only allow floating-point literals with an 2889 // optional sign. 2890 2891 StringRef Num = getTokenStr(); 2892 lex(); 2893 2894 APFloat RealVal(APFloat::IEEEdouble()); 2895 auto roundMode = APFloat::rmNearestTiesToEven; 2896 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) 2897 return ParseStatus::Failure; 2898 if (Negate) 2899 RealVal.changeSign(); 2900 2901 Operands.push_back( 2902 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2903 AMDGPUOperand::ImmTyNone, true)); 2904 2905 return ParseStatus::Success; 2906 2907 } else { 2908 int64_t IntVal; 2909 const MCExpr *Expr; 2910 SMLoc S = getLoc(); 2911 2912 if (HasSP3AbsModifier) { 2913 // This is a workaround for handling expressions 2914 // as arguments of SP3 'abs' modifier, for example: 2915 // |1.0| 2916 // |-1| 2917 // |1+x| 2918 // This syntax is not compatible with syntax of standard 2919 // MC expressions (due to the trailing '|'). 2920 SMLoc EndLoc; 2921 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2922 return ParseStatus::Failure; 2923 } else { 2924 if (Parser.parseExpression(Expr)) 2925 return ParseStatus::Failure; 2926 } 2927 2928 if (Expr->evaluateAsAbsolute(IntVal)) { 2929 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2930 } else { 2931 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2932 } 2933 2934 return ParseStatus::Success; 2935 } 2936 2937 return ParseStatus::NoMatch; 2938 } 2939 2940 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2941 if (!isRegister()) 2942 return ParseStatus::NoMatch; 2943 2944 if (auto R = parseRegister()) { 2945 assert(R->isReg()); 2946 Operands.push_back(std::move(R)); 2947 return ParseStatus::Success; 2948 } 2949 return ParseStatus::Failure; 2950 } 2951 2952 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, 2953 bool HasSP3AbsMod) { 2954 ParseStatus Res = parseReg(Operands); 2955 if (!Res.isNoMatch()) 2956 return Res; 2957 if (isModifier()) 2958 return ParseStatus::NoMatch; 2959 return parseImm(Operands, HasSP3AbsMod); 2960 } 2961 2962 bool 2963 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2964 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2965 const auto &str = Token.getString(); 2966 return str == "abs" || str == "neg" || str == "sext"; 2967 } 2968 return false; 2969 } 2970 2971 bool 2972 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2973 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2974 } 2975 2976 bool 2977 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2978 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2979 } 2980 2981 bool 2982 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2983 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2984 } 2985 2986 // Check if this is an operand modifier or an opcode modifier 2987 // which may look like an expression but it is not. We should 2988 // avoid parsing these modifiers as expressions. Currently 2989 // recognized sequences are: 2990 // |...| 2991 // abs(...) 2992 // neg(...) 2993 // sext(...) 2994 // -reg 2995 // -|...| 2996 // -abs(...) 2997 // name:... 2998 // 2999 bool 3000 AMDGPUAsmParser::isModifier() { 3001 3002 AsmToken Tok = getToken(); 3003 AsmToken NextToken[2]; 3004 peekTokens(NextToken); 3005 3006 return isOperandModifier(Tok, NextToken[0]) || 3007 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3008 isOpcodeModifierWithVal(Tok, NextToken[0]); 3009 } 3010 3011 // Check if the current token is an SP3 'neg' modifier. 3012 // Currently this modifier is allowed in the following context: 3013 // 3014 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3015 // 2. Before an 'abs' modifier: -abs(...) 3016 // 3. Before an SP3 'abs' modifier: -|...| 3017 // 3018 // In all other cases "-" is handled as a part 3019 // of an expression that follows the sign. 3020 // 3021 // Note: When "-" is followed by an integer literal, 3022 // this is interpreted as integer negation rather 3023 // than a floating-point NEG modifier applied to N. 3024 // Beside being contr-intuitive, such use of floating-point 3025 // NEG modifier would have resulted in different meaning 3026 // of integer literals used with VOP1/2/C and VOP3, 3027 // for example: 3028 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3029 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3030 // Negative fp literals with preceding "-" are 3031 // handled likewise for uniformity 3032 // 3033 bool 3034 AMDGPUAsmParser::parseSP3NegModifier() { 3035 3036 AsmToken NextToken[2]; 3037 peekTokens(NextToken); 3038 3039 if (isToken(AsmToken::Minus) && 3040 (isRegister(NextToken[0], NextToken[1]) || 3041 NextToken[0].is(AsmToken::Pipe) || 3042 isId(NextToken[0], "abs"))) { 3043 lex(); 3044 return true; 3045 } 3046 3047 return false; 3048 } 3049 3050 ParseStatus 3051 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3052 bool AllowImm) { 3053 bool Neg, SP3Neg; 3054 bool Abs, SP3Abs; 3055 SMLoc Loc; 3056 3057 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3058 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) 3059 return Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3060 3061 SP3Neg = parseSP3NegModifier(); 3062 3063 Loc = getLoc(); 3064 Neg = trySkipId("neg"); 3065 if (Neg && SP3Neg) 3066 return Error(Loc, "expected register or immediate"); 3067 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3068 return ParseStatus::Failure; 3069 3070 Abs = trySkipId("abs"); 3071 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3072 return ParseStatus::Failure; 3073 3074 Loc = getLoc(); 3075 SP3Abs = trySkipToken(AsmToken::Pipe); 3076 if (Abs && SP3Abs) 3077 return Error(Loc, "expected register or immediate"); 3078 3079 ParseStatus Res; 3080 if (AllowImm) { 3081 Res = parseRegOrImm(Operands, SP3Abs); 3082 } else { 3083 Res = parseReg(Operands); 3084 } 3085 if (!Res.isSuccess()) 3086 return (SP3Neg || Neg || SP3Abs || Abs) ? ParseStatus::Failure : Res; 3087 3088 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3089 return ParseStatus::Failure; 3090 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3091 return ParseStatus::Failure; 3092 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3093 return ParseStatus::Failure; 3094 3095 AMDGPUOperand::Modifiers Mods; 3096 Mods.Abs = Abs || SP3Abs; 3097 Mods.Neg = Neg || SP3Neg; 3098 3099 if (Mods.hasFPModifiers()) { 3100 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3101 if (Op.isExpr()) 3102 return Error(Op.getStartLoc(), "expected an absolute expression"); 3103 Op.setModifiers(Mods); 3104 } 3105 return ParseStatus::Success; 3106 } 3107 3108 ParseStatus 3109 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3110 bool AllowImm) { 3111 bool Sext = trySkipId("sext"); 3112 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3113 return ParseStatus::Failure; 3114 3115 ParseStatus Res; 3116 if (AllowImm) { 3117 Res = parseRegOrImm(Operands); 3118 } else { 3119 Res = parseReg(Operands); 3120 } 3121 if (!Res.isSuccess()) 3122 return Sext ? ParseStatus::Failure : Res; 3123 3124 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3125 return ParseStatus::Failure; 3126 3127 AMDGPUOperand::Modifiers Mods; 3128 Mods.Sext = Sext; 3129 3130 if (Mods.hasIntModifiers()) { 3131 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3132 if (Op.isExpr()) 3133 return Error(Op.getStartLoc(), "expected an absolute expression"); 3134 Op.setModifiers(Mods); 3135 } 3136 3137 return ParseStatus::Success; 3138 } 3139 3140 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3141 return parseRegOrImmWithFPInputMods(Operands, false); 3142 } 3143 3144 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3145 return parseRegOrImmWithIntInputMods(Operands, false); 3146 } 3147 3148 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3149 auto Loc = getLoc(); 3150 if (trySkipId("off")) { 3151 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3152 AMDGPUOperand::ImmTyOff, false)); 3153 return ParseStatus::Success; 3154 } 3155 3156 if (!isRegister()) 3157 return ParseStatus::NoMatch; 3158 3159 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3160 if (Reg) { 3161 Operands.push_back(std::move(Reg)); 3162 return ParseStatus::Success; 3163 } 3164 3165 return ParseStatus::Failure; 3166 } 3167 3168 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3169 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3170 3171 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3172 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3173 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3174 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3175 return Match_InvalidOperand; 3176 3177 if ((TSFlags & SIInstrFlags::VOP3) && 3178 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3179 getForcedEncodingSize() != 64) 3180 return Match_PreferE32; 3181 3182 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3183 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3184 // v_mac_f32/16 allow only dst_sel == DWORD; 3185 auto OpNum = 3186 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3187 const auto &Op = Inst.getOperand(OpNum); 3188 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3189 return Match_InvalidOperand; 3190 } 3191 } 3192 3193 return Match_Success; 3194 } 3195 3196 static ArrayRef<unsigned> getAllVariants() { 3197 static const unsigned Variants[] = { 3198 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3199 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3200 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3201 }; 3202 3203 return ArrayRef(Variants); 3204 } 3205 3206 // What asm variants we should check 3207 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3208 if (isForcedDPP() && isForcedVOP3()) { 3209 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3210 return ArrayRef(Variants); 3211 } 3212 if (getForcedEncodingSize() == 32) { 3213 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3214 return ArrayRef(Variants); 3215 } 3216 3217 if (isForcedVOP3()) { 3218 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3219 return ArrayRef(Variants); 3220 } 3221 3222 if (isForcedSDWA()) { 3223 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3224 AMDGPUAsmVariants::SDWA9}; 3225 return ArrayRef(Variants); 3226 } 3227 3228 if (isForcedDPP()) { 3229 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3230 return ArrayRef(Variants); 3231 } 3232 3233 return getAllVariants(); 3234 } 3235 3236 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3237 if (isForcedDPP() && isForcedVOP3()) 3238 return "e64_dpp"; 3239 3240 if (getForcedEncodingSize() == 32) 3241 return "e32"; 3242 3243 if (isForcedVOP3()) 3244 return "e64"; 3245 3246 if (isForcedSDWA()) 3247 return "sdwa"; 3248 3249 if (isForcedDPP()) 3250 return "dpp"; 3251 3252 return ""; 3253 } 3254 3255 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3256 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3257 for (MCPhysReg Reg : Desc.implicit_uses()) { 3258 switch (Reg) { 3259 case AMDGPU::FLAT_SCR: 3260 case AMDGPU::VCC: 3261 case AMDGPU::VCC_LO: 3262 case AMDGPU::VCC_HI: 3263 case AMDGPU::M0: 3264 return Reg; 3265 default: 3266 break; 3267 } 3268 } 3269 return AMDGPU::NoRegister; 3270 } 3271 3272 // NB: This code is correct only when used to check constant 3273 // bus limitations because GFX7 support no f16 inline constants. 3274 // Note that there are no cases when a GFX7 opcode violates 3275 // constant bus limitations due to the use of an f16 constant. 3276 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3277 unsigned OpIdx) const { 3278 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3279 3280 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) || 3281 AMDGPU::isKImmOperand(Desc, OpIdx)) { 3282 return false; 3283 } 3284 3285 const MCOperand &MO = Inst.getOperand(OpIdx); 3286 3287 int64_t Val = MO.getImm(); 3288 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3289 3290 switch (OpSize) { // expected operand size 3291 case 8: 3292 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3293 case 4: 3294 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3295 case 2: { 3296 const unsigned OperandType = Desc.operands()[OpIdx].OperandType; 3297 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3298 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3299 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3300 return AMDGPU::isInlinableIntLiteral(Val); 3301 3302 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3303 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3304 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3305 return AMDGPU::isInlinableIntLiteralV216(Val); 3306 3307 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3308 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3309 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3310 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3311 3312 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3313 } 3314 default: 3315 llvm_unreachable("invalid operand size"); 3316 } 3317 } 3318 3319 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3320 if (!isGFX10Plus()) 3321 return 1; 3322 3323 switch (Opcode) { 3324 // 64-bit shift instructions can use only one scalar value input 3325 case AMDGPU::V_LSHLREV_B64_e64: 3326 case AMDGPU::V_LSHLREV_B64_gfx10: 3327 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3328 case AMDGPU::V_LSHRREV_B64_e64: 3329 case AMDGPU::V_LSHRREV_B64_gfx10: 3330 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3331 case AMDGPU::V_ASHRREV_I64_e64: 3332 case AMDGPU::V_ASHRREV_I64_gfx10: 3333 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3334 case AMDGPU::V_LSHL_B64_e64: 3335 case AMDGPU::V_LSHR_B64_e64: 3336 case AMDGPU::V_ASHR_I64_e64: 3337 return 1; 3338 default: 3339 return 2; 3340 } 3341 } 3342 3343 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; 3344 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; 3345 3346 // Get regular operand indices in the same order as specified 3347 // in the instruction (but append mandatory literals to the end). 3348 static OperandIndices getSrcOperandIndices(unsigned Opcode, 3349 bool AddMandatoryLiterals = false) { 3350 3351 int16_t ImmIdx = 3352 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1; 3353 3354 if (isVOPD(Opcode)) { 3355 int16_t ImmDeferredIdx = 3356 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred) 3357 : -1; 3358 3359 return {getNamedOperandIdx(Opcode, OpName::src0X), 3360 getNamedOperandIdx(Opcode, OpName::vsrc1X), 3361 getNamedOperandIdx(Opcode, OpName::src0Y), 3362 getNamedOperandIdx(Opcode, OpName::vsrc1Y), 3363 ImmDeferredIdx, 3364 ImmIdx}; 3365 } 3366 3367 return {getNamedOperandIdx(Opcode, OpName::src0), 3368 getNamedOperandIdx(Opcode, OpName::src1), 3369 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx}; 3370 } 3371 3372 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3373 const MCOperand &MO = Inst.getOperand(OpIdx); 3374 if (MO.isImm()) { 3375 return !isInlineConstant(Inst, OpIdx); 3376 } else if (MO.isReg()) { 3377 auto Reg = MO.getReg(); 3378 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3379 auto PReg = mc2PseudoReg(Reg); 3380 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3381 } else { 3382 return true; 3383 } 3384 } 3385 3386 bool AMDGPUAsmParser::validateConstantBusLimitations( 3387 const MCInst &Inst, const OperandVector &Operands) { 3388 const unsigned Opcode = Inst.getOpcode(); 3389 const MCInstrDesc &Desc = MII.get(Opcode); 3390 unsigned LastSGPR = AMDGPU::NoRegister; 3391 unsigned ConstantBusUseCount = 0; 3392 unsigned NumLiterals = 0; 3393 unsigned LiteralSize; 3394 3395 if (!(Desc.TSFlags & 3396 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3397 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && 3398 !isVOPD(Opcode)) 3399 return true; 3400 3401 // Check special imm operands (used by madmk, etc) 3402 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { 3403 ++NumLiterals; 3404 LiteralSize = 4; 3405 } 3406 3407 SmallDenseSet<unsigned> SGPRsUsed; 3408 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3409 if (SGPRUsed != AMDGPU::NoRegister) { 3410 SGPRsUsed.insert(SGPRUsed); 3411 ++ConstantBusUseCount; 3412 } 3413 3414 OperandIndices OpIndices = getSrcOperandIndices(Opcode); 3415 3416 for (int OpIdx : OpIndices) { 3417 if (OpIdx == -1) 3418 continue; 3419 3420 const MCOperand &MO = Inst.getOperand(OpIdx); 3421 if (usesConstantBus(Inst, OpIdx)) { 3422 if (MO.isReg()) { 3423 LastSGPR = mc2PseudoReg(MO.getReg()); 3424 // Pairs of registers with a partial intersections like these 3425 // s0, s[0:1] 3426 // flat_scratch_lo, flat_scratch 3427 // flat_scratch_lo, flat_scratch_hi 3428 // are theoretically valid but they are disabled anyway. 3429 // Note that this code mimics SIInstrInfo::verifyInstruction 3430 if (SGPRsUsed.insert(LastSGPR).second) { 3431 ++ConstantBusUseCount; 3432 } 3433 } else { // Expression or a literal 3434 3435 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3436 continue; // special operand like VINTERP attr_chan 3437 3438 // An instruction may use only one literal. 3439 // This has been validated on the previous step. 3440 // See validateVOPLiteral. 3441 // This literal may be used as more than one operand. 3442 // If all these operands are of the same size, 3443 // this literal counts as one scalar value. 3444 // Otherwise it counts as 2 scalar values. 3445 // See "GFX10 Shader Programming", section 3.6.2.3. 3446 3447 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3448 if (Size < 4) 3449 Size = 4; 3450 3451 if (NumLiterals == 0) { 3452 NumLiterals = 1; 3453 LiteralSize = Size; 3454 } else if (LiteralSize != Size) { 3455 NumLiterals = 2; 3456 } 3457 } 3458 } 3459 } 3460 ConstantBusUseCount += NumLiterals; 3461 3462 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3463 return true; 3464 3465 SMLoc LitLoc = getLitLoc(Operands); 3466 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3467 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3468 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3469 return false; 3470 } 3471 3472 bool AMDGPUAsmParser::validateVOPDRegBankConstraints( 3473 const MCInst &Inst, const OperandVector &Operands) { 3474 3475 const unsigned Opcode = Inst.getOpcode(); 3476 if (!isVOPD(Opcode)) 3477 return true; 3478 3479 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3480 3481 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { 3482 const MCOperand &Opr = Inst.getOperand(OperandIdx); 3483 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) 3484 ? Opr.getReg() 3485 : MCRegister::NoRegister; 3486 }; 3487 3488 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 3489 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx); 3490 if (!InvalidCompOprIdx) 3491 return true; 3492 3493 auto CompOprIdx = *InvalidCompOprIdx; 3494 auto ParsedIdx = 3495 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), 3496 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); 3497 assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); 3498 3499 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); 3500 if (CompOprIdx == VOPD::Component::DST) { 3501 Error(Loc, "one dst register must be even and the other odd"); 3502 } else { 3503 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; 3504 Error(Loc, Twine("src") + Twine(CompSrcIdx) + 3505 " operands must use different VGPR banks"); 3506 } 3507 3508 return false; 3509 } 3510 3511 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3512 3513 const unsigned Opc = Inst.getOpcode(); 3514 const MCInstrDesc &Desc = MII.get(Opc); 3515 3516 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3517 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3518 assert(ClampIdx != -1); 3519 return Inst.getOperand(ClampIdx).getImm() == 0; 3520 } 3521 3522 return true; 3523 } 3524 3525 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, 3526 const SMLoc &IDLoc) { 3527 3528 const unsigned Opc = Inst.getOpcode(); 3529 const MCInstrDesc &Desc = MII.get(Opc); 3530 3531 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3532 return true; 3533 3534 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3535 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3536 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3537 3538 assert(VDataIdx != -1); 3539 3540 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray 3541 return true; 3542 3543 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3544 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3545 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3546 if (DMask == 0) 3547 DMask = 1; 3548 3549 bool IsPackedD16 = false; 3550 unsigned DataSize = 3551 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask); 3552 if (hasPackedD16()) { 3553 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3554 IsPackedD16 = D16Idx >= 0; 3555 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm()) 3556 DataSize = (DataSize + 1) / 2; 3557 } 3558 3559 if ((VDataSize / 4) == DataSize + TFESize) 3560 return true; 3561 3562 StringRef Modifiers; 3563 if (isGFX90A()) 3564 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask"; 3565 else 3566 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe"; 3567 3568 Error(IDLoc, Twine("image data size does not match ") + Modifiers); 3569 return false; 3570 } 3571 3572 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, 3573 const SMLoc &IDLoc) { 3574 const unsigned Opc = Inst.getOpcode(); 3575 const MCInstrDesc &Desc = MII.get(Opc); 3576 3577 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3578 return true; 3579 3580 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3581 3582 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3583 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3584 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3585 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3586 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3587 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3588 3589 assert(VAddr0Idx != -1); 3590 assert(SrsrcIdx != -1); 3591 assert(SrsrcIdx > VAddr0Idx); 3592 3593 bool IsA16 = Inst.getOperand(A16Idx).getImm(); 3594 if (BaseOpcode->BVH) { 3595 if (IsA16 == BaseOpcode->A16) 3596 return true; 3597 Error(IDLoc, "image address size does not match a16"); 3598 return false; 3599 } 3600 3601 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3602 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3603 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3604 unsigned ActualAddrSize = 3605 IsNSA ? SrsrcIdx - VAddr0Idx 3606 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3607 3608 unsigned ExpectedAddrSize = 3609 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3610 3611 if (IsNSA) { 3612 if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) { 3613 int VAddrLastIdx = SrsrcIdx - 1; 3614 unsigned VAddrLastSize = 3615 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; 3616 3617 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; 3618 } 3619 } else { 3620 if (ExpectedAddrSize > 12) 3621 ExpectedAddrSize = 16; 3622 3623 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3624 // This provides backward compatibility for assembly created 3625 // before 160b/192b/224b types were directly supported. 3626 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3627 return true; 3628 } 3629 3630 if (ActualAddrSize == ExpectedAddrSize) 3631 return true; 3632 3633 Error(IDLoc, "image address size does not match dim and a16"); 3634 return false; 3635 } 3636 3637 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3638 3639 const unsigned Opc = Inst.getOpcode(); 3640 const MCInstrDesc &Desc = MII.get(Opc); 3641 3642 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3643 return true; 3644 if (!Desc.mayLoad() || !Desc.mayStore()) 3645 return true; // Not atomic 3646 3647 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3648 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3649 3650 // This is an incomplete check because image_atomic_cmpswap 3651 // may only use 0x3 and 0xf while other atomic operations 3652 // may use 0x1 and 0x3. However these limitations are 3653 // verified when we check that dmask matches dst size. 3654 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3655 } 3656 3657 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3658 3659 const unsigned Opc = Inst.getOpcode(); 3660 const MCInstrDesc &Desc = MII.get(Opc); 3661 3662 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3663 return true; 3664 3665 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3666 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3667 3668 // GATHER4 instructions use dmask in a different fashion compared to 3669 // other MIMG instructions. The only useful DMASK values are 3670 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3671 // (red,red,red,red) etc.) The ISA document doesn't mention 3672 // this. 3673 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3674 } 3675 3676 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3677 const unsigned Opc = Inst.getOpcode(); 3678 const MCInstrDesc &Desc = MII.get(Opc); 3679 3680 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3681 return true; 3682 3683 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3684 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3685 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3686 3687 if (!BaseOpcode->MSAA) 3688 return true; 3689 3690 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3691 assert(DimIdx != -1); 3692 3693 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3694 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3695 3696 return DimInfo->MSAA; 3697 } 3698 3699 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3700 { 3701 switch (Opcode) { 3702 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3703 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3704 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3705 return true; 3706 default: 3707 return false; 3708 } 3709 } 3710 3711 // movrels* opcodes should only allow VGPRS as src0. 3712 // This is specified in .td description for vop1/vop3, 3713 // but sdwa is handled differently. See isSDWAOperand. 3714 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3715 const OperandVector &Operands) { 3716 3717 const unsigned Opc = Inst.getOpcode(); 3718 const MCInstrDesc &Desc = MII.get(Opc); 3719 3720 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3721 return true; 3722 3723 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3724 assert(Src0Idx != -1); 3725 3726 SMLoc ErrLoc; 3727 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3728 if (Src0.isReg()) { 3729 auto Reg = mc2PseudoReg(Src0.getReg()); 3730 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3731 if (!isSGPR(Reg, TRI)) 3732 return true; 3733 ErrLoc = getRegLoc(Reg, Operands); 3734 } else { 3735 ErrLoc = getConstLoc(Operands); 3736 } 3737 3738 Error(ErrLoc, "source operand must be a VGPR"); 3739 return false; 3740 } 3741 3742 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3743 const OperandVector &Operands) { 3744 3745 const unsigned Opc = Inst.getOpcode(); 3746 3747 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3748 return true; 3749 3750 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3751 assert(Src0Idx != -1); 3752 3753 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3754 if (!Src0.isReg()) 3755 return true; 3756 3757 auto Reg = mc2PseudoReg(Src0.getReg()); 3758 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3759 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3760 Error(getRegLoc(Reg, Operands), 3761 "source operand must be either a VGPR or an inline constant"); 3762 return false; 3763 } 3764 3765 return true; 3766 } 3767 3768 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, 3769 const OperandVector &Operands) { 3770 unsigned Opcode = Inst.getOpcode(); 3771 const MCInstrDesc &Desc = MII.get(Opcode); 3772 3773 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || 3774 !getFeatureBits()[FeatureMFMAInlineLiteralBug]) 3775 return true; 3776 3777 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2); 3778 if (Src2Idx == -1) 3779 return true; 3780 3781 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) { 3782 Error(getConstLoc(Operands), 3783 "inline constants are not allowed for this operand"); 3784 return false; 3785 } 3786 3787 return true; 3788 } 3789 3790 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3791 const OperandVector &Operands) { 3792 const unsigned Opc = Inst.getOpcode(); 3793 const MCInstrDesc &Desc = MII.get(Opc); 3794 3795 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3796 return true; 3797 3798 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3799 if (Src2Idx == -1) 3800 return true; 3801 3802 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3803 if (!Src2.isReg()) 3804 return true; 3805 3806 MCRegister Src2Reg = Src2.getReg(); 3807 MCRegister DstReg = Inst.getOperand(0).getReg(); 3808 if (Src2Reg == DstReg) 3809 return true; 3810 3811 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3812 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) 3813 return true; 3814 3815 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3816 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3817 "source 2 operand must not partially overlap with dst"); 3818 return false; 3819 } 3820 3821 return true; 3822 } 3823 3824 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3825 switch (Inst.getOpcode()) { 3826 default: 3827 return true; 3828 case V_DIV_SCALE_F32_gfx6_gfx7: 3829 case V_DIV_SCALE_F32_vi: 3830 case V_DIV_SCALE_F32_gfx10: 3831 case V_DIV_SCALE_F64_gfx6_gfx7: 3832 case V_DIV_SCALE_F64_vi: 3833 case V_DIV_SCALE_F64_gfx10: 3834 break; 3835 } 3836 3837 // TODO: Check that src0 = src1 or src2. 3838 3839 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3840 AMDGPU::OpName::src2_modifiers, 3841 AMDGPU::OpName::src2_modifiers}) { 3842 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3843 .getImm() & 3844 SISrcMods::ABS) { 3845 return false; 3846 } 3847 } 3848 3849 return true; 3850 } 3851 3852 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3853 3854 const unsigned Opc = Inst.getOpcode(); 3855 const MCInstrDesc &Desc = MII.get(Opc); 3856 3857 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3858 return true; 3859 3860 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3861 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3862 if (isCI() || isSI()) 3863 return false; 3864 } 3865 3866 return true; 3867 } 3868 3869 static bool IsRevOpcode(const unsigned Opcode) 3870 { 3871 switch (Opcode) { 3872 case AMDGPU::V_SUBREV_F32_e32: 3873 case AMDGPU::V_SUBREV_F32_e64: 3874 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3875 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3876 case AMDGPU::V_SUBREV_F32_e32_vi: 3877 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3878 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3879 case AMDGPU::V_SUBREV_F32_e64_vi: 3880 3881 case AMDGPU::V_SUBREV_CO_U32_e32: 3882 case AMDGPU::V_SUBREV_CO_U32_e64: 3883 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3884 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3885 3886 case AMDGPU::V_SUBBREV_U32_e32: 3887 case AMDGPU::V_SUBBREV_U32_e64: 3888 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3889 case AMDGPU::V_SUBBREV_U32_e32_vi: 3890 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3891 case AMDGPU::V_SUBBREV_U32_e64_vi: 3892 3893 case AMDGPU::V_SUBREV_U32_e32: 3894 case AMDGPU::V_SUBREV_U32_e64: 3895 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3896 case AMDGPU::V_SUBREV_U32_e32_vi: 3897 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3898 case AMDGPU::V_SUBREV_U32_e64_vi: 3899 3900 case AMDGPU::V_SUBREV_F16_e32: 3901 case AMDGPU::V_SUBREV_F16_e64: 3902 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3903 case AMDGPU::V_SUBREV_F16_e32_vi: 3904 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3905 case AMDGPU::V_SUBREV_F16_e64_vi: 3906 3907 case AMDGPU::V_SUBREV_U16_e32: 3908 case AMDGPU::V_SUBREV_U16_e64: 3909 case AMDGPU::V_SUBREV_U16_e32_vi: 3910 case AMDGPU::V_SUBREV_U16_e64_vi: 3911 3912 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3913 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3914 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3915 3916 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3917 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3918 3919 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3920 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3921 3922 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3923 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3924 3925 case AMDGPU::V_LSHRREV_B32_e32: 3926 case AMDGPU::V_LSHRREV_B32_e64: 3927 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3928 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3929 case AMDGPU::V_LSHRREV_B32_e32_vi: 3930 case AMDGPU::V_LSHRREV_B32_e64_vi: 3931 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3932 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3933 3934 case AMDGPU::V_ASHRREV_I32_e32: 3935 case AMDGPU::V_ASHRREV_I32_e64: 3936 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3937 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3938 case AMDGPU::V_ASHRREV_I32_e32_vi: 3939 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3940 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3941 case AMDGPU::V_ASHRREV_I32_e64_vi: 3942 3943 case AMDGPU::V_LSHLREV_B32_e32: 3944 case AMDGPU::V_LSHLREV_B32_e64: 3945 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3946 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3947 case AMDGPU::V_LSHLREV_B32_e32_vi: 3948 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3949 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3950 case AMDGPU::V_LSHLREV_B32_e64_vi: 3951 3952 case AMDGPU::V_LSHLREV_B16_e32: 3953 case AMDGPU::V_LSHLREV_B16_e64: 3954 case AMDGPU::V_LSHLREV_B16_e32_vi: 3955 case AMDGPU::V_LSHLREV_B16_e64_vi: 3956 case AMDGPU::V_LSHLREV_B16_gfx10: 3957 3958 case AMDGPU::V_LSHRREV_B16_e32: 3959 case AMDGPU::V_LSHRREV_B16_e64: 3960 case AMDGPU::V_LSHRREV_B16_e32_vi: 3961 case AMDGPU::V_LSHRREV_B16_e64_vi: 3962 case AMDGPU::V_LSHRREV_B16_gfx10: 3963 3964 case AMDGPU::V_ASHRREV_I16_e32: 3965 case AMDGPU::V_ASHRREV_I16_e64: 3966 case AMDGPU::V_ASHRREV_I16_e32_vi: 3967 case AMDGPU::V_ASHRREV_I16_e64_vi: 3968 case AMDGPU::V_ASHRREV_I16_gfx10: 3969 3970 case AMDGPU::V_LSHLREV_B64_e64: 3971 case AMDGPU::V_LSHLREV_B64_gfx10: 3972 case AMDGPU::V_LSHLREV_B64_vi: 3973 3974 case AMDGPU::V_LSHRREV_B64_e64: 3975 case AMDGPU::V_LSHRREV_B64_gfx10: 3976 case AMDGPU::V_LSHRREV_B64_vi: 3977 3978 case AMDGPU::V_ASHRREV_I64_e64: 3979 case AMDGPU::V_ASHRREV_I64_gfx10: 3980 case AMDGPU::V_ASHRREV_I64_vi: 3981 3982 case AMDGPU::V_PK_LSHLREV_B16: 3983 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3984 case AMDGPU::V_PK_LSHLREV_B16_vi: 3985 3986 case AMDGPU::V_PK_LSHRREV_B16: 3987 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3988 case AMDGPU::V_PK_LSHRREV_B16_vi: 3989 case AMDGPU::V_PK_ASHRREV_I16: 3990 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3991 case AMDGPU::V_PK_ASHRREV_I16_vi: 3992 return true; 3993 default: 3994 return false; 3995 } 3996 } 3997 3998 std::optional<StringRef> 3999 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4000 4001 using namespace SIInstrFlags; 4002 const unsigned Opcode = Inst.getOpcode(); 4003 const MCInstrDesc &Desc = MII.get(Opcode); 4004 4005 // lds_direct register is defined so that it can be used 4006 // with 9-bit operands only. Ignore encodings which do not accept these. 4007 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4008 if ((Desc.TSFlags & Enc) == 0) 4009 return std::nullopt; 4010 4011 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4012 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4013 if (SrcIdx == -1) 4014 break; 4015 const auto &Src = Inst.getOperand(SrcIdx); 4016 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4017 4018 if (isGFX90A() || isGFX11Plus()) 4019 return StringRef("lds_direct is not supported on this GPU"); 4020 4021 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4022 return StringRef("lds_direct cannot be used with this instruction"); 4023 4024 if (SrcName != OpName::src0) 4025 return StringRef("lds_direct may be used as src0 only"); 4026 } 4027 } 4028 4029 return std::nullopt; 4030 } 4031 4032 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4033 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4034 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4035 if (Op.isFlatOffset()) 4036 return Op.getStartLoc(); 4037 } 4038 return getLoc(); 4039 } 4040 4041 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4042 const OperandVector &Operands) { 4043 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4044 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4045 return true; 4046 4047 auto Opcode = Inst.getOpcode(); 4048 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4049 assert(OpNum != -1); 4050 4051 const auto &Op = Inst.getOperand(OpNum); 4052 if (!hasFlatOffsets() && Op.getImm() != 0) { 4053 Error(getFlatOffsetLoc(Operands), 4054 "flat offset modifier is not supported on this GPU"); 4055 return false; 4056 } 4057 4058 // For FLAT segment the offset must be positive; 4059 // MSB is ignored and forced to zero. 4060 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); 4061 bool AllowNegative = 4062 TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 4063 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { 4064 Error(getFlatOffsetLoc(Operands), 4065 Twine("expected a ") + 4066 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" 4067 : Twine(OffsetSize - 1) + "-bit unsigned offset")); 4068 return false; 4069 } 4070 4071 return true; 4072 } 4073 4074 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4075 // Start with second operand because SMEM Offset cannot be dst or src0. 4076 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4077 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4078 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) 4079 return Op.getStartLoc(); 4080 } 4081 return getLoc(); 4082 } 4083 4084 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4085 const OperandVector &Operands) { 4086 if (isCI() || isSI()) 4087 return true; 4088 4089 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4090 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4091 return true; 4092 4093 auto Opcode = Inst.getOpcode(); 4094 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4095 if (OpNum == -1) 4096 return true; 4097 4098 const auto &Op = Inst.getOperand(OpNum); 4099 if (!Op.isImm()) 4100 return true; 4101 4102 uint64_t Offset = Op.getImm(); 4103 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4104 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4105 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4106 return true; 4107 4108 Error(getSMEMOffsetLoc(Operands), 4109 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4110 "expected a 21-bit signed offset"); 4111 4112 return false; 4113 } 4114 4115 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4116 unsigned Opcode = Inst.getOpcode(); 4117 const MCInstrDesc &Desc = MII.get(Opcode); 4118 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4119 return true; 4120 4121 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4122 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4123 4124 const int OpIndices[] = { Src0Idx, Src1Idx }; 4125 4126 unsigned NumExprs = 0; 4127 unsigned NumLiterals = 0; 4128 uint32_t LiteralValue; 4129 4130 for (int OpIdx : OpIndices) { 4131 if (OpIdx == -1) break; 4132 4133 const MCOperand &MO = Inst.getOperand(OpIdx); 4134 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4135 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4136 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4137 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4138 if (NumLiterals == 0 || LiteralValue != Value) { 4139 LiteralValue = Value; 4140 ++NumLiterals; 4141 } 4142 } else if (MO.isExpr()) { 4143 ++NumExprs; 4144 } 4145 } 4146 } 4147 4148 return NumLiterals + NumExprs <= 1; 4149 } 4150 4151 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4152 const unsigned Opc = Inst.getOpcode(); 4153 if (isPermlane16(Opc)) { 4154 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4155 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4156 4157 if (OpSel & ~3) 4158 return false; 4159 } 4160 4161 uint64_t TSFlags = MII.get(Opc).TSFlags; 4162 4163 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4164 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4165 if (OpSelIdx != -1) { 4166 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4167 return false; 4168 } 4169 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4170 if (OpSelHiIdx != -1) { 4171 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4172 return false; 4173 } 4174 } 4175 4176 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4177 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && 4178 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { 4179 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4180 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4181 if (OpSel & 3) 4182 return false; 4183 } 4184 4185 return true; 4186 } 4187 4188 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4189 const OperandVector &Operands) { 4190 const unsigned Opc = Inst.getOpcode(); 4191 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4192 if (DppCtrlIdx < 0) 4193 return true; 4194 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4195 4196 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4197 // DPP64 is supported for row_newbcast only. 4198 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4199 if (Src0Idx >= 0 && 4200 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4201 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4202 Error(S, "64 bit dpp only supports row_newbcast"); 4203 return false; 4204 } 4205 } 4206 4207 return true; 4208 } 4209 4210 // Check if VCC register matches wavefront size 4211 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4212 auto FB = getFeatureBits(); 4213 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4214 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4215 } 4216 4217 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4218 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4219 const OperandVector &Operands) { 4220 unsigned Opcode = Inst.getOpcode(); 4221 const MCInstrDesc &Desc = MII.get(Opcode); 4222 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1; 4223 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4224 !HasMandatoryLiteral && !isVOPD(Opcode)) 4225 return true; 4226 4227 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral); 4228 4229 unsigned NumExprs = 0; 4230 unsigned NumLiterals = 0; 4231 uint32_t LiteralValue; 4232 4233 for (int OpIdx : OpIndices) { 4234 if (OpIdx == -1) 4235 continue; 4236 4237 const MCOperand &MO = Inst.getOperand(OpIdx); 4238 if (!MO.isImm() && !MO.isExpr()) 4239 continue; 4240 if (!isSISrcOperand(Desc, OpIdx)) 4241 continue; 4242 4243 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4244 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4245 if (NumLiterals == 0 || LiteralValue != Value) { 4246 LiteralValue = Value; 4247 ++NumLiterals; 4248 } 4249 } else if (MO.isExpr()) { 4250 ++NumExprs; 4251 } 4252 } 4253 NumLiterals += NumExprs; 4254 4255 if (!NumLiterals) 4256 return true; 4257 4258 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { 4259 Error(getLitLoc(Operands), "literal operands are not supported"); 4260 return false; 4261 } 4262 4263 if (NumLiterals > 1) { 4264 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed"); 4265 return false; 4266 } 4267 4268 return true; 4269 } 4270 4271 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4272 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4273 const MCRegisterInfo *MRI) { 4274 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4275 if (OpIdx < 0) 4276 return -1; 4277 4278 const MCOperand &Op = Inst.getOperand(OpIdx); 4279 if (!Op.isReg()) 4280 return -1; 4281 4282 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4283 auto Reg = Sub ? Sub : Op.getReg(); 4284 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4285 return AGPR32.contains(Reg) ? 1 : 0; 4286 } 4287 4288 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4289 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4290 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4291 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4292 SIInstrFlags::DS)) == 0) 4293 return true; 4294 4295 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4296 : AMDGPU::OpName::vdata; 4297 4298 const MCRegisterInfo *MRI = getMRI(); 4299 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4300 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4301 4302 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4303 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4304 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4305 return false; 4306 } 4307 4308 auto FB = getFeatureBits(); 4309 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4310 if (DataAreg < 0 || DstAreg < 0) 4311 return true; 4312 return DstAreg == DataAreg; 4313 } 4314 4315 return DstAreg < 1 && DataAreg < 1; 4316 } 4317 4318 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4319 auto FB = getFeatureBits(); 4320 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4321 return true; 4322 4323 const MCRegisterInfo *MRI = getMRI(); 4324 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4325 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4326 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4327 const MCOperand &Op = Inst.getOperand(I); 4328 if (!Op.isReg()) 4329 continue; 4330 4331 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4332 if (!Sub) 4333 continue; 4334 4335 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4336 return false; 4337 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4338 return false; 4339 } 4340 4341 return true; 4342 } 4343 4344 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4345 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4346 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4347 if (Op.isBLGP()) 4348 return Op.getStartLoc(); 4349 } 4350 return SMLoc(); 4351 } 4352 4353 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4354 const OperandVector &Operands) { 4355 unsigned Opc = Inst.getOpcode(); 4356 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4357 if (BlgpIdx == -1) 4358 return true; 4359 SMLoc BLGPLoc = getBLGPLoc(Operands); 4360 if (!BLGPLoc.isValid()) 4361 return true; 4362 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4363 auto FB = getFeatureBits(); 4364 bool UsesNeg = false; 4365 if (FB[AMDGPU::FeatureGFX940Insts]) { 4366 switch (Opc) { 4367 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4368 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4369 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4370 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4371 UsesNeg = true; 4372 } 4373 } 4374 4375 if (IsNeg == UsesNeg) 4376 return true; 4377 4378 Error(BLGPLoc, 4379 UsesNeg ? "invalid modifier: blgp is not supported" 4380 : "invalid modifier: neg is not supported"); 4381 4382 return false; 4383 } 4384 4385 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, 4386 const OperandVector &Operands) { 4387 if (!isGFX11Plus()) 4388 return true; 4389 4390 unsigned Opc = Inst.getOpcode(); 4391 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && 4392 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && 4393 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && 4394 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) 4395 return true; 4396 4397 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 4398 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); 4399 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()); 4400 if (Reg == AMDGPU::SGPR_NULL) 4401 return true; 4402 4403 SMLoc RegLoc = getRegLoc(Reg, Operands); 4404 Error(RegLoc, "src0 must be null"); 4405 return false; 4406 } 4407 4408 // gfx90a has an undocumented limitation: 4409 // DS_GWS opcodes must use even aligned registers. 4410 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4411 const OperandVector &Operands) { 4412 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4413 return true; 4414 4415 int Opc = Inst.getOpcode(); 4416 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4417 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4418 return true; 4419 4420 const MCRegisterInfo *MRI = getMRI(); 4421 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4422 int Data0Pos = 4423 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4424 assert(Data0Pos != -1); 4425 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4426 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4427 if (RegIdx & 1) { 4428 SMLoc RegLoc = getRegLoc(Reg, Operands); 4429 Error(RegLoc, "vgpr must be even aligned"); 4430 return false; 4431 } 4432 4433 return true; 4434 } 4435 4436 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4437 const OperandVector &Operands, 4438 const SMLoc &IDLoc) { 4439 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4440 AMDGPU::OpName::cpol); 4441 if (CPolPos == -1) 4442 return true; 4443 4444 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4445 4446 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4447 if (TSFlags & SIInstrFlags::SMRD) { 4448 if (CPol && (isSI() || isCI())) { 4449 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4450 Error(S, "cache policy is not supported for SMRD instructions"); 4451 return false; 4452 } 4453 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4454 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4455 return false; 4456 } 4457 } 4458 4459 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4460 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4461 StringRef CStr(S.getPointer()); 4462 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4463 Error(S, "scc is not supported on this GPU"); 4464 return false; 4465 } 4466 4467 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4468 return true; 4469 4470 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4471 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4472 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4473 : "instruction must use glc"); 4474 return false; 4475 } 4476 } else { 4477 if (CPol & CPol::GLC) { 4478 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4479 StringRef CStr(S.getPointer()); 4480 S = SMLoc::getFromPointer( 4481 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4482 Error(S, isGFX940() ? "instruction must not use sc0" 4483 : "instruction must not use glc"); 4484 return false; 4485 } 4486 } 4487 4488 return true; 4489 } 4490 4491 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4492 if (!isGFX11Plus()) 4493 return true; 4494 for (auto &Operand : Operands) { 4495 if (!Operand->isReg()) 4496 continue; 4497 unsigned Reg = Operand->getReg(); 4498 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4499 Error(getRegLoc(Reg, Operands), 4500 "execz and vccz are not supported on this GPU"); 4501 return false; 4502 } 4503 } 4504 return true; 4505 } 4506 4507 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, 4508 const OperandVector &Operands) { 4509 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4510 if (Desc.mayStore() && 4511 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4512 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands); 4513 if (Loc != getInstLoc(Operands)) { 4514 Error(Loc, "TFE modifier has no meaning for store instructions"); 4515 return false; 4516 } 4517 } 4518 4519 return true; 4520 } 4521 4522 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4523 const SMLoc &IDLoc, 4524 const OperandVector &Operands) { 4525 if (auto ErrMsg = validateLdsDirect(Inst)) { 4526 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4527 return false; 4528 } 4529 if (!validateSOPLiteral(Inst)) { 4530 Error(getLitLoc(Operands), 4531 "only one unique literal operand is allowed"); 4532 return false; 4533 } 4534 if (!validateVOPLiteral(Inst, Operands)) { 4535 return false; 4536 } 4537 if (!validateConstantBusLimitations(Inst, Operands)) { 4538 return false; 4539 } 4540 if (!validateVOPDRegBankConstraints(Inst, Operands)) { 4541 return false; 4542 } 4543 if (!validateIntClampSupported(Inst)) { 4544 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4545 "integer clamping is not supported on this GPU"); 4546 return false; 4547 } 4548 if (!validateOpSel(Inst)) { 4549 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4550 "invalid op_sel operand"); 4551 return false; 4552 } 4553 if (!validateDPP(Inst, Operands)) { 4554 return false; 4555 } 4556 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4557 if (!validateMIMGD16(Inst)) { 4558 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4559 "d16 modifier is not supported on this GPU"); 4560 return false; 4561 } 4562 if (!validateMIMGMSAA(Inst)) { 4563 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4564 "invalid dim; must be MSAA type"); 4565 return false; 4566 } 4567 if (!validateMIMGDataSize(Inst, IDLoc)) { 4568 return false; 4569 } 4570 if (!validateMIMGAddrSize(Inst, IDLoc)) 4571 return false; 4572 if (!validateMIMGAtomicDMask(Inst)) { 4573 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4574 "invalid atomic image dmask"); 4575 return false; 4576 } 4577 if (!validateMIMGGatherDMask(Inst)) { 4578 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4579 "invalid image_gather dmask: only one bit must be set"); 4580 return false; 4581 } 4582 if (!validateMovrels(Inst, Operands)) { 4583 return false; 4584 } 4585 if (!validateFlatOffset(Inst, Operands)) { 4586 return false; 4587 } 4588 if (!validateSMEMOffset(Inst, Operands)) { 4589 return false; 4590 } 4591 if (!validateMAIAccWrite(Inst, Operands)) { 4592 return false; 4593 } 4594 if (!validateMAISrc2(Inst, Operands)) { 4595 return false; 4596 } 4597 if (!validateMFMA(Inst, Operands)) { 4598 return false; 4599 } 4600 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4601 return false; 4602 } 4603 4604 if (!validateAGPRLdSt(Inst)) { 4605 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4606 ? "invalid register class: data and dst should be all VGPR or AGPR" 4607 : "invalid register class: agpr loads and stores not supported on this GPU" 4608 ); 4609 return false; 4610 } 4611 if (!validateVGPRAlign(Inst)) { 4612 Error(IDLoc, 4613 "invalid register class: vgpr tuples must be 64 bit aligned"); 4614 return false; 4615 } 4616 if (!validateGWS(Inst, Operands)) { 4617 return false; 4618 } 4619 4620 if (!validateBLGP(Inst, Operands)) { 4621 return false; 4622 } 4623 4624 if (!validateDivScale(Inst)) { 4625 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4626 return false; 4627 } 4628 if (!validateWaitCnt(Inst, Operands)) { 4629 return false; 4630 } 4631 if (!validateExeczVcczOperands(Operands)) { 4632 return false; 4633 } 4634 if (!validateTFE(Inst, Operands)) { 4635 return false; 4636 } 4637 4638 return true; 4639 } 4640 4641 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4642 const FeatureBitset &FBS, 4643 unsigned VariantID = 0); 4644 4645 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4646 const FeatureBitset &AvailableFeatures, 4647 unsigned VariantID); 4648 4649 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4650 const FeatureBitset &FBS) { 4651 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4652 } 4653 4654 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4655 const FeatureBitset &FBS, 4656 ArrayRef<unsigned> Variants) { 4657 for (auto Variant : Variants) { 4658 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4659 return true; 4660 } 4661 4662 return false; 4663 } 4664 4665 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4666 const SMLoc &IDLoc) { 4667 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits()); 4668 4669 // Check if requested instruction variant is supported. 4670 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4671 return false; 4672 4673 // This instruction is not supported. 4674 // Clear any other pending errors because they are no longer relevant. 4675 getParser().clearPendingErrors(); 4676 4677 // Requested instruction variant is not supported. 4678 // Check if any other variants are supported. 4679 StringRef VariantName = getMatchedVariantName(); 4680 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4681 return Error(IDLoc, 4682 Twine(VariantName, 4683 " variant of this instruction is not supported")); 4684 } 4685 4686 // Check if this instruction may be used with a different wavesize. 4687 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && 4688 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { 4689 4690 FeatureBitset FeaturesWS32 = getFeatureBits(); 4691 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) 4692 .flip(AMDGPU::FeatureWavefrontSize32); 4693 FeatureBitset AvailableFeaturesWS32 = 4694 ComputeAvailableFeatures(FeaturesWS32); 4695 4696 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants())) 4697 return Error(IDLoc, "instruction requires wavesize=32"); 4698 } 4699 4700 // Finally check if this instruction is supported on any other GPU. 4701 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4702 return Error(IDLoc, "instruction not supported on this GPU"); 4703 } 4704 4705 // Instruction not supported on any GPU. Probably a typo. 4706 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4707 return Error(IDLoc, "invalid instruction" + Suggestion); 4708 } 4709 4710 static bool isInvalidVOPDY(const OperandVector &Operands, 4711 uint64_t InvalidOprIdx) { 4712 assert(InvalidOprIdx < Operands.size()); 4713 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); 4714 if (Op.isToken() && InvalidOprIdx > 1) { 4715 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); 4716 return PrevOp.isToken() && PrevOp.getToken() == "::"; 4717 } 4718 return false; 4719 } 4720 4721 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4722 OperandVector &Operands, 4723 MCStreamer &Out, 4724 uint64_t &ErrorInfo, 4725 bool MatchingInlineAsm) { 4726 MCInst Inst; 4727 unsigned Result = Match_Success; 4728 for (auto Variant : getMatchedVariants()) { 4729 uint64_t EI; 4730 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4731 Variant); 4732 // We order match statuses from least to most specific. We use most specific 4733 // status as resulting 4734 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4735 if ((R == Match_Success) || 4736 (R == Match_PreferE32) || 4737 (R == Match_MissingFeature && Result != Match_PreferE32) || 4738 (R == Match_InvalidOperand && Result != Match_MissingFeature 4739 && Result != Match_PreferE32) || 4740 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4741 && Result != Match_MissingFeature 4742 && Result != Match_PreferE32)) { 4743 Result = R; 4744 ErrorInfo = EI; 4745 } 4746 if (R == Match_Success) 4747 break; 4748 } 4749 4750 if (Result == Match_Success) { 4751 if (!validateInstruction(Inst, IDLoc, Operands)) { 4752 return true; 4753 } 4754 Inst.setLoc(IDLoc); 4755 Out.emitInstruction(Inst, getSTI()); 4756 return false; 4757 } 4758 4759 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4760 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4761 return true; 4762 } 4763 4764 switch (Result) { 4765 default: break; 4766 case Match_MissingFeature: 4767 // It has been verified that the specified instruction 4768 // mnemonic is valid. A match was found but it requires 4769 // features which are not supported on this GPU. 4770 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4771 4772 case Match_InvalidOperand: { 4773 SMLoc ErrorLoc = IDLoc; 4774 if (ErrorInfo != ~0ULL) { 4775 if (ErrorInfo >= Operands.size()) { 4776 return Error(IDLoc, "too few operands for instruction"); 4777 } 4778 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4779 if (ErrorLoc == SMLoc()) 4780 ErrorLoc = IDLoc; 4781 4782 if (isInvalidVOPDY(Operands, ErrorInfo)) 4783 return Error(ErrorLoc, "invalid VOPDY instruction"); 4784 } 4785 return Error(ErrorLoc, "invalid operand for instruction"); 4786 } 4787 4788 case Match_PreferE32: 4789 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4790 "should be encoded as e32"); 4791 case Match_MnemonicFail: 4792 llvm_unreachable("Invalid instructions should have been handled already"); 4793 } 4794 llvm_unreachable("Implement any new match types added!"); 4795 } 4796 4797 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4798 int64_t Tmp = -1; 4799 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4800 return true; 4801 } 4802 if (getParser().parseAbsoluteExpression(Tmp)) { 4803 return true; 4804 } 4805 Ret = static_cast<uint32_t>(Tmp); 4806 return false; 4807 } 4808 4809 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4810 uint32_t &Minor) { 4811 if (ParseAsAbsoluteExpression(Major)) 4812 return TokError("invalid major version"); 4813 4814 if (!trySkipToken(AsmToken::Comma)) 4815 return TokError("minor version number required, comma expected"); 4816 4817 if (ParseAsAbsoluteExpression(Minor)) 4818 return TokError("invalid minor version"); 4819 4820 return false; 4821 } 4822 4823 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4824 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4825 return TokError("directive only supported for amdgcn architecture"); 4826 4827 std::string TargetIDDirective; 4828 SMLoc TargetStart = getTok().getLoc(); 4829 if (getParser().parseEscapedString(TargetIDDirective)) 4830 return true; 4831 4832 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4833 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4834 return getParser().Error(TargetRange.Start, 4835 (Twine(".amdgcn_target directive's target id ") + 4836 Twine(TargetIDDirective) + 4837 Twine(" does not match the specified target id ") + 4838 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4839 4840 return false; 4841 } 4842 4843 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4844 return Error(Range.Start, "value out of range", Range); 4845 } 4846 4847 bool AMDGPUAsmParser::calculateGPRBlocks( 4848 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4849 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32, 4850 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR, 4851 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4852 // TODO(scott.linder): These calculations are duplicated from 4853 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4854 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4855 4856 unsigned NumVGPRs = NextFreeVGPR; 4857 unsigned NumSGPRs = NextFreeSGPR; 4858 4859 if (Version.Major >= 10) 4860 NumSGPRs = 0; 4861 else { 4862 unsigned MaxAddressableNumSGPRs = 4863 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4864 4865 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4866 NumSGPRs > MaxAddressableNumSGPRs) 4867 return OutOfRangeError(SGPRRange); 4868 4869 NumSGPRs += 4870 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4871 4872 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4873 NumSGPRs > MaxAddressableNumSGPRs) 4874 return OutOfRangeError(SGPRRange); 4875 4876 if (Features.test(FeatureSGPRInitBug)) 4877 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4878 } 4879 4880 VGPRBlocks = 4881 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4882 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4883 4884 return false; 4885 } 4886 4887 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4888 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4889 return TokError("directive only supported for amdgcn architecture"); 4890 4891 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4892 return TokError("directive only supported for amdhsa OS"); 4893 4894 StringRef KernelName; 4895 if (getParser().parseIdentifier(KernelName)) 4896 return true; 4897 4898 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4899 4900 StringSet<> Seen; 4901 4902 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4903 4904 SMRange VGPRRange; 4905 uint64_t NextFreeVGPR = 0; 4906 uint64_t AccumOffset = 0; 4907 uint64_t SharedVGPRCount = 0; 4908 SMRange SGPRRange; 4909 uint64_t NextFreeSGPR = 0; 4910 4911 // Count the number of user SGPRs implied from the enabled feature bits. 4912 unsigned ImpliedUserSGPRCount = 0; 4913 4914 // Track if the asm explicitly contains the directive for the user SGPR 4915 // count. 4916 std::optional<unsigned> ExplicitUserSGPRCount; 4917 bool ReserveVCC = true; 4918 bool ReserveFlatScr = true; 4919 std::optional<bool> EnableWavefrontSize32; 4920 4921 while (true) { 4922 while (trySkipToken(AsmToken::EndOfStatement)); 4923 4924 StringRef ID; 4925 SMRange IDRange = getTok().getLocRange(); 4926 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4927 return true; 4928 4929 if (ID == ".end_amdhsa_kernel") 4930 break; 4931 4932 if (!Seen.insert(ID).second) 4933 return TokError(".amdhsa_ directives cannot be repeated"); 4934 4935 SMLoc ValStart = getLoc(); 4936 int64_t IVal; 4937 if (getParser().parseAbsoluteExpression(IVal)) 4938 return true; 4939 SMLoc ValEnd = getLoc(); 4940 SMRange ValRange = SMRange(ValStart, ValEnd); 4941 4942 if (IVal < 0) 4943 return OutOfRangeError(ValRange); 4944 4945 uint64_t Val = IVal; 4946 4947 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4948 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4949 return OutOfRangeError(RANGE); \ 4950 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4951 4952 if (ID == ".amdhsa_group_segment_fixed_size") { 4953 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4954 return OutOfRangeError(ValRange); 4955 KD.group_segment_fixed_size = Val; 4956 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4957 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4958 return OutOfRangeError(ValRange); 4959 KD.private_segment_fixed_size = Val; 4960 } else if (ID == ".amdhsa_kernarg_size") { 4961 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4962 return OutOfRangeError(ValRange); 4963 KD.kernarg_size = Val; 4964 } else if (ID == ".amdhsa_user_sgpr_count") { 4965 ExplicitUserSGPRCount = Val; 4966 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4967 if (hasArchitectedFlatScratch()) 4968 return Error(IDRange.Start, 4969 "directive is not supported with architected flat scratch", 4970 IDRange); 4971 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4972 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4973 Val, ValRange); 4974 if (Val) 4975 ImpliedUserSGPRCount += 4; 4976 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4977 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4978 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4979 ValRange); 4980 if (Val) 4981 ImpliedUserSGPRCount += 2; 4982 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4983 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4984 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4985 ValRange); 4986 if (Val) 4987 ImpliedUserSGPRCount += 2; 4988 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4989 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4990 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4991 Val, ValRange); 4992 if (Val) 4993 ImpliedUserSGPRCount += 2; 4994 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4995 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4996 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4997 ValRange); 4998 if (Val) 4999 ImpliedUserSGPRCount += 2; 5000 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 5001 if (hasArchitectedFlatScratch()) 5002 return Error(IDRange.Start, 5003 "directive is not supported with architected flat scratch", 5004 IDRange); 5005 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5006 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 5007 ValRange); 5008 if (Val) 5009 ImpliedUserSGPRCount += 2; 5010 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 5011 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5012 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 5013 Val, ValRange); 5014 if (Val) 5015 ImpliedUserSGPRCount += 1; 5016 } else if (ID == ".amdhsa_wavefront_size32") { 5017 if (IVersion.Major < 10) 5018 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5019 EnableWavefrontSize32 = Val; 5020 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5021 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5022 Val, ValRange); 5023 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5024 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5025 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); 5026 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5027 if (hasArchitectedFlatScratch()) 5028 return Error(IDRange.Start, 5029 "directive is not supported with architected flat scratch", 5030 IDRange); 5031 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5032 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5033 } else if (ID == ".amdhsa_enable_private_segment") { 5034 if (!hasArchitectedFlatScratch()) 5035 return Error( 5036 IDRange.Start, 5037 "directive is not supported without architected flat scratch", 5038 IDRange); 5039 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5040 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5041 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5042 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5043 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5044 ValRange); 5045 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5046 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5047 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5048 ValRange); 5049 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5050 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5051 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5052 ValRange); 5053 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5054 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5055 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5056 ValRange); 5057 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5058 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5059 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5060 ValRange); 5061 } else if (ID == ".amdhsa_next_free_vgpr") { 5062 VGPRRange = ValRange; 5063 NextFreeVGPR = Val; 5064 } else if (ID == ".amdhsa_next_free_sgpr") { 5065 SGPRRange = ValRange; 5066 NextFreeSGPR = Val; 5067 } else if (ID == ".amdhsa_accum_offset") { 5068 if (!isGFX90A()) 5069 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5070 AccumOffset = Val; 5071 } else if (ID == ".amdhsa_reserve_vcc") { 5072 if (!isUInt<1>(Val)) 5073 return OutOfRangeError(ValRange); 5074 ReserveVCC = Val; 5075 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5076 if (IVersion.Major < 7) 5077 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5078 if (hasArchitectedFlatScratch()) 5079 return Error(IDRange.Start, 5080 "directive is not supported with architected flat scratch", 5081 IDRange); 5082 if (!isUInt<1>(Val)) 5083 return OutOfRangeError(ValRange); 5084 ReserveFlatScr = Val; 5085 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5086 if (IVersion.Major < 8) 5087 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5088 if (!isUInt<1>(Val)) 5089 return OutOfRangeError(ValRange); 5090 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5091 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5092 IDRange); 5093 } else if (ID == ".amdhsa_float_round_mode_32") { 5094 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5095 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5096 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5097 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5098 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5099 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5100 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5101 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5102 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5103 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5104 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5105 ValRange); 5106 } else if (ID == ".amdhsa_dx10_clamp") { 5107 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5108 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5109 } else if (ID == ".amdhsa_ieee_mode") { 5110 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5111 Val, ValRange); 5112 } else if (ID == ".amdhsa_fp16_overflow") { 5113 if (IVersion.Major < 9) 5114 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5115 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5116 ValRange); 5117 } else if (ID == ".amdhsa_tg_split") { 5118 if (!isGFX90A()) 5119 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5120 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5121 ValRange); 5122 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5123 if (IVersion.Major < 10) 5124 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5125 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5126 ValRange); 5127 } else if (ID == ".amdhsa_memory_ordered") { 5128 if (IVersion.Major < 10) 5129 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5130 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5131 ValRange); 5132 } else if (ID == ".amdhsa_forward_progress") { 5133 if (IVersion.Major < 10) 5134 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5135 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5136 ValRange); 5137 } else if (ID == ".amdhsa_shared_vgpr_count") { 5138 if (IVersion.Major < 10) 5139 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5140 SharedVGPRCount = Val; 5141 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5142 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5143 ValRange); 5144 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5145 PARSE_BITS_ENTRY( 5146 KD.compute_pgm_rsrc2, 5147 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5148 ValRange); 5149 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5150 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5151 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5152 Val, ValRange); 5153 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5154 PARSE_BITS_ENTRY( 5155 KD.compute_pgm_rsrc2, 5156 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5157 ValRange); 5158 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5159 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5160 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5161 Val, ValRange); 5162 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5163 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5164 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5165 Val, ValRange); 5166 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5167 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5168 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5169 Val, ValRange); 5170 } else if (ID == ".amdhsa_exception_int_div_zero") { 5171 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5172 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5173 Val, ValRange); 5174 } else { 5175 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5176 } 5177 5178 #undef PARSE_BITS_ENTRY 5179 } 5180 5181 if (!Seen.contains(".amdhsa_next_free_vgpr")) 5182 return TokError(".amdhsa_next_free_vgpr directive is required"); 5183 5184 if (!Seen.contains(".amdhsa_next_free_sgpr")) 5185 return TokError(".amdhsa_next_free_sgpr directive is required"); 5186 5187 unsigned VGPRBlocks; 5188 unsigned SGPRBlocks; 5189 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5190 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5191 EnableWavefrontSize32, NextFreeVGPR, 5192 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5193 SGPRBlocks)) 5194 return true; 5195 5196 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5197 VGPRBlocks)) 5198 return OutOfRangeError(VGPRRange); 5199 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5200 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5201 5202 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5203 SGPRBlocks)) 5204 return OutOfRangeError(SGPRRange); 5205 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5206 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5207 SGPRBlocks); 5208 5209 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5210 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5211 "enabled user SGPRs"); 5212 5213 unsigned UserSGPRCount = 5214 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5215 5216 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5217 return TokError("too many user SGPRs enabled"); 5218 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5219 UserSGPRCount); 5220 5221 if (isGFX90A()) { 5222 if (!Seen.contains(".amdhsa_accum_offset")) 5223 return TokError(".amdhsa_accum_offset directive is required"); 5224 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5225 return TokError("accum_offset should be in range [4..256] in " 5226 "increments of 4"); 5227 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5228 return TokError("accum_offset exceeds total VGPR allocation"); 5229 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5230 (AccumOffset / 4 - 1)); 5231 } 5232 5233 if (IVersion.Major >= 10) { 5234 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5235 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { 5236 return TokError("shared_vgpr_count directive not valid on " 5237 "wavefront size 32"); 5238 } 5239 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5240 return TokError("shared_vgpr_count*2 + " 5241 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5242 "exceed 63\n"); 5243 } 5244 } 5245 5246 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5247 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5248 ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion()); 5249 return false; 5250 } 5251 5252 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5253 uint32_t Major; 5254 uint32_t Minor; 5255 5256 if (ParseDirectiveMajorMinor(Major, Minor)) 5257 return true; 5258 5259 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5260 return false; 5261 } 5262 5263 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5264 uint32_t Major; 5265 uint32_t Minor; 5266 uint32_t Stepping; 5267 StringRef VendorName; 5268 StringRef ArchName; 5269 5270 // If this directive has no arguments, then use the ISA version for the 5271 // targeted GPU. 5272 if (isToken(AsmToken::EndOfStatement)) { 5273 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5274 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5275 ISA.Stepping, 5276 "AMD", "AMDGPU"); 5277 return false; 5278 } 5279 5280 if (ParseDirectiveMajorMinor(Major, Minor)) 5281 return true; 5282 5283 if (!trySkipToken(AsmToken::Comma)) 5284 return TokError("stepping version number required, comma expected"); 5285 5286 if (ParseAsAbsoluteExpression(Stepping)) 5287 return TokError("invalid stepping version"); 5288 5289 if (!trySkipToken(AsmToken::Comma)) 5290 return TokError("vendor name required, comma expected"); 5291 5292 if (!parseString(VendorName, "invalid vendor name")) 5293 return true; 5294 5295 if (!trySkipToken(AsmToken::Comma)) 5296 return TokError("arch name required, comma expected"); 5297 5298 if (!parseString(ArchName, "invalid arch name")) 5299 return true; 5300 5301 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5302 VendorName, ArchName); 5303 return false; 5304 } 5305 5306 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5307 amd_kernel_code_t &Header) { 5308 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5309 // assembly for backwards compatibility. 5310 if (ID == "max_scratch_backing_memory_byte_size") { 5311 Parser.eatToEndOfStatement(); 5312 return false; 5313 } 5314 5315 SmallString<40> ErrStr; 5316 raw_svector_ostream Err(ErrStr); 5317 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5318 return TokError(Err.str()); 5319 } 5320 Lex(); 5321 5322 if (ID == "enable_wavefront_size32") { 5323 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5324 if (!isGFX10Plus()) 5325 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5326 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5327 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5328 } else { 5329 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5330 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5331 } 5332 } 5333 5334 if (ID == "wavefront_size") { 5335 if (Header.wavefront_size == 5) { 5336 if (!isGFX10Plus()) 5337 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5338 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5339 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5340 } else if (Header.wavefront_size == 6) { 5341 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5342 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5343 } 5344 } 5345 5346 if (ID == "enable_wgp_mode") { 5347 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5348 !isGFX10Plus()) 5349 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5350 } 5351 5352 if (ID == "enable_mem_ordered") { 5353 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5354 !isGFX10Plus()) 5355 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5356 } 5357 5358 if (ID == "enable_fwd_progress") { 5359 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5360 !isGFX10Plus()) 5361 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5362 } 5363 5364 return false; 5365 } 5366 5367 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5368 amd_kernel_code_t Header; 5369 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5370 5371 while (true) { 5372 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5373 // will set the current token to EndOfStatement. 5374 while(trySkipToken(AsmToken::EndOfStatement)); 5375 5376 StringRef ID; 5377 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5378 return true; 5379 5380 if (ID == ".end_amd_kernel_code_t") 5381 break; 5382 5383 if (ParseAMDKernelCodeTValue(ID, Header)) 5384 return true; 5385 } 5386 5387 getTargetStreamer().EmitAMDKernelCodeT(Header); 5388 5389 return false; 5390 } 5391 5392 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5393 StringRef KernelName; 5394 if (!parseId(KernelName, "expected symbol name")) 5395 return true; 5396 5397 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5398 ELF::STT_AMDGPU_HSA_KERNEL); 5399 5400 KernelScope.initialize(getContext()); 5401 return false; 5402 } 5403 5404 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5405 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5406 return Error(getLoc(), 5407 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5408 "architectures"); 5409 } 5410 5411 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5412 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5413 return Error(getParser().getTok().getLoc(), "target id must match options"); 5414 5415 getTargetStreamer().EmitISAVersion(); 5416 Lex(); 5417 5418 return false; 5419 } 5420 5421 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5422 const char *AssemblerDirectiveBegin; 5423 const char *AssemblerDirectiveEnd; 5424 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5425 isHsaAbiVersion3AndAbove(&getSTI()) 5426 ? std::pair(HSAMD::V3::AssemblerDirectiveBegin, 5427 HSAMD::V3::AssemblerDirectiveEnd) 5428 : std::pair(HSAMD::AssemblerDirectiveBegin, 5429 HSAMD::AssemblerDirectiveEnd); 5430 5431 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5432 return Error(getLoc(), 5433 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5434 "not available on non-amdhsa OSes")).str()); 5435 } 5436 5437 std::string HSAMetadataString; 5438 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5439 HSAMetadataString)) 5440 return true; 5441 5442 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5443 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5444 return Error(getLoc(), "invalid HSA metadata"); 5445 } else { 5446 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5447 return Error(getLoc(), "invalid HSA metadata"); 5448 } 5449 5450 return false; 5451 } 5452 5453 /// Common code to parse out a block of text (typically YAML) between start and 5454 /// end directives. 5455 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5456 const char *AssemblerDirectiveEnd, 5457 std::string &CollectString) { 5458 5459 raw_string_ostream CollectStream(CollectString); 5460 5461 getLexer().setSkipSpace(false); 5462 5463 bool FoundEnd = false; 5464 while (!isToken(AsmToken::Eof)) { 5465 while (isToken(AsmToken::Space)) { 5466 CollectStream << getTokenStr(); 5467 Lex(); 5468 } 5469 5470 if (trySkipId(AssemblerDirectiveEnd)) { 5471 FoundEnd = true; 5472 break; 5473 } 5474 5475 CollectStream << Parser.parseStringToEndOfStatement() 5476 << getContext().getAsmInfo()->getSeparatorString(); 5477 5478 Parser.eatToEndOfStatement(); 5479 } 5480 5481 getLexer().setSkipSpace(true); 5482 5483 if (isToken(AsmToken::Eof) && !FoundEnd) { 5484 return TokError(Twine("expected directive ") + 5485 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5486 } 5487 5488 CollectStream.flush(); 5489 return false; 5490 } 5491 5492 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5493 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5494 std::string String; 5495 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5496 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5497 return true; 5498 5499 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5500 if (!PALMetadata->setFromString(String)) 5501 return Error(getLoc(), "invalid PAL metadata"); 5502 return false; 5503 } 5504 5505 /// Parse the assembler directive for old linear-format PAL metadata. 5506 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5507 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5508 return Error(getLoc(), 5509 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5510 "not available on non-amdpal OSes")).str()); 5511 } 5512 5513 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5514 PALMetadata->setLegacy(); 5515 for (;;) { 5516 uint32_t Key, Value; 5517 if (ParseAsAbsoluteExpression(Key)) { 5518 return TokError(Twine("invalid value in ") + 5519 Twine(PALMD::AssemblerDirective)); 5520 } 5521 if (!trySkipToken(AsmToken::Comma)) { 5522 return TokError(Twine("expected an even number of values in ") + 5523 Twine(PALMD::AssemblerDirective)); 5524 } 5525 if (ParseAsAbsoluteExpression(Value)) { 5526 return TokError(Twine("invalid value in ") + 5527 Twine(PALMD::AssemblerDirective)); 5528 } 5529 PALMetadata->setRegister(Key, Value); 5530 if (!trySkipToken(AsmToken::Comma)) 5531 break; 5532 } 5533 return false; 5534 } 5535 5536 /// ParseDirectiveAMDGPULDS 5537 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5538 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5539 if (getParser().checkForValidSection()) 5540 return true; 5541 5542 StringRef Name; 5543 SMLoc NameLoc = getLoc(); 5544 if (getParser().parseIdentifier(Name)) 5545 return TokError("expected identifier in directive"); 5546 5547 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5548 if (getParser().parseComma()) 5549 return true; 5550 5551 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5552 5553 int64_t Size; 5554 SMLoc SizeLoc = getLoc(); 5555 if (getParser().parseAbsoluteExpression(Size)) 5556 return true; 5557 if (Size < 0) 5558 return Error(SizeLoc, "size must be non-negative"); 5559 if (Size > LocalMemorySize) 5560 return Error(SizeLoc, "size is too large"); 5561 5562 int64_t Alignment = 4; 5563 if (trySkipToken(AsmToken::Comma)) { 5564 SMLoc AlignLoc = getLoc(); 5565 if (getParser().parseAbsoluteExpression(Alignment)) 5566 return true; 5567 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5568 return Error(AlignLoc, "alignment must be a power of two"); 5569 5570 // Alignment larger than the size of LDS is possible in theory, as long 5571 // as the linker manages to place to symbol at address 0, but we do want 5572 // to make sure the alignment fits nicely into a 32-bit integer. 5573 if (Alignment >= 1u << 31) 5574 return Error(AlignLoc, "alignment is too large"); 5575 } 5576 5577 if (parseEOL()) 5578 return true; 5579 5580 Symbol->redefineIfPossible(); 5581 if (!Symbol->isUndefined()) 5582 return Error(NameLoc, "invalid symbol redefinition"); 5583 5584 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5585 return false; 5586 } 5587 5588 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5589 StringRef IDVal = DirectiveID.getString(); 5590 5591 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5592 if (IDVal == ".amdhsa_kernel") 5593 return ParseDirectiveAMDHSAKernel(); 5594 5595 // TODO: Restructure/combine with PAL metadata directive. 5596 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5597 return ParseDirectiveHSAMetadata(); 5598 } else { 5599 if (IDVal == ".hsa_code_object_version") 5600 return ParseDirectiveHSACodeObjectVersion(); 5601 5602 if (IDVal == ".hsa_code_object_isa") 5603 return ParseDirectiveHSACodeObjectISA(); 5604 5605 if (IDVal == ".amd_kernel_code_t") 5606 return ParseDirectiveAMDKernelCodeT(); 5607 5608 if (IDVal == ".amdgpu_hsa_kernel") 5609 return ParseDirectiveAMDGPUHsaKernel(); 5610 5611 if (IDVal == ".amd_amdgpu_isa") 5612 return ParseDirectiveISAVersion(); 5613 5614 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5615 return ParseDirectiveHSAMetadata(); 5616 } 5617 5618 if (IDVal == ".amdgcn_target") 5619 return ParseDirectiveAMDGCNTarget(); 5620 5621 if (IDVal == ".amdgpu_lds") 5622 return ParseDirectiveAMDGPULDS(); 5623 5624 if (IDVal == PALMD::AssemblerDirectiveBegin) 5625 return ParseDirectivePALMetadataBegin(); 5626 5627 if (IDVal == PALMD::AssemblerDirective) 5628 return ParseDirectivePALMetadata(); 5629 5630 return true; 5631 } 5632 5633 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5634 unsigned RegNo) { 5635 5636 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5637 return isGFX9Plus(); 5638 5639 // GFX10+ has 2 more SGPRs 104 and 105. 5640 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5641 return hasSGPR104_SGPR105(); 5642 5643 switch (RegNo) { 5644 case AMDGPU::SRC_SHARED_BASE_LO: 5645 case AMDGPU::SRC_SHARED_BASE: 5646 case AMDGPU::SRC_SHARED_LIMIT_LO: 5647 case AMDGPU::SRC_SHARED_LIMIT: 5648 case AMDGPU::SRC_PRIVATE_BASE_LO: 5649 case AMDGPU::SRC_PRIVATE_BASE: 5650 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 5651 case AMDGPU::SRC_PRIVATE_LIMIT: 5652 return isGFX9Plus(); 5653 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5654 return isGFX9Plus() && !isGFX11Plus(); 5655 case AMDGPU::TBA: 5656 case AMDGPU::TBA_LO: 5657 case AMDGPU::TBA_HI: 5658 case AMDGPU::TMA: 5659 case AMDGPU::TMA_LO: 5660 case AMDGPU::TMA_HI: 5661 return !isGFX9Plus(); 5662 case AMDGPU::XNACK_MASK: 5663 case AMDGPU::XNACK_MASK_LO: 5664 case AMDGPU::XNACK_MASK_HI: 5665 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5666 case AMDGPU::SGPR_NULL: 5667 return isGFX10Plus(); 5668 default: 5669 break; 5670 } 5671 5672 if (isCI()) 5673 return true; 5674 5675 if (isSI() || isGFX10Plus()) { 5676 // No flat_scr on SI. 5677 // On GFX10Plus flat scratch is not a valid register operand and can only be 5678 // accessed with s_setreg/s_getreg. 5679 switch (RegNo) { 5680 case AMDGPU::FLAT_SCR: 5681 case AMDGPU::FLAT_SCR_LO: 5682 case AMDGPU::FLAT_SCR_HI: 5683 return false; 5684 default: 5685 return true; 5686 } 5687 } 5688 5689 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5690 // SI/CI have. 5691 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5692 return hasSGPR102_SGPR103(); 5693 5694 return true; 5695 } 5696 5697 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, 5698 StringRef Mnemonic, 5699 OperandMode Mode) { 5700 ParseStatus Res = parseVOPD(Operands); 5701 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 5702 return Res; 5703 5704 // Try to parse with a custom parser 5705 Res = MatchOperandParserImpl(Operands, Mnemonic); 5706 5707 // If we successfully parsed the operand or if there as an error parsing, 5708 // we are done. 5709 // 5710 // If we are parsing after we reach EndOfStatement then this means we 5711 // are appending default values to the Operands list. This is only done 5712 // by custom parser, so we shouldn't continue on to the generic parsing. 5713 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 5714 return Res; 5715 5716 SMLoc RBraceLoc; 5717 SMLoc LBraceLoc = getLoc(); 5718 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5719 unsigned Prefix = Operands.size(); 5720 5721 for (;;) { 5722 auto Loc = getLoc(); 5723 Res = parseReg(Operands); 5724 if (Res.isNoMatch()) 5725 Error(Loc, "expected a register"); 5726 if (!Res.isSuccess()) 5727 return ParseStatus::Failure; 5728 5729 RBraceLoc = getLoc(); 5730 if (trySkipToken(AsmToken::RBrac)) 5731 break; 5732 5733 if (!skipToken(AsmToken::Comma, 5734 "expected a comma or a closing square bracket")) 5735 return ParseStatus::Failure; 5736 } 5737 5738 if (Operands.size() - Prefix > 1) { 5739 Operands.insert(Operands.begin() + Prefix, 5740 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5741 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5742 } 5743 5744 return ParseStatus::Success; 5745 } 5746 5747 return parseRegOrImm(Operands); 5748 } 5749 5750 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5751 // Clear any forced encodings from the previous instruction. 5752 setForcedEncodingSize(0); 5753 setForcedDPP(false); 5754 setForcedSDWA(false); 5755 5756 if (Name.endswith("_e64_dpp")) { 5757 setForcedDPP(true); 5758 setForcedEncodingSize(64); 5759 return Name.substr(0, Name.size() - 8); 5760 } else if (Name.endswith("_e64")) { 5761 setForcedEncodingSize(64); 5762 return Name.substr(0, Name.size() - 4); 5763 } else if (Name.endswith("_e32")) { 5764 setForcedEncodingSize(32); 5765 return Name.substr(0, Name.size() - 4); 5766 } else if (Name.endswith("_dpp")) { 5767 setForcedDPP(true); 5768 return Name.substr(0, Name.size() - 4); 5769 } else if (Name.endswith("_sdwa")) { 5770 setForcedSDWA(true); 5771 return Name.substr(0, Name.size() - 5); 5772 } 5773 return Name; 5774 } 5775 5776 static void applyMnemonicAliases(StringRef &Mnemonic, 5777 const FeatureBitset &Features, 5778 unsigned VariantID); 5779 5780 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5781 StringRef Name, 5782 SMLoc NameLoc, OperandVector &Operands) { 5783 // Add the instruction mnemonic 5784 Name = parseMnemonicSuffix(Name); 5785 5786 // If the target architecture uses MnemonicAlias, call it here to parse 5787 // operands correctly. 5788 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5789 5790 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5791 5792 bool IsMIMG = Name.startswith("image_"); 5793 5794 while (!trySkipToken(AsmToken::EndOfStatement)) { 5795 OperandMode Mode = OperandMode_Default; 5796 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5797 Mode = OperandMode_NSA; 5798 ParseStatus Res = parseOperand(Operands, Name, Mode); 5799 5800 if (!Res.isSuccess()) { 5801 checkUnsupportedInstruction(Name, NameLoc); 5802 if (!Parser.hasPendingError()) { 5803 // FIXME: use real operand location rather than the current location. 5804 StringRef Msg = Res.isFailure() ? "failed parsing operand." 5805 : "not a valid operand."; 5806 Error(getLoc(), Msg); 5807 } 5808 while (!trySkipToken(AsmToken::EndOfStatement)) { 5809 lex(); 5810 } 5811 return true; 5812 } 5813 5814 // Eat the comma or space if there is one. 5815 trySkipToken(AsmToken::Comma); 5816 } 5817 5818 return false; 5819 } 5820 5821 //===----------------------------------------------------------------------===// 5822 // Utility functions 5823 //===----------------------------------------------------------------------===// 5824 5825 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, 5826 OperandVector &Operands) { 5827 SMLoc S = getLoc(); 5828 if (!trySkipId(Name)) 5829 return ParseStatus::NoMatch; 5830 5831 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S)); 5832 return ParseStatus::Success; 5833 } 5834 5835 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, 5836 int64_t &IntVal) { 5837 5838 if (!trySkipId(Prefix, AsmToken::Colon)) 5839 return ParseStatus::NoMatch; 5840 5841 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure; 5842 } 5843 5844 ParseStatus AMDGPUAsmParser::parseIntWithPrefix( 5845 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 5846 std::function<bool(int64_t &)> ConvertResult) { 5847 SMLoc S = getLoc(); 5848 int64_t Value = 0; 5849 5850 ParseStatus Res = parseIntWithPrefix(Prefix, Value); 5851 if (!Res.isSuccess()) 5852 return Res; 5853 5854 if (ConvertResult && !ConvertResult(Value)) { 5855 Error(S, "invalid " + StringRef(Prefix) + " value."); 5856 } 5857 5858 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5859 return ParseStatus::Success; 5860 } 5861 5862 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( 5863 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 5864 bool (*ConvertResult)(int64_t &)) { 5865 SMLoc S = getLoc(); 5866 if (!trySkipId(Prefix, AsmToken::Colon)) 5867 return ParseStatus::NoMatch; 5868 5869 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5870 return ParseStatus::Failure; 5871 5872 unsigned Val = 0; 5873 const unsigned MaxSize = 4; 5874 5875 // FIXME: How to verify the number of elements matches the number of src 5876 // operands? 5877 for (int I = 0; ; ++I) { 5878 int64_t Op; 5879 SMLoc Loc = getLoc(); 5880 if (!parseExpr(Op)) 5881 return ParseStatus::Failure; 5882 5883 if (Op != 0 && Op != 1) 5884 return Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5885 5886 Val |= (Op << I); 5887 5888 if (trySkipToken(AsmToken::RBrac)) 5889 break; 5890 5891 if (I + 1 == MaxSize) 5892 return Error(getLoc(), "expected a closing square bracket"); 5893 5894 if (!skipToken(AsmToken::Comma, "expected a comma")) 5895 return ParseStatus::Failure; 5896 } 5897 5898 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5899 return ParseStatus::Success; 5900 } 5901 5902 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, 5903 OperandVector &Operands, 5904 AMDGPUOperand::ImmTy ImmTy) { 5905 int64_t Bit; 5906 SMLoc S = getLoc(); 5907 5908 if (trySkipId(Name)) { 5909 Bit = 1; 5910 } else if (trySkipId("no", Name)) { 5911 Bit = 0; 5912 } else { 5913 return ParseStatus::NoMatch; 5914 } 5915 5916 if (Name == "r128" && !hasMIMG_R128()) 5917 return Error(S, "r128 modifier is not supported on this GPU"); 5918 if (Name == "a16" && !hasA16()) 5919 return Error(S, "a16 modifier is not supported on this GPU"); 5920 5921 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5922 ImmTy = AMDGPUOperand::ImmTyR128A16; 5923 5924 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5925 return ParseStatus::Success; 5926 } 5927 5928 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, 5929 bool &Disabling) const { 5930 Disabling = Id.consume_front("no"); 5931 5932 if (isGFX940() && !Mnemo.startswith("s_")) { 5933 return StringSwitch<unsigned>(Id) 5934 .Case("nt", AMDGPU::CPol::NT) 5935 .Case("sc0", AMDGPU::CPol::SC0) 5936 .Case("sc1", AMDGPU::CPol::SC1) 5937 .Default(0); 5938 } 5939 5940 return StringSwitch<unsigned>(Id) 5941 .Case("dlc", AMDGPU::CPol::DLC) 5942 .Case("glc", AMDGPU::CPol::GLC) 5943 .Case("scc", AMDGPU::CPol::SCC) 5944 .Case("slc", AMDGPU::CPol::SLC) 5945 .Default(0); 5946 } 5947 5948 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5949 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5950 SMLoc OpLoc = getLoc(); 5951 unsigned Enabled = 0, Seen = 0; 5952 for (;;) { 5953 SMLoc S = getLoc(); 5954 bool Disabling; 5955 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); 5956 if (!CPol) 5957 break; 5958 5959 lex(); 5960 5961 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) 5962 return Error(S, "dlc modifier is not supported on this GPU"); 5963 5964 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) 5965 return Error(S, "scc modifier is not supported on this GPU"); 5966 5967 if (Seen & CPol) 5968 return Error(S, "duplicate cache policy modifier"); 5969 5970 if (!Disabling) 5971 Enabled |= CPol; 5972 5973 Seen |= CPol; 5974 } 5975 5976 if (!Seen) 5977 return ParseStatus::NoMatch; 5978 5979 Operands.push_back( 5980 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol)); 5981 return ParseStatus::Success; 5982 } 5983 5984 static void addOptionalImmOperand( 5985 MCInst& Inst, const OperandVector& Operands, 5986 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5987 AMDGPUOperand::ImmTy ImmT, 5988 int64_t Default = 0) { 5989 auto i = OptionalIdx.find(ImmT); 5990 if (i != OptionalIdx.end()) { 5991 unsigned Idx = i->second; 5992 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5993 } else { 5994 Inst.addOperand(MCOperand::createImm(Default)); 5995 } 5996 } 5997 5998 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5999 StringRef &Value, 6000 SMLoc &StringLoc) { 6001 if (!trySkipId(Prefix, AsmToken::Colon)) 6002 return ParseStatus::NoMatch; 6003 6004 StringLoc = getLoc(); 6005 return parseId(Value, "expected an identifier") ? ParseStatus::Success 6006 : ParseStatus::Failure; 6007 } 6008 6009 //===----------------------------------------------------------------------===// 6010 // MTBUF format 6011 //===----------------------------------------------------------------------===// 6012 6013 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6014 int64_t MaxVal, 6015 int64_t &Fmt) { 6016 int64_t Val; 6017 SMLoc Loc = getLoc(); 6018 6019 auto Res = parseIntWithPrefix(Pref, Val); 6020 if (Res.isFailure()) 6021 return false; 6022 if (Res.isNoMatch()) 6023 return true; 6024 6025 if (Val < 0 || Val > MaxVal) { 6026 Error(Loc, Twine("out of range ", StringRef(Pref))); 6027 return false; 6028 } 6029 6030 Fmt = Val; 6031 return true; 6032 } 6033 6034 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6035 // values to live in a joint format operand in the MCInst encoding. 6036 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6037 using namespace llvm::AMDGPU::MTBUFFormat; 6038 6039 int64_t Dfmt = DFMT_UNDEF; 6040 int64_t Nfmt = NFMT_UNDEF; 6041 6042 // dfmt and nfmt can appear in either order, and each is optional. 6043 for (int I = 0; I < 2; ++I) { 6044 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6045 return ParseStatus::Failure; 6046 6047 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) 6048 return ParseStatus::Failure; 6049 6050 // Skip optional comma between dfmt/nfmt 6051 // but guard against 2 commas following each other. 6052 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6053 !peekToken().is(AsmToken::Comma)) { 6054 trySkipToken(AsmToken::Comma); 6055 } 6056 } 6057 6058 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6059 return ParseStatus::NoMatch; 6060 6061 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6062 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6063 6064 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6065 return ParseStatus::Success; 6066 } 6067 6068 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6069 using namespace llvm::AMDGPU::MTBUFFormat; 6070 6071 int64_t Fmt = UFMT_UNDEF; 6072 6073 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6074 return ParseStatus::Failure; 6075 6076 if (Fmt == UFMT_UNDEF) 6077 return ParseStatus::NoMatch; 6078 6079 Format = Fmt; 6080 return ParseStatus::Success; 6081 } 6082 6083 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6084 int64_t &Nfmt, 6085 StringRef FormatStr, 6086 SMLoc Loc) { 6087 using namespace llvm::AMDGPU::MTBUFFormat; 6088 int64_t Format; 6089 6090 Format = getDfmt(FormatStr); 6091 if (Format != DFMT_UNDEF) { 6092 Dfmt = Format; 6093 return true; 6094 } 6095 6096 Format = getNfmt(FormatStr, getSTI()); 6097 if (Format != NFMT_UNDEF) { 6098 Nfmt = Format; 6099 return true; 6100 } 6101 6102 Error(Loc, "unsupported format"); 6103 return false; 6104 } 6105 6106 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6107 SMLoc FormatLoc, 6108 int64_t &Format) { 6109 using namespace llvm::AMDGPU::MTBUFFormat; 6110 6111 int64_t Dfmt = DFMT_UNDEF; 6112 int64_t Nfmt = NFMT_UNDEF; 6113 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6114 return ParseStatus::Failure; 6115 6116 if (trySkipToken(AsmToken::Comma)) { 6117 StringRef Str; 6118 SMLoc Loc = getLoc(); 6119 if (!parseId(Str, "expected a format string") || 6120 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) 6121 return ParseStatus::Failure; 6122 if (Dfmt == DFMT_UNDEF) 6123 return Error(Loc, "duplicate numeric format"); 6124 if (Nfmt == NFMT_UNDEF) 6125 return Error(Loc, "duplicate data format"); 6126 } 6127 6128 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6129 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6130 6131 if (isGFX10Plus()) { 6132 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6133 if (Ufmt == UFMT_UNDEF) 6134 return Error(FormatLoc, "unsupported format"); 6135 Format = Ufmt; 6136 } else { 6137 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6138 } 6139 6140 return ParseStatus::Success; 6141 } 6142 6143 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6144 SMLoc Loc, 6145 int64_t &Format) { 6146 using namespace llvm::AMDGPU::MTBUFFormat; 6147 6148 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6149 if (Id == UFMT_UNDEF) 6150 return ParseStatus::NoMatch; 6151 6152 if (!isGFX10Plus()) 6153 return Error(Loc, "unified format is not supported on this GPU"); 6154 6155 Format = Id; 6156 return ParseStatus::Success; 6157 } 6158 6159 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6160 using namespace llvm::AMDGPU::MTBUFFormat; 6161 SMLoc Loc = getLoc(); 6162 6163 if (!parseExpr(Format)) 6164 return ParseStatus::Failure; 6165 if (!isValidFormatEncoding(Format, getSTI())) 6166 return Error(Loc, "out of range format"); 6167 6168 return ParseStatus::Success; 6169 } 6170 6171 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6172 using namespace llvm::AMDGPU::MTBUFFormat; 6173 6174 if (!trySkipId("format", AsmToken::Colon)) 6175 return ParseStatus::NoMatch; 6176 6177 if (trySkipToken(AsmToken::LBrac)) { 6178 StringRef FormatStr; 6179 SMLoc Loc = getLoc(); 6180 if (!parseId(FormatStr, "expected a format string")) 6181 return ParseStatus::Failure; 6182 6183 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6184 if (Res.isNoMatch()) 6185 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6186 if (!Res.isSuccess()) 6187 return Res; 6188 6189 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6190 return ParseStatus::Failure; 6191 6192 return ParseStatus::Success; 6193 } 6194 6195 return parseNumericFormat(Format); 6196 } 6197 6198 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6199 using namespace llvm::AMDGPU::MTBUFFormat; 6200 6201 int64_t Format = getDefaultFormatEncoding(getSTI()); 6202 ParseStatus Res; 6203 SMLoc Loc = getLoc(); 6204 6205 // Parse legacy format syntax. 6206 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6207 if (Res.isFailure()) 6208 return Res; 6209 6210 bool FormatFound = Res.isSuccess(); 6211 6212 Operands.push_back( 6213 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6214 6215 if (FormatFound) 6216 trySkipToken(AsmToken::Comma); 6217 6218 if (isToken(AsmToken::EndOfStatement)) { 6219 // We are expecting an soffset operand, 6220 // but let matcher handle the error. 6221 return ParseStatus::Success; 6222 } 6223 6224 // Parse soffset. 6225 Res = parseRegOrImm(Operands); 6226 if (!Res.isSuccess()) 6227 return Res; 6228 6229 trySkipToken(AsmToken::Comma); 6230 6231 if (!FormatFound) { 6232 Res = parseSymbolicOrNumericFormat(Format); 6233 if (Res.isFailure()) 6234 return Res; 6235 if (Res.isSuccess()) { 6236 auto Size = Operands.size(); 6237 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6238 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6239 Op.setImm(Format); 6240 } 6241 return ParseStatus::Success; 6242 } 6243 6244 if (isId("format") && peekToken().is(AsmToken::Colon)) 6245 return Error(getLoc(), "duplicate format"); 6246 return ParseStatus::Success; 6247 } 6248 6249 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { 6250 ParseStatus Res = 6251 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 6252 if (Res.isNoMatch()) { 6253 Res = parseIntWithPrefix("inst_offset", Operands, 6254 AMDGPUOperand::ImmTyInstOffset); 6255 } 6256 return Res; 6257 } 6258 6259 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { 6260 ParseStatus Res = 6261 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16); 6262 if (Res.isNoMatch()) 6263 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16); 6264 return Res; 6265 } 6266 6267 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { 6268 ParseStatus Res = 6269 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP); 6270 if (Res.isNoMatch()) { 6271 Res = 6272 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP); 6273 } 6274 return Res; 6275 } 6276 6277 //===----------------------------------------------------------------------===// 6278 // Exp 6279 //===----------------------------------------------------------------------===// 6280 6281 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6282 OptionalImmIndexMap OptionalIdx; 6283 6284 unsigned OperandIdx[4]; 6285 unsigned EnMask = 0; 6286 int SrcIdx = 0; 6287 6288 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6289 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6290 6291 // Add the register arguments 6292 if (Op.isReg()) { 6293 assert(SrcIdx < 4); 6294 OperandIdx[SrcIdx] = Inst.size(); 6295 Op.addRegOperands(Inst, 1); 6296 ++SrcIdx; 6297 continue; 6298 } 6299 6300 if (Op.isOff()) { 6301 assert(SrcIdx < 4); 6302 OperandIdx[SrcIdx] = Inst.size(); 6303 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6304 ++SrcIdx; 6305 continue; 6306 } 6307 6308 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6309 Op.addImmOperands(Inst, 1); 6310 continue; 6311 } 6312 6313 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6314 continue; 6315 6316 // Handle optional arguments 6317 OptionalIdx[Op.getImmTy()] = i; 6318 } 6319 6320 assert(SrcIdx == 4); 6321 6322 bool Compr = false; 6323 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6324 Compr = true; 6325 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6326 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6327 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6328 } 6329 6330 for (auto i = 0; i < SrcIdx; ++i) { 6331 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6332 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6333 } 6334 } 6335 6336 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6337 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6338 6339 Inst.addOperand(MCOperand::createImm(EnMask)); 6340 } 6341 6342 //===----------------------------------------------------------------------===// 6343 // s_waitcnt 6344 //===----------------------------------------------------------------------===// 6345 6346 static bool 6347 encodeCnt( 6348 const AMDGPU::IsaVersion ISA, 6349 int64_t &IntVal, 6350 int64_t CntVal, 6351 bool Saturate, 6352 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6353 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6354 { 6355 bool Failed = false; 6356 6357 IntVal = encode(ISA, IntVal, CntVal); 6358 if (CntVal != decode(ISA, IntVal)) { 6359 if (Saturate) { 6360 IntVal = encode(ISA, IntVal, -1); 6361 } else { 6362 Failed = true; 6363 } 6364 } 6365 return Failed; 6366 } 6367 6368 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6369 6370 SMLoc CntLoc = getLoc(); 6371 StringRef CntName = getTokenStr(); 6372 6373 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6374 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6375 return false; 6376 6377 int64_t CntVal; 6378 SMLoc ValLoc = getLoc(); 6379 if (!parseExpr(CntVal)) 6380 return false; 6381 6382 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6383 6384 bool Failed = true; 6385 bool Sat = CntName.endswith("_sat"); 6386 6387 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6388 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6389 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6390 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6391 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6392 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6393 } else { 6394 Error(CntLoc, "invalid counter name " + CntName); 6395 return false; 6396 } 6397 6398 if (Failed) { 6399 Error(ValLoc, "too large value for " + CntName); 6400 return false; 6401 } 6402 6403 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6404 return false; 6405 6406 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6407 if (isToken(AsmToken::EndOfStatement)) { 6408 Error(getLoc(), "expected a counter name"); 6409 return false; 6410 } 6411 } 6412 6413 return true; 6414 } 6415 6416 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { 6417 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6418 int64_t Waitcnt = getWaitcntBitMask(ISA); 6419 SMLoc S = getLoc(); 6420 6421 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6422 while (!isToken(AsmToken::EndOfStatement)) { 6423 if (!parseCnt(Waitcnt)) 6424 return ParseStatus::Failure; 6425 } 6426 } else { 6427 if (!parseExpr(Waitcnt)) 6428 return ParseStatus::Failure; 6429 } 6430 6431 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6432 return ParseStatus::Success; 6433 } 6434 6435 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6436 SMLoc FieldLoc = getLoc(); 6437 StringRef FieldName = getTokenStr(); 6438 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6439 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6440 return false; 6441 6442 SMLoc ValueLoc = getLoc(); 6443 StringRef ValueName = getTokenStr(); 6444 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6445 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6446 return false; 6447 6448 unsigned Shift; 6449 if (FieldName == "instid0") { 6450 Shift = 0; 6451 } else if (FieldName == "instskip") { 6452 Shift = 4; 6453 } else if (FieldName == "instid1") { 6454 Shift = 7; 6455 } else { 6456 Error(FieldLoc, "invalid field name " + FieldName); 6457 return false; 6458 } 6459 6460 int Value; 6461 if (Shift == 4) { 6462 // Parse values for instskip. 6463 Value = StringSwitch<int>(ValueName) 6464 .Case("SAME", 0) 6465 .Case("NEXT", 1) 6466 .Case("SKIP_1", 2) 6467 .Case("SKIP_2", 3) 6468 .Case("SKIP_3", 4) 6469 .Case("SKIP_4", 5) 6470 .Default(-1); 6471 } else { 6472 // Parse values for instid0 and instid1. 6473 Value = StringSwitch<int>(ValueName) 6474 .Case("NO_DEP", 0) 6475 .Case("VALU_DEP_1", 1) 6476 .Case("VALU_DEP_2", 2) 6477 .Case("VALU_DEP_3", 3) 6478 .Case("VALU_DEP_4", 4) 6479 .Case("TRANS32_DEP_1", 5) 6480 .Case("TRANS32_DEP_2", 6) 6481 .Case("TRANS32_DEP_3", 7) 6482 .Case("FMA_ACCUM_CYCLE_1", 8) 6483 .Case("SALU_CYCLE_1", 9) 6484 .Case("SALU_CYCLE_2", 10) 6485 .Case("SALU_CYCLE_3", 11) 6486 .Default(-1); 6487 } 6488 if (Value < 0) { 6489 Error(ValueLoc, "invalid value name " + ValueName); 6490 return false; 6491 } 6492 6493 Delay |= Value << Shift; 6494 return true; 6495 } 6496 6497 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { 6498 int64_t Delay = 0; 6499 SMLoc S = getLoc(); 6500 6501 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6502 do { 6503 if (!parseDelay(Delay)) 6504 return ParseStatus::Failure; 6505 } while (trySkipToken(AsmToken::Pipe)); 6506 } else { 6507 if (!parseExpr(Delay)) 6508 return ParseStatus::Failure; 6509 } 6510 6511 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6512 return ParseStatus::Success; 6513 } 6514 6515 bool 6516 AMDGPUOperand::isSWaitCnt() const { 6517 return isImm(); 6518 } 6519 6520 bool AMDGPUOperand::isSDelayALU() const { return isImm(); } 6521 6522 //===----------------------------------------------------------------------===// 6523 // DepCtr 6524 //===----------------------------------------------------------------------===// 6525 6526 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6527 StringRef DepCtrName) { 6528 switch (ErrorId) { 6529 case OPR_ID_UNKNOWN: 6530 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6531 return; 6532 case OPR_ID_UNSUPPORTED: 6533 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6534 return; 6535 case OPR_ID_DUPLICATE: 6536 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6537 return; 6538 case OPR_VAL_INVALID: 6539 Error(Loc, Twine("invalid value for ", DepCtrName)); 6540 return; 6541 default: 6542 assert(false); 6543 } 6544 } 6545 6546 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6547 6548 using namespace llvm::AMDGPU::DepCtr; 6549 6550 SMLoc DepCtrLoc = getLoc(); 6551 StringRef DepCtrName = getTokenStr(); 6552 6553 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6554 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6555 return false; 6556 6557 int64_t ExprVal; 6558 if (!parseExpr(ExprVal)) 6559 return false; 6560 6561 unsigned PrevOprMask = UsedOprMask; 6562 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6563 6564 if (CntVal < 0) { 6565 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6566 return false; 6567 } 6568 6569 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6570 return false; 6571 6572 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6573 if (isToken(AsmToken::EndOfStatement)) { 6574 Error(getLoc(), "expected a counter name"); 6575 return false; 6576 } 6577 } 6578 6579 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6580 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6581 return true; 6582 } 6583 6584 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { 6585 using namespace llvm::AMDGPU::DepCtr; 6586 6587 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6588 SMLoc Loc = getLoc(); 6589 6590 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6591 unsigned UsedOprMask = 0; 6592 while (!isToken(AsmToken::EndOfStatement)) { 6593 if (!parseDepCtr(DepCtr, UsedOprMask)) 6594 return ParseStatus::Failure; 6595 } 6596 } else { 6597 if (!parseExpr(DepCtr)) 6598 return ParseStatus::Failure; 6599 } 6600 6601 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6602 return ParseStatus::Success; 6603 } 6604 6605 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6606 6607 //===----------------------------------------------------------------------===// 6608 // hwreg 6609 //===----------------------------------------------------------------------===// 6610 6611 bool 6612 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6613 OperandInfoTy &Offset, 6614 OperandInfoTy &Width) { 6615 using namespace llvm::AMDGPU::Hwreg; 6616 6617 // The register may be specified by name or using a numeric code 6618 HwReg.Loc = getLoc(); 6619 if (isToken(AsmToken::Identifier) && 6620 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6621 HwReg.IsSymbolic = true; 6622 lex(); // skip register name 6623 } else if (!parseExpr(HwReg.Id, "a register name")) { 6624 return false; 6625 } 6626 6627 if (trySkipToken(AsmToken::RParen)) 6628 return true; 6629 6630 // parse optional params 6631 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6632 return false; 6633 6634 Offset.Loc = getLoc(); 6635 if (!parseExpr(Offset.Id)) 6636 return false; 6637 6638 if (!skipToken(AsmToken::Comma, "expected a comma")) 6639 return false; 6640 6641 Width.Loc = getLoc(); 6642 return parseExpr(Width.Id) && 6643 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6644 } 6645 6646 bool 6647 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6648 const OperandInfoTy &Offset, 6649 const OperandInfoTy &Width) { 6650 6651 using namespace llvm::AMDGPU::Hwreg; 6652 6653 if (HwReg.IsSymbolic) { 6654 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6655 Error(HwReg.Loc, 6656 "specified hardware register is not supported on this GPU"); 6657 return false; 6658 } 6659 } else { 6660 if (!isValidHwreg(HwReg.Id)) { 6661 Error(HwReg.Loc, 6662 "invalid code of hardware register: only 6-bit values are legal"); 6663 return false; 6664 } 6665 } 6666 if (!isValidHwregOffset(Offset.Id)) { 6667 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6668 return false; 6669 } 6670 if (!isValidHwregWidth(Width.Id)) { 6671 Error(Width.Loc, 6672 "invalid bitfield width: only values from 1 to 32 are legal"); 6673 return false; 6674 } 6675 return true; 6676 } 6677 6678 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6679 using namespace llvm::AMDGPU::Hwreg; 6680 6681 int64_t ImmVal = 0; 6682 SMLoc Loc = getLoc(); 6683 6684 if (trySkipId("hwreg", AsmToken::LParen)) { 6685 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6686 OperandInfoTy Offset(OFFSET_DEFAULT_); 6687 OperandInfoTy Width(WIDTH_DEFAULT_); 6688 if (parseHwregBody(HwReg, Offset, Width) && 6689 validateHwreg(HwReg, Offset, Width)) { 6690 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6691 } else { 6692 return ParseStatus::Failure; 6693 } 6694 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6695 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 6696 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 6697 } else { 6698 return ParseStatus::Failure; 6699 } 6700 6701 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6702 return ParseStatus::Success; 6703 } 6704 6705 bool AMDGPUOperand::isHwreg() const { 6706 return isImmTy(ImmTyHwreg); 6707 } 6708 6709 //===----------------------------------------------------------------------===// 6710 // sendmsg 6711 //===----------------------------------------------------------------------===// 6712 6713 bool 6714 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6715 OperandInfoTy &Op, 6716 OperandInfoTy &Stream) { 6717 using namespace llvm::AMDGPU::SendMsg; 6718 6719 Msg.Loc = getLoc(); 6720 if (isToken(AsmToken::Identifier) && 6721 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6722 Msg.IsSymbolic = true; 6723 lex(); // skip message name 6724 } else if (!parseExpr(Msg.Id, "a message name")) { 6725 return false; 6726 } 6727 6728 if (trySkipToken(AsmToken::Comma)) { 6729 Op.IsDefined = true; 6730 Op.Loc = getLoc(); 6731 if (isToken(AsmToken::Identifier) && 6732 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6733 lex(); // skip operation name 6734 } else if (!parseExpr(Op.Id, "an operation name")) { 6735 return false; 6736 } 6737 6738 if (trySkipToken(AsmToken::Comma)) { 6739 Stream.IsDefined = true; 6740 Stream.Loc = getLoc(); 6741 if (!parseExpr(Stream.Id)) 6742 return false; 6743 } 6744 } 6745 6746 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6747 } 6748 6749 bool 6750 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6751 const OperandInfoTy &Op, 6752 const OperandInfoTy &Stream) { 6753 using namespace llvm::AMDGPU::SendMsg; 6754 6755 // Validation strictness depends on whether message is specified 6756 // in a symbolic or in a numeric form. In the latter case 6757 // only encoding possibility is checked. 6758 bool Strict = Msg.IsSymbolic; 6759 6760 if (Strict) { 6761 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6762 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6763 return false; 6764 } 6765 } else { 6766 if (!isValidMsgId(Msg.Id, getSTI())) { 6767 Error(Msg.Loc, "invalid message id"); 6768 return false; 6769 } 6770 } 6771 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6772 if (Op.IsDefined) { 6773 Error(Op.Loc, "message does not support operations"); 6774 } else { 6775 Error(Msg.Loc, "missing message operation"); 6776 } 6777 return false; 6778 } 6779 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6780 Error(Op.Loc, "invalid operation id"); 6781 return false; 6782 } 6783 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6784 Stream.IsDefined) { 6785 Error(Stream.Loc, "message operation does not support streams"); 6786 return false; 6787 } 6788 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6789 Error(Stream.Loc, "invalid message stream id"); 6790 return false; 6791 } 6792 return true; 6793 } 6794 6795 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { 6796 using namespace llvm::AMDGPU::SendMsg; 6797 6798 int64_t ImmVal = 0; 6799 SMLoc Loc = getLoc(); 6800 6801 if (trySkipId("sendmsg", AsmToken::LParen)) { 6802 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6803 OperandInfoTy Op(OP_NONE_); 6804 OperandInfoTy Stream(STREAM_ID_NONE_); 6805 if (parseSendMsgBody(Msg, Op, Stream) && 6806 validateSendMsg(Msg, Op, Stream)) { 6807 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6808 } else { 6809 return ParseStatus::Failure; 6810 } 6811 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6812 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 6813 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 6814 } else { 6815 return ParseStatus::Failure; 6816 } 6817 6818 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6819 return ParseStatus::Success; 6820 } 6821 6822 bool AMDGPUOperand::isSendMsg() const { 6823 return isImmTy(ImmTySendMsg); 6824 } 6825 6826 //===----------------------------------------------------------------------===// 6827 // v_interp 6828 //===----------------------------------------------------------------------===// 6829 6830 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6831 StringRef Str; 6832 SMLoc S = getLoc(); 6833 6834 if (!parseId(Str)) 6835 return ParseStatus::NoMatch; 6836 6837 int Slot = StringSwitch<int>(Str) 6838 .Case("p10", 0) 6839 .Case("p20", 1) 6840 .Case("p0", 2) 6841 .Default(-1); 6842 6843 if (Slot == -1) 6844 return Error(S, "invalid interpolation slot"); 6845 6846 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6847 AMDGPUOperand::ImmTyInterpSlot)); 6848 return ParseStatus::Success; 6849 } 6850 6851 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6852 StringRef Str; 6853 SMLoc S = getLoc(); 6854 6855 if (!parseId(Str)) 6856 return ParseStatus::NoMatch; 6857 6858 if (!Str.startswith("attr")) 6859 return Error(S, "invalid interpolation attribute"); 6860 6861 StringRef Chan = Str.take_back(2); 6862 int AttrChan = StringSwitch<int>(Chan) 6863 .Case(".x", 0) 6864 .Case(".y", 1) 6865 .Case(".z", 2) 6866 .Case(".w", 3) 6867 .Default(-1); 6868 if (AttrChan == -1) 6869 return Error(S, "invalid or missing interpolation attribute channel"); 6870 6871 Str = Str.drop_back(2).drop_front(4); 6872 6873 uint8_t Attr; 6874 if (Str.getAsInteger(10, Attr)) 6875 return Error(S, "invalid or missing interpolation attribute number"); 6876 6877 if (Attr > 32) 6878 return Error(S, "out of bounds interpolation attribute number"); 6879 6880 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6881 6882 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6883 AMDGPUOperand::ImmTyInterpAttr)); 6884 Operands.push_back(AMDGPUOperand::CreateImm( 6885 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan)); 6886 return ParseStatus::Success; 6887 } 6888 6889 //===----------------------------------------------------------------------===// 6890 // exp 6891 //===----------------------------------------------------------------------===// 6892 6893 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6894 using namespace llvm::AMDGPU::Exp; 6895 6896 StringRef Str; 6897 SMLoc S = getLoc(); 6898 6899 if (!parseId(Str)) 6900 return ParseStatus::NoMatch; 6901 6902 unsigned Id = getTgtId(Str); 6903 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) 6904 return Error(S, (Id == ET_INVALID) 6905 ? "invalid exp target" 6906 : "exp target is not supported on this GPU"); 6907 6908 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6909 AMDGPUOperand::ImmTyExpTgt)); 6910 return ParseStatus::Success; 6911 } 6912 6913 //===----------------------------------------------------------------------===// 6914 // parser helpers 6915 //===----------------------------------------------------------------------===// 6916 6917 bool 6918 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6919 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6920 } 6921 6922 bool 6923 AMDGPUAsmParser::isId(const StringRef Id) const { 6924 return isId(getToken(), Id); 6925 } 6926 6927 bool 6928 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6929 return getTokenKind() == Kind; 6930 } 6931 6932 StringRef AMDGPUAsmParser::getId() const { 6933 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef(); 6934 } 6935 6936 bool 6937 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6938 if (isId(Id)) { 6939 lex(); 6940 return true; 6941 } 6942 return false; 6943 } 6944 6945 bool 6946 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6947 if (isToken(AsmToken::Identifier)) { 6948 StringRef Tok = getTokenStr(); 6949 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6950 lex(); 6951 return true; 6952 } 6953 } 6954 return false; 6955 } 6956 6957 bool 6958 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6959 if (isId(Id) && peekToken().is(Kind)) { 6960 lex(); 6961 lex(); 6962 return true; 6963 } 6964 return false; 6965 } 6966 6967 bool 6968 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6969 if (isToken(Kind)) { 6970 lex(); 6971 return true; 6972 } 6973 return false; 6974 } 6975 6976 bool 6977 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6978 const StringRef ErrMsg) { 6979 if (!trySkipToken(Kind)) { 6980 Error(getLoc(), ErrMsg); 6981 return false; 6982 } 6983 return true; 6984 } 6985 6986 bool 6987 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6988 SMLoc S = getLoc(); 6989 6990 const MCExpr *Expr; 6991 if (Parser.parseExpression(Expr)) 6992 return false; 6993 6994 if (Expr->evaluateAsAbsolute(Imm)) 6995 return true; 6996 6997 if (Expected.empty()) { 6998 Error(S, "expected absolute expression"); 6999 } else { 7000 Error(S, Twine("expected ", Expected) + 7001 Twine(" or an absolute expression")); 7002 } 7003 return false; 7004 } 7005 7006 bool 7007 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7008 SMLoc S = getLoc(); 7009 7010 const MCExpr *Expr; 7011 if (Parser.parseExpression(Expr)) 7012 return false; 7013 7014 int64_t IntVal; 7015 if (Expr->evaluateAsAbsolute(IntVal)) { 7016 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7017 } else { 7018 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7019 } 7020 return true; 7021 } 7022 7023 bool 7024 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7025 if (isToken(AsmToken::String)) { 7026 Val = getToken().getStringContents(); 7027 lex(); 7028 return true; 7029 } else { 7030 Error(getLoc(), ErrMsg); 7031 return false; 7032 } 7033 } 7034 7035 bool 7036 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7037 if (isToken(AsmToken::Identifier)) { 7038 Val = getTokenStr(); 7039 lex(); 7040 return true; 7041 } else { 7042 if (!ErrMsg.empty()) 7043 Error(getLoc(), ErrMsg); 7044 return false; 7045 } 7046 } 7047 7048 AsmToken 7049 AMDGPUAsmParser::getToken() const { 7050 return Parser.getTok(); 7051 } 7052 7053 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7054 return isToken(AsmToken::EndOfStatement) 7055 ? getToken() 7056 : getLexer().peekTok(ShouldSkipSpace); 7057 } 7058 7059 void 7060 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7061 auto TokCount = getLexer().peekTokens(Tokens); 7062 7063 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7064 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7065 } 7066 7067 AsmToken::TokenKind 7068 AMDGPUAsmParser::getTokenKind() const { 7069 return getLexer().getKind(); 7070 } 7071 7072 SMLoc 7073 AMDGPUAsmParser::getLoc() const { 7074 return getToken().getLoc(); 7075 } 7076 7077 StringRef 7078 AMDGPUAsmParser::getTokenStr() const { 7079 return getToken().getString(); 7080 } 7081 7082 void 7083 AMDGPUAsmParser::lex() { 7084 Parser.Lex(); 7085 } 7086 7087 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { 7088 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7089 } 7090 7091 SMLoc 7092 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7093 const OperandVector &Operands) const { 7094 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7096 if (Test(Op)) 7097 return Op.getStartLoc(); 7098 } 7099 return getInstLoc(Operands); 7100 } 7101 7102 SMLoc 7103 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7104 const OperandVector &Operands) const { 7105 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7106 return getOperandLoc(Test, Operands); 7107 } 7108 7109 SMLoc 7110 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7111 const OperandVector &Operands) const { 7112 auto Test = [=](const AMDGPUOperand& Op) { 7113 return Op.isRegKind() && Op.getReg() == Reg; 7114 }; 7115 return getOperandLoc(Test, Operands); 7116 } 7117 7118 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, 7119 bool SearchMandatoryLiterals) const { 7120 auto Test = [](const AMDGPUOperand& Op) { 7121 return Op.IsImmKindLiteral() || Op.isExpr(); 7122 }; 7123 SMLoc Loc = getOperandLoc(Test, Operands); 7124 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) 7125 Loc = getMandatoryLitLoc(Operands); 7126 return Loc; 7127 } 7128 7129 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { 7130 auto Test = [](const AMDGPUOperand &Op) { 7131 return Op.IsImmKindMandatoryLiteral(); 7132 }; 7133 return getOperandLoc(Test, Operands); 7134 } 7135 7136 SMLoc 7137 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7138 auto Test = [](const AMDGPUOperand& Op) { 7139 return Op.isImmKindConst(); 7140 }; 7141 return getOperandLoc(Test, Operands); 7142 } 7143 7144 //===----------------------------------------------------------------------===// 7145 // swizzle 7146 //===----------------------------------------------------------------------===// 7147 7148 LLVM_READNONE 7149 static unsigned 7150 encodeBitmaskPerm(const unsigned AndMask, 7151 const unsigned OrMask, 7152 const unsigned XorMask) { 7153 using namespace llvm::AMDGPU::Swizzle; 7154 7155 return BITMASK_PERM_ENC | 7156 (AndMask << BITMASK_AND_SHIFT) | 7157 (OrMask << BITMASK_OR_SHIFT) | 7158 (XorMask << BITMASK_XOR_SHIFT); 7159 } 7160 7161 bool 7162 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7163 const unsigned MinVal, 7164 const unsigned MaxVal, 7165 const StringRef ErrMsg, 7166 SMLoc &Loc) { 7167 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7168 return false; 7169 } 7170 Loc = getLoc(); 7171 if (!parseExpr(Op)) { 7172 return false; 7173 } 7174 if (Op < MinVal || Op > MaxVal) { 7175 Error(Loc, ErrMsg); 7176 return false; 7177 } 7178 7179 return true; 7180 } 7181 7182 bool 7183 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7184 const unsigned MinVal, 7185 const unsigned MaxVal, 7186 const StringRef ErrMsg) { 7187 SMLoc Loc; 7188 for (unsigned i = 0; i < OpNum; ++i) { 7189 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7190 return false; 7191 } 7192 7193 return true; 7194 } 7195 7196 bool 7197 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7198 using namespace llvm::AMDGPU::Swizzle; 7199 7200 int64_t Lane[LANE_NUM]; 7201 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7202 "expected a 2-bit lane id")) { 7203 Imm = QUAD_PERM_ENC; 7204 for (unsigned I = 0; I < LANE_NUM; ++I) { 7205 Imm |= Lane[I] << (LANE_SHIFT * I); 7206 } 7207 return true; 7208 } 7209 return false; 7210 } 7211 7212 bool 7213 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7214 using namespace llvm::AMDGPU::Swizzle; 7215 7216 SMLoc Loc; 7217 int64_t GroupSize; 7218 int64_t LaneIdx; 7219 7220 if (!parseSwizzleOperand(GroupSize, 7221 2, 32, 7222 "group size must be in the interval [2,32]", 7223 Loc)) { 7224 return false; 7225 } 7226 if (!isPowerOf2_64(GroupSize)) { 7227 Error(Loc, "group size must be a power of two"); 7228 return false; 7229 } 7230 if (parseSwizzleOperand(LaneIdx, 7231 0, GroupSize - 1, 7232 "lane id must be in the interval [0,group size - 1]", 7233 Loc)) { 7234 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7235 return true; 7236 } 7237 return false; 7238 } 7239 7240 bool 7241 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7242 using namespace llvm::AMDGPU::Swizzle; 7243 7244 SMLoc Loc; 7245 int64_t GroupSize; 7246 7247 if (!parseSwizzleOperand(GroupSize, 7248 2, 32, 7249 "group size must be in the interval [2,32]", 7250 Loc)) { 7251 return false; 7252 } 7253 if (!isPowerOf2_64(GroupSize)) { 7254 Error(Loc, "group size must be a power of two"); 7255 return false; 7256 } 7257 7258 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7259 return true; 7260 } 7261 7262 bool 7263 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7264 using namespace llvm::AMDGPU::Swizzle; 7265 7266 SMLoc Loc; 7267 int64_t GroupSize; 7268 7269 if (!parseSwizzleOperand(GroupSize, 7270 1, 16, 7271 "group size must be in the interval [1,16]", 7272 Loc)) { 7273 return false; 7274 } 7275 if (!isPowerOf2_64(GroupSize)) { 7276 Error(Loc, "group size must be a power of two"); 7277 return false; 7278 } 7279 7280 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7281 return true; 7282 } 7283 7284 bool 7285 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7286 using namespace llvm::AMDGPU::Swizzle; 7287 7288 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7289 return false; 7290 } 7291 7292 StringRef Ctl; 7293 SMLoc StrLoc = getLoc(); 7294 if (!parseString(Ctl)) { 7295 return false; 7296 } 7297 if (Ctl.size() != BITMASK_WIDTH) { 7298 Error(StrLoc, "expected a 5-character mask"); 7299 return false; 7300 } 7301 7302 unsigned AndMask = 0; 7303 unsigned OrMask = 0; 7304 unsigned XorMask = 0; 7305 7306 for (size_t i = 0; i < Ctl.size(); ++i) { 7307 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7308 switch(Ctl[i]) { 7309 default: 7310 Error(StrLoc, "invalid mask"); 7311 return false; 7312 case '0': 7313 break; 7314 case '1': 7315 OrMask |= Mask; 7316 break; 7317 case 'p': 7318 AndMask |= Mask; 7319 break; 7320 case 'i': 7321 AndMask |= Mask; 7322 XorMask |= Mask; 7323 break; 7324 } 7325 } 7326 7327 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7328 return true; 7329 } 7330 7331 bool 7332 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7333 7334 SMLoc OffsetLoc = getLoc(); 7335 7336 if (!parseExpr(Imm, "a swizzle macro")) { 7337 return false; 7338 } 7339 if (!isUInt<16>(Imm)) { 7340 Error(OffsetLoc, "expected a 16-bit offset"); 7341 return false; 7342 } 7343 return true; 7344 } 7345 7346 bool 7347 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7348 using namespace llvm::AMDGPU::Swizzle; 7349 7350 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7351 7352 SMLoc ModeLoc = getLoc(); 7353 bool Ok = false; 7354 7355 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7356 Ok = parseSwizzleQuadPerm(Imm); 7357 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7358 Ok = parseSwizzleBitmaskPerm(Imm); 7359 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7360 Ok = parseSwizzleBroadcast(Imm); 7361 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7362 Ok = parseSwizzleSwap(Imm); 7363 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7364 Ok = parseSwizzleReverse(Imm); 7365 } else { 7366 Error(ModeLoc, "expected a swizzle mode"); 7367 } 7368 7369 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7370 } 7371 7372 return false; 7373 } 7374 7375 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { 7376 SMLoc S = getLoc(); 7377 int64_t Imm = 0; 7378 7379 if (trySkipId("offset")) { 7380 7381 bool Ok = false; 7382 if (skipToken(AsmToken::Colon, "expected a colon")) { 7383 if (trySkipId("swizzle")) { 7384 Ok = parseSwizzleMacro(Imm); 7385 } else { 7386 Ok = parseSwizzleOffset(Imm); 7387 } 7388 } 7389 7390 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7391 7392 return Ok ? ParseStatus::Success : ParseStatus::Failure; 7393 } 7394 return ParseStatus::NoMatch; 7395 } 7396 7397 bool 7398 AMDGPUOperand::isSwizzle() const { 7399 return isImmTy(ImmTySwizzle); 7400 } 7401 7402 //===----------------------------------------------------------------------===// 7403 // VGPR Index Mode 7404 //===----------------------------------------------------------------------===// 7405 7406 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7407 7408 using namespace llvm::AMDGPU::VGPRIndexMode; 7409 7410 if (trySkipToken(AsmToken::RParen)) { 7411 return OFF; 7412 } 7413 7414 int64_t Imm = 0; 7415 7416 while (true) { 7417 unsigned Mode = 0; 7418 SMLoc S = getLoc(); 7419 7420 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7421 if (trySkipId(IdSymbolic[ModeId])) { 7422 Mode = 1 << ModeId; 7423 break; 7424 } 7425 } 7426 7427 if (Mode == 0) { 7428 Error(S, (Imm == 0)? 7429 "expected a VGPR index mode or a closing parenthesis" : 7430 "expected a VGPR index mode"); 7431 return UNDEF; 7432 } 7433 7434 if (Imm & Mode) { 7435 Error(S, "duplicate VGPR index mode"); 7436 return UNDEF; 7437 } 7438 Imm |= Mode; 7439 7440 if (trySkipToken(AsmToken::RParen)) 7441 break; 7442 if (!skipToken(AsmToken::Comma, 7443 "expected a comma or a closing parenthesis")) 7444 return UNDEF; 7445 } 7446 7447 return Imm; 7448 } 7449 7450 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7451 7452 using namespace llvm::AMDGPU::VGPRIndexMode; 7453 7454 int64_t Imm = 0; 7455 SMLoc S = getLoc(); 7456 7457 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7458 Imm = parseGPRIdxMacro(); 7459 if (Imm == UNDEF) 7460 return ParseStatus::Failure; 7461 } else { 7462 if (getParser().parseAbsoluteExpression(Imm)) 7463 return ParseStatus::Failure; 7464 if (Imm < 0 || !isUInt<4>(Imm)) 7465 return Error(S, "invalid immediate: only 4-bit values are legal"); 7466 } 7467 7468 Operands.push_back( 7469 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7470 return ParseStatus::Success; 7471 } 7472 7473 bool AMDGPUOperand::isGPRIdxMode() const { 7474 return isImmTy(ImmTyGprIdxMode); 7475 } 7476 7477 //===----------------------------------------------------------------------===// 7478 // sopp branch targets 7479 //===----------------------------------------------------------------------===// 7480 7481 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { 7482 7483 // Make sure we are not parsing something 7484 // that looks like a label or an expression but is not. 7485 // This will improve error messages. 7486 if (isRegister() || isModifier()) 7487 return ParseStatus::NoMatch; 7488 7489 if (!parseExpr(Operands)) 7490 return ParseStatus::Failure; 7491 7492 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7493 assert(Opr.isImm() || Opr.isExpr()); 7494 SMLoc Loc = Opr.getStartLoc(); 7495 7496 // Currently we do not support arbitrary expressions as branch targets. 7497 // Only labels and absolute expressions are accepted. 7498 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7499 Error(Loc, "expected an absolute expression or a label"); 7500 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7501 Error(Loc, "expected a 16-bit signed jump offset"); 7502 } 7503 7504 return ParseStatus::Success; 7505 } 7506 7507 //===----------------------------------------------------------------------===// 7508 // Boolean holding registers 7509 //===----------------------------------------------------------------------===// 7510 7511 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7512 return parseReg(Operands); 7513 } 7514 7515 //===----------------------------------------------------------------------===// 7516 // mubuf 7517 //===----------------------------------------------------------------------===// 7518 7519 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7520 const OperandVector &Operands, 7521 bool IsAtomic) { 7522 OptionalImmIndexMap OptionalIdx; 7523 unsigned FirstOperandIdx = 1; 7524 bool IsAtomicReturn = false; 7525 7526 if (IsAtomic) { 7527 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7528 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7529 if (!Op.isCPol()) 7530 continue; 7531 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7532 break; 7533 } 7534 7535 if (!IsAtomicReturn) { 7536 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7537 if (NewOpc != -1) 7538 Inst.setOpcode(NewOpc); 7539 } 7540 7541 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7542 SIInstrFlags::IsAtomicRet; 7543 } 7544 7545 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7546 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7547 7548 // Add the register arguments 7549 if (Op.isReg()) { 7550 Op.addRegOperands(Inst, 1); 7551 // Insert a tied src for atomic return dst. 7552 // This cannot be postponed as subsequent calls to 7553 // addImmOperands rely on correct number of MC operands. 7554 if (IsAtomicReturn && i == FirstOperandIdx) 7555 Op.addRegOperands(Inst, 1); 7556 continue; 7557 } 7558 7559 // Handle the case where soffset is an immediate 7560 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7561 Op.addImmOperands(Inst, 1); 7562 continue; 7563 } 7564 7565 // Handle tokens like 'offen' which are sometimes hard-coded into the 7566 // asm string. There are no MCInst operands for these. 7567 if (Op.isToken()) { 7568 continue; 7569 } 7570 assert(Op.isImm()); 7571 7572 // Handle optional arguments 7573 OptionalIdx[Op.getImmTy()] = i; 7574 } 7575 7576 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7577 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7578 } 7579 7580 //===----------------------------------------------------------------------===// 7581 // SMEM 7582 //===----------------------------------------------------------------------===// 7583 7584 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7585 OptionalImmIndexMap OptionalIdx; 7586 bool IsAtomicReturn = false; 7587 7588 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7589 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7590 if (!Op.isCPol()) 7591 continue; 7592 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7593 break; 7594 } 7595 7596 if (!IsAtomicReturn) { 7597 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7598 if (NewOpc != -1) 7599 Inst.setOpcode(NewOpc); 7600 } 7601 7602 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7603 SIInstrFlags::IsAtomicRet; 7604 7605 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7606 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7607 7608 // Add the register arguments 7609 if (Op.isReg()) { 7610 Op.addRegOperands(Inst, 1); 7611 if (IsAtomicReturn && i == 1) 7612 Op.addRegOperands(Inst, 1); 7613 continue; 7614 } 7615 7616 // Handle the case where soffset is an immediate 7617 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7618 Op.addImmOperands(Inst, 1); 7619 continue; 7620 } 7621 7622 // Handle tokens like 'offen' which are sometimes hard-coded into the 7623 // asm string. There are no MCInst operands for these. 7624 if (Op.isToken()) { 7625 continue; 7626 } 7627 assert(Op.isImm()); 7628 7629 // Handle optional arguments 7630 OptionalIdx[Op.getImmTy()] = i; 7631 } 7632 7633 if ((int)Inst.getNumOperands() <= 7634 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7635 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7636 AMDGPUOperand::ImmTySMEMOffsetMod); 7637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7638 } 7639 7640 //===----------------------------------------------------------------------===// 7641 // smrd 7642 //===----------------------------------------------------------------------===// 7643 7644 bool AMDGPUOperand::isSMRDOffset8() const { 7645 return isImmLiteral() && isUInt<8>(getImm()); 7646 } 7647 7648 bool AMDGPUOperand::isSMEMOffset() const { 7649 // Offset range is checked later by validator. 7650 return isImmLiteral(); 7651 } 7652 7653 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7654 // 32-bit literals are only supported on CI and we only want to use them 7655 // when the offset is > 8-bits. 7656 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7657 } 7658 7659 //===----------------------------------------------------------------------===// 7660 // vop3 7661 //===----------------------------------------------------------------------===// 7662 7663 static bool ConvertOmodMul(int64_t &Mul) { 7664 if (Mul != 1 && Mul != 2 && Mul != 4) 7665 return false; 7666 7667 Mul >>= 1; 7668 return true; 7669 } 7670 7671 static bool ConvertOmodDiv(int64_t &Div) { 7672 if (Div == 1) { 7673 Div = 0; 7674 return true; 7675 } 7676 7677 if (Div == 2) { 7678 Div = 3; 7679 return true; 7680 } 7681 7682 return false; 7683 } 7684 7685 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7686 // This is intentional and ensures compatibility with sp3. 7687 // See bug 35397 for details. 7688 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { 7689 if (BoundCtrl == 0 || BoundCtrl == 1) { 7690 if (!isGFX11Plus()) 7691 BoundCtrl = 1; 7692 return true; 7693 } 7694 return false; 7695 } 7696 7697 void AMDGPUAsmParser::onBeginOfFile() { 7698 if (!getParser().getStreamer().getTargetStreamer() || 7699 getSTI().getTargetTriple().getArch() == Triple::r600) 7700 return; 7701 7702 if (!getTargetStreamer().getTargetID()) 7703 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(), 7704 // TODO: Should try to check code object version from directive??? 7705 AMDGPU::getAmdhsaCodeObjectVersion()); 7706 7707 if (isHsaAbiVersion3AndAbove(&getSTI())) 7708 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7709 } 7710 7711 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { 7712 StringRef Name = getTokenStr(); 7713 if (Name == "mul") { 7714 return parseIntWithPrefix("mul", Operands, 7715 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7716 } 7717 7718 if (Name == "div") { 7719 return parseIntWithPrefix("div", Operands, 7720 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7721 } 7722 7723 return ParseStatus::NoMatch; 7724 } 7725 7726 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 7727 // the number of src operands present, then copies that bit into src0_modifiers. 7728 void cvtVOP3DstOpSelOnly(MCInst &Inst) { 7729 int Opc = Inst.getOpcode(); 7730 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7731 if (OpSelIdx == -1) 7732 return; 7733 7734 int SrcNum; 7735 const int Ops[] = { AMDGPU::OpName::src0, 7736 AMDGPU::OpName::src1, 7737 AMDGPU::OpName::src2 }; 7738 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); 7739 ++SrcNum) 7740 ; 7741 assert(SrcNum > 0); 7742 7743 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7744 7745 if ((OpSel & (1 << SrcNum)) != 0) { 7746 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7747 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7748 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7749 } 7750 } 7751 7752 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 7753 const OperandVector &Operands) { 7754 cvtVOP3P(Inst, Operands); 7755 cvtVOP3DstOpSelOnly(Inst); 7756 } 7757 7758 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 7759 OptionalImmIndexMap &OptionalIdx) { 7760 cvtVOP3P(Inst, Operands, OptionalIdx); 7761 cvtVOP3DstOpSelOnly(Inst); 7762 } 7763 7764 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7765 return 7766 // 1. This operand is input modifiers 7767 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7768 // 2. This is not last operand 7769 && Desc.NumOperands > (OpNum + 1) 7770 // 3. Next operand is register class 7771 && Desc.operands()[OpNum + 1].RegClass != -1 7772 // 4. Next register is not tied to any other operand 7773 && Desc.getOperandConstraint(OpNum + 1, 7774 MCOI::OperandConstraint::TIED_TO) == -1; 7775 } 7776 7777 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7778 { 7779 OptionalImmIndexMap OptionalIdx; 7780 unsigned Opc = Inst.getOpcode(); 7781 7782 unsigned I = 1; 7783 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7784 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7785 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7786 } 7787 7788 for (unsigned E = Operands.size(); I != E; ++I) { 7789 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7790 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7791 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7792 } else if (Op.isInterpSlot() || Op.isInterpAttr() || 7793 Op.isInterpAttrChan()) { 7794 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7795 } else if (Op.isImmModifier()) { 7796 OptionalIdx[Op.getImmTy()] = I; 7797 } else { 7798 llvm_unreachable("unhandled operand type"); 7799 } 7800 } 7801 7802 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) 7803 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7804 AMDGPUOperand::ImmTyHigh); 7805 7806 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 7807 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7808 AMDGPUOperand::ImmTyClampSI); 7809 7810 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 7811 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7812 AMDGPUOperand::ImmTyOModSI); 7813 } 7814 7815 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 7816 { 7817 OptionalImmIndexMap OptionalIdx; 7818 unsigned Opc = Inst.getOpcode(); 7819 7820 unsigned I = 1; 7821 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7822 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7823 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7824 } 7825 7826 for (unsigned E = Operands.size(); I != E; ++I) { 7827 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7828 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7829 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7830 } else if (Op.isImmModifier()) { 7831 OptionalIdx[Op.getImmTy()] = I; 7832 } else { 7833 llvm_unreachable("unhandled operand type"); 7834 } 7835 } 7836 7837 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7838 7839 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7840 if (OpSelIdx != -1) 7841 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 7842 7843 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 7844 7845 if (OpSelIdx == -1) 7846 return; 7847 7848 const int Ops[] = { AMDGPU::OpName::src0, 7849 AMDGPU::OpName::src1, 7850 AMDGPU::OpName::src2 }; 7851 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7852 AMDGPU::OpName::src1_modifiers, 7853 AMDGPU::OpName::src2_modifiers }; 7854 7855 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7856 7857 for (int J = 0; J < 3; ++J) { 7858 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7859 if (OpIdx == -1) 7860 break; 7861 7862 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7863 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7864 7865 if ((OpSel & (1 << J)) != 0) 7866 ModVal |= SISrcMods::OP_SEL_0; 7867 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 7868 (OpSel & (1 << 3)) != 0) 7869 ModVal |= SISrcMods::DST_OP_SEL; 7870 7871 Inst.getOperand(ModIdx).setImm(ModVal); 7872 } 7873 } 7874 7875 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7876 OptionalImmIndexMap &OptionalIdx) { 7877 unsigned Opc = Inst.getOpcode(); 7878 7879 unsigned I = 1; 7880 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7881 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7882 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7883 } 7884 7885 for (unsigned E = Operands.size(); I != E; ++I) { 7886 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7887 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7888 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7889 } else if (Op.isImmModifier()) { 7890 OptionalIdx[Op.getImmTy()] = I; 7891 } else if (Op.isRegOrImm()) { 7892 Op.addRegOrImmOperands(Inst, 1); 7893 } else { 7894 llvm_unreachable("unhandled operand type"); 7895 } 7896 } 7897 7898 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 7899 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7900 AMDGPUOperand::ImmTyClampSI); 7901 7902 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 7903 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7904 AMDGPUOperand::ImmTyOModSI); 7905 7906 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7907 // it has src2 register operand that is tied to dst operand 7908 // we don't allow modifiers for this operand in assembler so src2_modifiers 7909 // should be 0. 7910 if (isMAC(Opc)) { 7911 auto it = Inst.begin(); 7912 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7913 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7914 ++it; 7915 // Copy the operand to ensure it's not invalidated when Inst grows. 7916 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7917 } 7918 } 7919 7920 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7921 OptionalImmIndexMap OptionalIdx; 7922 cvtVOP3(Inst, Operands, OptionalIdx); 7923 } 7924 7925 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7926 OptionalImmIndexMap &OptIdx) { 7927 const int Opc = Inst.getOpcode(); 7928 const MCInstrDesc &Desc = MII.get(Opc); 7929 7930 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7931 7932 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 7933 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { 7934 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 7935 Inst.addOperand(Inst.getOperand(0)); 7936 } 7937 7938 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { 7939 assert(!IsPacked); 7940 Inst.addOperand(Inst.getOperand(0)); 7941 } 7942 7943 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7944 // instruction, and then figure out where to actually put the modifiers 7945 7946 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7947 if (OpSelIdx != -1) { 7948 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7949 } 7950 7951 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7952 if (OpSelHiIdx != -1) { 7953 int DefaultVal = IsPacked ? -1 : 0; 7954 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7955 DefaultVal); 7956 } 7957 7958 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7959 if (NegLoIdx != -1) { 7960 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7961 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7962 } 7963 7964 const int Ops[] = { AMDGPU::OpName::src0, 7965 AMDGPU::OpName::src1, 7966 AMDGPU::OpName::src2 }; 7967 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7968 AMDGPU::OpName::src1_modifiers, 7969 AMDGPU::OpName::src2_modifiers }; 7970 7971 unsigned OpSel = 0; 7972 unsigned OpSelHi = 0; 7973 unsigned NegLo = 0; 7974 unsigned NegHi = 0; 7975 7976 if (OpSelIdx != -1) 7977 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7978 7979 if (OpSelHiIdx != -1) 7980 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7981 7982 if (NegLoIdx != -1) { 7983 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7984 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7985 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7986 } 7987 7988 for (int J = 0; J < 3; ++J) { 7989 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7990 if (OpIdx == -1) 7991 break; 7992 7993 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7994 7995 if (ModIdx == -1) 7996 continue; 7997 7998 uint32_t ModVal = 0; 7999 8000 if ((OpSel & (1 << J)) != 0) 8001 ModVal |= SISrcMods::OP_SEL_0; 8002 8003 if ((OpSelHi & (1 << J)) != 0) 8004 ModVal |= SISrcMods::OP_SEL_1; 8005 8006 if ((NegLo & (1 << J)) != 0) 8007 ModVal |= SISrcMods::NEG; 8008 8009 if ((NegHi & (1 << J)) != 0) 8010 ModVal |= SISrcMods::NEG_HI; 8011 8012 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8013 } 8014 } 8015 8016 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8017 OptionalImmIndexMap OptIdx; 8018 cvtVOP3(Inst, Operands, OptIdx); 8019 cvtVOP3P(Inst, Operands, OptIdx); 8020 } 8021 8022 //===----------------------------------------------------------------------===// 8023 // VOPD 8024 //===----------------------------------------------------------------------===// 8025 8026 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8027 if (!hasVOPD(getSTI())) 8028 return ParseStatus::NoMatch; 8029 8030 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8031 SMLoc S = getLoc(); 8032 lex(); 8033 lex(); 8034 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8035 SMLoc OpYLoc = getLoc(); 8036 StringRef OpYName; 8037 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) { 8038 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc)); 8039 return ParseStatus::Success; 8040 } 8041 return Error(OpYLoc, "expected a VOPDY instruction after ::"); 8042 } 8043 return ParseStatus::NoMatch; 8044 } 8045 8046 // Create VOPD MCInst operands using parsed assembler operands. 8047 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8048 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer 8049 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); 8050 if (Op.isReg()) { 8051 Op.addRegOperands(Inst, 1); 8052 return; 8053 } 8054 if (Op.isImm()) { 8055 Op.addImmOperands(Inst, 1); 8056 return; 8057 } 8058 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8059 }; 8060 8061 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); 8062 8063 // MCInst operands are ordered as follows: 8064 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8065 8066 for (auto CompIdx : VOPD::COMPONENTS) { 8067 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); 8068 } 8069 8070 for (auto CompIdx : VOPD::COMPONENTS) { 8071 const auto &CInfo = InstInfo[CompIdx]; 8072 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); 8073 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) 8074 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); 8075 if (CInfo.hasSrc2Acc()) 8076 addOp(CInfo.getIndexOfDstInParsedOperands()); 8077 } 8078 } 8079 8080 //===----------------------------------------------------------------------===// 8081 // dpp 8082 //===----------------------------------------------------------------------===// 8083 8084 bool AMDGPUOperand::isDPP8() const { 8085 return isImmTy(ImmTyDPP8); 8086 } 8087 8088 bool AMDGPUOperand::isDPPCtrl() const { 8089 using namespace AMDGPU::DPP; 8090 8091 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8092 if (result) { 8093 int64_t Imm = getImm(); 8094 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8095 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8096 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8097 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8098 (Imm == DppCtrl::WAVE_SHL1) || 8099 (Imm == DppCtrl::WAVE_ROL1) || 8100 (Imm == DppCtrl::WAVE_SHR1) || 8101 (Imm == DppCtrl::WAVE_ROR1) || 8102 (Imm == DppCtrl::ROW_MIRROR) || 8103 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8104 (Imm == DppCtrl::BCAST15) || 8105 (Imm == DppCtrl::BCAST31) || 8106 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8107 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8108 } 8109 return false; 8110 } 8111 8112 //===----------------------------------------------------------------------===// 8113 // mAI 8114 //===----------------------------------------------------------------------===// 8115 8116 bool AMDGPUOperand::isBLGP() const { 8117 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8118 } 8119 8120 bool AMDGPUOperand::isCBSZ() const { 8121 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8122 } 8123 8124 bool AMDGPUOperand::isABID() const { 8125 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8126 } 8127 8128 bool AMDGPUOperand::isS16Imm() const { 8129 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8130 } 8131 8132 bool AMDGPUOperand::isU16Imm() const { 8133 return isImmLiteral() && isUInt<16>(getImm()); 8134 } 8135 8136 //===----------------------------------------------------------------------===// 8137 // dim 8138 //===----------------------------------------------------------------------===// 8139 8140 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8141 // We want to allow "dim:1D" etc., 8142 // but the initial 1 is tokenized as an integer. 8143 std::string Token; 8144 if (isToken(AsmToken::Integer)) { 8145 SMLoc Loc = getToken().getEndLoc(); 8146 Token = std::string(getTokenStr()); 8147 lex(); 8148 if (getLoc() != Loc) 8149 return false; 8150 } 8151 8152 StringRef Suffix; 8153 if (!parseId(Suffix)) 8154 return false; 8155 Token += Suffix; 8156 8157 StringRef DimId = Token; 8158 if (DimId.startswith("SQ_RSRC_IMG_")) 8159 DimId = DimId.drop_front(12); 8160 8161 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8162 if (!DimInfo) 8163 return false; 8164 8165 Encoding = DimInfo->Encoding; 8166 return true; 8167 } 8168 8169 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8170 if (!isGFX10Plus()) 8171 return ParseStatus::NoMatch; 8172 8173 SMLoc S = getLoc(); 8174 8175 if (!trySkipId("dim", AsmToken::Colon)) 8176 return ParseStatus::NoMatch; 8177 8178 unsigned Encoding; 8179 SMLoc Loc = getLoc(); 8180 if (!parseDimId(Encoding)) 8181 return Error(Loc, "invalid dim value"); 8182 8183 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8184 AMDGPUOperand::ImmTyDim)); 8185 return ParseStatus::Success; 8186 } 8187 8188 //===----------------------------------------------------------------------===// 8189 // dpp 8190 //===----------------------------------------------------------------------===// 8191 8192 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8193 SMLoc S = getLoc(); 8194 8195 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8196 return ParseStatus::NoMatch; 8197 8198 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8199 8200 int64_t Sels[8]; 8201 8202 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8203 return ParseStatus::Failure; 8204 8205 for (size_t i = 0; i < 8; ++i) { 8206 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8207 return ParseStatus::Failure; 8208 8209 SMLoc Loc = getLoc(); 8210 if (getParser().parseAbsoluteExpression(Sels[i])) 8211 return ParseStatus::Failure; 8212 if (0 > Sels[i] || 7 < Sels[i]) 8213 return Error(Loc, "expected a 3-bit value"); 8214 } 8215 8216 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8217 return ParseStatus::Failure; 8218 8219 unsigned DPP8 = 0; 8220 for (size_t i = 0; i < 8; ++i) 8221 DPP8 |= (Sels[i] << (i * 3)); 8222 8223 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8224 return ParseStatus::Success; 8225 } 8226 8227 bool 8228 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8229 const OperandVector &Operands) { 8230 if (Ctrl == "row_newbcast") 8231 return isGFX90A(); 8232 8233 if (Ctrl == "row_share" || 8234 Ctrl == "row_xmask") 8235 return isGFX10Plus(); 8236 8237 if (Ctrl == "wave_shl" || 8238 Ctrl == "wave_shr" || 8239 Ctrl == "wave_rol" || 8240 Ctrl == "wave_ror" || 8241 Ctrl == "row_bcast") 8242 return isVI() || isGFX9(); 8243 8244 return Ctrl == "row_mirror" || 8245 Ctrl == "row_half_mirror" || 8246 Ctrl == "quad_perm" || 8247 Ctrl == "row_shl" || 8248 Ctrl == "row_shr" || 8249 Ctrl == "row_ror"; 8250 } 8251 8252 int64_t 8253 AMDGPUAsmParser::parseDPPCtrlPerm() { 8254 // quad_perm:[%d,%d,%d,%d] 8255 8256 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8257 return -1; 8258 8259 int64_t Val = 0; 8260 for (int i = 0; i < 4; ++i) { 8261 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8262 return -1; 8263 8264 int64_t Temp; 8265 SMLoc Loc = getLoc(); 8266 if (getParser().parseAbsoluteExpression(Temp)) 8267 return -1; 8268 if (Temp < 0 || Temp > 3) { 8269 Error(Loc, "expected a 2-bit value"); 8270 return -1; 8271 } 8272 8273 Val += (Temp << i * 2); 8274 } 8275 8276 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8277 return -1; 8278 8279 return Val; 8280 } 8281 8282 int64_t 8283 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8284 using namespace AMDGPU::DPP; 8285 8286 // sel:%d 8287 8288 int64_t Val; 8289 SMLoc Loc = getLoc(); 8290 8291 if (getParser().parseAbsoluteExpression(Val)) 8292 return -1; 8293 8294 struct DppCtrlCheck { 8295 int64_t Ctrl; 8296 int Lo; 8297 int Hi; 8298 }; 8299 8300 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8301 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8302 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8303 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8304 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8305 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8306 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8307 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8308 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8309 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8310 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8311 .Default({-1, 0, 0}); 8312 8313 bool Valid; 8314 if (Check.Ctrl == -1) { 8315 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8316 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8317 } else { 8318 Valid = Check.Lo <= Val && Val <= Check.Hi; 8319 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8320 } 8321 8322 if (!Valid) { 8323 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8324 return -1; 8325 } 8326 8327 return Val; 8328 } 8329 8330 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8331 using namespace AMDGPU::DPP; 8332 8333 if (!isToken(AsmToken::Identifier) || 8334 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8335 return ParseStatus::NoMatch; 8336 8337 SMLoc S = getLoc(); 8338 int64_t Val = -1; 8339 StringRef Ctrl; 8340 8341 parseId(Ctrl); 8342 8343 if (Ctrl == "row_mirror") { 8344 Val = DppCtrl::ROW_MIRROR; 8345 } else if (Ctrl == "row_half_mirror") { 8346 Val = DppCtrl::ROW_HALF_MIRROR; 8347 } else { 8348 if (skipToken(AsmToken::Colon, "expected a colon")) { 8349 if (Ctrl == "quad_perm") { 8350 Val = parseDPPCtrlPerm(); 8351 } else { 8352 Val = parseDPPCtrlSel(Ctrl); 8353 } 8354 } 8355 } 8356 8357 if (Val == -1) 8358 return ParseStatus::Failure; 8359 8360 Operands.push_back( 8361 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8362 return ParseStatus::Success; 8363 } 8364 8365 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 8366 bool IsDPP8) { 8367 OptionalImmIndexMap OptionalIdx; 8368 unsigned Opc = Inst.getOpcode(); 8369 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8370 8371 // MAC instructions are special because they have 'old' 8372 // operand which is not tied to dst (but assumed to be). 8373 // They also have dummy unused src2_modifiers. 8374 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); 8375 int Src2ModIdx = 8376 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); 8377 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && 8378 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; 8379 8380 unsigned I = 1; 8381 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8382 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8383 } 8384 8385 int Fi = 0; 8386 for (unsigned E = Operands.size(); I != E; ++I) { 8387 8388 if (IsMAC) { 8389 int NumOperands = Inst.getNumOperands(); 8390 if (OldIdx == NumOperands) { 8391 // Handle old operand 8392 constexpr int DST_IDX = 0; 8393 Inst.addOperand(Inst.getOperand(DST_IDX)); 8394 } else if (Src2ModIdx == NumOperands) { 8395 // Add unused dummy src2_modifiers 8396 Inst.addOperand(MCOperand::createImm(0)); 8397 } 8398 } 8399 8400 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8401 MCOI::TIED_TO); 8402 if (TiedTo != -1) { 8403 assert((unsigned)TiedTo < Inst.getNumOperands()); 8404 // handle tied old or src2 for MAC instructions 8405 Inst.addOperand(Inst.getOperand(TiedTo)); 8406 } 8407 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8408 // Add the register arguments 8409 if (IsDPP8 && Op.isDppFI()) { 8410 Fi = Op.getImm(); 8411 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8412 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8413 } else if (Op.isReg()) { 8414 Op.addRegOperands(Inst, 1); 8415 } else if (Op.isImm() && 8416 Desc.operands()[Inst.getNumOperands()].RegClass != -1) { 8417 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8418 Op.addImmOperands(Inst, 1); 8419 } else if (Op.isImm()) { 8420 OptionalIdx[Op.getImmTy()] = I; 8421 } else { 8422 llvm_unreachable("unhandled operand type"); 8423 } 8424 } 8425 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8426 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8427 8428 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8429 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8430 8431 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8432 cvtVOP3P(Inst, Operands, OptionalIdx); 8433 else if (Desc.TSFlags & SIInstrFlags::VOP3) 8434 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 8435 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 8436 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8437 } 8438 8439 if (IsDPP8) { 8440 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8441 using namespace llvm::AMDGPU::DPP; 8442 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8443 } else { 8444 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8445 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8446 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8447 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8448 8449 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) 8450 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8451 AMDGPUOperand::ImmTyDppFI); 8452 } 8453 } 8454 8455 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8456 OptionalImmIndexMap OptionalIdx; 8457 8458 unsigned I = 1; 8459 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8460 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8461 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8462 } 8463 8464 int Fi = 0; 8465 for (unsigned E = Operands.size(); I != E; ++I) { 8466 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8467 MCOI::TIED_TO); 8468 if (TiedTo != -1) { 8469 assert((unsigned)TiedTo < Inst.getNumOperands()); 8470 // handle tied old or src2 for MAC instructions 8471 Inst.addOperand(Inst.getOperand(TiedTo)); 8472 } 8473 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8474 // Add the register arguments 8475 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8476 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8477 // Skip it. 8478 continue; 8479 } 8480 8481 if (IsDPP8) { 8482 if (Op.isDPP8()) { 8483 Op.addImmOperands(Inst, 1); 8484 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8485 Op.addRegWithFPInputModsOperands(Inst, 2); 8486 } else if (Op.isDppFI()) { 8487 Fi = Op.getImm(); 8488 } else if (Op.isReg()) { 8489 Op.addRegOperands(Inst, 1); 8490 } else { 8491 llvm_unreachable("Invalid operand type"); 8492 } 8493 } else { 8494 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8495 Op.addRegWithFPInputModsOperands(Inst, 2); 8496 } else if (Op.isReg()) { 8497 Op.addRegOperands(Inst, 1); 8498 } else if (Op.isDPPCtrl()) { 8499 Op.addImmOperands(Inst, 1); 8500 } else if (Op.isImm()) { 8501 // Handle optional arguments 8502 OptionalIdx[Op.getImmTy()] = I; 8503 } else { 8504 llvm_unreachable("Invalid operand type"); 8505 } 8506 } 8507 } 8508 8509 if (IsDPP8) { 8510 using namespace llvm::AMDGPU::DPP; 8511 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8512 } else { 8513 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8515 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8516 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { 8517 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8518 AMDGPUOperand::ImmTyDppFI); 8519 } 8520 } 8521 } 8522 8523 //===----------------------------------------------------------------------===// 8524 // sdwa 8525 //===----------------------------------------------------------------------===// 8526 8527 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, 8528 StringRef Prefix, 8529 AMDGPUOperand::ImmTy Type) { 8530 using namespace llvm::AMDGPU::SDWA; 8531 8532 SMLoc S = getLoc(); 8533 StringRef Value; 8534 8535 SMLoc StringLoc; 8536 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc); 8537 if (!Res.isSuccess()) 8538 return Res; 8539 8540 int64_t Int; 8541 Int = StringSwitch<int64_t>(Value) 8542 .Case("BYTE_0", SdwaSel::BYTE_0) 8543 .Case("BYTE_1", SdwaSel::BYTE_1) 8544 .Case("BYTE_2", SdwaSel::BYTE_2) 8545 .Case("BYTE_3", SdwaSel::BYTE_3) 8546 .Case("WORD_0", SdwaSel::WORD_0) 8547 .Case("WORD_1", SdwaSel::WORD_1) 8548 .Case("DWORD", SdwaSel::DWORD) 8549 .Default(0xffffffff); 8550 8551 if (Int == 0xffffffff) 8552 return Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8553 8554 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8555 return ParseStatus::Success; 8556 } 8557 8558 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8559 using namespace llvm::AMDGPU::SDWA; 8560 8561 SMLoc S = getLoc(); 8562 StringRef Value; 8563 8564 SMLoc StringLoc; 8565 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8566 if (!Res.isSuccess()) 8567 return Res; 8568 8569 int64_t Int; 8570 Int = StringSwitch<int64_t>(Value) 8571 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8572 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8573 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8574 .Default(0xffffffff); 8575 8576 if (Int == 0xffffffff) 8577 return Error(StringLoc, "invalid dst_unused value"); 8578 8579 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused)); 8580 return ParseStatus::Success; 8581 } 8582 8583 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8584 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8585 } 8586 8587 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8588 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8589 } 8590 8591 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8592 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8593 } 8594 8595 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8596 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8597 } 8598 8599 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8600 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8601 } 8602 8603 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8604 uint64_t BasicInstType, 8605 bool SkipDstVcc, 8606 bool SkipSrcVcc) { 8607 using namespace llvm::AMDGPU::SDWA; 8608 8609 OptionalImmIndexMap OptionalIdx; 8610 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8611 bool SkippedVcc = false; 8612 8613 unsigned I = 1; 8614 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8615 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8616 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8617 } 8618 8619 for (unsigned E = Operands.size(); I != E; ++I) { 8620 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8621 if (SkipVcc && !SkippedVcc && Op.isReg() && 8622 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8623 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8624 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8625 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8626 // Skip VCC only if we didn't skip it on previous iteration. 8627 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8628 if (BasicInstType == SIInstrFlags::VOP2 && 8629 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8630 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8631 SkippedVcc = true; 8632 continue; 8633 } else if (BasicInstType == SIInstrFlags::VOPC && 8634 Inst.getNumOperands() == 0) { 8635 SkippedVcc = true; 8636 continue; 8637 } 8638 } 8639 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8640 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8641 } else if (Op.isImm()) { 8642 // Handle optional arguments 8643 OptionalIdx[Op.getImmTy()] = I; 8644 } else { 8645 llvm_unreachable("Invalid operand type"); 8646 } 8647 SkippedVcc = false; 8648 } 8649 8650 const unsigned Opc = Inst.getOpcode(); 8651 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && 8652 Opc != AMDGPU::V_NOP_sdwa_vi) { 8653 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8654 switch (BasicInstType) { 8655 case SIInstrFlags::VOP1: 8656 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8657 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8658 AMDGPUOperand::ImmTyClampSI, 0); 8659 8660 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8661 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8662 AMDGPUOperand::ImmTyOModSI, 0); 8663 8664 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) 8665 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8666 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 8667 8668 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) 8669 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8670 AMDGPUOperand::ImmTySDWADstUnused, 8671 DstUnused::UNUSED_PRESERVE); 8672 8673 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 8674 break; 8675 8676 case SIInstrFlags::VOP2: 8677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8678 8679 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) 8680 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8681 8682 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 8683 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE); 8684 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 8685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 8686 break; 8687 8688 case SIInstrFlags::VOPC: 8689 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) 8690 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8691 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 8692 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 8693 break; 8694 8695 default: 8696 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8697 } 8698 } 8699 8700 // special case v_mac_{f16, f32}: 8701 // it has src2 register operand that is tied to dst operand 8702 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8703 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8704 auto it = Inst.begin(); 8705 std::advance( 8706 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8707 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8708 } 8709 } 8710 8711 /// Force static initialization. 8712 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8713 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); 8714 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8715 } 8716 8717 #define GET_REGISTER_MATCHER 8718 #define GET_MATCHER_IMPLEMENTATION 8719 #define GET_MNEMONIC_SPELL_CHECKER 8720 #define GET_MNEMONIC_CHECKER 8721 #include "AMDGPUGenAsmMatcher.inc" 8722 8723 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, 8724 unsigned MCK) { 8725 switch (MCK) { 8726 case MCK_addr64: 8727 return parseTokenOp("addr64", Operands); 8728 case MCK_done: 8729 return parseTokenOp("done", Operands); 8730 case MCK_idxen: 8731 return parseTokenOp("idxen", Operands); 8732 case MCK_lds: 8733 return parseTokenOp("lds", Operands); 8734 case MCK_offen: 8735 return parseTokenOp("offen", Operands); 8736 case MCK_off: 8737 return parseTokenOp("off", Operands); 8738 case MCK_row_95_en: 8739 return parseTokenOp("row_en", Operands); 8740 case MCK_gds: 8741 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS); 8742 case MCK_tfe: 8743 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE); 8744 } 8745 return tryCustomParseOperand(Operands, MCK); 8746 } 8747 8748 // This function should be defined after auto-generated include so that we have 8749 // MatchClassKind enum defined 8750 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8751 unsigned Kind) { 8752 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8753 // But MatchInstructionImpl() expects to meet token and fails to validate 8754 // operand. This method checks if we are given immediate operand but expect to 8755 // get corresponding token. 8756 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8757 switch (Kind) { 8758 case MCK_addr64: 8759 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8760 case MCK_gds: 8761 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8762 case MCK_lds: 8763 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8764 case MCK_idxen: 8765 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8766 case MCK_offen: 8767 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8768 case MCK_tfe: 8769 return Operand.isTFE() ? Match_Success : Match_InvalidOperand; 8770 case MCK_SSrcB32: 8771 // When operands have expression values, they will return true for isToken, 8772 // because it is not possible to distinguish between a token and an 8773 // expression at parse time. MatchInstructionImpl() will always try to 8774 // match an operand as a token, when isToken returns true, and when the 8775 // name of the expression is not a valid token, the match will fail, 8776 // so we need to handle it here. 8777 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8778 case MCK_SSrcF32: 8779 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8780 case MCK_SOPPBrTarget: 8781 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; 8782 case MCK_VReg32OrOff: 8783 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8784 case MCK_InterpSlot: 8785 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8786 case MCK_InterpAttr: 8787 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8788 case MCK_InterpAttrChan: 8789 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; 8790 case MCK_SReg_64: 8791 case MCK_SReg_64_XEXEC: 8792 // Null is defined as a 32-bit register but 8793 // it should also be enabled with 64-bit operands. 8794 // The following code enables it for SReg_64 operands 8795 // used as source and destination. Remaining source 8796 // operands are handled in isInlinableImm. 8797 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8798 default: 8799 return Match_InvalidOperand; 8800 } 8801 } 8802 8803 //===----------------------------------------------------------------------===// 8804 // endpgm 8805 //===----------------------------------------------------------------------===// 8806 8807 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { 8808 SMLoc S = getLoc(); 8809 int64_t Imm = 0; 8810 8811 if (!parseExpr(Imm)) { 8812 // The operand is optional, if not present default to 0 8813 Imm = 0; 8814 } 8815 8816 if (!isUInt<16>(Imm)) 8817 return Error(S, "expected a 16-bit value"); 8818 8819 Operands.push_back( 8820 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8821 return ParseStatus::Success; 8822 } 8823 8824 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8825 8826 //===----------------------------------------------------------------------===// 8827 // LDSDIR 8828 //===----------------------------------------------------------------------===// 8829 8830 bool AMDGPUOperand::isWaitVDST() const { 8831 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 8832 } 8833 8834 //===----------------------------------------------------------------------===// 8835 // VINTERP 8836 //===----------------------------------------------------------------------===// 8837 8838 bool AMDGPUOperand::isWaitEXP() const { 8839 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 8840 } 8841