1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 ImmTy getImmTy() const { 693 assert(isImm()); 694 return Imm.Type; 695 } 696 697 unsigned getReg() const override { 698 assert(isRegKind()); 699 return Reg.RegNo; 700 } 701 702 SMLoc getStartLoc() const override { 703 return StartLoc; 704 } 705 706 SMLoc getEndLoc() const override { 707 return EndLoc; 708 } 709 710 SMRange getLocRange() const { 711 return SMRange(StartLoc, EndLoc); 712 } 713 714 Modifiers getModifiers() const { 715 assert(isRegKind() || isImmTy(ImmTyNone)); 716 return isRegKind() ? Reg.Mods : Imm.Mods; 717 } 718 719 void setModifiers(Modifiers Mods) { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 if (isRegKind()) 722 Reg.Mods = Mods; 723 else 724 Imm.Mods = Mods; 725 } 726 727 bool hasModifiers() const { 728 return getModifiers().hasModifiers(); 729 } 730 731 bool hasFPModifiers() const { 732 return getModifiers().hasFPModifiers(); 733 } 734 735 bool hasIntModifiers() const { 736 return getModifiers().hasIntModifiers(); 737 } 738 739 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 740 741 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 742 743 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 744 745 template <unsigned Bitwidth> 746 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 747 748 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 749 addKImmFPOperands<16>(Inst, N); 750 } 751 752 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 753 addKImmFPOperands<32>(Inst, N); 754 } 755 756 void addRegOperands(MCInst &Inst, unsigned N) const; 757 758 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 759 addRegOperands(Inst, N); 760 } 761 762 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 763 if (isRegKind()) 764 addRegOperands(Inst, N); 765 else if (isExpr()) 766 Inst.addOperand(MCOperand::createExpr(Expr)); 767 else 768 addImmOperands(Inst, N); 769 } 770 771 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 772 Modifiers Mods = getModifiers(); 773 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 774 if (isRegKind()) { 775 addRegOperands(Inst, N); 776 } else { 777 addImmOperands(Inst, N, false); 778 } 779 } 780 781 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 782 assert(!hasIntModifiers()); 783 addRegOrImmWithInputModsOperands(Inst, N); 784 } 785 786 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasFPModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 792 Modifiers Mods = getModifiers(); 793 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 794 assert(isRegKind()); 795 addRegOperands(Inst, N); 796 } 797 798 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 799 assert(!hasIntModifiers()); 800 addRegWithInputModsOperands(Inst, N); 801 } 802 803 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasFPModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 809 if (isImm()) 810 addImmOperands(Inst, N); 811 else { 812 assert(isExpr()); 813 Inst.addOperand(MCOperand::createExpr(Expr)); 814 } 815 } 816 817 static void printImmTy(raw_ostream& OS, ImmTy Type) { 818 switch (Type) { 819 case ImmTyNone: OS << "None"; break; 820 case ImmTyGDS: OS << "GDS"; break; 821 case ImmTyLDS: OS << "LDS"; break; 822 case ImmTyOffen: OS << "Offen"; break; 823 case ImmTyIdxen: OS << "Idxen"; break; 824 case ImmTyAddr64: OS << "Addr64"; break; 825 case ImmTyOffset: OS << "Offset"; break; 826 case ImmTyInstOffset: OS << "InstOffset"; break; 827 case ImmTyOffset0: OS << "Offset0"; break; 828 case ImmTyOffset1: OS << "Offset1"; break; 829 case ImmTyDLC: OS << "DLC"; break; 830 case ImmTyGLC: OS << "GLC"; break; 831 case ImmTySLC: OS << "SLC"; break; 832 case ImmTySWZ: OS << "SWZ"; break; 833 case ImmTyTFE: OS << "TFE"; break; 834 case ImmTyD16: OS << "D16"; break; 835 case ImmTyFORMAT: OS << "FORMAT"; break; 836 case ImmTyClampSI: OS << "ClampSI"; break; 837 case ImmTyOModSI: OS << "OModSI"; break; 838 case ImmTyDPP8: OS << "DPP8"; break; 839 case ImmTyDppCtrl: OS << "DppCtrl"; break; 840 case ImmTyDppRowMask: OS << "DppRowMask"; break; 841 case ImmTyDppBankMask: OS << "DppBankMask"; break; 842 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 843 case ImmTyDppFi: OS << "FI"; break; 844 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 845 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 846 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 847 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 848 case ImmTyDMask: OS << "DMask"; break; 849 case ImmTyDim: OS << "Dim"; break; 850 case ImmTyUNorm: OS << "UNorm"; break; 851 case ImmTyDA: OS << "DA"; break; 852 case ImmTyR128A16: OS << "R128A16"; break; 853 case ImmTyA16: OS << "A16"; break; 854 case ImmTyLWE: OS << "LWE"; break; 855 case ImmTyOff: OS << "Off"; break; 856 case ImmTyExpTgt: OS << "ExpTgt"; break; 857 case ImmTyExpCompr: OS << "ExpCompr"; break; 858 case ImmTyExpVM: OS << "ExpVM"; break; 859 case ImmTyHwreg: OS << "Hwreg"; break; 860 case ImmTySendMsg: OS << "SendMsg"; break; 861 case ImmTyInterpSlot: OS << "InterpSlot"; break; 862 case ImmTyInterpAttr: OS << "InterpAttr"; break; 863 case ImmTyAttrChan: OS << "AttrChan"; break; 864 case ImmTyOpSel: OS << "OpSel"; break; 865 case ImmTyOpSelHi: OS << "OpSelHi"; break; 866 case ImmTyNegLo: OS << "NegLo"; break; 867 case ImmTyNegHi: OS << "NegHi"; break; 868 case ImmTySwizzle: OS << "Swizzle"; break; 869 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 870 case ImmTyHigh: OS << "High"; break; 871 case ImmTyBLGP: OS << "BLGP"; break; 872 case ImmTyCBSZ: OS << "CBSZ"; break; 873 case ImmTyABID: OS << "ABID"; break; 874 case ImmTyEndpgm: OS << "Endpgm"; break; 875 } 876 } 877 878 void print(raw_ostream &OS) const override { 879 switch (Kind) { 880 case Register: 881 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 882 break; 883 case Immediate: 884 OS << '<' << getImm(); 885 if (getImmTy() != ImmTyNone) { 886 OS << " type: "; printImmTy(OS, getImmTy()); 887 } 888 OS << " mods: " << Imm.Mods << '>'; 889 break; 890 case Token: 891 OS << '\'' << getToken() << '\''; 892 break; 893 case Expression: 894 OS << "<expr " << *Expr << '>'; 895 break; 896 } 897 } 898 899 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 900 int64_t Val, SMLoc Loc, 901 ImmTy Type = ImmTyNone, 902 bool IsFPImm = false) { 903 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 904 Op->Imm.Val = Val; 905 Op->Imm.IsFPImm = IsFPImm; 906 Op->Imm.Type = Type; 907 Op->Imm.Mods = Modifiers(); 908 Op->StartLoc = Loc; 909 Op->EndLoc = Loc; 910 return Op; 911 } 912 913 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 914 StringRef Str, SMLoc Loc, 915 bool HasExplicitEncodingSize = true) { 916 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 917 Res->Tok.Data = Str.data(); 918 Res->Tok.Length = Str.size(); 919 Res->StartLoc = Loc; 920 Res->EndLoc = Loc; 921 return Res; 922 } 923 924 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 925 unsigned RegNo, SMLoc S, 926 SMLoc E) { 927 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 928 Op->Reg.RegNo = RegNo; 929 Op->Reg.Mods = Modifiers(); 930 Op->StartLoc = S; 931 Op->EndLoc = E; 932 return Op; 933 } 934 935 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 936 const class MCExpr *Expr, SMLoc S) { 937 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 938 Op->Expr = Expr; 939 Op->StartLoc = S; 940 Op->EndLoc = S; 941 return Op; 942 } 943 }; 944 945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 946 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 947 return OS; 948 } 949 950 //===----------------------------------------------------------------------===// 951 // AsmParser 952 //===----------------------------------------------------------------------===// 953 954 // Holds info related to the current kernel, e.g. count of SGPRs used. 955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 956 // .amdgpu_hsa_kernel or at EOF. 957 class KernelScopeInfo { 958 int SgprIndexUnusedMin = -1; 959 int VgprIndexUnusedMin = -1; 960 MCContext *Ctx = nullptr; 961 962 void usesSgprAt(int i) { 963 if (i >= SgprIndexUnusedMin) { 964 SgprIndexUnusedMin = ++i; 965 if (Ctx) { 966 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 967 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 968 } 969 } 970 } 971 972 void usesVgprAt(int i) { 973 if (i >= VgprIndexUnusedMin) { 974 VgprIndexUnusedMin = ++i; 975 if (Ctx) { 976 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 977 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 978 } 979 } 980 } 981 982 public: 983 KernelScopeInfo() = default; 984 985 void initialize(MCContext &Context) { 986 Ctx = &Context; 987 usesSgprAt(SgprIndexUnusedMin = -1); 988 usesVgprAt(VgprIndexUnusedMin = -1); 989 } 990 991 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 992 switch (RegKind) { 993 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 994 case IS_AGPR: // fall through 995 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 996 default: break; 997 } 998 } 999 }; 1000 1001 class AMDGPUAsmParser : public MCTargetAsmParser { 1002 MCAsmParser &Parser; 1003 1004 // Number of extra operands parsed after the first optional operand. 1005 // This may be necessary to skip hardcoded mandatory operands. 1006 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1007 1008 unsigned ForcedEncodingSize = 0; 1009 bool ForcedDPP = false; 1010 bool ForcedSDWA = false; 1011 KernelScopeInfo KernelScope; 1012 1013 /// @name Auto-generated Match Functions 1014 /// { 1015 1016 #define GET_ASSEMBLER_HEADER 1017 #include "AMDGPUGenAsmMatcher.inc" 1018 1019 /// } 1020 1021 private: 1022 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1023 bool OutOfRangeError(SMRange Range); 1024 /// Calculate VGPR/SGPR blocks required for given target, reserved 1025 /// registers, and user-specified NextFreeXGPR values. 1026 /// 1027 /// \param Features [in] Target features, used for bug corrections. 1028 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1029 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1030 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1031 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1032 /// descriptor field, if valid. 1033 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1034 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1035 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1036 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1037 /// \param VGPRBlocks [out] Result VGPR block count. 1038 /// \param SGPRBlocks [out] Result SGPR block count. 1039 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1040 bool FlatScrUsed, bool XNACKUsed, 1041 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1042 SMRange VGPRRange, unsigned NextFreeSGPR, 1043 SMRange SGPRRange, unsigned &VGPRBlocks, 1044 unsigned &SGPRBlocks); 1045 bool ParseDirectiveAMDGCNTarget(); 1046 bool ParseDirectiveAMDHSAKernel(); 1047 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1048 bool ParseDirectiveHSACodeObjectVersion(); 1049 bool ParseDirectiveHSACodeObjectISA(); 1050 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1051 bool ParseDirectiveAMDKernelCodeT(); 1052 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1053 bool ParseDirectiveAMDGPUHsaKernel(); 1054 1055 bool ParseDirectiveISAVersion(); 1056 bool ParseDirectiveHSAMetadata(); 1057 bool ParseDirectivePALMetadataBegin(); 1058 bool ParseDirectivePALMetadata(); 1059 bool ParseDirectiveAMDGPULDS(); 1060 1061 /// Common code to parse out a block of text (typically YAML) between start and 1062 /// end directives. 1063 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1064 const char *AssemblerDirectiveEnd, 1065 std::string &CollectString); 1066 1067 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1068 RegisterKind RegKind, unsigned Reg1); 1069 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1070 unsigned &RegNum, unsigned &RegWidth, 1071 bool RestoreOnFailure = false); 1072 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1073 unsigned &RegNum, unsigned &RegWidth, 1074 SmallVectorImpl<AsmToken> &Tokens); 1075 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1076 unsigned &RegWidth, 1077 SmallVectorImpl<AsmToken> &Tokens); 1078 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1079 unsigned &RegWidth, 1080 SmallVectorImpl<AsmToken> &Tokens); 1081 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1082 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1083 bool ParseRegRange(unsigned& Num, unsigned& Width); 1084 unsigned getRegularReg(RegisterKind RegKind, 1085 unsigned RegNum, 1086 unsigned RegWidth); 1087 1088 bool isRegister(); 1089 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1090 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1091 void initializeGprCountSymbol(RegisterKind RegKind); 1092 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1093 unsigned RegWidth); 1094 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1095 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1096 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1097 bool IsGdsHardcoded); 1098 1099 public: 1100 enum AMDGPUMatchResultTy { 1101 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1102 }; 1103 enum OperandMode { 1104 OperandMode_Default, 1105 OperandMode_NSA, 1106 }; 1107 1108 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1109 1110 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1111 const MCInstrInfo &MII, 1112 const MCTargetOptions &Options) 1113 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1114 MCAsmParserExtension::Initialize(Parser); 1115 1116 if (getFeatureBits().none()) { 1117 // Set default features. 1118 copySTI().ToggleFeature("southern-islands"); 1119 } 1120 1121 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1122 1123 { 1124 // TODO: make those pre-defined variables read-only. 1125 // Currently there is none suitable machinery in the core llvm-mc for this. 1126 // MCSymbol::isRedefinable is intended for another purpose, and 1127 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1128 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1129 MCContext &Ctx = getContext(); 1130 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1131 MCSymbol *Sym = 1132 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1133 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1134 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1135 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1136 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1137 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1138 } else { 1139 MCSymbol *Sym = 1140 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1144 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1145 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1146 } 1147 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1148 initializeGprCountSymbol(IS_VGPR); 1149 initializeGprCountSymbol(IS_SGPR); 1150 } else 1151 KernelScope.initialize(getContext()); 1152 } 1153 } 1154 1155 bool hasXNACK() const { 1156 return AMDGPU::hasXNACK(getSTI()); 1157 } 1158 1159 bool hasMIMG_R128() const { 1160 return AMDGPU::hasMIMG_R128(getSTI()); 1161 } 1162 1163 bool hasPackedD16() const { 1164 return AMDGPU::hasPackedD16(getSTI()); 1165 } 1166 1167 bool hasGFX10A16() const { 1168 return AMDGPU::hasGFX10A16(getSTI()); 1169 } 1170 1171 bool isSI() const { 1172 return AMDGPU::isSI(getSTI()); 1173 } 1174 1175 bool isCI() const { 1176 return AMDGPU::isCI(getSTI()); 1177 } 1178 1179 bool isVI() const { 1180 return AMDGPU::isVI(getSTI()); 1181 } 1182 1183 bool isGFX9() const { 1184 return AMDGPU::isGFX9(getSTI()); 1185 } 1186 1187 bool isGFX10() const { 1188 return AMDGPU::isGFX10(getSTI()); 1189 } 1190 1191 bool isGFX10_BEncoding() const { 1192 return AMDGPU::isGFX10_BEncoding(getSTI()); 1193 } 1194 1195 bool hasInv2PiInlineImm() const { 1196 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1197 } 1198 1199 bool hasFlatOffsets() const { 1200 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1201 } 1202 1203 bool hasSGPR102_SGPR103() const { 1204 return !isVI() && !isGFX9(); 1205 } 1206 1207 bool hasSGPR104_SGPR105() const { 1208 return isGFX10(); 1209 } 1210 1211 bool hasIntClamp() const { 1212 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1213 } 1214 1215 AMDGPUTargetStreamer &getTargetStreamer() { 1216 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1217 return static_cast<AMDGPUTargetStreamer &>(TS); 1218 } 1219 1220 const MCRegisterInfo *getMRI() const { 1221 // We need this const_cast because for some reason getContext() is not const 1222 // in MCAsmParser. 1223 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1224 } 1225 1226 const MCInstrInfo *getMII() const { 1227 return &MII; 1228 } 1229 1230 const FeatureBitset &getFeatureBits() const { 1231 return getSTI().getFeatureBits(); 1232 } 1233 1234 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1235 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1236 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1237 1238 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1239 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1240 bool isForcedDPP() const { return ForcedDPP; } 1241 bool isForcedSDWA() const { return ForcedSDWA; } 1242 ArrayRef<unsigned> getMatchedVariants() const; 1243 1244 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1245 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1246 bool RestoreOnFailure); 1247 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1248 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1249 SMLoc &EndLoc) override; 1250 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1251 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1252 unsigned Kind) override; 1253 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1254 OperandVector &Operands, MCStreamer &Out, 1255 uint64_t &ErrorInfo, 1256 bool MatchingInlineAsm) override; 1257 bool ParseDirective(AsmToken DirectiveID) override; 1258 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1259 OperandMode Mode = OperandMode_Default); 1260 StringRef parseMnemonicSuffix(StringRef Name); 1261 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1262 SMLoc NameLoc, OperandVector &Operands) override; 1263 //bool ProcessInstruction(MCInst &Inst); 1264 1265 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1266 1267 OperandMatchResultTy 1268 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1269 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1270 bool (*ConvertResult)(int64_t &) = nullptr); 1271 1272 OperandMatchResultTy 1273 parseOperandArrayWithPrefix(const char *Prefix, 1274 OperandVector &Operands, 1275 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1276 bool (*ConvertResult)(int64_t&) = nullptr); 1277 1278 OperandMatchResultTy 1279 parseNamedBit(const char *Name, OperandVector &Operands, 1280 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1281 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1282 StringRef &Value); 1283 1284 bool isModifier(); 1285 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1286 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1287 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1288 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1289 bool parseSP3NegModifier(); 1290 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1291 OperandMatchResultTy parseReg(OperandVector &Operands); 1292 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1293 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1294 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1295 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1296 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1297 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1298 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1299 1300 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1301 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1302 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1303 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1304 1305 bool parseCnt(int64_t &IntVal); 1306 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1307 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1308 1309 private: 1310 struct OperandInfoTy { 1311 int64_t Id; 1312 bool IsSymbolic = false; 1313 bool IsDefined = false; 1314 1315 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1316 }; 1317 1318 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1319 bool validateSendMsg(const OperandInfoTy &Msg, 1320 const OperandInfoTy &Op, 1321 const OperandInfoTy &Stream, 1322 const SMLoc Loc); 1323 1324 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1325 bool validateHwreg(const OperandInfoTy &HwReg, 1326 const int64_t Offset, 1327 const int64_t Width, 1328 const SMLoc Loc); 1329 1330 void errorExpTgt(); 1331 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1332 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1333 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1334 1335 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1336 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1337 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1338 bool validateSOPLiteral(const MCInst &Inst) const; 1339 bool validateConstantBusLimitations(const MCInst &Inst); 1340 bool validateEarlyClobberLimitations(const MCInst &Inst); 1341 bool validateIntClampSupported(const MCInst &Inst); 1342 bool validateMIMGAtomicDMask(const MCInst &Inst); 1343 bool validateMIMGGatherDMask(const MCInst &Inst); 1344 bool validateMovrels(const MCInst &Inst); 1345 bool validateMIMGDataSize(const MCInst &Inst); 1346 bool validateMIMGAddrSize(const MCInst &Inst); 1347 bool validateMIMGD16(const MCInst &Inst); 1348 bool validateMIMGDim(const MCInst &Inst); 1349 bool validateLdsDirect(const MCInst &Inst); 1350 bool validateOpSel(const MCInst &Inst); 1351 bool validateVccOperand(unsigned Reg) const; 1352 bool validateVOP3Literal(const MCInst &Inst) const; 1353 bool validateMAIAccWrite(const MCInst &Inst); 1354 unsigned getConstantBusLimit(unsigned Opcode) const; 1355 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1356 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1357 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1358 1359 bool isId(const StringRef Id) const; 1360 bool isId(const AsmToken &Token, const StringRef Id) const; 1361 bool isToken(const AsmToken::TokenKind Kind) const; 1362 bool trySkipId(const StringRef Id); 1363 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1364 bool trySkipToken(const AsmToken::TokenKind Kind); 1365 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1366 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1367 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1368 AsmToken::TokenKind getTokenKind() const; 1369 bool parseExpr(int64_t &Imm); 1370 bool parseExpr(OperandVector &Operands); 1371 StringRef getTokenStr() const; 1372 AsmToken peekToken(); 1373 AsmToken getToken() const; 1374 SMLoc getLoc() const; 1375 void lex(); 1376 1377 public: 1378 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1379 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1380 1381 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1382 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1383 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1384 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1385 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1386 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1387 1388 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1389 const unsigned MinVal, 1390 const unsigned MaxVal, 1391 const StringRef ErrMsg); 1392 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1393 bool parseSwizzleOffset(int64_t &Imm); 1394 bool parseSwizzleMacro(int64_t &Imm); 1395 bool parseSwizzleQuadPerm(int64_t &Imm); 1396 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1397 bool parseSwizzleBroadcast(int64_t &Imm); 1398 bool parseSwizzleSwap(int64_t &Imm); 1399 bool parseSwizzleReverse(int64_t &Imm); 1400 1401 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1402 int64_t parseGPRIdxMacro(); 1403 1404 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1405 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1406 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1407 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1408 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1409 1410 AMDGPUOperand::Ptr defaultDLC() const; 1411 AMDGPUOperand::Ptr defaultGLC() const; 1412 AMDGPUOperand::Ptr defaultSLC() const; 1413 1414 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1415 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1416 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1417 AMDGPUOperand::Ptr defaultFlatOffset() const; 1418 1419 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1420 1421 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1422 OptionalImmIndexMap &OptionalIdx); 1423 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1424 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1425 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1426 1427 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1428 1429 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1430 bool IsAtomic = false); 1431 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1432 1433 OperandMatchResultTy parseDim(OperandVector &Operands); 1434 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1435 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1436 AMDGPUOperand::Ptr defaultRowMask() const; 1437 AMDGPUOperand::Ptr defaultBankMask() const; 1438 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1439 AMDGPUOperand::Ptr defaultFI() const; 1440 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1441 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1442 1443 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1444 AMDGPUOperand::ImmTy Type); 1445 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1446 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1447 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1448 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1449 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1450 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1451 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1452 uint64_t BasicInstType, 1453 bool SkipDstVcc = false, 1454 bool SkipSrcVcc = false); 1455 1456 AMDGPUOperand::Ptr defaultBLGP() const; 1457 AMDGPUOperand::Ptr defaultCBSZ() const; 1458 AMDGPUOperand::Ptr defaultABID() const; 1459 1460 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1461 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1462 }; 1463 1464 struct OptionalOperand { 1465 const char *Name; 1466 AMDGPUOperand::ImmTy Type; 1467 bool IsBit; 1468 bool (*ConvertResult)(int64_t&); 1469 }; 1470 1471 } // end anonymous namespace 1472 1473 // May be called with integer type with equivalent bitwidth. 1474 static const fltSemantics *getFltSemantics(unsigned Size) { 1475 switch (Size) { 1476 case 4: 1477 return &APFloat::IEEEsingle(); 1478 case 8: 1479 return &APFloat::IEEEdouble(); 1480 case 2: 1481 return &APFloat::IEEEhalf(); 1482 default: 1483 llvm_unreachable("unsupported fp type"); 1484 } 1485 } 1486 1487 static const fltSemantics *getFltSemantics(MVT VT) { 1488 return getFltSemantics(VT.getSizeInBits() / 8); 1489 } 1490 1491 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1492 switch (OperandType) { 1493 case AMDGPU::OPERAND_REG_IMM_INT32: 1494 case AMDGPU::OPERAND_REG_IMM_FP32: 1495 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1496 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1497 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1498 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1499 return &APFloat::IEEEsingle(); 1500 case AMDGPU::OPERAND_REG_IMM_INT64: 1501 case AMDGPU::OPERAND_REG_IMM_FP64: 1502 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1503 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1504 return &APFloat::IEEEdouble(); 1505 case AMDGPU::OPERAND_REG_IMM_INT16: 1506 case AMDGPU::OPERAND_REG_IMM_FP16: 1507 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1508 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1509 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1510 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1511 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1512 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1513 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1514 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1515 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1516 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1517 return &APFloat::IEEEhalf(); 1518 default: 1519 llvm_unreachable("unsupported fp type"); 1520 } 1521 } 1522 1523 //===----------------------------------------------------------------------===// 1524 // Operand 1525 //===----------------------------------------------------------------------===// 1526 1527 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1528 bool Lost; 1529 1530 // Convert literal to single precision 1531 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1532 APFloat::rmNearestTiesToEven, 1533 &Lost); 1534 // We allow precision lost but not overflow or underflow 1535 if (Status != APFloat::opOK && 1536 Lost && 1537 ((Status & APFloat::opOverflow) != 0 || 1538 (Status & APFloat::opUnderflow) != 0)) { 1539 return false; 1540 } 1541 1542 return true; 1543 } 1544 1545 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1546 return isUIntN(Size, Val) || isIntN(Size, Val); 1547 } 1548 1549 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1550 if (VT.getScalarType() == MVT::i16) { 1551 // FP immediate values are broken. 1552 return isInlinableIntLiteral(Val); 1553 } 1554 1555 // f16/v2f16 operands work correctly for all values. 1556 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1557 } 1558 1559 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1560 1561 // This is a hack to enable named inline values like 1562 // shared_base with both 32-bit and 64-bit operands. 1563 // Note that these values are defined as 1564 // 32-bit operands only. 1565 if (isInlineValue()) { 1566 return true; 1567 } 1568 1569 if (!isImmTy(ImmTyNone)) { 1570 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1571 return false; 1572 } 1573 // TODO: We should avoid using host float here. It would be better to 1574 // check the float bit values which is what a few other places do. 1575 // We've had bot failures before due to weird NaN support on mips hosts. 1576 1577 APInt Literal(64, Imm.Val); 1578 1579 if (Imm.IsFPImm) { // We got fp literal token 1580 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1581 return AMDGPU::isInlinableLiteral64(Imm.Val, 1582 AsmParser->hasInv2PiInlineImm()); 1583 } 1584 1585 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1586 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1587 return false; 1588 1589 if (type.getScalarSizeInBits() == 16) { 1590 return isInlineableLiteralOp16( 1591 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1592 type, AsmParser->hasInv2PiInlineImm()); 1593 } 1594 1595 // Check if single precision literal is inlinable 1596 return AMDGPU::isInlinableLiteral32( 1597 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1598 AsmParser->hasInv2PiInlineImm()); 1599 } 1600 1601 // We got int literal token. 1602 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1603 return AMDGPU::isInlinableLiteral64(Imm.Val, 1604 AsmParser->hasInv2PiInlineImm()); 1605 } 1606 1607 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1608 return false; 1609 } 1610 1611 if (type.getScalarSizeInBits() == 16) { 1612 return isInlineableLiteralOp16( 1613 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1614 type, AsmParser->hasInv2PiInlineImm()); 1615 } 1616 1617 return AMDGPU::isInlinableLiteral32( 1618 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1619 AsmParser->hasInv2PiInlineImm()); 1620 } 1621 1622 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1623 // Check that this immediate can be added as literal 1624 if (!isImmTy(ImmTyNone)) { 1625 return false; 1626 } 1627 1628 if (!Imm.IsFPImm) { 1629 // We got int literal token. 1630 1631 if (type == MVT::f64 && hasFPModifiers()) { 1632 // Cannot apply fp modifiers to int literals preserving the same semantics 1633 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1634 // disable these cases. 1635 return false; 1636 } 1637 1638 unsigned Size = type.getSizeInBits(); 1639 if (Size == 64) 1640 Size = 32; 1641 1642 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1643 // types. 1644 return isSafeTruncation(Imm.Val, Size); 1645 } 1646 1647 // We got fp literal token 1648 if (type == MVT::f64) { // Expected 64-bit fp operand 1649 // We would set low 64-bits of literal to zeroes but we accept this literals 1650 return true; 1651 } 1652 1653 if (type == MVT::i64) { // Expected 64-bit int operand 1654 // We don't allow fp literals in 64-bit integer instructions. It is 1655 // unclear how we should encode them. 1656 return false; 1657 } 1658 1659 // We allow fp literals with f16x2 operands assuming that the specified 1660 // literal goes into the lower half and the upper half is zero. We also 1661 // require that the literal may be losslesly converted to f16. 1662 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1663 (type == MVT::v2i16)? MVT::i16 : type; 1664 1665 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1666 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1667 } 1668 1669 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1670 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1671 } 1672 1673 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1674 if (AsmParser->isVI()) 1675 return isVReg32(); 1676 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1677 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1678 else 1679 return false; 1680 } 1681 1682 bool AMDGPUOperand::isSDWAFP16Operand() const { 1683 return isSDWAOperand(MVT::f16); 1684 } 1685 1686 bool AMDGPUOperand::isSDWAFP32Operand() const { 1687 return isSDWAOperand(MVT::f32); 1688 } 1689 1690 bool AMDGPUOperand::isSDWAInt16Operand() const { 1691 return isSDWAOperand(MVT::i16); 1692 } 1693 1694 bool AMDGPUOperand::isSDWAInt32Operand() const { 1695 return isSDWAOperand(MVT::i32); 1696 } 1697 1698 bool AMDGPUOperand::isBoolReg() const { 1699 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1700 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1701 } 1702 1703 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1704 { 1705 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1706 assert(Size == 2 || Size == 4 || Size == 8); 1707 1708 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1709 1710 if (Imm.Mods.Abs) { 1711 Val &= ~FpSignMask; 1712 } 1713 if (Imm.Mods.Neg) { 1714 Val ^= FpSignMask; 1715 } 1716 1717 return Val; 1718 } 1719 1720 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1721 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1722 Inst.getNumOperands())) { 1723 addLiteralImmOperand(Inst, Imm.Val, 1724 ApplyModifiers & 1725 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1726 } else { 1727 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1728 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1729 } 1730 } 1731 1732 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1733 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1734 auto OpNum = Inst.getNumOperands(); 1735 // Check that this operand accepts literals 1736 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1737 1738 if (ApplyModifiers) { 1739 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1740 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1741 Val = applyInputFPModifiers(Val, Size); 1742 } 1743 1744 APInt Literal(64, Val); 1745 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1746 1747 if (Imm.IsFPImm) { // We got fp literal token 1748 switch (OpTy) { 1749 case AMDGPU::OPERAND_REG_IMM_INT64: 1750 case AMDGPU::OPERAND_REG_IMM_FP64: 1751 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1752 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1753 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1754 AsmParser->hasInv2PiInlineImm())) { 1755 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1756 return; 1757 } 1758 1759 // Non-inlineable 1760 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1761 // For fp operands we check if low 32 bits are zeros 1762 if (Literal.getLoBits(32) != 0) { 1763 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1764 "Can't encode literal as exact 64-bit floating-point operand. " 1765 "Low 32-bits will be set to zero"); 1766 } 1767 1768 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1769 return; 1770 } 1771 1772 // We don't allow fp literals in 64-bit integer instructions. It is 1773 // unclear how we should encode them. This case should be checked earlier 1774 // in predicate methods (isLiteralImm()) 1775 llvm_unreachable("fp literal in 64-bit integer instruction."); 1776 1777 case AMDGPU::OPERAND_REG_IMM_INT32: 1778 case AMDGPU::OPERAND_REG_IMM_FP32: 1779 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1780 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1781 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1782 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1783 case AMDGPU::OPERAND_REG_IMM_INT16: 1784 case AMDGPU::OPERAND_REG_IMM_FP16: 1785 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1786 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1787 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1788 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1789 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1790 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1791 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1792 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1793 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1794 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1795 bool lost; 1796 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1797 // Convert literal to single precision 1798 FPLiteral.convert(*getOpFltSemantics(OpTy), 1799 APFloat::rmNearestTiesToEven, &lost); 1800 // We allow precision lost but not overflow or underflow. This should be 1801 // checked earlier in isLiteralImm() 1802 1803 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1804 Inst.addOperand(MCOperand::createImm(ImmVal)); 1805 return; 1806 } 1807 default: 1808 llvm_unreachable("invalid operand size"); 1809 } 1810 1811 return; 1812 } 1813 1814 // We got int literal token. 1815 // Only sign extend inline immediates. 1816 switch (OpTy) { 1817 case AMDGPU::OPERAND_REG_IMM_INT32: 1818 case AMDGPU::OPERAND_REG_IMM_FP32: 1819 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1820 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1821 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1822 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1823 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1824 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1825 if (isSafeTruncation(Val, 32) && 1826 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1827 AsmParser->hasInv2PiInlineImm())) { 1828 Inst.addOperand(MCOperand::createImm(Val)); 1829 return; 1830 } 1831 1832 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1833 return; 1834 1835 case AMDGPU::OPERAND_REG_IMM_INT64: 1836 case AMDGPU::OPERAND_REG_IMM_FP64: 1837 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1838 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1839 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1840 Inst.addOperand(MCOperand::createImm(Val)); 1841 return; 1842 } 1843 1844 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1845 return; 1846 1847 case AMDGPU::OPERAND_REG_IMM_INT16: 1848 case AMDGPU::OPERAND_REG_IMM_FP16: 1849 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1850 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1851 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1852 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1853 if (isSafeTruncation(Val, 16) && 1854 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1855 AsmParser->hasInv2PiInlineImm())) { 1856 Inst.addOperand(MCOperand::createImm(Val)); 1857 return; 1858 } 1859 1860 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1861 return; 1862 1863 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1864 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1865 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1867 assert(isSafeTruncation(Val, 16)); 1868 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1869 AsmParser->hasInv2PiInlineImm())); 1870 1871 Inst.addOperand(MCOperand::createImm(Val)); 1872 return; 1873 } 1874 default: 1875 llvm_unreachable("invalid operand size"); 1876 } 1877 } 1878 1879 template <unsigned Bitwidth> 1880 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1881 APInt Literal(64, Imm.Val); 1882 1883 if (!Imm.IsFPImm) { 1884 // We got int literal token. 1885 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1886 return; 1887 } 1888 1889 bool Lost; 1890 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1891 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1892 APFloat::rmNearestTiesToEven, &Lost); 1893 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1894 } 1895 1896 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1897 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1898 } 1899 1900 static bool isInlineValue(unsigned Reg) { 1901 switch (Reg) { 1902 case AMDGPU::SRC_SHARED_BASE: 1903 case AMDGPU::SRC_SHARED_LIMIT: 1904 case AMDGPU::SRC_PRIVATE_BASE: 1905 case AMDGPU::SRC_PRIVATE_LIMIT: 1906 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1907 return true; 1908 case AMDGPU::SRC_VCCZ: 1909 case AMDGPU::SRC_EXECZ: 1910 case AMDGPU::SRC_SCC: 1911 return true; 1912 case AMDGPU::SGPR_NULL: 1913 return true; 1914 default: 1915 return false; 1916 } 1917 } 1918 1919 bool AMDGPUOperand::isInlineValue() const { 1920 return isRegKind() && ::isInlineValue(getReg()); 1921 } 1922 1923 //===----------------------------------------------------------------------===// 1924 // AsmParser 1925 //===----------------------------------------------------------------------===// 1926 1927 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1928 if (Is == IS_VGPR) { 1929 switch (RegWidth) { 1930 default: return -1; 1931 case 1: return AMDGPU::VGPR_32RegClassID; 1932 case 2: return AMDGPU::VReg_64RegClassID; 1933 case 3: return AMDGPU::VReg_96RegClassID; 1934 case 4: return AMDGPU::VReg_128RegClassID; 1935 case 5: return AMDGPU::VReg_160RegClassID; 1936 case 6: return AMDGPU::VReg_192RegClassID; 1937 case 8: return AMDGPU::VReg_256RegClassID; 1938 case 16: return AMDGPU::VReg_512RegClassID; 1939 case 32: return AMDGPU::VReg_1024RegClassID; 1940 } 1941 } else if (Is == IS_TTMP) { 1942 switch (RegWidth) { 1943 default: return -1; 1944 case 1: return AMDGPU::TTMP_32RegClassID; 1945 case 2: return AMDGPU::TTMP_64RegClassID; 1946 case 4: return AMDGPU::TTMP_128RegClassID; 1947 case 8: return AMDGPU::TTMP_256RegClassID; 1948 case 16: return AMDGPU::TTMP_512RegClassID; 1949 } 1950 } else if (Is == IS_SGPR) { 1951 switch (RegWidth) { 1952 default: return -1; 1953 case 1: return AMDGPU::SGPR_32RegClassID; 1954 case 2: return AMDGPU::SGPR_64RegClassID; 1955 case 3: return AMDGPU::SGPR_96RegClassID; 1956 case 4: return AMDGPU::SGPR_128RegClassID; 1957 case 5: return AMDGPU::SGPR_160RegClassID; 1958 case 6: return AMDGPU::SGPR_192RegClassID; 1959 case 8: return AMDGPU::SGPR_256RegClassID; 1960 case 16: return AMDGPU::SGPR_512RegClassID; 1961 } 1962 } else if (Is == IS_AGPR) { 1963 switch (RegWidth) { 1964 default: return -1; 1965 case 1: return AMDGPU::AGPR_32RegClassID; 1966 case 2: return AMDGPU::AReg_64RegClassID; 1967 case 3: return AMDGPU::AReg_96RegClassID; 1968 case 4: return AMDGPU::AReg_128RegClassID; 1969 case 5: return AMDGPU::AReg_160RegClassID; 1970 case 6: return AMDGPU::AReg_192RegClassID; 1971 case 8: return AMDGPU::AReg_256RegClassID; 1972 case 16: return AMDGPU::AReg_512RegClassID; 1973 case 32: return AMDGPU::AReg_1024RegClassID; 1974 } 1975 } 1976 return -1; 1977 } 1978 1979 static unsigned getSpecialRegForName(StringRef RegName) { 1980 return StringSwitch<unsigned>(RegName) 1981 .Case("exec", AMDGPU::EXEC) 1982 .Case("vcc", AMDGPU::VCC) 1983 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1984 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1985 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1986 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1987 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1988 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1989 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1990 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1991 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1992 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1993 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1994 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1995 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1996 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1997 .Case("m0", AMDGPU::M0) 1998 .Case("vccz", AMDGPU::SRC_VCCZ) 1999 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2000 .Case("execz", AMDGPU::SRC_EXECZ) 2001 .Case("src_execz", AMDGPU::SRC_EXECZ) 2002 .Case("scc", AMDGPU::SRC_SCC) 2003 .Case("src_scc", AMDGPU::SRC_SCC) 2004 .Case("tba", AMDGPU::TBA) 2005 .Case("tma", AMDGPU::TMA) 2006 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2007 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2008 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2009 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2010 .Case("vcc_lo", AMDGPU::VCC_LO) 2011 .Case("vcc_hi", AMDGPU::VCC_HI) 2012 .Case("exec_lo", AMDGPU::EXEC_LO) 2013 .Case("exec_hi", AMDGPU::EXEC_HI) 2014 .Case("tma_lo", AMDGPU::TMA_LO) 2015 .Case("tma_hi", AMDGPU::TMA_HI) 2016 .Case("tba_lo", AMDGPU::TBA_LO) 2017 .Case("tba_hi", AMDGPU::TBA_HI) 2018 .Case("pc", AMDGPU::PC_REG) 2019 .Case("null", AMDGPU::SGPR_NULL) 2020 .Default(AMDGPU::NoRegister); 2021 } 2022 2023 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2024 SMLoc &EndLoc, bool RestoreOnFailure) { 2025 auto R = parseRegister(); 2026 if (!R) return true; 2027 assert(R->isReg()); 2028 RegNo = R->getReg(); 2029 StartLoc = R->getStartLoc(); 2030 EndLoc = R->getEndLoc(); 2031 return false; 2032 } 2033 2034 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2035 SMLoc &EndLoc) { 2036 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2037 } 2038 2039 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2040 SMLoc &StartLoc, 2041 SMLoc &EndLoc) { 2042 bool Result = 2043 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2044 bool PendingErrors = getParser().hasPendingError(); 2045 getParser().clearPendingErrors(); 2046 if (PendingErrors) 2047 return MatchOperand_ParseFail; 2048 if (Result) 2049 return MatchOperand_NoMatch; 2050 return MatchOperand_Success; 2051 } 2052 2053 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2054 RegisterKind RegKind, unsigned Reg1) { 2055 switch (RegKind) { 2056 case IS_SPECIAL: 2057 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2058 Reg = AMDGPU::EXEC; 2059 RegWidth = 2; 2060 return true; 2061 } 2062 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2063 Reg = AMDGPU::FLAT_SCR; 2064 RegWidth = 2; 2065 return true; 2066 } 2067 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2068 Reg = AMDGPU::XNACK_MASK; 2069 RegWidth = 2; 2070 return true; 2071 } 2072 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2073 Reg = AMDGPU::VCC; 2074 RegWidth = 2; 2075 return true; 2076 } 2077 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2078 Reg = AMDGPU::TBA; 2079 RegWidth = 2; 2080 return true; 2081 } 2082 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2083 Reg = AMDGPU::TMA; 2084 RegWidth = 2; 2085 return true; 2086 } 2087 return false; 2088 case IS_VGPR: 2089 case IS_SGPR: 2090 case IS_AGPR: 2091 case IS_TTMP: 2092 if (Reg1 != Reg + RegWidth) { 2093 return false; 2094 } 2095 RegWidth++; 2096 return true; 2097 default: 2098 llvm_unreachable("unexpected register kind"); 2099 } 2100 } 2101 2102 struct RegInfo { 2103 StringLiteral Name; 2104 RegisterKind Kind; 2105 }; 2106 2107 static constexpr RegInfo RegularRegisters[] = { 2108 {{"v"}, IS_VGPR}, 2109 {{"s"}, IS_SGPR}, 2110 {{"ttmp"}, IS_TTMP}, 2111 {{"acc"}, IS_AGPR}, 2112 {{"a"}, IS_AGPR}, 2113 }; 2114 2115 static bool isRegularReg(RegisterKind Kind) { 2116 return Kind == IS_VGPR || 2117 Kind == IS_SGPR || 2118 Kind == IS_TTMP || 2119 Kind == IS_AGPR; 2120 } 2121 2122 static const RegInfo* getRegularRegInfo(StringRef Str) { 2123 for (const RegInfo &Reg : RegularRegisters) 2124 if (Str.startswith(Reg.Name)) 2125 return &Reg; 2126 return nullptr; 2127 } 2128 2129 static bool getRegNum(StringRef Str, unsigned& Num) { 2130 return !Str.getAsInteger(10, Num); 2131 } 2132 2133 bool 2134 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2135 const AsmToken &NextToken) const { 2136 2137 // A list of consecutive registers: [s0,s1,s2,s3] 2138 if (Token.is(AsmToken::LBrac)) 2139 return true; 2140 2141 if (!Token.is(AsmToken::Identifier)) 2142 return false; 2143 2144 // A single register like s0 or a range of registers like s[0:1] 2145 2146 StringRef Str = Token.getString(); 2147 const RegInfo *Reg = getRegularRegInfo(Str); 2148 if (Reg) { 2149 StringRef RegName = Reg->Name; 2150 StringRef RegSuffix = Str.substr(RegName.size()); 2151 if (!RegSuffix.empty()) { 2152 unsigned Num; 2153 // A single register with an index: rXX 2154 if (getRegNum(RegSuffix, Num)) 2155 return true; 2156 } else { 2157 // A range of registers: r[XX:YY]. 2158 if (NextToken.is(AsmToken::LBrac)) 2159 return true; 2160 } 2161 } 2162 2163 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2164 } 2165 2166 bool 2167 AMDGPUAsmParser::isRegister() 2168 { 2169 return isRegister(getToken(), peekToken()); 2170 } 2171 2172 unsigned 2173 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2174 unsigned RegNum, 2175 unsigned RegWidth) { 2176 2177 assert(isRegularReg(RegKind)); 2178 2179 unsigned AlignSize = 1; 2180 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2181 // SGPR and TTMP registers must be aligned. 2182 // Max required alignment is 4 dwords. 2183 AlignSize = std::min(RegWidth, 4u); 2184 } 2185 2186 if (RegNum % AlignSize != 0) 2187 return AMDGPU::NoRegister; 2188 2189 unsigned RegIdx = RegNum / AlignSize; 2190 int RCID = getRegClass(RegKind, RegWidth); 2191 if (RCID == -1) 2192 return AMDGPU::NoRegister; 2193 2194 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2195 const MCRegisterClass RC = TRI->getRegClass(RCID); 2196 if (RegIdx >= RC.getNumRegs()) 2197 return AMDGPU::NoRegister; 2198 2199 return RC.getRegister(RegIdx); 2200 } 2201 2202 bool 2203 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2204 int64_t RegLo, RegHi; 2205 if (!trySkipToken(AsmToken::LBrac)) 2206 return false; 2207 2208 if (!parseExpr(RegLo)) 2209 return false; 2210 2211 if (trySkipToken(AsmToken::Colon)) { 2212 if (!parseExpr(RegHi)) 2213 return false; 2214 } else { 2215 RegHi = RegLo; 2216 } 2217 2218 if (!trySkipToken(AsmToken::RBrac)) 2219 return false; 2220 2221 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2222 return false; 2223 2224 Num = static_cast<unsigned>(RegLo); 2225 Width = (RegHi - RegLo) + 1; 2226 return true; 2227 } 2228 2229 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2230 unsigned &RegNum, unsigned &RegWidth, 2231 SmallVectorImpl<AsmToken> &Tokens) { 2232 assert(isToken(AsmToken::Identifier)); 2233 unsigned Reg = getSpecialRegForName(getTokenStr()); 2234 if (Reg) { 2235 RegNum = 0; 2236 RegWidth = 1; 2237 RegKind = IS_SPECIAL; 2238 Tokens.push_back(getToken()); 2239 lex(); // skip register name 2240 } 2241 return Reg; 2242 } 2243 2244 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2245 unsigned &RegNum, unsigned &RegWidth, 2246 SmallVectorImpl<AsmToken> &Tokens) { 2247 assert(isToken(AsmToken::Identifier)); 2248 StringRef RegName = getTokenStr(); 2249 2250 const RegInfo *RI = getRegularRegInfo(RegName); 2251 if (!RI) 2252 return AMDGPU::NoRegister; 2253 Tokens.push_back(getToken()); 2254 lex(); // skip register name 2255 2256 RegKind = RI->Kind; 2257 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2258 if (!RegSuffix.empty()) { 2259 // Single 32-bit register: vXX. 2260 if (!getRegNum(RegSuffix, RegNum)) 2261 return AMDGPU::NoRegister; 2262 RegWidth = 1; 2263 } else { 2264 // Range of registers: v[XX:YY]. ":YY" is optional. 2265 if (!ParseRegRange(RegNum, RegWidth)) 2266 return AMDGPU::NoRegister; 2267 } 2268 2269 return getRegularReg(RegKind, RegNum, RegWidth); 2270 } 2271 2272 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2273 unsigned &RegWidth, 2274 SmallVectorImpl<AsmToken> &Tokens) { 2275 unsigned Reg = AMDGPU::NoRegister; 2276 2277 if (!trySkipToken(AsmToken::LBrac)) 2278 return AMDGPU::NoRegister; 2279 2280 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2281 2282 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2283 return AMDGPU::NoRegister; 2284 if (RegWidth != 1) 2285 return AMDGPU::NoRegister; 2286 2287 for (; trySkipToken(AsmToken::Comma); ) { 2288 RegisterKind NextRegKind; 2289 unsigned NextReg, NextRegNum, NextRegWidth; 2290 2291 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2292 Tokens)) 2293 return AMDGPU::NoRegister; 2294 if (NextRegWidth != 1) 2295 return AMDGPU::NoRegister; 2296 if (NextRegKind != RegKind) 2297 return AMDGPU::NoRegister; 2298 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2299 return AMDGPU::NoRegister; 2300 } 2301 2302 if (!trySkipToken(AsmToken::RBrac)) 2303 return AMDGPU::NoRegister; 2304 2305 if (isRegularReg(RegKind)) 2306 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2307 2308 return Reg; 2309 } 2310 2311 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2312 unsigned &RegNum, unsigned &RegWidth, 2313 SmallVectorImpl<AsmToken> &Tokens) { 2314 Reg = AMDGPU::NoRegister; 2315 2316 if (isToken(AsmToken::Identifier)) { 2317 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2318 if (Reg == AMDGPU::NoRegister) 2319 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2320 } else { 2321 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2322 } 2323 2324 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2325 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2326 } 2327 2328 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2329 unsigned &RegNum, unsigned &RegWidth, 2330 bool RestoreOnFailure) { 2331 Reg = AMDGPU::NoRegister; 2332 2333 SmallVector<AsmToken, 1> Tokens; 2334 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2335 if (RestoreOnFailure) { 2336 while (!Tokens.empty()) { 2337 getLexer().UnLex(Tokens.pop_back_val()); 2338 } 2339 } 2340 return true; 2341 } 2342 return false; 2343 } 2344 2345 Optional<StringRef> 2346 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2347 switch (RegKind) { 2348 case IS_VGPR: 2349 return StringRef(".amdgcn.next_free_vgpr"); 2350 case IS_SGPR: 2351 return StringRef(".amdgcn.next_free_sgpr"); 2352 default: 2353 return None; 2354 } 2355 } 2356 2357 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2358 auto SymbolName = getGprCountSymbolName(RegKind); 2359 assert(SymbolName && "initializing invalid register kind"); 2360 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2361 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2362 } 2363 2364 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2365 unsigned DwordRegIndex, 2366 unsigned RegWidth) { 2367 // Symbols are only defined for GCN targets 2368 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2369 return true; 2370 2371 auto SymbolName = getGprCountSymbolName(RegKind); 2372 if (!SymbolName) 2373 return true; 2374 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2375 2376 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2377 int64_t OldCount; 2378 2379 if (!Sym->isVariable()) 2380 return !Error(getParser().getTok().getLoc(), 2381 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2382 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2383 return !Error( 2384 getParser().getTok().getLoc(), 2385 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2386 2387 if (OldCount <= NewMax) 2388 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2389 2390 return true; 2391 } 2392 2393 std::unique_ptr<AMDGPUOperand> 2394 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2395 const auto &Tok = Parser.getTok(); 2396 SMLoc StartLoc = Tok.getLoc(); 2397 SMLoc EndLoc = Tok.getEndLoc(); 2398 RegisterKind RegKind; 2399 unsigned Reg, RegNum, RegWidth; 2400 2401 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2402 //FIXME: improve error messages (bug 41303). 2403 Error(StartLoc, "not a valid operand."); 2404 return nullptr; 2405 } 2406 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2407 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2408 return nullptr; 2409 } else 2410 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2411 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2412 } 2413 2414 OperandMatchResultTy 2415 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2416 // TODO: add syntactic sugar for 1/(2*PI) 2417 2418 assert(!isRegister()); 2419 assert(!isModifier()); 2420 2421 const auto& Tok = getToken(); 2422 const auto& NextTok = peekToken(); 2423 bool IsReal = Tok.is(AsmToken::Real); 2424 SMLoc S = getLoc(); 2425 bool Negate = false; 2426 2427 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2428 lex(); 2429 IsReal = true; 2430 Negate = true; 2431 } 2432 2433 if (IsReal) { 2434 // Floating-point expressions are not supported. 2435 // Can only allow floating-point literals with an 2436 // optional sign. 2437 2438 StringRef Num = getTokenStr(); 2439 lex(); 2440 2441 APFloat RealVal(APFloat::IEEEdouble()); 2442 auto roundMode = APFloat::rmNearestTiesToEven; 2443 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2444 return MatchOperand_ParseFail; 2445 } 2446 if (Negate) 2447 RealVal.changeSign(); 2448 2449 Operands.push_back( 2450 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2451 AMDGPUOperand::ImmTyNone, true)); 2452 2453 return MatchOperand_Success; 2454 2455 } else { 2456 int64_t IntVal; 2457 const MCExpr *Expr; 2458 SMLoc S = getLoc(); 2459 2460 if (HasSP3AbsModifier) { 2461 // This is a workaround for handling expressions 2462 // as arguments of SP3 'abs' modifier, for example: 2463 // |1.0| 2464 // |-1| 2465 // |1+x| 2466 // This syntax is not compatible with syntax of standard 2467 // MC expressions (due to the trailing '|'). 2468 SMLoc EndLoc; 2469 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2470 return MatchOperand_ParseFail; 2471 } else { 2472 if (Parser.parseExpression(Expr)) 2473 return MatchOperand_ParseFail; 2474 } 2475 2476 if (Expr->evaluateAsAbsolute(IntVal)) { 2477 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2478 } else { 2479 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2480 } 2481 2482 return MatchOperand_Success; 2483 } 2484 2485 return MatchOperand_NoMatch; 2486 } 2487 2488 OperandMatchResultTy 2489 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2490 if (!isRegister()) 2491 return MatchOperand_NoMatch; 2492 2493 if (auto R = parseRegister()) { 2494 assert(R->isReg()); 2495 Operands.push_back(std::move(R)); 2496 return MatchOperand_Success; 2497 } 2498 return MatchOperand_ParseFail; 2499 } 2500 2501 OperandMatchResultTy 2502 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2503 auto res = parseReg(Operands); 2504 if (res != MatchOperand_NoMatch) { 2505 return res; 2506 } else if (isModifier()) { 2507 return MatchOperand_NoMatch; 2508 } else { 2509 return parseImm(Operands, HasSP3AbsMod); 2510 } 2511 } 2512 2513 bool 2514 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2515 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2516 const auto &str = Token.getString(); 2517 return str == "abs" || str == "neg" || str == "sext"; 2518 } 2519 return false; 2520 } 2521 2522 bool 2523 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2524 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2525 } 2526 2527 bool 2528 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2529 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2530 } 2531 2532 bool 2533 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2534 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2535 } 2536 2537 // Check if this is an operand modifier or an opcode modifier 2538 // which may look like an expression but it is not. We should 2539 // avoid parsing these modifiers as expressions. Currently 2540 // recognized sequences are: 2541 // |...| 2542 // abs(...) 2543 // neg(...) 2544 // sext(...) 2545 // -reg 2546 // -|...| 2547 // -abs(...) 2548 // name:... 2549 // Note that simple opcode modifiers like 'gds' may be parsed as 2550 // expressions; this is a special case. See getExpressionAsToken. 2551 // 2552 bool 2553 AMDGPUAsmParser::isModifier() { 2554 2555 AsmToken Tok = getToken(); 2556 AsmToken NextToken[2]; 2557 peekTokens(NextToken); 2558 2559 return isOperandModifier(Tok, NextToken[0]) || 2560 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2561 isOpcodeModifierWithVal(Tok, NextToken[0]); 2562 } 2563 2564 // Check if the current token is an SP3 'neg' modifier. 2565 // Currently this modifier is allowed in the following context: 2566 // 2567 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2568 // 2. Before an 'abs' modifier: -abs(...) 2569 // 3. Before an SP3 'abs' modifier: -|...| 2570 // 2571 // In all other cases "-" is handled as a part 2572 // of an expression that follows the sign. 2573 // 2574 // Note: When "-" is followed by an integer literal, 2575 // this is interpreted as integer negation rather 2576 // than a floating-point NEG modifier applied to N. 2577 // Beside being contr-intuitive, such use of floating-point 2578 // NEG modifier would have resulted in different meaning 2579 // of integer literals used with VOP1/2/C and VOP3, 2580 // for example: 2581 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2582 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2583 // Negative fp literals with preceding "-" are 2584 // handled likewise for unifomtity 2585 // 2586 bool 2587 AMDGPUAsmParser::parseSP3NegModifier() { 2588 2589 AsmToken NextToken[2]; 2590 peekTokens(NextToken); 2591 2592 if (isToken(AsmToken::Minus) && 2593 (isRegister(NextToken[0], NextToken[1]) || 2594 NextToken[0].is(AsmToken::Pipe) || 2595 isId(NextToken[0], "abs"))) { 2596 lex(); 2597 return true; 2598 } 2599 2600 return false; 2601 } 2602 2603 OperandMatchResultTy 2604 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2605 bool AllowImm) { 2606 bool Neg, SP3Neg; 2607 bool Abs, SP3Abs; 2608 SMLoc Loc; 2609 2610 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2611 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2612 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2613 return MatchOperand_ParseFail; 2614 } 2615 2616 SP3Neg = parseSP3NegModifier(); 2617 2618 Loc = getLoc(); 2619 Neg = trySkipId("neg"); 2620 if (Neg && SP3Neg) { 2621 Error(Loc, "expected register or immediate"); 2622 return MatchOperand_ParseFail; 2623 } 2624 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2625 return MatchOperand_ParseFail; 2626 2627 Abs = trySkipId("abs"); 2628 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2629 return MatchOperand_ParseFail; 2630 2631 Loc = getLoc(); 2632 SP3Abs = trySkipToken(AsmToken::Pipe); 2633 if (Abs && SP3Abs) { 2634 Error(Loc, "expected register or immediate"); 2635 return MatchOperand_ParseFail; 2636 } 2637 2638 OperandMatchResultTy Res; 2639 if (AllowImm) { 2640 Res = parseRegOrImm(Operands, SP3Abs); 2641 } else { 2642 Res = parseReg(Operands); 2643 } 2644 if (Res != MatchOperand_Success) { 2645 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2646 } 2647 2648 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2649 return MatchOperand_ParseFail; 2650 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2651 return MatchOperand_ParseFail; 2652 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2653 return MatchOperand_ParseFail; 2654 2655 AMDGPUOperand::Modifiers Mods; 2656 Mods.Abs = Abs || SP3Abs; 2657 Mods.Neg = Neg || SP3Neg; 2658 2659 if (Mods.hasFPModifiers()) { 2660 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2661 if (Op.isExpr()) { 2662 Error(Op.getStartLoc(), "expected an absolute expression"); 2663 return MatchOperand_ParseFail; 2664 } 2665 Op.setModifiers(Mods); 2666 } 2667 return MatchOperand_Success; 2668 } 2669 2670 OperandMatchResultTy 2671 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2672 bool AllowImm) { 2673 bool Sext = trySkipId("sext"); 2674 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2675 return MatchOperand_ParseFail; 2676 2677 OperandMatchResultTy Res; 2678 if (AllowImm) { 2679 Res = parseRegOrImm(Operands); 2680 } else { 2681 Res = parseReg(Operands); 2682 } 2683 if (Res != MatchOperand_Success) { 2684 return Sext? MatchOperand_ParseFail : Res; 2685 } 2686 2687 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2688 return MatchOperand_ParseFail; 2689 2690 AMDGPUOperand::Modifiers Mods; 2691 Mods.Sext = Sext; 2692 2693 if (Mods.hasIntModifiers()) { 2694 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2695 if (Op.isExpr()) { 2696 Error(Op.getStartLoc(), "expected an absolute expression"); 2697 return MatchOperand_ParseFail; 2698 } 2699 Op.setModifiers(Mods); 2700 } 2701 2702 return MatchOperand_Success; 2703 } 2704 2705 OperandMatchResultTy 2706 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2707 return parseRegOrImmWithFPInputMods(Operands, false); 2708 } 2709 2710 OperandMatchResultTy 2711 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2712 return parseRegOrImmWithIntInputMods(Operands, false); 2713 } 2714 2715 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2716 auto Loc = getLoc(); 2717 if (trySkipId("off")) { 2718 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2719 AMDGPUOperand::ImmTyOff, false)); 2720 return MatchOperand_Success; 2721 } 2722 2723 if (!isRegister()) 2724 return MatchOperand_NoMatch; 2725 2726 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2727 if (Reg) { 2728 Operands.push_back(std::move(Reg)); 2729 return MatchOperand_Success; 2730 } 2731 2732 return MatchOperand_ParseFail; 2733 2734 } 2735 2736 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2737 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2738 2739 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2740 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2741 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2742 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2743 return Match_InvalidOperand; 2744 2745 if ((TSFlags & SIInstrFlags::VOP3) && 2746 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2747 getForcedEncodingSize() != 64) 2748 return Match_PreferE32; 2749 2750 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2751 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2752 // v_mac_f32/16 allow only dst_sel == DWORD; 2753 auto OpNum = 2754 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2755 const auto &Op = Inst.getOperand(OpNum); 2756 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2757 return Match_InvalidOperand; 2758 } 2759 } 2760 2761 return Match_Success; 2762 } 2763 2764 // What asm variants we should check 2765 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2766 if (getForcedEncodingSize() == 32) { 2767 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2768 return makeArrayRef(Variants); 2769 } 2770 2771 if (isForcedVOP3()) { 2772 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2773 return makeArrayRef(Variants); 2774 } 2775 2776 if (isForcedSDWA()) { 2777 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2778 AMDGPUAsmVariants::SDWA9}; 2779 return makeArrayRef(Variants); 2780 } 2781 2782 if (isForcedDPP()) { 2783 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2784 return makeArrayRef(Variants); 2785 } 2786 2787 static const unsigned Variants[] = { 2788 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2789 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2790 }; 2791 2792 return makeArrayRef(Variants); 2793 } 2794 2795 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2796 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2797 const unsigned Num = Desc.getNumImplicitUses(); 2798 for (unsigned i = 0; i < Num; ++i) { 2799 unsigned Reg = Desc.ImplicitUses[i]; 2800 switch (Reg) { 2801 case AMDGPU::FLAT_SCR: 2802 case AMDGPU::VCC: 2803 case AMDGPU::VCC_LO: 2804 case AMDGPU::VCC_HI: 2805 case AMDGPU::M0: 2806 return Reg; 2807 default: 2808 break; 2809 } 2810 } 2811 return AMDGPU::NoRegister; 2812 } 2813 2814 // NB: This code is correct only when used to check constant 2815 // bus limitations because GFX7 support no f16 inline constants. 2816 // Note that there are no cases when a GFX7 opcode violates 2817 // constant bus limitations due to the use of an f16 constant. 2818 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2819 unsigned OpIdx) const { 2820 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2821 2822 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2823 return false; 2824 } 2825 2826 const MCOperand &MO = Inst.getOperand(OpIdx); 2827 2828 int64_t Val = MO.getImm(); 2829 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2830 2831 switch (OpSize) { // expected operand size 2832 case 8: 2833 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2834 case 4: 2835 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2836 case 2: { 2837 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2838 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2839 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2840 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2841 return AMDGPU::isInlinableIntLiteral(Val); 2842 2843 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2844 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2845 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2846 return AMDGPU::isInlinableIntLiteralV216(Val); 2847 2848 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2849 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2850 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2851 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2852 2853 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2854 } 2855 default: 2856 llvm_unreachable("invalid operand size"); 2857 } 2858 } 2859 2860 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2861 if (!isGFX10()) 2862 return 1; 2863 2864 switch (Opcode) { 2865 // 64-bit shift instructions can use only one scalar value input 2866 case AMDGPU::V_LSHLREV_B64: 2867 case AMDGPU::V_LSHLREV_B64_gfx10: 2868 case AMDGPU::V_LSHL_B64: 2869 case AMDGPU::V_LSHRREV_B64: 2870 case AMDGPU::V_LSHRREV_B64_gfx10: 2871 case AMDGPU::V_LSHR_B64: 2872 case AMDGPU::V_ASHRREV_I64: 2873 case AMDGPU::V_ASHRREV_I64_gfx10: 2874 case AMDGPU::V_ASHR_I64: 2875 return 1; 2876 default: 2877 return 2; 2878 } 2879 } 2880 2881 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2882 const MCOperand &MO = Inst.getOperand(OpIdx); 2883 if (MO.isImm()) { 2884 return !isInlineConstant(Inst, OpIdx); 2885 } else if (MO.isReg()) { 2886 auto Reg = MO.getReg(); 2887 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2888 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2889 } else { 2890 return true; 2891 } 2892 } 2893 2894 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2895 const unsigned Opcode = Inst.getOpcode(); 2896 const MCInstrDesc &Desc = MII.get(Opcode); 2897 unsigned ConstantBusUseCount = 0; 2898 unsigned NumLiterals = 0; 2899 unsigned LiteralSize; 2900 2901 if (Desc.TSFlags & 2902 (SIInstrFlags::VOPC | 2903 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2904 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2905 SIInstrFlags::SDWA)) { 2906 // Check special imm operands (used by madmk, etc) 2907 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2908 ++ConstantBusUseCount; 2909 } 2910 2911 SmallDenseSet<unsigned> SGPRsUsed; 2912 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2913 if (SGPRUsed != AMDGPU::NoRegister) { 2914 SGPRsUsed.insert(SGPRUsed); 2915 ++ConstantBusUseCount; 2916 } 2917 2918 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2919 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2920 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2921 2922 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2923 2924 for (int OpIdx : OpIndices) { 2925 if (OpIdx == -1) break; 2926 2927 const MCOperand &MO = Inst.getOperand(OpIdx); 2928 if (usesConstantBus(Inst, OpIdx)) { 2929 if (MO.isReg()) { 2930 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2931 // Pairs of registers with a partial intersections like these 2932 // s0, s[0:1] 2933 // flat_scratch_lo, flat_scratch 2934 // flat_scratch_lo, flat_scratch_hi 2935 // are theoretically valid but they are disabled anyway. 2936 // Note that this code mimics SIInstrInfo::verifyInstruction 2937 if (!SGPRsUsed.count(Reg)) { 2938 SGPRsUsed.insert(Reg); 2939 ++ConstantBusUseCount; 2940 } 2941 } else { // Expression or a literal 2942 2943 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2944 continue; // special operand like VINTERP attr_chan 2945 2946 // An instruction may use only one literal. 2947 // This has been validated on the previous step. 2948 // See validateVOP3Literal. 2949 // This literal may be used as more than one operand. 2950 // If all these operands are of the same size, 2951 // this literal counts as one scalar value. 2952 // Otherwise it counts as 2 scalar values. 2953 // See "GFX10 Shader Programming", section 3.6.2.3. 2954 2955 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2956 if (Size < 4) Size = 4; 2957 2958 if (NumLiterals == 0) { 2959 NumLiterals = 1; 2960 LiteralSize = Size; 2961 } else if (LiteralSize != Size) { 2962 NumLiterals = 2; 2963 } 2964 } 2965 } 2966 } 2967 } 2968 ConstantBusUseCount += NumLiterals; 2969 2970 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2971 } 2972 2973 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2974 const unsigned Opcode = Inst.getOpcode(); 2975 const MCInstrDesc &Desc = MII.get(Opcode); 2976 2977 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2978 if (DstIdx == -1 || 2979 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2980 return true; 2981 } 2982 2983 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2984 2985 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2986 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2987 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2988 2989 assert(DstIdx != -1); 2990 const MCOperand &Dst = Inst.getOperand(DstIdx); 2991 assert(Dst.isReg()); 2992 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2993 2994 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2995 2996 for (int SrcIdx : SrcIndices) { 2997 if (SrcIdx == -1) break; 2998 const MCOperand &Src = Inst.getOperand(SrcIdx); 2999 if (Src.isReg()) { 3000 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3001 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3002 return false; 3003 } 3004 } 3005 } 3006 3007 return true; 3008 } 3009 3010 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3011 3012 const unsigned Opc = Inst.getOpcode(); 3013 const MCInstrDesc &Desc = MII.get(Opc); 3014 3015 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3016 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3017 assert(ClampIdx != -1); 3018 return Inst.getOperand(ClampIdx).getImm() == 0; 3019 } 3020 3021 return true; 3022 } 3023 3024 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3025 3026 const unsigned Opc = Inst.getOpcode(); 3027 const MCInstrDesc &Desc = MII.get(Opc); 3028 3029 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3030 return true; 3031 3032 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3033 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3034 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3035 3036 assert(VDataIdx != -1); 3037 assert(DMaskIdx != -1); 3038 assert(TFEIdx != -1); 3039 3040 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3041 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3042 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3043 if (DMask == 0) 3044 DMask = 1; 3045 3046 unsigned DataSize = 3047 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3048 if (hasPackedD16()) { 3049 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3050 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3051 DataSize = (DataSize + 1) / 2; 3052 } 3053 3054 return (VDataSize / 4) == DataSize + TFESize; 3055 } 3056 3057 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3058 const unsigned Opc = Inst.getOpcode(); 3059 const MCInstrDesc &Desc = MII.get(Opc); 3060 3061 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3062 return true; 3063 3064 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3065 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3066 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3067 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3068 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3069 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3070 3071 assert(VAddr0Idx != -1); 3072 assert(SrsrcIdx != -1); 3073 assert(DimIdx != -1); 3074 assert(SrsrcIdx > VAddr0Idx); 3075 3076 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3077 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3078 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3079 unsigned VAddrSize = 3080 IsNSA ? SrsrcIdx - VAddr0Idx 3081 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3082 3083 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3084 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3085 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3086 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3087 if (!IsNSA) { 3088 if (AddrSize > 8) 3089 AddrSize = 16; 3090 else if (AddrSize > 4) 3091 AddrSize = 8; 3092 } 3093 3094 return VAddrSize == AddrSize; 3095 } 3096 3097 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3098 3099 const unsigned Opc = Inst.getOpcode(); 3100 const MCInstrDesc &Desc = MII.get(Opc); 3101 3102 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3103 return true; 3104 if (!Desc.mayLoad() || !Desc.mayStore()) 3105 return true; // Not atomic 3106 3107 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3108 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3109 3110 // This is an incomplete check because image_atomic_cmpswap 3111 // may only use 0x3 and 0xf while other atomic operations 3112 // may use 0x1 and 0x3. However these limitations are 3113 // verified when we check that dmask matches dst size. 3114 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3115 } 3116 3117 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3118 3119 const unsigned Opc = Inst.getOpcode(); 3120 const MCInstrDesc &Desc = MII.get(Opc); 3121 3122 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3123 return true; 3124 3125 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3126 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3127 3128 // GATHER4 instructions use dmask in a different fashion compared to 3129 // other MIMG instructions. The only useful DMASK values are 3130 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3131 // (red,red,red,red) etc.) The ISA document doesn't mention 3132 // this. 3133 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3134 } 3135 3136 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3137 { 3138 switch (Opcode) { 3139 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3140 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3141 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3142 return true; 3143 default: 3144 return false; 3145 } 3146 } 3147 3148 // movrels* opcodes should only allow VGPRS as src0. 3149 // This is specified in .td description for vop1/vop3, 3150 // but sdwa is handled differently. See isSDWAOperand. 3151 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3152 3153 const unsigned Opc = Inst.getOpcode(); 3154 const MCInstrDesc &Desc = MII.get(Opc); 3155 3156 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3157 return true; 3158 3159 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3160 assert(Src0Idx != -1); 3161 3162 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3163 if (!Src0.isReg()) 3164 return false; 3165 3166 auto Reg = Src0.getReg(); 3167 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3168 return !isSGPR(mc2PseudoReg(Reg), TRI); 3169 } 3170 3171 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3172 3173 const unsigned Opc = Inst.getOpcode(); 3174 3175 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3176 return true; 3177 3178 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3179 assert(Src0Idx != -1); 3180 3181 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3182 if (!Src0.isReg()) 3183 return true; 3184 3185 auto Reg = Src0.getReg(); 3186 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3187 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3188 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3189 return false; 3190 } 3191 3192 return true; 3193 } 3194 3195 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3196 3197 const unsigned Opc = Inst.getOpcode(); 3198 const MCInstrDesc &Desc = MII.get(Opc); 3199 3200 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3201 return true; 3202 3203 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3204 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3205 if (isCI() || isSI()) 3206 return false; 3207 } 3208 3209 return true; 3210 } 3211 3212 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3213 const unsigned Opc = Inst.getOpcode(); 3214 const MCInstrDesc &Desc = MII.get(Opc); 3215 3216 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3217 return true; 3218 3219 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3220 if (DimIdx < 0) 3221 return true; 3222 3223 long Imm = Inst.getOperand(DimIdx).getImm(); 3224 if (Imm < 0 || Imm >= 8) 3225 return false; 3226 3227 return true; 3228 } 3229 3230 static bool IsRevOpcode(const unsigned Opcode) 3231 { 3232 switch (Opcode) { 3233 case AMDGPU::V_SUBREV_F32_e32: 3234 case AMDGPU::V_SUBREV_F32_e64: 3235 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3236 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3237 case AMDGPU::V_SUBREV_F32_e32_vi: 3238 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3239 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3240 case AMDGPU::V_SUBREV_F32_e64_vi: 3241 3242 case AMDGPU::V_SUBREV_I32_e32: 3243 case AMDGPU::V_SUBREV_I32_e64: 3244 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3245 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3246 3247 case AMDGPU::V_SUBBREV_U32_e32: 3248 case AMDGPU::V_SUBBREV_U32_e64: 3249 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3250 case AMDGPU::V_SUBBREV_U32_e32_vi: 3251 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3252 case AMDGPU::V_SUBBREV_U32_e64_vi: 3253 3254 case AMDGPU::V_SUBREV_U32_e32: 3255 case AMDGPU::V_SUBREV_U32_e64: 3256 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3257 case AMDGPU::V_SUBREV_U32_e32_vi: 3258 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3259 case AMDGPU::V_SUBREV_U32_e64_vi: 3260 3261 case AMDGPU::V_SUBREV_F16_e32: 3262 case AMDGPU::V_SUBREV_F16_e64: 3263 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3264 case AMDGPU::V_SUBREV_F16_e32_vi: 3265 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3266 case AMDGPU::V_SUBREV_F16_e64_vi: 3267 3268 case AMDGPU::V_SUBREV_U16_e32: 3269 case AMDGPU::V_SUBREV_U16_e64: 3270 case AMDGPU::V_SUBREV_U16_e32_vi: 3271 case AMDGPU::V_SUBREV_U16_e64_vi: 3272 3273 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3274 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3275 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3276 3277 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3278 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3279 3280 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3281 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3282 3283 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3284 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3285 3286 case AMDGPU::V_LSHRREV_B32_e32: 3287 case AMDGPU::V_LSHRREV_B32_e64: 3288 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3289 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3290 case AMDGPU::V_LSHRREV_B32_e32_vi: 3291 case AMDGPU::V_LSHRREV_B32_e64_vi: 3292 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3293 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3294 3295 case AMDGPU::V_ASHRREV_I32_e32: 3296 case AMDGPU::V_ASHRREV_I32_e64: 3297 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3298 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3299 case AMDGPU::V_ASHRREV_I32_e32_vi: 3300 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3301 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3302 case AMDGPU::V_ASHRREV_I32_e64_vi: 3303 3304 case AMDGPU::V_LSHLREV_B32_e32: 3305 case AMDGPU::V_LSHLREV_B32_e64: 3306 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3307 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3308 case AMDGPU::V_LSHLREV_B32_e32_vi: 3309 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3310 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3311 case AMDGPU::V_LSHLREV_B32_e64_vi: 3312 3313 case AMDGPU::V_LSHLREV_B16_e32: 3314 case AMDGPU::V_LSHLREV_B16_e64: 3315 case AMDGPU::V_LSHLREV_B16_e32_vi: 3316 case AMDGPU::V_LSHLREV_B16_e64_vi: 3317 case AMDGPU::V_LSHLREV_B16_gfx10: 3318 3319 case AMDGPU::V_LSHRREV_B16_e32: 3320 case AMDGPU::V_LSHRREV_B16_e64: 3321 case AMDGPU::V_LSHRREV_B16_e32_vi: 3322 case AMDGPU::V_LSHRREV_B16_e64_vi: 3323 case AMDGPU::V_LSHRREV_B16_gfx10: 3324 3325 case AMDGPU::V_ASHRREV_I16_e32: 3326 case AMDGPU::V_ASHRREV_I16_e64: 3327 case AMDGPU::V_ASHRREV_I16_e32_vi: 3328 case AMDGPU::V_ASHRREV_I16_e64_vi: 3329 case AMDGPU::V_ASHRREV_I16_gfx10: 3330 3331 case AMDGPU::V_LSHLREV_B64: 3332 case AMDGPU::V_LSHLREV_B64_gfx10: 3333 case AMDGPU::V_LSHLREV_B64_vi: 3334 3335 case AMDGPU::V_LSHRREV_B64: 3336 case AMDGPU::V_LSHRREV_B64_gfx10: 3337 case AMDGPU::V_LSHRREV_B64_vi: 3338 3339 case AMDGPU::V_ASHRREV_I64: 3340 case AMDGPU::V_ASHRREV_I64_gfx10: 3341 case AMDGPU::V_ASHRREV_I64_vi: 3342 3343 case AMDGPU::V_PK_LSHLREV_B16: 3344 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3345 case AMDGPU::V_PK_LSHLREV_B16_vi: 3346 3347 case AMDGPU::V_PK_LSHRREV_B16: 3348 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3349 case AMDGPU::V_PK_LSHRREV_B16_vi: 3350 case AMDGPU::V_PK_ASHRREV_I16: 3351 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3352 case AMDGPU::V_PK_ASHRREV_I16_vi: 3353 return true; 3354 default: 3355 return false; 3356 } 3357 } 3358 3359 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3360 3361 using namespace SIInstrFlags; 3362 const unsigned Opcode = Inst.getOpcode(); 3363 const MCInstrDesc &Desc = MII.get(Opcode); 3364 3365 // lds_direct register is defined so that it can be used 3366 // with 9-bit operands only. Ignore encodings which do not accept these. 3367 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3368 return true; 3369 3370 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3371 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3372 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3373 3374 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3375 3376 // lds_direct cannot be specified as either src1 or src2. 3377 for (int SrcIdx : SrcIndices) { 3378 if (SrcIdx == -1) break; 3379 const MCOperand &Src = Inst.getOperand(SrcIdx); 3380 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3381 return false; 3382 } 3383 } 3384 3385 if (Src0Idx == -1) 3386 return true; 3387 3388 const MCOperand &Src = Inst.getOperand(Src0Idx); 3389 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3390 return true; 3391 3392 // lds_direct is specified as src0. Check additional limitations. 3393 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3394 } 3395 3396 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3397 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3398 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3399 if (Op.isFlatOffset()) 3400 return Op.getStartLoc(); 3401 } 3402 return getLoc(); 3403 } 3404 3405 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3406 const OperandVector &Operands) { 3407 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3408 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3409 return true; 3410 3411 auto Opcode = Inst.getOpcode(); 3412 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3413 assert(OpNum != -1); 3414 3415 const auto &Op = Inst.getOperand(OpNum); 3416 if (!hasFlatOffsets() && Op.getImm() != 0) { 3417 Error(getFlatOffsetLoc(Operands), 3418 "flat offset modifier is not supported on this GPU"); 3419 return false; 3420 } 3421 3422 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3423 // For FLAT segment the offset must be positive; 3424 // MSB is ignored and forced to zero. 3425 unsigned OffsetSize = isGFX9() ? 13 : 12; 3426 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3427 if (!isIntN(OffsetSize, Op.getImm())) { 3428 Error(getFlatOffsetLoc(Operands), 3429 isGFX9() ? "expected a 13-bit signed offset" : 3430 "expected a 12-bit signed offset"); 3431 return false; 3432 } 3433 } else { 3434 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3435 Error(getFlatOffsetLoc(Operands), 3436 isGFX9() ? "expected a 12-bit unsigned offset" : 3437 "expected an 11-bit unsigned offset"); 3438 return false; 3439 } 3440 } 3441 3442 return true; 3443 } 3444 3445 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3446 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3447 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3448 if (Op.isSMEMOffset()) 3449 return Op.getStartLoc(); 3450 } 3451 return getLoc(); 3452 } 3453 3454 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3455 const OperandVector &Operands) { 3456 if (isCI() || isSI()) 3457 return true; 3458 3459 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3460 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3461 return true; 3462 3463 auto Opcode = Inst.getOpcode(); 3464 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3465 if (OpNum == -1) 3466 return true; 3467 3468 const auto &Op = Inst.getOperand(OpNum); 3469 if (!Op.isImm()) 3470 return true; 3471 3472 uint64_t Offset = Op.getImm(); 3473 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3474 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3475 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3476 return true; 3477 3478 Error(getSMEMOffsetLoc(Operands), 3479 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3480 "expected a 21-bit signed offset"); 3481 3482 return false; 3483 } 3484 3485 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3486 unsigned Opcode = Inst.getOpcode(); 3487 const MCInstrDesc &Desc = MII.get(Opcode); 3488 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3489 return true; 3490 3491 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3492 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3493 3494 const int OpIndices[] = { Src0Idx, Src1Idx }; 3495 3496 unsigned NumExprs = 0; 3497 unsigned NumLiterals = 0; 3498 uint32_t LiteralValue; 3499 3500 for (int OpIdx : OpIndices) { 3501 if (OpIdx == -1) break; 3502 3503 const MCOperand &MO = Inst.getOperand(OpIdx); 3504 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3505 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3506 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3507 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3508 if (NumLiterals == 0 || LiteralValue != Value) { 3509 LiteralValue = Value; 3510 ++NumLiterals; 3511 } 3512 } else if (MO.isExpr()) { 3513 ++NumExprs; 3514 } 3515 } 3516 } 3517 3518 return NumLiterals + NumExprs <= 1; 3519 } 3520 3521 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3522 const unsigned Opc = Inst.getOpcode(); 3523 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3524 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3525 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3526 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3527 3528 if (OpSel & ~3) 3529 return false; 3530 } 3531 return true; 3532 } 3533 3534 // Check if VCC register matches wavefront size 3535 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3536 auto FB = getFeatureBits(); 3537 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3538 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3539 } 3540 3541 // VOP3 literal is only allowed in GFX10+ and only one can be used 3542 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3543 unsigned Opcode = Inst.getOpcode(); 3544 const MCInstrDesc &Desc = MII.get(Opcode); 3545 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3546 return true; 3547 3548 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3549 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3550 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3551 3552 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3553 3554 unsigned NumExprs = 0; 3555 unsigned NumLiterals = 0; 3556 uint32_t LiteralValue; 3557 3558 for (int OpIdx : OpIndices) { 3559 if (OpIdx == -1) break; 3560 3561 const MCOperand &MO = Inst.getOperand(OpIdx); 3562 if (!MO.isImm() && !MO.isExpr()) 3563 continue; 3564 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3565 continue; 3566 3567 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3568 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3569 return false; 3570 3571 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3572 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3573 if (NumLiterals == 0 || LiteralValue != Value) { 3574 LiteralValue = Value; 3575 ++NumLiterals; 3576 } 3577 } else if (MO.isExpr()) { 3578 ++NumExprs; 3579 } 3580 } 3581 NumLiterals += NumExprs; 3582 3583 return !NumLiterals || 3584 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3585 } 3586 3587 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3588 const SMLoc &IDLoc, 3589 const OperandVector &Operands) { 3590 if (!validateLdsDirect(Inst)) { 3591 Error(IDLoc, 3592 "invalid use of lds_direct"); 3593 return false; 3594 } 3595 if (!validateSOPLiteral(Inst)) { 3596 Error(IDLoc, 3597 "only one literal operand is allowed"); 3598 return false; 3599 } 3600 if (!validateVOP3Literal(Inst)) { 3601 Error(IDLoc, 3602 "invalid literal operand"); 3603 return false; 3604 } 3605 if (!validateConstantBusLimitations(Inst)) { 3606 Error(IDLoc, 3607 "invalid operand (violates constant bus restrictions)"); 3608 return false; 3609 } 3610 if (!validateEarlyClobberLimitations(Inst)) { 3611 Error(IDLoc, 3612 "destination must be different than all sources"); 3613 return false; 3614 } 3615 if (!validateIntClampSupported(Inst)) { 3616 Error(IDLoc, 3617 "integer clamping is not supported on this GPU"); 3618 return false; 3619 } 3620 if (!validateOpSel(Inst)) { 3621 Error(IDLoc, 3622 "invalid op_sel operand"); 3623 return false; 3624 } 3625 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3626 if (!validateMIMGD16(Inst)) { 3627 Error(IDLoc, 3628 "d16 modifier is not supported on this GPU"); 3629 return false; 3630 } 3631 if (!validateMIMGDim(Inst)) { 3632 Error(IDLoc, "dim modifier is required on this GPU"); 3633 return false; 3634 } 3635 if (!validateMIMGDataSize(Inst)) { 3636 Error(IDLoc, 3637 "image data size does not match dmask and tfe"); 3638 return false; 3639 } 3640 if (!validateMIMGAddrSize(Inst)) { 3641 Error(IDLoc, 3642 "image address size does not match dim and a16"); 3643 return false; 3644 } 3645 if (!validateMIMGAtomicDMask(Inst)) { 3646 Error(IDLoc, 3647 "invalid atomic image dmask"); 3648 return false; 3649 } 3650 if (!validateMIMGGatherDMask(Inst)) { 3651 Error(IDLoc, 3652 "invalid image_gather dmask: only one bit must be set"); 3653 return false; 3654 } 3655 if (!validateMovrels(Inst)) { 3656 Error(IDLoc, "source operand must be a VGPR"); 3657 return false; 3658 } 3659 if (!validateFlatOffset(Inst, Operands)) { 3660 return false; 3661 } 3662 if (!validateSMEMOffset(Inst, Operands)) { 3663 return false; 3664 } 3665 if (!validateMAIAccWrite(Inst)) { 3666 return false; 3667 } 3668 3669 return true; 3670 } 3671 3672 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3673 const FeatureBitset &FBS, 3674 unsigned VariantID = 0); 3675 3676 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3677 OperandVector &Operands, 3678 MCStreamer &Out, 3679 uint64_t &ErrorInfo, 3680 bool MatchingInlineAsm) { 3681 MCInst Inst; 3682 unsigned Result = Match_Success; 3683 for (auto Variant : getMatchedVariants()) { 3684 uint64_t EI; 3685 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3686 Variant); 3687 // We order match statuses from least to most specific. We use most specific 3688 // status as resulting 3689 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3690 if ((R == Match_Success) || 3691 (R == Match_PreferE32) || 3692 (R == Match_MissingFeature && Result != Match_PreferE32) || 3693 (R == Match_InvalidOperand && Result != Match_MissingFeature 3694 && Result != Match_PreferE32) || 3695 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3696 && Result != Match_MissingFeature 3697 && Result != Match_PreferE32)) { 3698 Result = R; 3699 ErrorInfo = EI; 3700 } 3701 if (R == Match_Success) 3702 break; 3703 } 3704 3705 switch (Result) { 3706 default: break; 3707 case Match_Success: 3708 if (!validateInstruction(Inst, IDLoc, Operands)) { 3709 return true; 3710 } 3711 Inst.setLoc(IDLoc); 3712 Out.emitInstruction(Inst, getSTI()); 3713 return false; 3714 3715 case Match_MissingFeature: 3716 return Error(IDLoc, "instruction not supported on this GPU"); 3717 3718 case Match_MnemonicFail: { 3719 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3720 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3721 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3722 return Error(IDLoc, "invalid instruction" + Suggestion, 3723 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3724 } 3725 3726 case Match_InvalidOperand: { 3727 SMLoc ErrorLoc = IDLoc; 3728 if (ErrorInfo != ~0ULL) { 3729 if (ErrorInfo >= Operands.size()) { 3730 return Error(IDLoc, "too few operands for instruction"); 3731 } 3732 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3733 if (ErrorLoc == SMLoc()) 3734 ErrorLoc = IDLoc; 3735 } 3736 return Error(ErrorLoc, "invalid operand for instruction"); 3737 } 3738 3739 case Match_PreferE32: 3740 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3741 "should be encoded as e32"); 3742 } 3743 llvm_unreachable("Implement any new match types added!"); 3744 } 3745 3746 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3747 int64_t Tmp = -1; 3748 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3749 return true; 3750 } 3751 if (getParser().parseAbsoluteExpression(Tmp)) { 3752 return true; 3753 } 3754 Ret = static_cast<uint32_t>(Tmp); 3755 return false; 3756 } 3757 3758 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3759 uint32_t &Minor) { 3760 if (ParseAsAbsoluteExpression(Major)) 3761 return TokError("invalid major version"); 3762 3763 if (getLexer().isNot(AsmToken::Comma)) 3764 return TokError("minor version number required, comma expected"); 3765 Lex(); 3766 3767 if (ParseAsAbsoluteExpression(Minor)) 3768 return TokError("invalid minor version"); 3769 3770 return false; 3771 } 3772 3773 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3774 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3775 return TokError("directive only supported for amdgcn architecture"); 3776 3777 std::string Target; 3778 3779 SMLoc TargetStart = getTok().getLoc(); 3780 if (getParser().parseEscapedString(Target)) 3781 return true; 3782 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3783 3784 std::string ExpectedTarget; 3785 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3786 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3787 3788 if (Target != ExpectedTargetOS.str()) 3789 return getParser().Error(TargetRange.Start, "target must match options", 3790 TargetRange); 3791 3792 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3793 return false; 3794 } 3795 3796 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3797 return getParser().Error(Range.Start, "value out of range", Range); 3798 } 3799 3800 bool AMDGPUAsmParser::calculateGPRBlocks( 3801 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3802 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3803 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3804 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3805 // TODO(scott.linder): These calculations are duplicated from 3806 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3807 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3808 3809 unsigned NumVGPRs = NextFreeVGPR; 3810 unsigned NumSGPRs = NextFreeSGPR; 3811 3812 if (Version.Major >= 10) 3813 NumSGPRs = 0; 3814 else { 3815 unsigned MaxAddressableNumSGPRs = 3816 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3817 3818 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3819 NumSGPRs > MaxAddressableNumSGPRs) 3820 return OutOfRangeError(SGPRRange); 3821 3822 NumSGPRs += 3823 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3824 3825 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3826 NumSGPRs > MaxAddressableNumSGPRs) 3827 return OutOfRangeError(SGPRRange); 3828 3829 if (Features.test(FeatureSGPRInitBug)) 3830 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3831 } 3832 3833 VGPRBlocks = 3834 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3835 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3836 3837 return false; 3838 } 3839 3840 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3841 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3842 return TokError("directive only supported for amdgcn architecture"); 3843 3844 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3845 return TokError("directive only supported for amdhsa OS"); 3846 3847 StringRef KernelName; 3848 if (getParser().parseIdentifier(KernelName)) 3849 return true; 3850 3851 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3852 3853 StringSet<> Seen; 3854 3855 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3856 3857 SMRange VGPRRange; 3858 uint64_t NextFreeVGPR = 0; 3859 SMRange SGPRRange; 3860 uint64_t NextFreeSGPR = 0; 3861 unsigned UserSGPRCount = 0; 3862 bool ReserveVCC = true; 3863 bool ReserveFlatScr = true; 3864 bool ReserveXNACK = hasXNACK(); 3865 Optional<bool> EnableWavefrontSize32; 3866 3867 while (true) { 3868 while (getLexer().is(AsmToken::EndOfStatement)) 3869 Lex(); 3870 3871 if (getLexer().isNot(AsmToken::Identifier)) 3872 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3873 3874 StringRef ID = getTok().getIdentifier(); 3875 SMRange IDRange = getTok().getLocRange(); 3876 Lex(); 3877 3878 if (ID == ".end_amdhsa_kernel") 3879 break; 3880 3881 if (Seen.find(ID) != Seen.end()) 3882 return TokError(".amdhsa_ directives cannot be repeated"); 3883 Seen.insert(ID); 3884 3885 SMLoc ValStart = getTok().getLoc(); 3886 int64_t IVal; 3887 if (getParser().parseAbsoluteExpression(IVal)) 3888 return true; 3889 SMLoc ValEnd = getTok().getLoc(); 3890 SMRange ValRange = SMRange(ValStart, ValEnd); 3891 3892 if (IVal < 0) 3893 return OutOfRangeError(ValRange); 3894 3895 uint64_t Val = IVal; 3896 3897 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3898 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3899 return OutOfRangeError(RANGE); \ 3900 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3901 3902 if (ID == ".amdhsa_group_segment_fixed_size") { 3903 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3904 return OutOfRangeError(ValRange); 3905 KD.group_segment_fixed_size = Val; 3906 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3907 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3908 return OutOfRangeError(ValRange); 3909 KD.private_segment_fixed_size = Val; 3910 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3911 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3912 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3913 Val, ValRange); 3914 if (Val) 3915 UserSGPRCount += 4; 3916 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3917 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3918 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3919 ValRange); 3920 if (Val) 3921 UserSGPRCount += 2; 3922 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3923 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3924 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3925 ValRange); 3926 if (Val) 3927 UserSGPRCount += 2; 3928 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3929 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3930 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3931 Val, ValRange); 3932 if (Val) 3933 UserSGPRCount += 2; 3934 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3935 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3936 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3937 ValRange); 3938 if (Val) 3939 UserSGPRCount += 2; 3940 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3941 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3942 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3943 ValRange); 3944 if (Val) 3945 UserSGPRCount += 2; 3946 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3947 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3948 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3949 Val, ValRange); 3950 if (Val) 3951 UserSGPRCount += 1; 3952 } else if (ID == ".amdhsa_wavefront_size32") { 3953 if (IVersion.Major < 10) 3954 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3955 IDRange); 3956 EnableWavefrontSize32 = Val; 3957 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3958 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3959 Val, ValRange); 3960 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3961 PARSE_BITS_ENTRY( 3962 KD.compute_pgm_rsrc2, 3963 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3964 ValRange); 3965 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3966 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3967 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3968 ValRange); 3969 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3970 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3971 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3972 ValRange); 3973 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3974 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3975 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3976 ValRange); 3977 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3978 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3979 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3980 ValRange); 3981 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3982 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3983 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3984 ValRange); 3985 } else if (ID == ".amdhsa_next_free_vgpr") { 3986 VGPRRange = ValRange; 3987 NextFreeVGPR = Val; 3988 } else if (ID == ".amdhsa_next_free_sgpr") { 3989 SGPRRange = ValRange; 3990 NextFreeSGPR = Val; 3991 } else if (ID == ".amdhsa_reserve_vcc") { 3992 if (!isUInt<1>(Val)) 3993 return OutOfRangeError(ValRange); 3994 ReserveVCC = Val; 3995 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3996 if (IVersion.Major < 7) 3997 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3998 IDRange); 3999 if (!isUInt<1>(Val)) 4000 return OutOfRangeError(ValRange); 4001 ReserveFlatScr = Val; 4002 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4003 if (IVersion.Major < 8) 4004 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4005 IDRange); 4006 if (!isUInt<1>(Val)) 4007 return OutOfRangeError(ValRange); 4008 ReserveXNACK = Val; 4009 } else if (ID == ".amdhsa_float_round_mode_32") { 4010 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4011 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4012 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4013 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4014 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4015 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4016 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4017 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4018 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4020 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4021 ValRange); 4022 } else if (ID == ".amdhsa_dx10_clamp") { 4023 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4024 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4025 } else if (ID == ".amdhsa_ieee_mode") { 4026 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4027 Val, ValRange); 4028 } else if (ID == ".amdhsa_fp16_overflow") { 4029 if (IVersion.Major < 9) 4030 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4031 IDRange); 4032 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4033 ValRange); 4034 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4035 if (IVersion.Major < 10) 4036 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4037 IDRange); 4038 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4039 ValRange); 4040 } else if (ID == ".amdhsa_memory_ordered") { 4041 if (IVersion.Major < 10) 4042 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4043 IDRange); 4044 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4045 ValRange); 4046 } else if (ID == ".amdhsa_forward_progress") { 4047 if (IVersion.Major < 10) 4048 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4049 IDRange); 4050 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4051 ValRange); 4052 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4053 PARSE_BITS_ENTRY( 4054 KD.compute_pgm_rsrc2, 4055 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4056 ValRange); 4057 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4058 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4059 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4060 Val, ValRange); 4061 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4062 PARSE_BITS_ENTRY( 4063 KD.compute_pgm_rsrc2, 4064 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4065 ValRange); 4066 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4067 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4068 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4069 Val, ValRange); 4070 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4071 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4072 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4073 Val, ValRange); 4074 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4075 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4076 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4077 Val, ValRange); 4078 } else if (ID == ".amdhsa_exception_int_div_zero") { 4079 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4080 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4081 Val, ValRange); 4082 } else { 4083 return getParser().Error(IDRange.Start, 4084 "unknown .amdhsa_kernel directive", IDRange); 4085 } 4086 4087 #undef PARSE_BITS_ENTRY 4088 } 4089 4090 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4091 return TokError(".amdhsa_next_free_vgpr directive is required"); 4092 4093 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4094 return TokError(".amdhsa_next_free_sgpr directive is required"); 4095 4096 unsigned VGPRBlocks; 4097 unsigned SGPRBlocks; 4098 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4099 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4100 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4101 SGPRBlocks)) 4102 return true; 4103 4104 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4105 VGPRBlocks)) 4106 return OutOfRangeError(VGPRRange); 4107 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4108 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4109 4110 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4111 SGPRBlocks)) 4112 return OutOfRangeError(SGPRRange); 4113 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4114 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4115 SGPRBlocks); 4116 4117 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4118 return TokError("too many user SGPRs enabled"); 4119 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4120 UserSGPRCount); 4121 4122 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4123 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4124 ReserveFlatScr, ReserveXNACK); 4125 return false; 4126 } 4127 4128 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4129 uint32_t Major; 4130 uint32_t Minor; 4131 4132 if (ParseDirectiveMajorMinor(Major, Minor)) 4133 return true; 4134 4135 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4136 return false; 4137 } 4138 4139 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4140 uint32_t Major; 4141 uint32_t Minor; 4142 uint32_t Stepping; 4143 StringRef VendorName; 4144 StringRef ArchName; 4145 4146 // If this directive has no arguments, then use the ISA version for the 4147 // targeted GPU. 4148 if (getLexer().is(AsmToken::EndOfStatement)) { 4149 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4150 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4151 ISA.Stepping, 4152 "AMD", "AMDGPU"); 4153 return false; 4154 } 4155 4156 if (ParseDirectiveMajorMinor(Major, Minor)) 4157 return true; 4158 4159 if (getLexer().isNot(AsmToken::Comma)) 4160 return TokError("stepping version number required, comma expected"); 4161 Lex(); 4162 4163 if (ParseAsAbsoluteExpression(Stepping)) 4164 return TokError("invalid stepping version"); 4165 4166 if (getLexer().isNot(AsmToken::Comma)) 4167 return TokError("vendor name required, comma expected"); 4168 Lex(); 4169 4170 if (getLexer().isNot(AsmToken::String)) 4171 return TokError("invalid vendor name"); 4172 4173 VendorName = getLexer().getTok().getStringContents(); 4174 Lex(); 4175 4176 if (getLexer().isNot(AsmToken::Comma)) 4177 return TokError("arch name required, comma expected"); 4178 Lex(); 4179 4180 if (getLexer().isNot(AsmToken::String)) 4181 return TokError("invalid arch name"); 4182 4183 ArchName = getLexer().getTok().getStringContents(); 4184 Lex(); 4185 4186 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4187 VendorName, ArchName); 4188 return false; 4189 } 4190 4191 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4192 amd_kernel_code_t &Header) { 4193 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4194 // assembly for backwards compatibility. 4195 if (ID == "max_scratch_backing_memory_byte_size") { 4196 Parser.eatToEndOfStatement(); 4197 return false; 4198 } 4199 4200 SmallString<40> ErrStr; 4201 raw_svector_ostream Err(ErrStr); 4202 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4203 return TokError(Err.str()); 4204 } 4205 Lex(); 4206 4207 if (ID == "enable_wavefront_size32") { 4208 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4209 if (!isGFX10()) 4210 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4211 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4212 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4213 } else { 4214 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4215 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4216 } 4217 } 4218 4219 if (ID == "wavefront_size") { 4220 if (Header.wavefront_size == 5) { 4221 if (!isGFX10()) 4222 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4223 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4224 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4225 } else if (Header.wavefront_size == 6) { 4226 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4227 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4228 } 4229 } 4230 4231 if (ID == "enable_wgp_mode") { 4232 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4233 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4234 } 4235 4236 if (ID == "enable_mem_ordered") { 4237 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4238 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4239 } 4240 4241 if (ID == "enable_fwd_progress") { 4242 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4243 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4244 } 4245 4246 return false; 4247 } 4248 4249 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4250 amd_kernel_code_t Header; 4251 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4252 4253 while (true) { 4254 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4255 // will set the current token to EndOfStatement. 4256 while(getLexer().is(AsmToken::EndOfStatement)) 4257 Lex(); 4258 4259 if (getLexer().isNot(AsmToken::Identifier)) 4260 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4261 4262 StringRef ID = getLexer().getTok().getIdentifier(); 4263 Lex(); 4264 4265 if (ID == ".end_amd_kernel_code_t") 4266 break; 4267 4268 if (ParseAMDKernelCodeTValue(ID, Header)) 4269 return true; 4270 } 4271 4272 getTargetStreamer().EmitAMDKernelCodeT(Header); 4273 4274 return false; 4275 } 4276 4277 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4278 if (getLexer().isNot(AsmToken::Identifier)) 4279 return TokError("expected symbol name"); 4280 4281 StringRef KernelName = Parser.getTok().getString(); 4282 4283 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4284 ELF::STT_AMDGPU_HSA_KERNEL); 4285 Lex(); 4286 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4287 KernelScope.initialize(getContext()); 4288 return false; 4289 } 4290 4291 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4292 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4293 return Error(getParser().getTok().getLoc(), 4294 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4295 "architectures"); 4296 } 4297 4298 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4299 4300 std::string ISAVersionStringFromSTI; 4301 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4302 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4303 4304 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4305 return Error(getParser().getTok().getLoc(), 4306 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4307 "arguments specified through the command line"); 4308 } 4309 4310 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4311 Lex(); 4312 4313 return false; 4314 } 4315 4316 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4317 const char *AssemblerDirectiveBegin; 4318 const char *AssemblerDirectiveEnd; 4319 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4320 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4321 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4322 HSAMD::V3::AssemblerDirectiveEnd) 4323 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4324 HSAMD::AssemblerDirectiveEnd); 4325 4326 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4327 return Error(getParser().getTok().getLoc(), 4328 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4329 "not available on non-amdhsa OSes")).str()); 4330 } 4331 4332 std::string HSAMetadataString; 4333 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4334 HSAMetadataString)) 4335 return true; 4336 4337 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4338 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4339 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4340 } else { 4341 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4342 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4343 } 4344 4345 return false; 4346 } 4347 4348 /// Common code to parse out a block of text (typically YAML) between start and 4349 /// end directives. 4350 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4351 const char *AssemblerDirectiveEnd, 4352 std::string &CollectString) { 4353 4354 raw_string_ostream CollectStream(CollectString); 4355 4356 getLexer().setSkipSpace(false); 4357 4358 bool FoundEnd = false; 4359 while (!getLexer().is(AsmToken::Eof)) { 4360 while (getLexer().is(AsmToken::Space)) { 4361 CollectStream << getLexer().getTok().getString(); 4362 Lex(); 4363 } 4364 4365 if (getLexer().is(AsmToken::Identifier)) { 4366 StringRef ID = getLexer().getTok().getIdentifier(); 4367 if (ID == AssemblerDirectiveEnd) { 4368 Lex(); 4369 FoundEnd = true; 4370 break; 4371 } 4372 } 4373 4374 CollectStream << Parser.parseStringToEndOfStatement() 4375 << getContext().getAsmInfo()->getSeparatorString(); 4376 4377 Parser.eatToEndOfStatement(); 4378 } 4379 4380 getLexer().setSkipSpace(true); 4381 4382 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4383 return TokError(Twine("expected directive ") + 4384 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4385 } 4386 4387 CollectStream.flush(); 4388 return false; 4389 } 4390 4391 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4392 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4393 std::string String; 4394 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4395 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4396 return true; 4397 4398 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4399 if (!PALMetadata->setFromString(String)) 4400 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4401 return false; 4402 } 4403 4404 /// Parse the assembler directive for old linear-format PAL metadata. 4405 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4406 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4407 return Error(getParser().getTok().getLoc(), 4408 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4409 "not available on non-amdpal OSes")).str()); 4410 } 4411 4412 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4413 PALMetadata->setLegacy(); 4414 for (;;) { 4415 uint32_t Key, Value; 4416 if (ParseAsAbsoluteExpression(Key)) { 4417 return TokError(Twine("invalid value in ") + 4418 Twine(PALMD::AssemblerDirective)); 4419 } 4420 if (getLexer().isNot(AsmToken::Comma)) { 4421 return TokError(Twine("expected an even number of values in ") + 4422 Twine(PALMD::AssemblerDirective)); 4423 } 4424 Lex(); 4425 if (ParseAsAbsoluteExpression(Value)) { 4426 return TokError(Twine("invalid value in ") + 4427 Twine(PALMD::AssemblerDirective)); 4428 } 4429 PALMetadata->setRegister(Key, Value); 4430 if (getLexer().isNot(AsmToken::Comma)) 4431 break; 4432 Lex(); 4433 } 4434 return false; 4435 } 4436 4437 /// ParseDirectiveAMDGPULDS 4438 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4439 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4440 if (getParser().checkForValidSection()) 4441 return true; 4442 4443 StringRef Name; 4444 SMLoc NameLoc = getLexer().getLoc(); 4445 if (getParser().parseIdentifier(Name)) 4446 return TokError("expected identifier in directive"); 4447 4448 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4449 if (parseToken(AsmToken::Comma, "expected ','")) 4450 return true; 4451 4452 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4453 4454 int64_t Size; 4455 SMLoc SizeLoc = getLexer().getLoc(); 4456 if (getParser().parseAbsoluteExpression(Size)) 4457 return true; 4458 if (Size < 0) 4459 return Error(SizeLoc, "size must be non-negative"); 4460 if (Size > LocalMemorySize) 4461 return Error(SizeLoc, "size is too large"); 4462 4463 int64_t Alignment = 4; 4464 if (getLexer().is(AsmToken::Comma)) { 4465 Lex(); 4466 SMLoc AlignLoc = getLexer().getLoc(); 4467 if (getParser().parseAbsoluteExpression(Alignment)) 4468 return true; 4469 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4470 return Error(AlignLoc, "alignment must be a power of two"); 4471 4472 // Alignment larger than the size of LDS is possible in theory, as long 4473 // as the linker manages to place to symbol at address 0, but we do want 4474 // to make sure the alignment fits nicely into a 32-bit integer. 4475 if (Alignment >= 1u << 31) 4476 return Error(AlignLoc, "alignment is too large"); 4477 } 4478 4479 if (parseToken(AsmToken::EndOfStatement, 4480 "unexpected token in '.amdgpu_lds' directive")) 4481 return true; 4482 4483 Symbol->redefineIfPossible(); 4484 if (!Symbol->isUndefined()) 4485 return Error(NameLoc, "invalid symbol redefinition"); 4486 4487 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4488 return false; 4489 } 4490 4491 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4492 StringRef IDVal = DirectiveID.getString(); 4493 4494 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4495 if (IDVal == ".amdgcn_target") 4496 return ParseDirectiveAMDGCNTarget(); 4497 4498 if (IDVal == ".amdhsa_kernel") 4499 return ParseDirectiveAMDHSAKernel(); 4500 4501 // TODO: Restructure/combine with PAL metadata directive. 4502 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4503 return ParseDirectiveHSAMetadata(); 4504 } else { 4505 if (IDVal == ".hsa_code_object_version") 4506 return ParseDirectiveHSACodeObjectVersion(); 4507 4508 if (IDVal == ".hsa_code_object_isa") 4509 return ParseDirectiveHSACodeObjectISA(); 4510 4511 if (IDVal == ".amd_kernel_code_t") 4512 return ParseDirectiveAMDKernelCodeT(); 4513 4514 if (IDVal == ".amdgpu_hsa_kernel") 4515 return ParseDirectiveAMDGPUHsaKernel(); 4516 4517 if (IDVal == ".amd_amdgpu_isa") 4518 return ParseDirectiveISAVersion(); 4519 4520 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4521 return ParseDirectiveHSAMetadata(); 4522 } 4523 4524 if (IDVal == ".amdgpu_lds") 4525 return ParseDirectiveAMDGPULDS(); 4526 4527 if (IDVal == PALMD::AssemblerDirectiveBegin) 4528 return ParseDirectivePALMetadataBegin(); 4529 4530 if (IDVal == PALMD::AssemblerDirective) 4531 return ParseDirectivePALMetadata(); 4532 4533 return true; 4534 } 4535 4536 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4537 unsigned RegNo) const { 4538 4539 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4540 R.isValid(); ++R) { 4541 if (*R == RegNo) 4542 return isGFX9() || isGFX10(); 4543 } 4544 4545 // GFX10 has 2 more SGPRs 104 and 105. 4546 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4547 R.isValid(); ++R) { 4548 if (*R == RegNo) 4549 return hasSGPR104_SGPR105(); 4550 } 4551 4552 switch (RegNo) { 4553 case AMDGPU::SRC_SHARED_BASE: 4554 case AMDGPU::SRC_SHARED_LIMIT: 4555 case AMDGPU::SRC_PRIVATE_BASE: 4556 case AMDGPU::SRC_PRIVATE_LIMIT: 4557 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4558 return !isCI() && !isSI() && !isVI(); 4559 case AMDGPU::TBA: 4560 case AMDGPU::TBA_LO: 4561 case AMDGPU::TBA_HI: 4562 case AMDGPU::TMA: 4563 case AMDGPU::TMA_LO: 4564 case AMDGPU::TMA_HI: 4565 return !isGFX9() && !isGFX10(); 4566 case AMDGPU::XNACK_MASK: 4567 case AMDGPU::XNACK_MASK_LO: 4568 case AMDGPU::XNACK_MASK_HI: 4569 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4570 case AMDGPU::SGPR_NULL: 4571 return isGFX10(); 4572 default: 4573 break; 4574 } 4575 4576 if (isCI()) 4577 return true; 4578 4579 if (isSI() || isGFX10()) { 4580 // No flat_scr on SI. 4581 // On GFX10 flat scratch is not a valid register operand and can only be 4582 // accessed with s_setreg/s_getreg. 4583 switch (RegNo) { 4584 case AMDGPU::FLAT_SCR: 4585 case AMDGPU::FLAT_SCR_LO: 4586 case AMDGPU::FLAT_SCR_HI: 4587 return false; 4588 default: 4589 return true; 4590 } 4591 } 4592 4593 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4594 // SI/CI have. 4595 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4596 R.isValid(); ++R) { 4597 if (*R == RegNo) 4598 return hasSGPR102_SGPR103(); 4599 } 4600 4601 return true; 4602 } 4603 4604 OperandMatchResultTy 4605 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4606 OperandMode Mode) { 4607 // Try to parse with a custom parser 4608 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4609 4610 // If we successfully parsed the operand or if there as an error parsing, 4611 // we are done. 4612 // 4613 // If we are parsing after we reach EndOfStatement then this means we 4614 // are appending default values to the Operands list. This is only done 4615 // by custom parser, so we shouldn't continue on to the generic parsing. 4616 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4617 getLexer().is(AsmToken::EndOfStatement)) 4618 return ResTy; 4619 4620 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4621 unsigned Prefix = Operands.size(); 4622 SMLoc LBraceLoc = getTok().getLoc(); 4623 Parser.Lex(); // eat the '[' 4624 4625 for (;;) { 4626 ResTy = parseReg(Operands); 4627 if (ResTy != MatchOperand_Success) 4628 return ResTy; 4629 4630 if (getLexer().is(AsmToken::RBrac)) 4631 break; 4632 4633 if (getLexer().isNot(AsmToken::Comma)) 4634 return MatchOperand_ParseFail; 4635 Parser.Lex(); 4636 } 4637 4638 if (Operands.size() - Prefix > 1) { 4639 Operands.insert(Operands.begin() + Prefix, 4640 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4641 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4642 getTok().getLoc())); 4643 } 4644 4645 Parser.Lex(); // eat the ']' 4646 return MatchOperand_Success; 4647 } 4648 4649 return parseRegOrImm(Operands); 4650 } 4651 4652 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4653 // Clear any forced encodings from the previous instruction. 4654 setForcedEncodingSize(0); 4655 setForcedDPP(false); 4656 setForcedSDWA(false); 4657 4658 if (Name.endswith("_e64")) { 4659 setForcedEncodingSize(64); 4660 return Name.substr(0, Name.size() - 4); 4661 } else if (Name.endswith("_e32")) { 4662 setForcedEncodingSize(32); 4663 return Name.substr(0, Name.size() - 4); 4664 } else if (Name.endswith("_dpp")) { 4665 setForcedDPP(true); 4666 return Name.substr(0, Name.size() - 4); 4667 } else if (Name.endswith("_sdwa")) { 4668 setForcedSDWA(true); 4669 return Name.substr(0, Name.size() - 5); 4670 } 4671 return Name; 4672 } 4673 4674 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4675 StringRef Name, 4676 SMLoc NameLoc, OperandVector &Operands) { 4677 // Add the instruction mnemonic 4678 Name = parseMnemonicSuffix(Name); 4679 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4680 4681 bool IsMIMG = Name.startswith("image_"); 4682 4683 while (!getLexer().is(AsmToken::EndOfStatement)) { 4684 OperandMode Mode = OperandMode_Default; 4685 if (IsMIMG && isGFX10() && Operands.size() == 2) 4686 Mode = OperandMode_NSA; 4687 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4688 4689 // Eat the comma or space if there is one. 4690 if (getLexer().is(AsmToken::Comma)) 4691 Parser.Lex(); 4692 4693 switch (Res) { 4694 case MatchOperand_Success: break; 4695 case MatchOperand_ParseFail: 4696 // FIXME: use real operand location rather than the current location. 4697 Error(getLexer().getLoc(), "failed parsing operand."); 4698 while (!getLexer().is(AsmToken::EndOfStatement)) { 4699 Parser.Lex(); 4700 } 4701 return true; 4702 case MatchOperand_NoMatch: 4703 // FIXME: use real operand location rather than the current location. 4704 Error(getLexer().getLoc(), "not a valid operand."); 4705 while (!getLexer().is(AsmToken::EndOfStatement)) { 4706 Parser.Lex(); 4707 } 4708 return true; 4709 } 4710 } 4711 4712 return false; 4713 } 4714 4715 //===----------------------------------------------------------------------===// 4716 // Utility functions 4717 //===----------------------------------------------------------------------===// 4718 4719 OperandMatchResultTy 4720 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4721 4722 if (!trySkipId(Prefix, AsmToken::Colon)) 4723 return MatchOperand_NoMatch; 4724 4725 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4726 } 4727 4728 OperandMatchResultTy 4729 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4730 AMDGPUOperand::ImmTy ImmTy, 4731 bool (*ConvertResult)(int64_t&)) { 4732 SMLoc S = getLoc(); 4733 int64_t Value = 0; 4734 4735 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4736 if (Res != MatchOperand_Success) 4737 return Res; 4738 4739 if (ConvertResult && !ConvertResult(Value)) { 4740 Error(S, "invalid " + StringRef(Prefix) + " value."); 4741 } 4742 4743 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4744 return MatchOperand_Success; 4745 } 4746 4747 OperandMatchResultTy 4748 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4749 OperandVector &Operands, 4750 AMDGPUOperand::ImmTy ImmTy, 4751 bool (*ConvertResult)(int64_t&)) { 4752 SMLoc S = getLoc(); 4753 if (!trySkipId(Prefix, AsmToken::Colon)) 4754 return MatchOperand_NoMatch; 4755 4756 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4757 return MatchOperand_ParseFail; 4758 4759 unsigned Val = 0; 4760 const unsigned MaxSize = 4; 4761 4762 // FIXME: How to verify the number of elements matches the number of src 4763 // operands? 4764 for (int I = 0; ; ++I) { 4765 int64_t Op; 4766 SMLoc Loc = getLoc(); 4767 if (!parseExpr(Op)) 4768 return MatchOperand_ParseFail; 4769 4770 if (Op != 0 && Op != 1) { 4771 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4772 return MatchOperand_ParseFail; 4773 } 4774 4775 Val |= (Op << I); 4776 4777 if (trySkipToken(AsmToken::RBrac)) 4778 break; 4779 4780 if (I + 1 == MaxSize) { 4781 Error(getLoc(), "expected a closing square bracket"); 4782 return MatchOperand_ParseFail; 4783 } 4784 4785 if (!skipToken(AsmToken::Comma, "expected a comma")) 4786 return MatchOperand_ParseFail; 4787 } 4788 4789 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4790 return MatchOperand_Success; 4791 } 4792 4793 OperandMatchResultTy 4794 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4795 AMDGPUOperand::ImmTy ImmTy) { 4796 int64_t Bit = 0; 4797 SMLoc S = Parser.getTok().getLoc(); 4798 4799 // We are at the end of the statement, and this is a default argument, so 4800 // use a default value. 4801 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4802 switch(getLexer().getKind()) { 4803 case AsmToken::Identifier: { 4804 StringRef Tok = Parser.getTok().getString(); 4805 if (Tok == Name) { 4806 if (Tok == "r128" && !hasMIMG_R128()) 4807 Error(S, "r128 modifier is not supported on this GPU"); 4808 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4809 Error(S, "a16 modifier is not supported on this GPU"); 4810 Bit = 1; 4811 Parser.Lex(); 4812 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4813 Bit = 0; 4814 Parser.Lex(); 4815 } else { 4816 return MatchOperand_NoMatch; 4817 } 4818 break; 4819 } 4820 default: 4821 return MatchOperand_NoMatch; 4822 } 4823 } 4824 4825 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4826 return MatchOperand_ParseFail; 4827 4828 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4829 ImmTy = AMDGPUOperand::ImmTyR128A16; 4830 4831 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4832 return MatchOperand_Success; 4833 } 4834 4835 static void addOptionalImmOperand( 4836 MCInst& Inst, const OperandVector& Operands, 4837 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4838 AMDGPUOperand::ImmTy ImmT, 4839 int64_t Default = 0) { 4840 auto i = OptionalIdx.find(ImmT); 4841 if (i != OptionalIdx.end()) { 4842 unsigned Idx = i->second; 4843 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4844 } else { 4845 Inst.addOperand(MCOperand::createImm(Default)); 4846 } 4847 } 4848 4849 OperandMatchResultTy 4850 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4851 if (getLexer().isNot(AsmToken::Identifier)) { 4852 return MatchOperand_NoMatch; 4853 } 4854 StringRef Tok = Parser.getTok().getString(); 4855 if (Tok != Prefix) { 4856 return MatchOperand_NoMatch; 4857 } 4858 4859 Parser.Lex(); 4860 if (getLexer().isNot(AsmToken::Colon)) { 4861 return MatchOperand_ParseFail; 4862 } 4863 4864 Parser.Lex(); 4865 if (getLexer().isNot(AsmToken::Identifier)) { 4866 return MatchOperand_ParseFail; 4867 } 4868 4869 Value = Parser.getTok().getString(); 4870 return MatchOperand_Success; 4871 } 4872 4873 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4874 // values to live in a joint format operand in the MCInst encoding. 4875 OperandMatchResultTy 4876 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4877 SMLoc S = Parser.getTok().getLoc(); 4878 int64_t Dfmt = 0, Nfmt = 0; 4879 // dfmt and nfmt can appear in either order, and each is optional. 4880 bool GotDfmt = false, GotNfmt = false; 4881 while (!GotDfmt || !GotNfmt) { 4882 if (!GotDfmt) { 4883 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4884 if (Res != MatchOperand_NoMatch) { 4885 if (Res != MatchOperand_Success) 4886 return Res; 4887 if (Dfmt >= 16) { 4888 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4889 return MatchOperand_ParseFail; 4890 } 4891 GotDfmt = true; 4892 Parser.Lex(); 4893 continue; 4894 } 4895 } 4896 if (!GotNfmt) { 4897 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4898 if (Res != MatchOperand_NoMatch) { 4899 if (Res != MatchOperand_Success) 4900 return Res; 4901 if (Nfmt >= 8) { 4902 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4903 return MatchOperand_ParseFail; 4904 } 4905 GotNfmt = true; 4906 Parser.Lex(); 4907 continue; 4908 } 4909 } 4910 break; 4911 } 4912 if (!GotDfmt && !GotNfmt) 4913 return MatchOperand_NoMatch; 4914 auto Format = Dfmt | Nfmt << 4; 4915 Operands.push_back( 4916 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4917 return MatchOperand_Success; 4918 } 4919 4920 //===----------------------------------------------------------------------===// 4921 // ds 4922 //===----------------------------------------------------------------------===// 4923 4924 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4925 const OperandVector &Operands) { 4926 OptionalImmIndexMap OptionalIdx; 4927 4928 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4929 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4930 4931 // Add the register arguments 4932 if (Op.isReg()) { 4933 Op.addRegOperands(Inst, 1); 4934 continue; 4935 } 4936 4937 // Handle optional arguments 4938 OptionalIdx[Op.getImmTy()] = i; 4939 } 4940 4941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4944 4945 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4946 } 4947 4948 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4949 bool IsGdsHardcoded) { 4950 OptionalImmIndexMap OptionalIdx; 4951 4952 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4953 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4954 4955 // Add the register arguments 4956 if (Op.isReg()) { 4957 Op.addRegOperands(Inst, 1); 4958 continue; 4959 } 4960 4961 if (Op.isToken() && Op.getToken() == "gds") { 4962 IsGdsHardcoded = true; 4963 continue; 4964 } 4965 4966 // Handle optional arguments 4967 OptionalIdx[Op.getImmTy()] = i; 4968 } 4969 4970 AMDGPUOperand::ImmTy OffsetType = 4971 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4972 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4973 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4974 AMDGPUOperand::ImmTyOffset; 4975 4976 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4977 4978 if (!IsGdsHardcoded) { 4979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4980 } 4981 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4982 } 4983 4984 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4985 OptionalImmIndexMap OptionalIdx; 4986 4987 unsigned OperandIdx[4]; 4988 unsigned EnMask = 0; 4989 int SrcIdx = 0; 4990 4991 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4992 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4993 4994 // Add the register arguments 4995 if (Op.isReg()) { 4996 assert(SrcIdx < 4); 4997 OperandIdx[SrcIdx] = Inst.size(); 4998 Op.addRegOperands(Inst, 1); 4999 ++SrcIdx; 5000 continue; 5001 } 5002 5003 if (Op.isOff()) { 5004 assert(SrcIdx < 4); 5005 OperandIdx[SrcIdx] = Inst.size(); 5006 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5007 ++SrcIdx; 5008 continue; 5009 } 5010 5011 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5012 Op.addImmOperands(Inst, 1); 5013 continue; 5014 } 5015 5016 if (Op.isToken() && Op.getToken() == "done") 5017 continue; 5018 5019 // Handle optional arguments 5020 OptionalIdx[Op.getImmTy()] = i; 5021 } 5022 5023 assert(SrcIdx == 4); 5024 5025 bool Compr = false; 5026 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5027 Compr = true; 5028 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5029 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5030 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5031 } 5032 5033 for (auto i = 0; i < SrcIdx; ++i) { 5034 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5035 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5036 } 5037 } 5038 5039 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5040 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5041 5042 Inst.addOperand(MCOperand::createImm(EnMask)); 5043 } 5044 5045 //===----------------------------------------------------------------------===// 5046 // s_waitcnt 5047 //===----------------------------------------------------------------------===// 5048 5049 static bool 5050 encodeCnt( 5051 const AMDGPU::IsaVersion ISA, 5052 int64_t &IntVal, 5053 int64_t CntVal, 5054 bool Saturate, 5055 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5056 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5057 { 5058 bool Failed = false; 5059 5060 IntVal = encode(ISA, IntVal, CntVal); 5061 if (CntVal != decode(ISA, IntVal)) { 5062 if (Saturate) { 5063 IntVal = encode(ISA, IntVal, -1); 5064 } else { 5065 Failed = true; 5066 } 5067 } 5068 return Failed; 5069 } 5070 5071 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5072 5073 SMLoc CntLoc = getLoc(); 5074 StringRef CntName = getTokenStr(); 5075 5076 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5077 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5078 return false; 5079 5080 int64_t CntVal; 5081 SMLoc ValLoc = getLoc(); 5082 if (!parseExpr(CntVal)) 5083 return false; 5084 5085 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5086 5087 bool Failed = true; 5088 bool Sat = CntName.endswith("_sat"); 5089 5090 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5091 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5092 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5093 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5094 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5095 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5096 } else { 5097 Error(CntLoc, "invalid counter name " + CntName); 5098 return false; 5099 } 5100 5101 if (Failed) { 5102 Error(ValLoc, "too large value for " + CntName); 5103 return false; 5104 } 5105 5106 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5107 return false; 5108 5109 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5110 if (isToken(AsmToken::EndOfStatement)) { 5111 Error(getLoc(), "expected a counter name"); 5112 return false; 5113 } 5114 } 5115 5116 return true; 5117 } 5118 5119 OperandMatchResultTy 5120 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5121 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5122 int64_t Waitcnt = getWaitcntBitMask(ISA); 5123 SMLoc S = getLoc(); 5124 5125 // If parse failed, do not return error code 5126 // to avoid excessive error messages. 5127 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5128 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 5129 } else { 5130 parseExpr(Waitcnt); 5131 } 5132 5133 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5134 return MatchOperand_Success; 5135 } 5136 5137 bool 5138 AMDGPUOperand::isSWaitCnt() const { 5139 return isImm(); 5140 } 5141 5142 //===----------------------------------------------------------------------===// 5143 // hwreg 5144 //===----------------------------------------------------------------------===// 5145 5146 bool 5147 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5148 int64_t &Offset, 5149 int64_t &Width) { 5150 using namespace llvm::AMDGPU::Hwreg; 5151 5152 // The register may be specified by name or using a numeric code 5153 if (isToken(AsmToken::Identifier) && 5154 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5155 HwReg.IsSymbolic = true; 5156 lex(); // skip message name 5157 } else if (!parseExpr(HwReg.Id)) { 5158 return false; 5159 } 5160 5161 if (trySkipToken(AsmToken::RParen)) 5162 return true; 5163 5164 // parse optional params 5165 return 5166 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5167 parseExpr(Offset) && 5168 skipToken(AsmToken::Comma, "expected a comma") && 5169 parseExpr(Width) && 5170 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5171 } 5172 5173 bool 5174 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5175 const int64_t Offset, 5176 const int64_t Width, 5177 const SMLoc Loc) { 5178 5179 using namespace llvm::AMDGPU::Hwreg; 5180 5181 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5182 Error(Loc, "specified hardware register is not supported on this GPU"); 5183 return false; 5184 } else if (!isValidHwreg(HwReg.Id)) { 5185 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5186 return false; 5187 } else if (!isValidHwregOffset(Offset)) { 5188 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5189 return false; 5190 } else if (!isValidHwregWidth(Width)) { 5191 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5192 return false; 5193 } 5194 return true; 5195 } 5196 5197 OperandMatchResultTy 5198 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5199 using namespace llvm::AMDGPU::Hwreg; 5200 5201 int64_t ImmVal = 0; 5202 SMLoc Loc = getLoc(); 5203 5204 // If parse failed, do not return error code 5205 // to avoid excessive error messages. 5206 if (trySkipId("hwreg", AsmToken::LParen)) { 5207 OperandInfoTy HwReg(ID_UNKNOWN_); 5208 int64_t Offset = OFFSET_DEFAULT_; 5209 int64_t Width = WIDTH_DEFAULT_; 5210 if (parseHwregBody(HwReg, Offset, Width) && 5211 validateHwreg(HwReg, Offset, Width, Loc)) { 5212 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5213 } 5214 } else if (parseExpr(ImmVal)) { 5215 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5216 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5217 } 5218 5219 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5220 return MatchOperand_Success; 5221 } 5222 5223 bool AMDGPUOperand::isHwreg() const { 5224 return isImmTy(ImmTyHwreg); 5225 } 5226 5227 //===----------------------------------------------------------------------===// 5228 // sendmsg 5229 //===----------------------------------------------------------------------===// 5230 5231 bool 5232 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5233 OperandInfoTy &Op, 5234 OperandInfoTy &Stream) { 5235 using namespace llvm::AMDGPU::SendMsg; 5236 5237 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5238 Msg.IsSymbolic = true; 5239 lex(); // skip message name 5240 } else if (!parseExpr(Msg.Id)) { 5241 return false; 5242 } 5243 5244 if (trySkipToken(AsmToken::Comma)) { 5245 Op.IsDefined = true; 5246 if (isToken(AsmToken::Identifier) && 5247 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5248 lex(); // skip operation name 5249 } else if (!parseExpr(Op.Id)) { 5250 return false; 5251 } 5252 5253 if (trySkipToken(AsmToken::Comma)) { 5254 Stream.IsDefined = true; 5255 if (!parseExpr(Stream.Id)) 5256 return false; 5257 } 5258 } 5259 5260 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5261 } 5262 5263 bool 5264 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5265 const OperandInfoTy &Op, 5266 const OperandInfoTy &Stream, 5267 const SMLoc S) { 5268 using namespace llvm::AMDGPU::SendMsg; 5269 5270 // Validation strictness depends on whether message is specified 5271 // in a symbolc or in a numeric form. In the latter case 5272 // only encoding possibility is checked. 5273 bool Strict = Msg.IsSymbolic; 5274 5275 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5276 Error(S, "invalid message id"); 5277 return false; 5278 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5279 Error(S, Op.IsDefined ? 5280 "message does not support operations" : 5281 "missing message operation"); 5282 return false; 5283 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5284 Error(S, "invalid operation id"); 5285 return false; 5286 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5287 Error(S, "message operation does not support streams"); 5288 return false; 5289 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5290 Error(S, "invalid message stream id"); 5291 return false; 5292 } 5293 return true; 5294 } 5295 5296 OperandMatchResultTy 5297 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5298 using namespace llvm::AMDGPU::SendMsg; 5299 5300 int64_t ImmVal = 0; 5301 SMLoc Loc = getLoc(); 5302 5303 // If parse failed, do not return error code 5304 // to avoid excessive error messages. 5305 if (trySkipId("sendmsg", AsmToken::LParen)) { 5306 OperandInfoTy Msg(ID_UNKNOWN_); 5307 OperandInfoTy Op(OP_NONE_); 5308 OperandInfoTy Stream(STREAM_ID_NONE_); 5309 if (parseSendMsgBody(Msg, Op, Stream) && 5310 validateSendMsg(Msg, Op, Stream, Loc)) { 5311 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5312 } 5313 } else if (parseExpr(ImmVal)) { 5314 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5315 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5316 } 5317 5318 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5319 return MatchOperand_Success; 5320 } 5321 5322 bool AMDGPUOperand::isSendMsg() const { 5323 return isImmTy(ImmTySendMsg); 5324 } 5325 5326 //===----------------------------------------------------------------------===// 5327 // v_interp 5328 //===----------------------------------------------------------------------===// 5329 5330 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5331 if (getLexer().getKind() != AsmToken::Identifier) 5332 return MatchOperand_NoMatch; 5333 5334 StringRef Str = Parser.getTok().getString(); 5335 int Slot = StringSwitch<int>(Str) 5336 .Case("p10", 0) 5337 .Case("p20", 1) 5338 .Case("p0", 2) 5339 .Default(-1); 5340 5341 SMLoc S = Parser.getTok().getLoc(); 5342 if (Slot == -1) 5343 return MatchOperand_ParseFail; 5344 5345 Parser.Lex(); 5346 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5347 AMDGPUOperand::ImmTyInterpSlot)); 5348 return MatchOperand_Success; 5349 } 5350 5351 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5352 if (getLexer().getKind() != AsmToken::Identifier) 5353 return MatchOperand_NoMatch; 5354 5355 StringRef Str = Parser.getTok().getString(); 5356 if (!Str.startswith("attr")) 5357 return MatchOperand_NoMatch; 5358 5359 StringRef Chan = Str.take_back(2); 5360 int AttrChan = StringSwitch<int>(Chan) 5361 .Case(".x", 0) 5362 .Case(".y", 1) 5363 .Case(".z", 2) 5364 .Case(".w", 3) 5365 .Default(-1); 5366 if (AttrChan == -1) 5367 return MatchOperand_ParseFail; 5368 5369 Str = Str.drop_back(2).drop_front(4); 5370 5371 uint8_t Attr; 5372 if (Str.getAsInteger(10, Attr)) 5373 return MatchOperand_ParseFail; 5374 5375 SMLoc S = Parser.getTok().getLoc(); 5376 Parser.Lex(); 5377 if (Attr > 63) { 5378 Error(S, "out of bounds attr"); 5379 return MatchOperand_Success; 5380 } 5381 5382 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5383 5384 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5385 AMDGPUOperand::ImmTyInterpAttr)); 5386 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5387 AMDGPUOperand::ImmTyAttrChan)); 5388 return MatchOperand_Success; 5389 } 5390 5391 //===----------------------------------------------------------------------===// 5392 // exp 5393 //===----------------------------------------------------------------------===// 5394 5395 void AMDGPUAsmParser::errorExpTgt() { 5396 Error(Parser.getTok().getLoc(), "invalid exp target"); 5397 } 5398 5399 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5400 uint8_t &Val) { 5401 if (Str == "null") { 5402 Val = 9; 5403 return MatchOperand_Success; 5404 } 5405 5406 if (Str.startswith("mrt")) { 5407 Str = Str.drop_front(3); 5408 if (Str == "z") { // == mrtz 5409 Val = 8; 5410 return MatchOperand_Success; 5411 } 5412 5413 if (Str.getAsInteger(10, Val)) 5414 return MatchOperand_ParseFail; 5415 5416 if (Val > 7) 5417 errorExpTgt(); 5418 5419 return MatchOperand_Success; 5420 } 5421 5422 if (Str.startswith("pos")) { 5423 Str = Str.drop_front(3); 5424 if (Str.getAsInteger(10, Val)) 5425 return MatchOperand_ParseFail; 5426 5427 if (Val > 4 || (Val == 4 && !isGFX10())) 5428 errorExpTgt(); 5429 5430 Val += 12; 5431 return MatchOperand_Success; 5432 } 5433 5434 if (isGFX10() && Str == "prim") { 5435 Val = 20; 5436 return MatchOperand_Success; 5437 } 5438 5439 if (Str.startswith("param")) { 5440 Str = Str.drop_front(5); 5441 if (Str.getAsInteger(10, Val)) 5442 return MatchOperand_ParseFail; 5443 5444 if (Val >= 32) 5445 errorExpTgt(); 5446 5447 Val += 32; 5448 return MatchOperand_Success; 5449 } 5450 5451 if (Str.startswith("invalid_target_")) { 5452 Str = Str.drop_front(15); 5453 if (Str.getAsInteger(10, Val)) 5454 return MatchOperand_ParseFail; 5455 5456 errorExpTgt(); 5457 return MatchOperand_Success; 5458 } 5459 5460 return MatchOperand_NoMatch; 5461 } 5462 5463 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5464 uint8_t Val; 5465 StringRef Str = Parser.getTok().getString(); 5466 5467 auto Res = parseExpTgtImpl(Str, Val); 5468 if (Res != MatchOperand_Success) 5469 return Res; 5470 5471 SMLoc S = Parser.getTok().getLoc(); 5472 Parser.Lex(); 5473 5474 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5475 AMDGPUOperand::ImmTyExpTgt)); 5476 return MatchOperand_Success; 5477 } 5478 5479 //===----------------------------------------------------------------------===// 5480 // parser helpers 5481 //===----------------------------------------------------------------------===// 5482 5483 bool 5484 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5485 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5486 } 5487 5488 bool 5489 AMDGPUAsmParser::isId(const StringRef Id) const { 5490 return isId(getToken(), Id); 5491 } 5492 5493 bool 5494 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5495 return getTokenKind() == Kind; 5496 } 5497 5498 bool 5499 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5500 if (isId(Id)) { 5501 lex(); 5502 return true; 5503 } 5504 return false; 5505 } 5506 5507 bool 5508 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5509 if (isId(Id) && peekToken().is(Kind)) { 5510 lex(); 5511 lex(); 5512 return true; 5513 } 5514 return false; 5515 } 5516 5517 bool 5518 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5519 if (isToken(Kind)) { 5520 lex(); 5521 return true; 5522 } 5523 return false; 5524 } 5525 5526 bool 5527 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5528 const StringRef ErrMsg) { 5529 if (!trySkipToken(Kind)) { 5530 Error(getLoc(), ErrMsg); 5531 return false; 5532 } 5533 return true; 5534 } 5535 5536 bool 5537 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5538 return !getParser().parseAbsoluteExpression(Imm); 5539 } 5540 5541 bool 5542 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5543 SMLoc S = getLoc(); 5544 5545 const MCExpr *Expr; 5546 if (Parser.parseExpression(Expr)) 5547 return false; 5548 5549 int64_t IntVal; 5550 if (Expr->evaluateAsAbsolute(IntVal)) { 5551 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5552 } else { 5553 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5554 } 5555 return true; 5556 } 5557 5558 bool 5559 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5560 if (isToken(AsmToken::String)) { 5561 Val = getToken().getStringContents(); 5562 lex(); 5563 return true; 5564 } else { 5565 Error(getLoc(), ErrMsg); 5566 return false; 5567 } 5568 } 5569 5570 AsmToken 5571 AMDGPUAsmParser::getToken() const { 5572 return Parser.getTok(); 5573 } 5574 5575 AsmToken 5576 AMDGPUAsmParser::peekToken() { 5577 return getLexer().peekTok(); 5578 } 5579 5580 void 5581 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5582 auto TokCount = getLexer().peekTokens(Tokens); 5583 5584 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5585 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5586 } 5587 5588 AsmToken::TokenKind 5589 AMDGPUAsmParser::getTokenKind() const { 5590 return getLexer().getKind(); 5591 } 5592 5593 SMLoc 5594 AMDGPUAsmParser::getLoc() const { 5595 return getToken().getLoc(); 5596 } 5597 5598 StringRef 5599 AMDGPUAsmParser::getTokenStr() const { 5600 return getToken().getString(); 5601 } 5602 5603 void 5604 AMDGPUAsmParser::lex() { 5605 Parser.Lex(); 5606 } 5607 5608 //===----------------------------------------------------------------------===// 5609 // swizzle 5610 //===----------------------------------------------------------------------===// 5611 5612 LLVM_READNONE 5613 static unsigned 5614 encodeBitmaskPerm(const unsigned AndMask, 5615 const unsigned OrMask, 5616 const unsigned XorMask) { 5617 using namespace llvm::AMDGPU::Swizzle; 5618 5619 return BITMASK_PERM_ENC | 5620 (AndMask << BITMASK_AND_SHIFT) | 5621 (OrMask << BITMASK_OR_SHIFT) | 5622 (XorMask << BITMASK_XOR_SHIFT); 5623 } 5624 5625 bool 5626 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5627 const unsigned MinVal, 5628 const unsigned MaxVal, 5629 const StringRef ErrMsg) { 5630 for (unsigned i = 0; i < OpNum; ++i) { 5631 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5632 return false; 5633 } 5634 SMLoc ExprLoc = Parser.getTok().getLoc(); 5635 if (!parseExpr(Op[i])) { 5636 return false; 5637 } 5638 if (Op[i] < MinVal || Op[i] > MaxVal) { 5639 Error(ExprLoc, ErrMsg); 5640 return false; 5641 } 5642 } 5643 5644 return true; 5645 } 5646 5647 bool 5648 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5649 using namespace llvm::AMDGPU::Swizzle; 5650 5651 int64_t Lane[LANE_NUM]; 5652 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5653 "expected a 2-bit lane id")) { 5654 Imm = QUAD_PERM_ENC; 5655 for (unsigned I = 0; I < LANE_NUM; ++I) { 5656 Imm |= Lane[I] << (LANE_SHIFT * I); 5657 } 5658 return true; 5659 } 5660 return false; 5661 } 5662 5663 bool 5664 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5665 using namespace llvm::AMDGPU::Swizzle; 5666 5667 SMLoc S = Parser.getTok().getLoc(); 5668 int64_t GroupSize; 5669 int64_t LaneIdx; 5670 5671 if (!parseSwizzleOperands(1, &GroupSize, 5672 2, 32, 5673 "group size must be in the interval [2,32]")) { 5674 return false; 5675 } 5676 if (!isPowerOf2_64(GroupSize)) { 5677 Error(S, "group size must be a power of two"); 5678 return false; 5679 } 5680 if (parseSwizzleOperands(1, &LaneIdx, 5681 0, GroupSize - 1, 5682 "lane id must be in the interval [0,group size - 1]")) { 5683 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5684 return true; 5685 } 5686 return false; 5687 } 5688 5689 bool 5690 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5691 using namespace llvm::AMDGPU::Swizzle; 5692 5693 SMLoc S = Parser.getTok().getLoc(); 5694 int64_t GroupSize; 5695 5696 if (!parseSwizzleOperands(1, &GroupSize, 5697 2, 32, "group size must be in the interval [2,32]")) { 5698 return false; 5699 } 5700 if (!isPowerOf2_64(GroupSize)) { 5701 Error(S, "group size must be a power of two"); 5702 return false; 5703 } 5704 5705 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5706 return true; 5707 } 5708 5709 bool 5710 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5711 using namespace llvm::AMDGPU::Swizzle; 5712 5713 SMLoc S = Parser.getTok().getLoc(); 5714 int64_t GroupSize; 5715 5716 if (!parseSwizzleOperands(1, &GroupSize, 5717 1, 16, "group size must be in the interval [1,16]")) { 5718 return false; 5719 } 5720 if (!isPowerOf2_64(GroupSize)) { 5721 Error(S, "group size must be a power of two"); 5722 return false; 5723 } 5724 5725 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5726 return true; 5727 } 5728 5729 bool 5730 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5731 using namespace llvm::AMDGPU::Swizzle; 5732 5733 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5734 return false; 5735 } 5736 5737 StringRef Ctl; 5738 SMLoc StrLoc = Parser.getTok().getLoc(); 5739 if (!parseString(Ctl)) { 5740 return false; 5741 } 5742 if (Ctl.size() != BITMASK_WIDTH) { 5743 Error(StrLoc, "expected a 5-character mask"); 5744 return false; 5745 } 5746 5747 unsigned AndMask = 0; 5748 unsigned OrMask = 0; 5749 unsigned XorMask = 0; 5750 5751 for (size_t i = 0; i < Ctl.size(); ++i) { 5752 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5753 switch(Ctl[i]) { 5754 default: 5755 Error(StrLoc, "invalid mask"); 5756 return false; 5757 case '0': 5758 break; 5759 case '1': 5760 OrMask |= Mask; 5761 break; 5762 case 'p': 5763 AndMask |= Mask; 5764 break; 5765 case 'i': 5766 AndMask |= Mask; 5767 XorMask |= Mask; 5768 break; 5769 } 5770 } 5771 5772 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5773 return true; 5774 } 5775 5776 bool 5777 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5778 5779 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5780 5781 if (!parseExpr(Imm)) { 5782 return false; 5783 } 5784 if (!isUInt<16>(Imm)) { 5785 Error(OffsetLoc, "expected a 16-bit offset"); 5786 return false; 5787 } 5788 return true; 5789 } 5790 5791 bool 5792 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5793 using namespace llvm::AMDGPU::Swizzle; 5794 5795 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5796 5797 SMLoc ModeLoc = Parser.getTok().getLoc(); 5798 bool Ok = false; 5799 5800 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5801 Ok = parseSwizzleQuadPerm(Imm); 5802 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5803 Ok = parseSwizzleBitmaskPerm(Imm); 5804 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5805 Ok = parseSwizzleBroadcast(Imm); 5806 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5807 Ok = parseSwizzleSwap(Imm); 5808 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5809 Ok = parseSwizzleReverse(Imm); 5810 } else { 5811 Error(ModeLoc, "expected a swizzle mode"); 5812 } 5813 5814 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5815 } 5816 5817 return false; 5818 } 5819 5820 OperandMatchResultTy 5821 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5822 SMLoc S = Parser.getTok().getLoc(); 5823 int64_t Imm = 0; 5824 5825 if (trySkipId("offset")) { 5826 5827 bool Ok = false; 5828 if (skipToken(AsmToken::Colon, "expected a colon")) { 5829 if (trySkipId("swizzle")) { 5830 Ok = parseSwizzleMacro(Imm); 5831 } else { 5832 Ok = parseSwizzleOffset(Imm); 5833 } 5834 } 5835 5836 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5837 5838 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5839 } else { 5840 // Swizzle "offset" operand is optional. 5841 // If it is omitted, try parsing other optional operands. 5842 return parseOptionalOpr(Operands); 5843 } 5844 } 5845 5846 bool 5847 AMDGPUOperand::isSwizzle() const { 5848 return isImmTy(ImmTySwizzle); 5849 } 5850 5851 //===----------------------------------------------------------------------===// 5852 // VGPR Index Mode 5853 //===----------------------------------------------------------------------===// 5854 5855 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5856 5857 using namespace llvm::AMDGPU::VGPRIndexMode; 5858 5859 if (trySkipToken(AsmToken::RParen)) { 5860 return OFF; 5861 } 5862 5863 int64_t Imm = 0; 5864 5865 while (true) { 5866 unsigned Mode = 0; 5867 SMLoc S = Parser.getTok().getLoc(); 5868 5869 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5870 if (trySkipId(IdSymbolic[ModeId])) { 5871 Mode = 1 << ModeId; 5872 break; 5873 } 5874 } 5875 5876 if (Mode == 0) { 5877 Error(S, (Imm == 0)? 5878 "expected a VGPR index mode or a closing parenthesis" : 5879 "expected a VGPR index mode"); 5880 break; 5881 } 5882 5883 if (Imm & Mode) { 5884 Error(S, "duplicate VGPR index mode"); 5885 break; 5886 } 5887 Imm |= Mode; 5888 5889 if (trySkipToken(AsmToken::RParen)) 5890 break; 5891 if (!skipToken(AsmToken::Comma, 5892 "expected a comma or a closing parenthesis")) 5893 break; 5894 } 5895 5896 return Imm; 5897 } 5898 5899 OperandMatchResultTy 5900 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5901 5902 int64_t Imm = 0; 5903 SMLoc S = Parser.getTok().getLoc(); 5904 5905 if (getLexer().getKind() == AsmToken::Identifier && 5906 Parser.getTok().getString() == "gpr_idx" && 5907 getLexer().peekTok().is(AsmToken::LParen)) { 5908 5909 Parser.Lex(); 5910 Parser.Lex(); 5911 5912 // If parse failed, trigger an error but do not return error code 5913 // to avoid excessive error messages. 5914 Imm = parseGPRIdxMacro(); 5915 5916 } else { 5917 if (getParser().parseAbsoluteExpression(Imm)) 5918 return MatchOperand_NoMatch; 5919 if (Imm < 0 || !isUInt<4>(Imm)) { 5920 Error(S, "invalid immediate: only 4-bit values are legal"); 5921 } 5922 } 5923 5924 Operands.push_back( 5925 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5926 return MatchOperand_Success; 5927 } 5928 5929 bool AMDGPUOperand::isGPRIdxMode() const { 5930 return isImmTy(ImmTyGprIdxMode); 5931 } 5932 5933 //===----------------------------------------------------------------------===// 5934 // sopp branch targets 5935 //===----------------------------------------------------------------------===// 5936 5937 OperandMatchResultTy 5938 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5939 5940 // Make sure we are not parsing something 5941 // that looks like a label or an expression but is not. 5942 // This will improve error messages. 5943 if (isRegister() || isModifier()) 5944 return MatchOperand_NoMatch; 5945 5946 if (parseExpr(Operands)) { 5947 5948 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5949 assert(Opr.isImm() || Opr.isExpr()); 5950 SMLoc Loc = Opr.getStartLoc(); 5951 5952 // Currently we do not support arbitrary expressions as branch targets. 5953 // Only labels and absolute expressions are accepted. 5954 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5955 Error(Loc, "expected an absolute expression or a label"); 5956 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5957 Error(Loc, "expected a 16-bit signed jump offset"); 5958 } 5959 } 5960 5961 return MatchOperand_Success; // avoid excessive error messages 5962 } 5963 5964 //===----------------------------------------------------------------------===// 5965 // Boolean holding registers 5966 //===----------------------------------------------------------------------===// 5967 5968 OperandMatchResultTy 5969 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5970 return parseReg(Operands); 5971 } 5972 5973 //===----------------------------------------------------------------------===// 5974 // mubuf 5975 //===----------------------------------------------------------------------===// 5976 5977 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5978 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5979 } 5980 5981 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5982 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5983 } 5984 5985 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5986 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5987 } 5988 5989 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5990 const OperandVector &Operands, 5991 bool IsAtomic, 5992 bool IsAtomicReturn, 5993 bool IsLds) { 5994 bool IsLdsOpcode = IsLds; 5995 bool HasLdsModifier = false; 5996 OptionalImmIndexMap OptionalIdx; 5997 assert(IsAtomicReturn ? IsAtomic : true); 5998 unsigned FirstOperandIdx = 1; 5999 6000 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6001 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6002 6003 // Add the register arguments 6004 if (Op.isReg()) { 6005 Op.addRegOperands(Inst, 1); 6006 // Insert a tied src for atomic return dst. 6007 // This cannot be postponed as subsequent calls to 6008 // addImmOperands rely on correct number of MC operands. 6009 if (IsAtomicReturn && i == FirstOperandIdx) 6010 Op.addRegOperands(Inst, 1); 6011 continue; 6012 } 6013 6014 // Handle the case where soffset is an immediate 6015 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6016 Op.addImmOperands(Inst, 1); 6017 continue; 6018 } 6019 6020 HasLdsModifier |= Op.isLDS(); 6021 6022 // Handle tokens like 'offen' which are sometimes hard-coded into the 6023 // asm string. There are no MCInst operands for these. 6024 if (Op.isToken()) { 6025 continue; 6026 } 6027 assert(Op.isImm()); 6028 6029 // Handle optional arguments 6030 OptionalIdx[Op.getImmTy()] = i; 6031 } 6032 6033 // This is a workaround for an llvm quirk which may result in an 6034 // incorrect instruction selection. Lds and non-lds versions of 6035 // MUBUF instructions are identical except that lds versions 6036 // have mandatory 'lds' modifier. However this modifier follows 6037 // optional modifiers and llvm asm matcher regards this 'lds' 6038 // modifier as an optional one. As a result, an lds version 6039 // of opcode may be selected even if it has no 'lds' modifier. 6040 if (IsLdsOpcode && !HasLdsModifier) { 6041 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6042 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6043 Inst.setOpcode(NoLdsOpcode); 6044 IsLdsOpcode = false; 6045 } 6046 } 6047 6048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6049 if (!IsAtomic) { // glc is hard-coded. 6050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6051 } 6052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6053 6054 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6055 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6056 } 6057 6058 if (isGFX10()) 6059 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6060 } 6061 6062 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6063 OptionalImmIndexMap OptionalIdx; 6064 6065 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6066 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6067 6068 // Add the register arguments 6069 if (Op.isReg()) { 6070 Op.addRegOperands(Inst, 1); 6071 continue; 6072 } 6073 6074 // Handle the case where soffset is an immediate 6075 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6076 Op.addImmOperands(Inst, 1); 6077 continue; 6078 } 6079 6080 // Handle tokens like 'offen' which are sometimes hard-coded into the 6081 // asm string. There are no MCInst operands for these. 6082 if (Op.isToken()) { 6083 continue; 6084 } 6085 assert(Op.isImm()); 6086 6087 // Handle optional arguments 6088 OptionalIdx[Op.getImmTy()] = i; 6089 } 6090 6091 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6092 AMDGPUOperand::ImmTyOffset); 6093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6094 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6097 6098 if (isGFX10()) 6099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6100 } 6101 6102 //===----------------------------------------------------------------------===// 6103 // mimg 6104 //===----------------------------------------------------------------------===// 6105 6106 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6107 bool IsAtomic) { 6108 unsigned I = 1; 6109 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6110 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6111 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6112 } 6113 6114 if (IsAtomic) { 6115 // Add src, same as dst 6116 assert(Desc.getNumDefs() == 1); 6117 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6118 } 6119 6120 OptionalImmIndexMap OptionalIdx; 6121 6122 for (unsigned E = Operands.size(); I != E; ++I) { 6123 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6124 6125 // Add the register arguments 6126 if (Op.isReg()) { 6127 Op.addRegOperands(Inst, 1); 6128 } else if (Op.isImmModifier()) { 6129 OptionalIdx[Op.getImmTy()] = I; 6130 } else if (!Op.isToken()) { 6131 llvm_unreachable("unexpected operand type"); 6132 } 6133 } 6134 6135 bool IsGFX10 = isGFX10(); 6136 6137 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6138 if (IsGFX10) 6139 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6140 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6141 if (IsGFX10) 6142 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6144 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6145 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6146 if (IsGFX10) 6147 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6148 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6149 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6150 if (!IsGFX10) 6151 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6152 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6153 } 6154 6155 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6156 cvtMIMG(Inst, Operands, true); 6157 } 6158 6159 //===----------------------------------------------------------------------===// 6160 // smrd 6161 //===----------------------------------------------------------------------===// 6162 6163 bool AMDGPUOperand::isSMRDOffset8() const { 6164 return isImm() && isUInt<8>(getImm()); 6165 } 6166 6167 bool AMDGPUOperand::isSMEMOffset() const { 6168 return isImm(); // Offset range is checked later by validator. 6169 } 6170 6171 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6172 // 32-bit literals are only supported on CI and we only want to use them 6173 // when the offset is > 8-bits. 6174 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6175 } 6176 6177 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6178 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6179 } 6180 6181 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6182 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6183 } 6184 6185 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6186 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6187 } 6188 6189 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6190 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6191 } 6192 6193 //===----------------------------------------------------------------------===// 6194 // vop3 6195 //===----------------------------------------------------------------------===// 6196 6197 static bool ConvertOmodMul(int64_t &Mul) { 6198 if (Mul != 1 && Mul != 2 && Mul != 4) 6199 return false; 6200 6201 Mul >>= 1; 6202 return true; 6203 } 6204 6205 static bool ConvertOmodDiv(int64_t &Div) { 6206 if (Div == 1) { 6207 Div = 0; 6208 return true; 6209 } 6210 6211 if (Div == 2) { 6212 Div = 3; 6213 return true; 6214 } 6215 6216 return false; 6217 } 6218 6219 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6220 if (BoundCtrl == 0) { 6221 BoundCtrl = 1; 6222 return true; 6223 } 6224 6225 if (BoundCtrl == -1) { 6226 BoundCtrl = 0; 6227 return true; 6228 } 6229 6230 return false; 6231 } 6232 6233 // Note: the order in this table matches the order of operands in AsmString. 6234 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6235 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6236 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6237 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6238 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6239 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6240 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6241 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6242 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6243 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6244 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6245 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6246 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6247 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6248 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6249 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6250 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6251 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6252 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6253 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6254 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6255 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6256 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6257 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6258 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6259 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6260 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6261 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6262 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6263 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6264 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6265 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6266 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6267 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6268 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6269 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6270 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6271 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6272 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6273 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6274 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6275 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6276 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6277 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6278 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6279 }; 6280 6281 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6282 6283 OperandMatchResultTy res = parseOptionalOpr(Operands); 6284 6285 // This is a hack to enable hardcoded mandatory operands which follow 6286 // optional operands. 6287 // 6288 // Current design assumes that all operands after the first optional operand 6289 // are also optional. However implementation of some instructions violates 6290 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6291 // 6292 // To alleviate this problem, we have to (implicitly) parse extra operands 6293 // to make sure autogenerated parser of custom operands never hit hardcoded 6294 // mandatory operands. 6295 6296 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6297 if (res != MatchOperand_Success || 6298 isToken(AsmToken::EndOfStatement)) 6299 break; 6300 6301 trySkipToken(AsmToken::Comma); 6302 res = parseOptionalOpr(Operands); 6303 } 6304 6305 return res; 6306 } 6307 6308 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6309 OperandMatchResultTy res; 6310 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6311 // try to parse any optional operand here 6312 if (Op.IsBit) { 6313 res = parseNamedBit(Op.Name, Operands, Op.Type); 6314 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6315 res = parseOModOperand(Operands); 6316 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6317 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6318 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6319 res = parseSDWASel(Operands, Op.Name, Op.Type); 6320 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6321 res = parseSDWADstUnused(Operands); 6322 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6323 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6324 Op.Type == AMDGPUOperand::ImmTyNegLo || 6325 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6326 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6327 Op.ConvertResult); 6328 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6329 res = parseDim(Operands); 6330 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6331 res = parseDfmtNfmt(Operands); 6332 } else { 6333 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6334 } 6335 if (res != MatchOperand_NoMatch) { 6336 return res; 6337 } 6338 } 6339 return MatchOperand_NoMatch; 6340 } 6341 6342 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6343 StringRef Name = Parser.getTok().getString(); 6344 if (Name == "mul") { 6345 return parseIntWithPrefix("mul", Operands, 6346 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6347 } 6348 6349 if (Name == "div") { 6350 return parseIntWithPrefix("div", Operands, 6351 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6352 } 6353 6354 return MatchOperand_NoMatch; 6355 } 6356 6357 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6358 cvtVOP3P(Inst, Operands); 6359 6360 int Opc = Inst.getOpcode(); 6361 6362 int SrcNum; 6363 const int Ops[] = { AMDGPU::OpName::src0, 6364 AMDGPU::OpName::src1, 6365 AMDGPU::OpName::src2 }; 6366 for (SrcNum = 0; 6367 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6368 ++SrcNum); 6369 assert(SrcNum > 0); 6370 6371 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6372 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6373 6374 if ((OpSel & (1 << SrcNum)) != 0) { 6375 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6376 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6377 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6378 } 6379 } 6380 6381 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6382 // 1. This operand is input modifiers 6383 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6384 // 2. This is not last operand 6385 && Desc.NumOperands > (OpNum + 1) 6386 // 3. Next operand is register class 6387 && Desc.OpInfo[OpNum + 1].RegClass != -1 6388 // 4. Next register is not tied to any other operand 6389 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6390 } 6391 6392 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6393 { 6394 OptionalImmIndexMap OptionalIdx; 6395 unsigned Opc = Inst.getOpcode(); 6396 6397 unsigned I = 1; 6398 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6399 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6400 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6401 } 6402 6403 for (unsigned E = Operands.size(); I != E; ++I) { 6404 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6405 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6406 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6407 } else if (Op.isInterpSlot() || 6408 Op.isInterpAttr() || 6409 Op.isAttrChan()) { 6410 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6411 } else if (Op.isImmModifier()) { 6412 OptionalIdx[Op.getImmTy()] = I; 6413 } else { 6414 llvm_unreachable("unhandled operand type"); 6415 } 6416 } 6417 6418 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6419 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6420 } 6421 6422 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6423 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6424 } 6425 6426 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6428 } 6429 } 6430 6431 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6432 OptionalImmIndexMap &OptionalIdx) { 6433 unsigned Opc = Inst.getOpcode(); 6434 6435 unsigned I = 1; 6436 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6437 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6438 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6439 } 6440 6441 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6442 // This instruction has src modifiers 6443 for (unsigned E = Operands.size(); I != E; ++I) { 6444 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6445 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6446 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6447 } else if (Op.isImmModifier()) { 6448 OptionalIdx[Op.getImmTy()] = I; 6449 } else if (Op.isRegOrImm()) { 6450 Op.addRegOrImmOperands(Inst, 1); 6451 } else { 6452 llvm_unreachable("unhandled operand type"); 6453 } 6454 } 6455 } else { 6456 // No src modifiers 6457 for (unsigned E = Operands.size(); I != E; ++I) { 6458 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6459 if (Op.isMod()) { 6460 OptionalIdx[Op.getImmTy()] = I; 6461 } else { 6462 Op.addRegOrImmOperands(Inst, 1); 6463 } 6464 } 6465 } 6466 6467 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6468 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6469 } 6470 6471 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6473 } 6474 6475 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6476 // it has src2 register operand that is tied to dst operand 6477 // we don't allow modifiers for this operand in assembler so src2_modifiers 6478 // should be 0. 6479 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6480 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6481 Opc == AMDGPU::V_MAC_F32_e64_vi || 6482 Opc == AMDGPU::V_MAC_F16_e64_vi || 6483 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6484 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6485 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6486 auto it = Inst.begin(); 6487 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6488 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6489 ++it; 6490 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6491 } 6492 } 6493 6494 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6495 OptionalImmIndexMap OptionalIdx; 6496 cvtVOP3(Inst, Operands, OptionalIdx); 6497 } 6498 6499 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6500 const OperandVector &Operands) { 6501 OptionalImmIndexMap OptIdx; 6502 const int Opc = Inst.getOpcode(); 6503 const MCInstrDesc &Desc = MII.get(Opc); 6504 6505 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6506 6507 cvtVOP3(Inst, Operands, OptIdx); 6508 6509 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6510 assert(!IsPacked); 6511 Inst.addOperand(Inst.getOperand(0)); 6512 } 6513 6514 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6515 // instruction, and then figure out where to actually put the modifiers 6516 6517 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6518 6519 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6520 if (OpSelHiIdx != -1) { 6521 int DefaultVal = IsPacked ? -1 : 0; 6522 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6523 DefaultVal); 6524 } 6525 6526 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6527 if (NegLoIdx != -1) { 6528 assert(IsPacked); 6529 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6530 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6531 } 6532 6533 const int Ops[] = { AMDGPU::OpName::src0, 6534 AMDGPU::OpName::src1, 6535 AMDGPU::OpName::src2 }; 6536 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6537 AMDGPU::OpName::src1_modifiers, 6538 AMDGPU::OpName::src2_modifiers }; 6539 6540 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6541 6542 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6543 unsigned OpSelHi = 0; 6544 unsigned NegLo = 0; 6545 unsigned NegHi = 0; 6546 6547 if (OpSelHiIdx != -1) { 6548 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6549 } 6550 6551 if (NegLoIdx != -1) { 6552 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6553 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6554 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6555 } 6556 6557 for (int J = 0; J < 3; ++J) { 6558 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6559 if (OpIdx == -1) 6560 break; 6561 6562 uint32_t ModVal = 0; 6563 6564 if ((OpSel & (1 << J)) != 0) 6565 ModVal |= SISrcMods::OP_SEL_0; 6566 6567 if ((OpSelHi & (1 << J)) != 0) 6568 ModVal |= SISrcMods::OP_SEL_1; 6569 6570 if ((NegLo & (1 << J)) != 0) 6571 ModVal |= SISrcMods::NEG; 6572 6573 if ((NegHi & (1 << J)) != 0) 6574 ModVal |= SISrcMods::NEG_HI; 6575 6576 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6577 6578 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6579 } 6580 } 6581 6582 //===----------------------------------------------------------------------===// 6583 // dpp 6584 //===----------------------------------------------------------------------===// 6585 6586 bool AMDGPUOperand::isDPP8() const { 6587 return isImmTy(ImmTyDPP8); 6588 } 6589 6590 bool AMDGPUOperand::isDPPCtrl() const { 6591 using namespace AMDGPU::DPP; 6592 6593 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6594 if (result) { 6595 int64_t Imm = getImm(); 6596 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6597 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6598 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6599 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6600 (Imm == DppCtrl::WAVE_SHL1) || 6601 (Imm == DppCtrl::WAVE_ROL1) || 6602 (Imm == DppCtrl::WAVE_SHR1) || 6603 (Imm == DppCtrl::WAVE_ROR1) || 6604 (Imm == DppCtrl::ROW_MIRROR) || 6605 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6606 (Imm == DppCtrl::BCAST15) || 6607 (Imm == DppCtrl::BCAST31) || 6608 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6609 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6610 } 6611 return false; 6612 } 6613 6614 //===----------------------------------------------------------------------===// 6615 // mAI 6616 //===----------------------------------------------------------------------===// 6617 6618 bool AMDGPUOperand::isBLGP() const { 6619 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6620 } 6621 6622 bool AMDGPUOperand::isCBSZ() const { 6623 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6624 } 6625 6626 bool AMDGPUOperand::isABID() const { 6627 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6628 } 6629 6630 bool AMDGPUOperand::isS16Imm() const { 6631 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6632 } 6633 6634 bool AMDGPUOperand::isU16Imm() const { 6635 return isImm() && isUInt<16>(getImm()); 6636 } 6637 6638 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6639 if (!isGFX10()) 6640 return MatchOperand_NoMatch; 6641 6642 SMLoc S = Parser.getTok().getLoc(); 6643 6644 if (getLexer().isNot(AsmToken::Identifier)) 6645 return MatchOperand_NoMatch; 6646 if (getLexer().getTok().getString() != "dim") 6647 return MatchOperand_NoMatch; 6648 6649 Parser.Lex(); 6650 if (getLexer().isNot(AsmToken::Colon)) 6651 return MatchOperand_ParseFail; 6652 6653 Parser.Lex(); 6654 6655 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6656 // integer. 6657 std::string Token; 6658 if (getLexer().is(AsmToken::Integer)) { 6659 SMLoc Loc = getLexer().getTok().getEndLoc(); 6660 Token = std::string(getLexer().getTok().getString()); 6661 Parser.Lex(); 6662 if (getLexer().getTok().getLoc() != Loc) 6663 return MatchOperand_ParseFail; 6664 } 6665 if (getLexer().isNot(AsmToken::Identifier)) 6666 return MatchOperand_ParseFail; 6667 Token += getLexer().getTok().getString(); 6668 6669 StringRef DimId = Token; 6670 if (DimId.startswith("SQ_RSRC_IMG_")) 6671 DimId = DimId.substr(12); 6672 6673 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6674 if (!DimInfo) 6675 return MatchOperand_ParseFail; 6676 6677 Parser.Lex(); 6678 6679 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6680 AMDGPUOperand::ImmTyDim)); 6681 return MatchOperand_Success; 6682 } 6683 6684 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6685 SMLoc S = Parser.getTok().getLoc(); 6686 StringRef Prefix; 6687 6688 if (getLexer().getKind() == AsmToken::Identifier) { 6689 Prefix = Parser.getTok().getString(); 6690 } else { 6691 return MatchOperand_NoMatch; 6692 } 6693 6694 if (Prefix != "dpp8") 6695 return parseDPPCtrl(Operands); 6696 if (!isGFX10()) 6697 return MatchOperand_NoMatch; 6698 6699 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6700 6701 int64_t Sels[8]; 6702 6703 Parser.Lex(); 6704 if (getLexer().isNot(AsmToken::Colon)) 6705 return MatchOperand_ParseFail; 6706 6707 Parser.Lex(); 6708 if (getLexer().isNot(AsmToken::LBrac)) 6709 return MatchOperand_ParseFail; 6710 6711 Parser.Lex(); 6712 if (getParser().parseAbsoluteExpression(Sels[0])) 6713 return MatchOperand_ParseFail; 6714 if (0 > Sels[0] || 7 < Sels[0]) 6715 return MatchOperand_ParseFail; 6716 6717 for (size_t i = 1; i < 8; ++i) { 6718 if (getLexer().isNot(AsmToken::Comma)) 6719 return MatchOperand_ParseFail; 6720 6721 Parser.Lex(); 6722 if (getParser().parseAbsoluteExpression(Sels[i])) 6723 return MatchOperand_ParseFail; 6724 if (0 > Sels[i] || 7 < Sels[i]) 6725 return MatchOperand_ParseFail; 6726 } 6727 6728 if (getLexer().isNot(AsmToken::RBrac)) 6729 return MatchOperand_ParseFail; 6730 Parser.Lex(); 6731 6732 unsigned DPP8 = 0; 6733 for (size_t i = 0; i < 8; ++i) 6734 DPP8 |= (Sels[i] << (i * 3)); 6735 6736 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6737 return MatchOperand_Success; 6738 } 6739 6740 OperandMatchResultTy 6741 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6742 using namespace AMDGPU::DPP; 6743 6744 SMLoc S = Parser.getTok().getLoc(); 6745 StringRef Prefix; 6746 int64_t Int; 6747 6748 if (getLexer().getKind() == AsmToken::Identifier) { 6749 Prefix = Parser.getTok().getString(); 6750 } else { 6751 return MatchOperand_NoMatch; 6752 } 6753 6754 if (Prefix == "row_mirror") { 6755 Int = DppCtrl::ROW_MIRROR; 6756 Parser.Lex(); 6757 } else if (Prefix == "row_half_mirror") { 6758 Int = DppCtrl::ROW_HALF_MIRROR; 6759 Parser.Lex(); 6760 } else { 6761 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6762 if (Prefix != "quad_perm" 6763 && Prefix != "row_shl" 6764 && Prefix != "row_shr" 6765 && Prefix != "row_ror" 6766 && Prefix != "wave_shl" 6767 && Prefix != "wave_rol" 6768 && Prefix != "wave_shr" 6769 && Prefix != "wave_ror" 6770 && Prefix != "row_bcast" 6771 && Prefix != "row_share" 6772 && Prefix != "row_xmask") { 6773 return MatchOperand_NoMatch; 6774 } 6775 6776 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6777 return MatchOperand_NoMatch; 6778 6779 if (!isVI() && !isGFX9() && 6780 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6781 Prefix == "wave_rol" || Prefix == "wave_ror" || 6782 Prefix == "row_bcast")) 6783 return MatchOperand_NoMatch; 6784 6785 Parser.Lex(); 6786 if (getLexer().isNot(AsmToken::Colon)) 6787 return MatchOperand_ParseFail; 6788 6789 if (Prefix == "quad_perm") { 6790 // quad_perm:[%d,%d,%d,%d] 6791 Parser.Lex(); 6792 if (getLexer().isNot(AsmToken::LBrac)) 6793 return MatchOperand_ParseFail; 6794 Parser.Lex(); 6795 6796 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6797 return MatchOperand_ParseFail; 6798 6799 for (int i = 0; i < 3; ++i) { 6800 if (getLexer().isNot(AsmToken::Comma)) 6801 return MatchOperand_ParseFail; 6802 Parser.Lex(); 6803 6804 int64_t Temp; 6805 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6806 return MatchOperand_ParseFail; 6807 const int shift = i*2 + 2; 6808 Int += (Temp << shift); 6809 } 6810 6811 if (getLexer().isNot(AsmToken::RBrac)) 6812 return MatchOperand_ParseFail; 6813 Parser.Lex(); 6814 } else { 6815 // sel:%d 6816 Parser.Lex(); 6817 if (getParser().parseAbsoluteExpression(Int)) 6818 return MatchOperand_ParseFail; 6819 6820 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6821 Int |= DppCtrl::ROW_SHL0; 6822 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6823 Int |= DppCtrl::ROW_SHR0; 6824 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6825 Int |= DppCtrl::ROW_ROR0; 6826 } else if (Prefix == "wave_shl" && 1 == Int) { 6827 Int = DppCtrl::WAVE_SHL1; 6828 } else if (Prefix == "wave_rol" && 1 == Int) { 6829 Int = DppCtrl::WAVE_ROL1; 6830 } else if (Prefix == "wave_shr" && 1 == Int) { 6831 Int = DppCtrl::WAVE_SHR1; 6832 } else if (Prefix == "wave_ror" && 1 == Int) { 6833 Int = DppCtrl::WAVE_ROR1; 6834 } else if (Prefix == "row_bcast") { 6835 if (Int == 15) { 6836 Int = DppCtrl::BCAST15; 6837 } else if (Int == 31) { 6838 Int = DppCtrl::BCAST31; 6839 } else { 6840 return MatchOperand_ParseFail; 6841 } 6842 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6843 Int |= DppCtrl::ROW_SHARE_FIRST; 6844 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6845 Int |= DppCtrl::ROW_XMASK_FIRST; 6846 } else { 6847 return MatchOperand_ParseFail; 6848 } 6849 } 6850 } 6851 6852 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6853 return MatchOperand_Success; 6854 } 6855 6856 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6857 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6858 } 6859 6860 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6861 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6862 } 6863 6864 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6865 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6866 } 6867 6868 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6869 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6870 } 6871 6872 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6873 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6874 } 6875 6876 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6877 OptionalImmIndexMap OptionalIdx; 6878 6879 unsigned I = 1; 6880 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6881 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6882 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6883 } 6884 6885 int Fi = 0; 6886 for (unsigned E = Operands.size(); I != E; ++I) { 6887 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6888 MCOI::TIED_TO); 6889 if (TiedTo != -1) { 6890 assert((unsigned)TiedTo < Inst.getNumOperands()); 6891 // handle tied old or src2 for MAC instructions 6892 Inst.addOperand(Inst.getOperand(TiedTo)); 6893 } 6894 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6895 // Add the register arguments 6896 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6897 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6898 // Skip it. 6899 continue; 6900 } 6901 6902 if (IsDPP8) { 6903 if (Op.isDPP8()) { 6904 Op.addImmOperands(Inst, 1); 6905 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6906 Op.addRegWithFPInputModsOperands(Inst, 2); 6907 } else if (Op.isFI()) { 6908 Fi = Op.getImm(); 6909 } else if (Op.isReg()) { 6910 Op.addRegOperands(Inst, 1); 6911 } else { 6912 llvm_unreachable("Invalid operand type"); 6913 } 6914 } else { 6915 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6916 Op.addRegWithFPInputModsOperands(Inst, 2); 6917 } else if (Op.isDPPCtrl()) { 6918 Op.addImmOperands(Inst, 1); 6919 } else if (Op.isImm()) { 6920 // Handle optional arguments 6921 OptionalIdx[Op.getImmTy()] = I; 6922 } else { 6923 llvm_unreachable("Invalid operand type"); 6924 } 6925 } 6926 } 6927 6928 if (IsDPP8) { 6929 using namespace llvm::AMDGPU::DPP; 6930 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6931 } else { 6932 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6933 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6935 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6936 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6937 } 6938 } 6939 } 6940 6941 //===----------------------------------------------------------------------===// 6942 // sdwa 6943 //===----------------------------------------------------------------------===// 6944 6945 OperandMatchResultTy 6946 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6947 AMDGPUOperand::ImmTy Type) { 6948 using namespace llvm::AMDGPU::SDWA; 6949 6950 SMLoc S = Parser.getTok().getLoc(); 6951 StringRef Value; 6952 OperandMatchResultTy res; 6953 6954 res = parseStringWithPrefix(Prefix, Value); 6955 if (res != MatchOperand_Success) { 6956 return res; 6957 } 6958 6959 int64_t Int; 6960 Int = StringSwitch<int64_t>(Value) 6961 .Case("BYTE_0", SdwaSel::BYTE_0) 6962 .Case("BYTE_1", SdwaSel::BYTE_1) 6963 .Case("BYTE_2", SdwaSel::BYTE_2) 6964 .Case("BYTE_3", SdwaSel::BYTE_3) 6965 .Case("WORD_0", SdwaSel::WORD_0) 6966 .Case("WORD_1", SdwaSel::WORD_1) 6967 .Case("DWORD", SdwaSel::DWORD) 6968 .Default(0xffffffff); 6969 Parser.Lex(); // eat last token 6970 6971 if (Int == 0xffffffff) { 6972 return MatchOperand_ParseFail; 6973 } 6974 6975 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6976 return MatchOperand_Success; 6977 } 6978 6979 OperandMatchResultTy 6980 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6981 using namespace llvm::AMDGPU::SDWA; 6982 6983 SMLoc S = Parser.getTok().getLoc(); 6984 StringRef Value; 6985 OperandMatchResultTy res; 6986 6987 res = parseStringWithPrefix("dst_unused", Value); 6988 if (res != MatchOperand_Success) { 6989 return res; 6990 } 6991 6992 int64_t Int; 6993 Int = StringSwitch<int64_t>(Value) 6994 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6995 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6996 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6997 .Default(0xffffffff); 6998 Parser.Lex(); // eat last token 6999 7000 if (Int == 0xffffffff) { 7001 return MatchOperand_ParseFail; 7002 } 7003 7004 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7005 return MatchOperand_Success; 7006 } 7007 7008 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7009 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7010 } 7011 7012 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7013 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7014 } 7015 7016 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7017 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7018 } 7019 7020 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7021 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7022 } 7023 7024 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7025 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7026 } 7027 7028 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7029 uint64_t BasicInstType, 7030 bool SkipDstVcc, 7031 bool SkipSrcVcc) { 7032 using namespace llvm::AMDGPU::SDWA; 7033 7034 OptionalImmIndexMap OptionalIdx; 7035 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7036 bool SkippedVcc = false; 7037 7038 unsigned I = 1; 7039 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7040 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7041 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7042 } 7043 7044 for (unsigned E = Operands.size(); I != E; ++I) { 7045 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7046 if (SkipVcc && !SkippedVcc && Op.isReg() && 7047 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7048 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7049 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7050 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7051 // Skip VCC only if we didn't skip it on previous iteration. 7052 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7053 if (BasicInstType == SIInstrFlags::VOP2 && 7054 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7055 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7056 SkippedVcc = true; 7057 continue; 7058 } else if (BasicInstType == SIInstrFlags::VOPC && 7059 Inst.getNumOperands() == 0) { 7060 SkippedVcc = true; 7061 continue; 7062 } 7063 } 7064 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7065 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7066 } else if (Op.isImm()) { 7067 // Handle optional arguments 7068 OptionalIdx[Op.getImmTy()] = I; 7069 } else { 7070 llvm_unreachable("Invalid operand type"); 7071 } 7072 SkippedVcc = false; 7073 } 7074 7075 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7076 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7077 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7078 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7079 switch (BasicInstType) { 7080 case SIInstrFlags::VOP1: 7081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7082 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7084 } 7085 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7086 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7087 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7088 break; 7089 7090 case SIInstrFlags::VOP2: 7091 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7092 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7094 } 7095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7097 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7098 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7099 break; 7100 7101 case SIInstrFlags::VOPC: 7102 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7103 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7104 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7105 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7106 break; 7107 7108 default: 7109 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7110 } 7111 } 7112 7113 // special case v_mac_{f16, f32}: 7114 // it has src2 register operand that is tied to dst operand 7115 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7116 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7117 auto it = Inst.begin(); 7118 std::advance( 7119 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7120 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7121 } 7122 } 7123 7124 //===----------------------------------------------------------------------===// 7125 // mAI 7126 //===----------------------------------------------------------------------===// 7127 7128 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7129 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7130 } 7131 7132 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7133 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7134 } 7135 7136 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7137 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7138 } 7139 7140 /// Force static initialization. 7141 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7142 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7143 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7144 } 7145 7146 #define GET_REGISTER_MATCHER 7147 #define GET_MATCHER_IMPLEMENTATION 7148 #define GET_MNEMONIC_SPELL_CHECKER 7149 #include "AMDGPUGenAsmMatcher.inc" 7150 7151 // This fuction should be defined after auto-generated include so that we have 7152 // MatchClassKind enum defined 7153 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7154 unsigned Kind) { 7155 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7156 // But MatchInstructionImpl() expects to meet token and fails to validate 7157 // operand. This method checks if we are given immediate operand but expect to 7158 // get corresponding token. 7159 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7160 switch (Kind) { 7161 case MCK_addr64: 7162 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7163 case MCK_gds: 7164 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7165 case MCK_lds: 7166 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7167 case MCK_glc: 7168 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7169 case MCK_idxen: 7170 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7171 case MCK_offen: 7172 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7173 case MCK_SSrcB32: 7174 // When operands have expression values, they will return true for isToken, 7175 // because it is not possible to distinguish between a token and an 7176 // expression at parse time. MatchInstructionImpl() will always try to 7177 // match an operand as a token, when isToken returns true, and when the 7178 // name of the expression is not a valid token, the match will fail, 7179 // so we need to handle it here. 7180 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7181 case MCK_SSrcF32: 7182 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7183 case MCK_SoppBrTarget: 7184 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7185 case MCK_VReg32OrOff: 7186 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7187 case MCK_InterpSlot: 7188 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7189 case MCK_Attr: 7190 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7191 case MCK_AttrChan: 7192 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7193 case MCK_ImmSMEMOffset: 7194 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7195 case MCK_SReg_64: 7196 case MCK_SReg_64_XEXEC: 7197 // Null is defined as a 32-bit register but 7198 // it should also be enabled with 64-bit operands. 7199 // The following code enables it for SReg_64 operands 7200 // used as source and destination. Remaining source 7201 // operands are handled in isInlinableImm. 7202 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7203 default: 7204 return Match_InvalidOperand; 7205 } 7206 } 7207 7208 //===----------------------------------------------------------------------===// 7209 // endpgm 7210 //===----------------------------------------------------------------------===// 7211 7212 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7213 SMLoc S = Parser.getTok().getLoc(); 7214 int64_t Imm = 0; 7215 7216 if (!parseExpr(Imm)) { 7217 // The operand is optional, if not present default to 0 7218 Imm = 0; 7219 } 7220 7221 if (!isUInt<16>(Imm)) { 7222 Error(S, "expected a 16-bit value"); 7223 return MatchOperand_ParseFail; 7224 } 7225 7226 Operands.push_back( 7227 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7228 return MatchOperand_Success; 7229 } 7230 7231 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7232