1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDPP8, 151 ImmTyDppCtrl, 152 ImmTyDppRowMask, 153 ImmTyDppBankMask, 154 ImmTyDppBoundCtrl, 155 ImmTyDppFi, 156 ImmTySdwaDstSel, 157 ImmTySdwaSrc0Sel, 158 ImmTySdwaSrc1Sel, 159 ImmTySdwaDstUnused, 160 ImmTyDMask, 161 ImmTyDim, 162 ImmTyUNorm, 163 ImmTyDA, 164 ImmTyR128A16, 165 ImmTyLWE, 166 ImmTyExpTgt, 167 ImmTyExpCompr, 168 ImmTyExpVM, 169 ImmTyFORMAT, 170 ImmTyHwreg, 171 ImmTyOff, 172 ImmTySendMsg, 173 ImmTyInterpSlot, 174 ImmTyInterpAttr, 175 ImmTyAttrChan, 176 ImmTyOpSel, 177 ImmTyOpSelHi, 178 ImmTyNegLo, 179 ImmTyNegHi, 180 ImmTySwizzle, 181 ImmTyGprIdxMode, 182 ImmTyHigh, 183 ImmTyBLGP, 184 ImmTyCBSZ, 185 ImmTyABID, 186 ImmTyEndpgm, 187 }; 188 189 private: 190 struct TokOp { 191 const char *Data; 192 unsigned Length; 193 }; 194 195 struct ImmOp { 196 int64_t Val; 197 ImmTy Type; 198 bool IsFPImm; 199 Modifiers Mods; 200 }; 201 202 struct RegOp { 203 unsigned RegNo; 204 Modifiers Mods; 205 }; 206 207 union { 208 TokOp Tok; 209 ImmOp Imm; 210 RegOp Reg; 211 const MCExpr *Expr; 212 }; 213 214 public: 215 bool isToken() const override { 216 if (Kind == Token) 217 return true; 218 219 if (Kind != Expression || !Expr) 220 return false; 221 222 // When parsing operands, we can't always tell if something was meant to be 223 // a token, like 'gds', or an expression that references a global variable. 224 // In this case, we assume the string is an expression, and if we need to 225 // interpret is a token, then we treat the symbol name as the token. 226 return isa<MCSymbolRefExpr>(Expr); 227 } 228 229 bool isImm() const override { 230 return Kind == Immediate; 231 } 232 233 bool isInlinableImm(MVT type) const; 234 bool isLiteralImm(MVT type) const; 235 236 bool isRegKind() const { 237 return Kind == Register; 238 } 239 240 bool isReg() const override { 241 return isRegKind() && !hasModifiers(); 242 } 243 244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 245 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 246 } 247 248 bool isRegOrImmWithInt16InputMods() const { 249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 250 } 251 252 bool isRegOrImmWithInt32InputMods() const { 253 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 254 } 255 256 bool isRegOrImmWithInt64InputMods() const { 257 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 258 } 259 260 bool isRegOrImmWithFP16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 262 } 263 264 bool isRegOrImmWithFP32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 266 } 267 268 bool isRegOrImmWithFP64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 270 } 271 272 bool isVReg() const { 273 return isRegClass(AMDGPU::VGPR_32RegClassID) || 274 isRegClass(AMDGPU::VReg_64RegClassID) || 275 isRegClass(AMDGPU::VReg_96RegClassID) || 276 isRegClass(AMDGPU::VReg_128RegClassID) || 277 isRegClass(AMDGPU::VReg_256RegClassID) || 278 isRegClass(AMDGPU::VReg_512RegClassID); 279 } 280 281 bool isVReg32() const { 282 return isRegClass(AMDGPU::VGPR_32RegClassID); 283 } 284 285 bool isVReg32OrOff() const { 286 return isOff() || isVReg32(); 287 } 288 289 bool isSDWAOperand(MVT type) const; 290 bool isSDWAFP16Operand() const; 291 bool isSDWAFP32Operand() const; 292 bool isSDWAInt16Operand() const; 293 bool isSDWAInt32Operand() const; 294 295 bool isImmTy(ImmTy ImmT) const { 296 return isImm() && Imm.Type == ImmT; 297 } 298 299 bool isImmModifier() const { 300 return isImm() && Imm.Type != ImmTyNone; 301 } 302 303 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 304 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 305 bool isDMask() const { return isImmTy(ImmTyDMask); } 306 bool isDim() const { return isImmTy(ImmTyDim); } 307 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 308 bool isDA() const { return isImmTy(ImmTyDA); } 309 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 310 bool isLWE() const { return isImmTy(ImmTyLWE); } 311 bool isOff() const { return isImmTy(ImmTyOff); } 312 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 313 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 314 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 315 bool isOffen() const { return isImmTy(ImmTyOffen); } 316 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 317 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 318 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 319 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 320 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 321 322 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 323 bool isGDS() const { return isImmTy(ImmTyGDS); } 324 bool isLDS() const { return isImmTy(ImmTyLDS); } 325 bool isDLC() const { return isImmTy(ImmTyDLC); } 326 bool isGLC() const { return isImmTy(ImmTyGLC); } 327 bool isSLC() const { return isImmTy(ImmTySLC); } 328 bool isTFE() const { return isImmTy(ImmTyTFE); } 329 bool isD16() const { return isImmTy(ImmTyD16); } 330 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 331 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 332 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 333 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 334 bool isFI() const { return isImmTy(ImmTyDppFi); } 335 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 336 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 337 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 338 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 339 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 340 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 341 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 342 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 343 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 344 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 345 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 346 bool isHigh() const { return isImmTy(ImmTyHigh); } 347 348 bool isMod() const { 349 return isClampSI() || isOModSI(); 350 } 351 352 bool isRegOrImm() const { 353 return isReg() || isImm(); 354 } 355 356 bool isRegClass(unsigned RCID) const; 357 358 bool isInlineValue() const; 359 360 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 361 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 362 } 363 364 bool isSCSrcB16() const { 365 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 366 } 367 368 bool isSCSrcV2B16() const { 369 return isSCSrcB16(); 370 } 371 372 bool isSCSrcB32() const { 373 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 374 } 375 376 bool isSCSrcB64() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 378 } 379 380 bool isBoolReg() const; 381 382 bool isSCSrcF16() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 384 } 385 386 bool isSCSrcV2F16() const { 387 return isSCSrcF16(); 388 } 389 390 bool isSCSrcF32() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 392 } 393 394 bool isSCSrcF64() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 396 } 397 398 bool isSSrcB32() const { 399 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 400 } 401 402 bool isSSrcB16() const { 403 return isSCSrcB16() || isLiteralImm(MVT::i16); 404 } 405 406 bool isSSrcV2B16() const { 407 llvm_unreachable("cannot happen"); 408 return isSSrcB16(); 409 } 410 411 bool isSSrcB64() const { 412 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 413 // See isVSrc64(). 414 return isSCSrcB64() || isLiteralImm(MVT::i64); 415 } 416 417 bool isSSrcF32() const { 418 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 419 } 420 421 bool isSSrcF64() const { 422 return isSCSrcB64() || isLiteralImm(MVT::f64); 423 } 424 425 bool isSSrcF16() const { 426 return isSCSrcB16() || isLiteralImm(MVT::f16); 427 } 428 429 bool isSSrcV2F16() const { 430 llvm_unreachable("cannot happen"); 431 return isSSrcF16(); 432 } 433 434 bool isSSrcOrLdsB32() const { 435 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 436 isLiteralImm(MVT::i32) || isExpr(); 437 } 438 439 bool isVCSrcB32() const { 440 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 441 } 442 443 bool isVCSrcB64() const { 444 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 445 } 446 447 bool isVCSrcB16() const { 448 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 449 } 450 451 bool isVCSrcV2B16() const { 452 return isVCSrcB16(); 453 } 454 455 bool isVCSrcF32() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 457 } 458 459 bool isVCSrcF64() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 461 } 462 463 bool isVCSrcF16() const { 464 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 465 } 466 467 bool isVCSrcV2F16() const { 468 return isVCSrcF16(); 469 } 470 471 bool isVSrcB32() const { 472 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 473 } 474 475 bool isVSrcB64() const { 476 return isVCSrcF64() || isLiteralImm(MVT::i64); 477 } 478 479 bool isVSrcB16() const { 480 return isVCSrcF16() || isLiteralImm(MVT::i16); 481 } 482 483 bool isVSrcV2B16() const { 484 return isVSrcB16() || isLiteralImm(MVT::v2i16); 485 } 486 487 bool isVSrcF32() const { 488 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 489 } 490 491 bool isVSrcF64() const { 492 return isVCSrcF64() || isLiteralImm(MVT::f64); 493 } 494 495 bool isVSrcF16() const { 496 return isVCSrcF16() || isLiteralImm(MVT::f16); 497 } 498 499 bool isVSrcV2F16() const { 500 return isVSrcF16() || isLiteralImm(MVT::v2f16); 501 } 502 503 bool isVISrcB32() const { 504 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 505 } 506 507 bool isVISrcB16() const { 508 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 509 } 510 511 bool isVISrcV2B16() const { 512 return isVISrcB16(); 513 } 514 515 bool isVISrcF32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 517 } 518 519 bool isVISrcF16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 521 } 522 523 bool isVISrcV2F16() const { 524 return isVISrcF16() || isVISrcB32(); 525 } 526 527 bool isAISrcB32() const { 528 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 529 } 530 531 bool isAISrcB16() const { 532 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 533 } 534 535 bool isAISrcV2B16() const { 536 return isAISrcB16(); 537 } 538 539 bool isAISrcF32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 541 } 542 543 bool isAISrcF16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 545 } 546 547 bool isAISrcV2F16() const { 548 return isAISrcF16() || isAISrcB32(); 549 } 550 551 bool isAISrc_128B32() const { 552 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 553 } 554 555 bool isAISrc_128B16() const { 556 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 557 } 558 559 bool isAISrc_128V2B16() const { 560 return isAISrc_128B16(); 561 } 562 563 bool isAISrc_128F32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 565 } 566 567 bool isAISrc_128F16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 569 } 570 571 bool isAISrc_128V2F16() const { 572 return isAISrc_128F16() || isAISrc_128B32(); 573 } 574 575 bool isAISrc_512B32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 577 } 578 579 bool isAISrc_512B16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 581 } 582 583 bool isAISrc_512V2B16() const { 584 return isAISrc_512B16(); 585 } 586 587 bool isAISrc_512F32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 589 } 590 591 bool isAISrc_512F16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 593 } 594 595 bool isAISrc_512V2F16() const { 596 return isAISrc_512F16() || isAISrc_512B32(); 597 } 598 599 bool isAISrc_1024B32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 601 } 602 603 bool isAISrc_1024B16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 605 } 606 607 bool isAISrc_1024V2B16() const { 608 return isAISrc_1024B16(); 609 } 610 611 bool isAISrc_1024F32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 613 } 614 615 bool isAISrc_1024F16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 617 } 618 619 bool isAISrc_1024V2F16() const { 620 return isAISrc_1024F16() || isAISrc_1024B32(); 621 } 622 623 bool isKImmFP32() const { 624 return isLiteralImm(MVT::f32); 625 } 626 627 bool isKImmFP16() const { 628 return isLiteralImm(MVT::f16); 629 } 630 631 bool isMem() const override { 632 return false; 633 } 634 635 bool isExpr() const { 636 return Kind == Expression; 637 } 638 639 bool isSoppBrTarget() const { 640 return isExpr() || isImm(); 641 } 642 643 bool isSWaitCnt() const; 644 bool isHwreg() const; 645 bool isSendMsg() const; 646 bool isSwizzle() const; 647 bool isSMRDOffset8() const; 648 bool isSMRDOffset20() const; 649 bool isSMRDLiteralOffset() const; 650 bool isDPP8() const; 651 bool isDPPCtrl() const; 652 bool isBLGP() const; 653 bool isCBSZ() const; 654 bool isABID() const; 655 bool isGPRIdxMode() const; 656 bool isS16Imm() const; 657 bool isU16Imm() const; 658 bool isEndpgm() const; 659 660 StringRef getExpressionAsToken() const { 661 assert(isExpr()); 662 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 663 return S->getSymbol().getName(); 664 } 665 666 StringRef getToken() const { 667 assert(isToken()); 668 669 if (Kind == Expression) 670 return getExpressionAsToken(); 671 672 return StringRef(Tok.Data, Tok.Length); 673 } 674 675 int64_t getImm() const { 676 assert(isImm()); 677 return Imm.Val; 678 } 679 680 ImmTy getImmTy() const { 681 assert(isImm()); 682 return Imm.Type; 683 } 684 685 unsigned getReg() const override { 686 assert(isRegKind()); 687 return Reg.RegNo; 688 } 689 690 SMLoc getStartLoc() const override { 691 return StartLoc; 692 } 693 694 SMLoc getEndLoc() const override { 695 return EndLoc; 696 } 697 698 SMRange getLocRange() const { 699 return SMRange(StartLoc, EndLoc); 700 } 701 702 Modifiers getModifiers() const { 703 assert(isRegKind() || isImmTy(ImmTyNone)); 704 return isRegKind() ? Reg.Mods : Imm.Mods; 705 } 706 707 void setModifiers(Modifiers Mods) { 708 assert(isRegKind() || isImmTy(ImmTyNone)); 709 if (isRegKind()) 710 Reg.Mods = Mods; 711 else 712 Imm.Mods = Mods; 713 } 714 715 bool hasModifiers() const { 716 return getModifiers().hasModifiers(); 717 } 718 719 bool hasFPModifiers() const { 720 return getModifiers().hasFPModifiers(); 721 } 722 723 bool hasIntModifiers() const { 724 return getModifiers().hasIntModifiers(); 725 } 726 727 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 728 729 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 730 731 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 732 733 template <unsigned Bitwidth> 734 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 735 736 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 737 addKImmFPOperands<16>(Inst, N); 738 } 739 740 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 741 addKImmFPOperands<32>(Inst, N); 742 } 743 744 void addRegOperands(MCInst &Inst, unsigned N) const; 745 746 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 747 addRegOperands(Inst, N); 748 } 749 750 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 751 if (isRegKind()) 752 addRegOperands(Inst, N); 753 else if (isExpr()) 754 Inst.addOperand(MCOperand::createExpr(Expr)); 755 else 756 addImmOperands(Inst, N); 757 } 758 759 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 760 Modifiers Mods = getModifiers(); 761 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 762 if (isRegKind()) { 763 addRegOperands(Inst, N); 764 } else { 765 addImmOperands(Inst, N, false); 766 } 767 } 768 769 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 770 assert(!hasIntModifiers()); 771 addRegOrImmWithInputModsOperands(Inst, N); 772 } 773 774 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 775 assert(!hasFPModifiers()); 776 addRegOrImmWithInputModsOperands(Inst, N); 777 } 778 779 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 780 Modifiers Mods = getModifiers(); 781 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 782 assert(isRegKind()); 783 addRegOperands(Inst, N); 784 } 785 786 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegWithInputModsOperands(Inst, N); 789 } 790 791 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegWithInputModsOperands(Inst, N); 794 } 795 796 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 797 if (isImm()) 798 addImmOperands(Inst, N); 799 else { 800 assert(isExpr()); 801 Inst.addOperand(MCOperand::createExpr(Expr)); 802 } 803 } 804 805 static void printImmTy(raw_ostream& OS, ImmTy Type) { 806 switch (Type) { 807 case ImmTyNone: OS << "None"; break; 808 case ImmTyGDS: OS << "GDS"; break; 809 case ImmTyLDS: OS << "LDS"; break; 810 case ImmTyOffen: OS << "Offen"; break; 811 case ImmTyIdxen: OS << "Idxen"; break; 812 case ImmTyAddr64: OS << "Addr64"; break; 813 case ImmTyOffset: OS << "Offset"; break; 814 case ImmTyInstOffset: OS << "InstOffset"; break; 815 case ImmTyOffset0: OS << "Offset0"; break; 816 case ImmTyOffset1: OS << "Offset1"; break; 817 case ImmTyDLC: OS << "DLC"; break; 818 case ImmTyGLC: OS << "GLC"; break; 819 case ImmTySLC: OS << "SLC"; break; 820 case ImmTyTFE: OS << "TFE"; break; 821 case ImmTyD16: OS << "D16"; break; 822 case ImmTyFORMAT: OS << "FORMAT"; break; 823 case ImmTyClampSI: OS << "ClampSI"; break; 824 case ImmTyOModSI: OS << "OModSI"; break; 825 case ImmTyDPP8: OS << "DPP8"; break; 826 case ImmTyDppCtrl: OS << "DppCtrl"; break; 827 case ImmTyDppRowMask: OS << "DppRowMask"; break; 828 case ImmTyDppBankMask: OS << "DppBankMask"; break; 829 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 830 case ImmTyDppFi: OS << "FI"; break; 831 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 832 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 833 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 834 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 835 case ImmTyDMask: OS << "DMask"; break; 836 case ImmTyDim: OS << "Dim"; break; 837 case ImmTyUNorm: OS << "UNorm"; break; 838 case ImmTyDA: OS << "DA"; break; 839 case ImmTyR128A16: OS << "R128A16"; break; 840 case ImmTyLWE: OS << "LWE"; break; 841 case ImmTyOff: OS << "Off"; break; 842 case ImmTyExpTgt: OS << "ExpTgt"; break; 843 case ImmTyExpCompr: OS << "ExpCompr"; break; 844 case ImmTyExpVM: OS << "ExpVM"; break; 845 case ImmTyHwreg: OS << "Hwreg"; break; 846 case ImmTySendMsg: OS << "SendMsg"; break; 847 case ImmTyInterpSlot: OS << "InterpSlot"; break; 848 case ImmTyInterpAttr: OS << "InterpAttr"; break; 849 case ImmTyAttrChan: OS << "AttrChan"; break; 850 case ImmTyOpSel: OS << "OpSel"; break; 851 case ImmTyOpSelHi: OS << "OpSelHi"; break; 852 case ImmTyNegLo: OS << "NegLo"; break; 853 case ImmTyNegHi: OS << "NegHi"; break; 854 case ImmTySwizzle: OS << "Swizzle"; break; 855 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 856 case ImmTyHigh: OS << "High"; break; 857 case ImmTyBLGP: OS << "BLGP"; break; 858 case ImmTyCBSZ: OS << "CBSZ"; break; 859 case ImmTyABID: OS << "ABID"; break; 860 case ImmTyEndpgm: OS << "Endpgm"; break; 861 } 862 } 863 864 void print(raw_ostream &OS) const override { 865 switch (Kind) { 866 case Register: 867 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 868 break; 869 case Immediate: 870 OS << '<' << getImm(); 871 if (getImmTy() != ImmTyNone) { 872 OS << " type: "; printImmTy(OS, getImmTy()); 873 } 874 OS << " mods: " << Imm.Mods << '>'; 875 break; 876 case Token: 877 OS << '\'' << getToken() << '\''; 878 break; 879 case Expression: 880 OS << "<expr " << *Expr << '>'; 881 break; 882 } 883 } 884 885 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 886 int64_t Val, SMLoc Loc, 887 ImmTy Type = ImmTyNone, 888 bool IsFPImm = false) { 889 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 890 Op->Imm.Val = Val; 891 Op->Imm.IsFPImm = IsFPImm; 892 Op->Imm.Type = Type; 893 Op->Imm.Mods = Modifiers(); 894 Op->StartLoc = Loc; 895 Op->EndLoc = Loc; 896 return Op; 897 } 898 899 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 900 StringRef Str, SMLoc Loc, 901 bool HasExplicitEncodingSize = true) { 902 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 903 Res->Tok.Data = Str.data(); 904 Res->Tok.Length = Str.size(); 905 Res->StartLoc = Loc; 906 Res->EndLoc = Loc; 907 return Res; 908 } 909 910 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 911 unsigned RegNo, SMLoc S, 912 SMLoc E) { 913 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 914 Op->Reg.RegNo = RegNo; 915 Op->Reg.Mods = Modifiers(); 916 Op->StartLoc = S; 917 Op->EndLoc = E; 918 return Op; 919 } 920 921 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 922 const class MCExpr *Expr, SMLoc S) { 923 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 924 Op->Expr = Expr; 925 Op->StartLoc = S; 926 Op->EndLoc = S; 927 return Op; 928 } 929 }; 930 931 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 932 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 933 return OS; 934 } 935 936 //===----------------------------------------------------------------------===// 937 // AsmParser 938 //===----------------------------------------------------------------------===// 939 940 // Holds info related to the current kernel, e.g. count of SGPRs used. 941 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 942 // .amdgpu_hsa_kernel or at EOF. 943 class KernelScopeInfo { 944 int SgprIndexUnusedMin = -1; 945 int VgprIndexUnusedMin = -1; 946 MCContext *Ctx = nullptr; 947 948 void usesSgprAt(int i) { 949 if (i >= SgprIndexUnusedMin) { 950 SgprIndexUnusedMin = ++i; 951 if (Ctx) { 952 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 953 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 954 } 955 } 956 } 957 958 void usesVgprAt(int i) { 959 if (i >= VgprIndexUnusedMin) { 960 VgprIndexUnusedMin = ++i; 961 if (Ctx) { 962 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 963 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 964 } 965 } 966 } 967 968 public: 969 KernelScopeInfo() = default; 970 971 void initialize(MCContext &Context) { 972 Ctx = &Context; 973 usesSgprAt(SgprIndexUnusedMin = -1); 974 usesVgprAt(VgprIndexUnusedMin = -1); 975 } 976 977 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 978 switch (RegKind) { 979 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 980 case IS_AGPR: // fall through 981 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 982 default: break; 983 } 984 } 985 }; 986 987 class AMDGPUAsmParser : public MCTargetAsmParser { 988 MCAsmParser &Parser; 989 990 // Number of extra operands parsed after the first optional operand. 991 // This may be necessary to skip hardcoded mandatory operands. 992 static const unsigned MAX_OPR_LOOKAHEAD = 8; 993 994 unsigned ForcedEncodingSize = 0; 995 bool ForcedDPP = false; 996 bool ForcedSDWA = false; 997 KernelScopeInfo KernelScope; 998 999 /// @name Auto-generated Match Functions 1000 /// { 1001 1002 #define GET_ASSEMBLER_HEADER 1003 #include "AMDGPUGenAsmMatcher.inc" 1004 1005 /// } 1006 1007 private: 1008 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1009 bool OutOfRangeError(SMRange Range); 1010 /// Calculate VGPR/SGPR blocks required for given target, reserved 1011 /// registers, and user-specified NextFreeXGPR values. 1012 /// 1013 /// \param Features [in] Target features, used for bug corrections. 1014 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1015 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1016 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1017 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1018 /// descriptor field, if valid. 1019 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1020 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1021 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1022 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1023 /// \param VGPRBlocks [out] Result VGPR block count. 1024 /// \param SGPRBlocks [out] Result SGPR block count. 1025 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1026 bool FlatScrUsed, bool XNACKUsed, 1027 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1028 SMRange VGPRRange, unsigned NextFreeSGPR, 1029 SMRange SGPRRange, unsigned &VGPRBlocks, 1030 unsigned &SGPRBlocks); 1031 bool ParseDirectiveAMDGCNTarget(); 1032 bool ParseDirectiveAMDHSAKernel(); 1033 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1034 bool ParseDirectiveHSACodeObjectVersion(); 1035 bool ParseDirectiveHSACodeObjectISA(); 1036 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1037 bool ParseDirectiveAMDKernelCodeT(); 1038 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1039 bool ParseDirectiveAMDGPUHsaKernel(); 1040 1041 bool ParseDirectiveISAVersion(); 1042 bool ParseDirectiveHSAMetadata(); 1043 bool ParseDirectivePALMetadataBegin(); 1044 bool ParseDirectivePALMetadata(); 1045 bool ParseDirectiveAMDGPULDS(); 1046 1047 /// Common code to parse out a block of text (typically YAML) between start and 1048 /// end directives. 1049 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1050 const char *AssemblerDirectiveEnd, 1051 std::string &CollectString); 1052 1053 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1054 RegisterKind RegKind, unsigned Reg1, 1055 unsigned RegNum); 1056 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1057 unsigned& RegNum, unsigned& RegWidth, 1058 unsigned *DwordRegIndex); 1059 bool isRegister(); 1060 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1061 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1062 void initializeGprCountSymbol(RegisterKind RegKind); 1063 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1064 unsigned RegWidth); 1065 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1066 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1067 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1068 bool IsGdsHardcoded); 1069 1070 public: 1071 enum AMDGPUMatchResultTy { 1072 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1073 }; 1074 enum OperandMode { 1075 OperandMode_Default, 1076 OperandMode_NSA, 1077 }; 1078 1079 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1080 1081 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1082 const MCInstrInfo &MII, 1083 const MCTargetOptions &Options) 1084 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1085 MCAsmParserExtension::Initialize(Parser); 1086 1087 if (getFeatureBits().none()) { 1088 // Set default features. 1089 copySTI().ToggleFeature("southern-islands"); 1090 } 1091 1092 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1093 1094 { 1095 // TODO: make those pre-defined variables read-only. 1096 // Currently there is none suitable machinery in the core llvm-mc for this. 1097 // MCSymbol::isRedefinable is intended for another purpose, and 1098 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1099 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1100 MCContext &Ctx = getContext(); 1101 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1102 MCSymbol *Sym = 1103 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1104 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1105 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1106 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1107 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1108 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1109 } else { 1110 MCSymbol *Sym = 1111 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1112 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1113 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1114 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1115 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1116 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1117 } 1118 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1119 initializeGprCountSymbol(IS_VGPR); 1120 initializeGprCountSymbol(IS_SGPR); 1121 } else 1122 KernelScope.initialize(getContext()); 1123 } 1124 } 1125 1126 bool hasXNACK() const { 1127 return AMDGPU::hasXNACK(getSTI()); 1128 } 1129 1130 bool hasMIMG_R128() const { 1131 return AMDGPU::hasMIMG_R128(getSTI()); 1132 } 1133 1134 bool hasPackedD16() const { 1135 return AMDGPU::hasPackedD16(getSTI()); 1136 } 1137 1138 bool isSI() const { 1139 return AMDGPU::isSI(getSTI()); 1140 } 1141 1142 bool isCI() const { 1143 return AMDGPU::isCI(getSTI()); 1144 } 1145 1146 bool isVI() const { 1147 return AMDGPU::isVI(getSTI()); 1148 } 1149 1150 bool isGFX9() const { 1151 return AMDGPU::isGFX9(getSTI()); 1152 } 1153 1154 bool isGFX10() const { 1155 return AMDGPU::isGFX10(getSTI()); 1156 } 1157 1158 bool hasInv2PiInlineImm() const { 1159 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1160 } 1161 1162 bool hasFlatOffsets() const { 1163 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1164 } 1165 1166 bool hasSGPR102_SGPR103() const { 1167 return !isVI() && !isGFX9(); 1168 } 1169 1170 bool hasSGPR104_SGPR105() const { 1171 return isGFX10(); 1172 } 1173 1174 bool hasIntClamp() const { 1175 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1176 } 1177 1178 AMDGPUTargetStreamer &getTargetStreamer() { 1179 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1180 return static_cast<AMDGPUTargetStreamer &>(TS); 1181 } 1182 1183 const MCRegisterInfo *getMRI() const { 1184 // We need this const_cast because for some reason getContext() is not const 1185 // in MCAsmParser. 1186 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1187 } 1188 1189 const MCInstrInfo *getMII() const { 1190 return &MII; 1191 } 1192 1193 const FeatureBitset &getFeatureBits() const { 1194 return getSTI().getFeatureBits(); 1195 } 1196 1197 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1198 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1199 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1200 1201 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1202 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1203 bool isForcedDPP() const { return ForcedDPP; } 1204 bool isForcedSDWA() const { return ForcedSDWA; } 1205 ArrayRef<unsigned> getMatchedVariants() const; 1206 1207 std::unique_ptr<AMDGPUOperand> parseRegister(); 1208 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1209 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1210 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1211 unsigned Kind) override; 1212 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1213 OperandVector &Operands, MCStreamer &Out, 1214 uint64_t &ErrorInfo, 1215 bool MatchingInlineAsm) override; 1216 bool ParseDirective(AsmToken DirectiveID) override; 1217 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1218 OperandMode Mode = OperandMode_Default); 1219 StringRef parseMnemonicSuffix(StringRef Name); 1220 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1221 SMLoc NameLoc, OperandVector &Operands) override; 1222 //bool ProcessInstruction(MCInst &Inst); 1223 1224 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1225 1226 OperandMatchResultTy 1227 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1228 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1229 bool (*ConvertResult)(int64_t &) = nullptr); 1230 1231 OperandMatchResultTy 1232 parseOperandArrayWithPrefix(const char *Prefix, 1233 OperandVector &Operands, 1234 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1235 bool (*ConvertResult)(int64_t&) = nullptr); 1236 1237 OperandMatchResultTy 1238 parseNamedBit(const char *Name, OperandVector &Operands, 1239 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1240 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1241 StringRef &Value); 1242 1243 bool isModifier(); 1244 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1245 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1246 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1247 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1248 bool parseSP3NegModifier(); 1249 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1250 OperandMatchResultTy parseReg(OperandVector &Operands); 1251 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1252 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1253 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1254 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1255 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1256 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1257 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1258 1259 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1260 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1261 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1262 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1263 1264 bool parseCnt(int64_t &IntVal); 1265 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1266 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1267 1268 private: 1269 struct OperandInfoTy { 1270 int64_t Id; 1271 bool IsSymbolic = false; 1272 bool IsDefined = false; 1273 1274 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1275 }; 1276 1277 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1278 bool validateSendMsg(const OperandInfoTy &Msg, 1279 const OperandInfoTy &Op, 1280 const OperandInfoTy &Stream, 1281 const SMLoc Loc); 1282 1283 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1284 bool validateHwreg(const OperandInfoTy &HwReg, 1285 const int64_t Offset, 1286 const int64_t Width, 1287 const SMLoc Loc); 1288 1289 void errorExpTgt(); 1290 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1291 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1292 1293 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1294 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1295 bool validateSOPLiteral(const MCInst &Inst) const; 1296 bool validateConstantBusLimitations(const MCInst &Inst); 1297 bool validateEarlyClobberLimitations(const MCInst &Inst); 1298 bool validateIntClampSupported(const MCInst &Inst); 1299 bool validateMIMGAtomicDMask(const MCInst &Inst); 1300 bool validateMIMGGatherDMask(const MCInst &Inst); 1301 bool validateMIMGDataSize(const MCInst &Inst); 1302 bool validateMIMGAddrSize(const MCInst &Inst); 1303 bool validateMIMGD16(const MCInst &Inst); 1304 bool validateMIMGDim(const MCInst &Inst); 1305 bool validateLdsDirect(const MCInst &Inst); 1306 bool validateOpSel(const MCInst &Inst); 1307 bool validateVccOperand(unsigned Reg) const; 1308 bool validateVOP3Literal(const MCInst &Inst) const; 1309 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1310 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1311 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1312 1313 bool isId(const StringRef Id) const; 1314 bool isId(const AsmToken &Token, const StringRef Id) const; 1315 bool isToken(const AsmToken::TokenKind Kind) const; 1316 bool trySkipId(const StringRef Id); 1317 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1318 bool trySkipToken(const AsmToken::TokenKind Kind); 1319 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1320 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1321 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1322 AsmToken::TokenKind getTokenKind() const; 1323 bool parseExpr(int64_t &Imm); 1324 StringRef getTokenStr() const; 1325 AsmToken peekToken(); 1326 AsmToken getToken() const; 1327 SMLoc getLoc() const; 1328 void lex(); 1329 1330 public: 1331 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1332 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1333 1334 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1335 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1336 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1337 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1338 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1339 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1340 1341 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1342 const unsigned MinVal, 1343 const unsigned MaxVal, 1344 const StringRef ErrMsg); 1345 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1346 bool parseSwizzleOffset(int64_t &Imm); 1347 bool parseSwizzleMacro(int64_t &Imm); 1348 bool parseSwizzleQuadPerm(int64_t &Imm); 1349 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1350 bool parseSwizzleBroadcast(int64_t &Imm); 1351 bool parseSwizzleSwap(int64_t &Imm); 1352 bool parseSwizzleReverse(int64_t &Imm); 1353 1354 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1355 int64_t parseGPRIdxMacro(); 1356 1357 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1358 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1359 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1360 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1361 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1362 1363 AMDGPUOperand::Ptr defaultDLC() const; 1364 AMDGPUOperand::Ptr defaultGLC() const; 1365 AMDGPUOperand::Ptr defaultSLC() const; 1366 1367 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1368 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1369 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1370 AMDGPUOperand::Ptr defaultFlatOffset() const; 1371 1372 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1373 1374 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1375 OptionalImmIndexMap &OptionalIdx); 1376 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1377 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1378 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1379 1380 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1381 1382 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1383 bool IsAtomic = false); 1384 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1385 1386 OperandMatchResultTy parseDim(OperandVector &Operands); 1387 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1388 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1389 AMDGPUOperand::Ptr defaultRowMask() const; 1390 AMDGPUOperand::Ptr defaultBankMask() const; 1391 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1392 AMDGPUOperand::Ptr defaultFI() const; 1393 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1394 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1395 1396 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1397 AMDGPUOperand::ImmTy Type); 1398 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1399 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1400 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1401 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1402 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1403 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1404 uint64_t BasicInstType, bool skipVcc = false); 1405 1406 AMDGPUOperand::Ptr defaultBLGP() const; 1407 AMDGPUOperand::Ptr defaultCBSZ() const; 1408 AMDGPUOperand::Ptr defaultABID() const; 1409 1410 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1411 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1412 }; 1413 1414 struct OptionalOperand { 1415 const char *Name; 1416 AMDGPUOperand::ImmTy Type; 1417 bool IsBit; 1418 bool (*ConvertResult)(int64_t&); 1419 }; 1420 1421 } // end anonymous namespace 1422 1423 // May be called with integer type with equivalent bitwidth. 1424 static const fltSemantics *getFltSemantics(unsigned Size) { 1425 switch (Size) { 1426 case 4: 1427 return &APFloat::IEEEsingle(); 1428 case 8: 1429 return &APFloat::IEEEdouble(); 1430 case 2: 1431 return &APFloat::IEEEhalf(); 1432 default: 1433 llvm_unreachable("unsupported fp type"); 1434 } 1435 } 1436 1437 static const fltSemantics *getFltSemantics(MVT VT) { 1438 return getFltSemantics(VT.getSizeInBits() / 8); 1439 } 1440 1441 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1442 switch (OperandType) { 1443 case AMDGPU::OPERAND_REG_IMM_INT32: 1444 case AMDGPU::OPERAND_REG_IMM_FP32: 1445 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1446 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1447 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1448 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1449 return &APFloat::IEEEsingle(); 1450 case AMDGPU::OPERAND_REG_IMM_INT64: 1451 case AMDGPU::OPERAND_REG_IMM_FP64: 1452 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1453 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1454 return &APFloat::IEEEdouble(); 1455 case AMDGPU::OPERAND_REG_IMM_INT16: 1456 case AMDGPU::OPERAND_REG_IMM_FP16: 1457 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1458 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1459 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1460 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1461 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1462 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1463 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1464 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1465 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1466 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1467 return &APFloat::IEEEhalf(); 1468 default: 1469 llvm_unreachable("unsupported fp type"); 1470 } 1471 } 1472 1473 //===----------------------------------------------------------------------===// 1474 // Operand 1475 //===----------------------------------------------------------------------===// 1476 1477 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1478 bool Lost; 1479 1480 // Convert literal to single precision 1481 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1482 APFloat::rmNearestTiesToEven, 1483 &Lost); 1484 // We allow precision lost but not overflow or underflow 1485 if (Status != APFloat::opOK && 1486 Lost && 1487 ((Status & APFloat::opOverflow) != 0 || 1488 (Status & APFloat::opUnderflow) != 0)) { 1489 return false; 1490 } 1491 1492 return true; 1493 } 1494 1495 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1496 return isUIntN(Size, Val) || isIntN(Size, Val); 1497 } 1498 1499 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1500 1501 // This is a hack to enable named inline values like 1502 // shared_base with both 32-bit and 64-bit operands. 1503 // Note that these values are defined as 1504 // 32-bit operands only. 1505 if (isInlineValue()) { 1506 return true; 1507 } 1508 1509 if (!isImmTy(ImmTyNone)) { 1510 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1511 return false; 1512 } 1513 // TODO: We should avoid using host float here. It would be better to 1514 // check the float bit values which is what a few other places do. 1515 // We've had bot failures before due to weird NaN support on mips hosts. 1516 1517 APInt Literal(64, Imm.Val); 1518 1519 if (Imm.IsFPImm) { // We got fp literal token 1520 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1521 return AMDGPU::isInlinableLiteral64(Imm.Val, 1522 AsmParser->hasInv2PiInlineImm()); 1523 } 1524 1525 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1526 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1527 return false; 1528 1529 if (type.getScalarSizeInBits() == 16) { 1530 return AMDGPU::isInlinableLiteral16( 1531 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1532 AsmParser->hasInv2PiInlineImm()); 1533 } 1534 1535 // Check if single precision literal is inlinable 1536 return AMDGPU::isInlinableLiteral32( 1537 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1538 AsmParser->hasInv2PiInlineImm()); 1539 } 1540 1541 // We got int literal token. 1542 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1543 return AMDGPU::isInlinableLiteral64(Imm.Val, 1544 AsmParser->hasInv2PiInlineImm()); 1545 } 1546 1547 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1548 return false; 1549 } 1550 1551 if (type.getScalarSizeInBits() == 16) { 1552 return AMDGPU::isInlinableLiteral16( 1553 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1554 AsmParser->hasInv2PiInlineImm()); 1555 } 1556 1557 return AMDGPU::isInlinableLiteral32( 1558 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1559 AsmParser->hasInv2PiInlineImm()); 1560 } 1561 1562 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1563 // Check that this immediate can be added as literal 1564 if (!isImmTy(ImmTyNone)) { 1565 return false; 1566 } 1567 1568 if (!Imm.IsFPImm) { 1569 // We got int literal token. 1570 1571 if (type == MVT::f64 && hasFPModifiers()) { 1572 // Cannot apply fp modifiers to int literals preserving the same semantics 1573 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1574 // disable these cases. 1575 return false; 1576 } 1577 1578 unsigned Size = type.getSizeInBits(); 1579 if (Size == 64) 1580 Size = 32; 1581 1582 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1583 // types. 1584 return isSafeTruncation(Imm.Val, Size); 1585 } 1586 1587 // We got fp literal token 1588 if (type == MVT::f64) { // Expected 64-bit fp operand 1589 // We would set low 64-bits of literal to zeroes but we accept this literals 1590 return true; 1591 } 1592 1593 if (type == MVT::i64) { // Expected 64-bit int operand 1594 // We don't allow fp literals in 64-bit integer instructions. It is 1595 // unclear how we should encode them. 1596 return false; 1597 } 1598 1599 // We allow fp literals with f16x2 operands assuming that the specified 1600 // literal goes into the lower half and the upper half is zero. We also 1601 // require that the literal may be losslesly converted to f16. 1602 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1603 (type == MVT::v2i16)? MVT::i16 : type; 1604 1605 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1606 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1607 } 1608 1609 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1610 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1611 } 1612 1613 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1614 if (AsmParser->isVI()) 1615 return isVReg32(); 1616 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1617 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1618 else 1619 return false; 1620 } 1621 1622 bool AMDGPUOperand::isSDWAFP16Operand() const { 1623 return isSDWAOperand(MVT::f16); 1624 } 1625 1626 bool AMDGPUOperand::isSDWAFP32Operand() const { 1627 return isSDWAOperand(MVT::f32); 1628 } 1629 1630 bool AMDGPUOperand::isSDWAInt16Operand() const { 1631 return isSDWAOperand(MVT::i16); 1632 } 1633 1634 bool AMDGPUOperand::isSDWAInt32Operand() const { 1635 return isSDWAOperand(MVT::i32); 1636 } 1637 1638 bool AMDGPUOperand::isBoolReg() const { 1639 return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? 1640 isSCSrcB64() : isSCSrcB32(); 1641 } 1642 1643 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1644 { 1645 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1646 assert(Size == 2 || Size == 4 || Size == 8); 1647 1648 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1649 1650 if (Imm.Mods.Abs) { 1651 Val &= ~FpSignMask; 1652 } 1653 if (Imm.Mods.Neg) { 1654 Val ^= FpSignMask; 1655 } 1656 1657 return Val; 1658 } 1659 1660 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1661 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1662 Inst.getNumOperands())) { 1663 addLiteralImmOperand(Inst, Imm.Val, 1664 ApplyModifiers & 1665 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1666 } else { 1667 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1668 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1669 } 1670 } 1671 1672 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1673 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1674 auto OpNum = Inst.getNumOperands(); 1675 // Check that this operand accepts literals 1676 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1677 1678 if (ApplyModifiers) { 1679 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1680 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1681 Val = applyInputFPModifiers(Val, Size); 1682 } 1683 1684 APInt Literal(64, Val); 1685 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1686 1687 if (Imm.IsFPImm) { // We got fp literal token 1688 switch (OpTy) { 1689 case AMDGPU::OPERAND_REG_IMM_INT64: 1690 case AMDGPU::OPERAND_REG_IMM_FP64: 1691 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1692 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1693 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1694 AsmParser->hasInv2PiInlineImm())) { 1695 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1696 return; 1697 } 1698 1699 // Non-inlineable 1700 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1701 // For fp operands we check if low 32 bits are zeros 1702 if (Literal.getLoBits(32) != 0) { 1703 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1704 "Can't encode literal as exact 64-bit floating-point operand. " 1705 "Low 32-bits will be set to zero"); 1706 } 1707 1708 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1709 return; 1710 } 1711 1712 // We don't allow fp literals in 64-bit integer instructions. It is 1713 // unclear how we should encode them. This case should be checked earlier 1714 // in predicate methods (isLiteralImm()) 1715 llvm_unreachable("fp literal in 64-bit integer instruction."); 1716 1717 case AMDGPU::OPERAND_REG_IMM_INT32: 1718 case AMDGPU::OPERAND_REG_IMM_FP32: 1719 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1720 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1721 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1722 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1723 case AMDGPU::OPERAND_REG_IMM_INT16: 1724 case AMDGPU::OPERAND_REG_IMM_FP16: 1725 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1726 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1727 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1728 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1729 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1730 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1732 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1733 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1734 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1735 bool lost; 1736 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1737 // Convert literal to single precision 1738 FPLiteral.convert(*getOpFltSemantics(OpTy), 1739 APFloat::rmNearestTiesToEven, &lost); 1740 // We allow precision lost but not overflow or underflow. This should be 1741 // checked earlier in isLiteralImm() 1742 1743 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1744 Inst.addOperand(MCOperand::createImm(ImmVal)); 1745 return; 1746 } 1747 default: 1748 llvm_unreachable("invalid operand size"); 1749 } 1750 1751 return; 1752 } 1753 1754 // We got int literal token. 1755 // Only sign extend inline immediates. 1756 switch (OpTy) { 1757 case AMDGPU::OPERAND_REG_IMM_INT32: 1758 case AMDGPU::OPERAND_REG_IMM_FP32: 1759 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1760 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1761 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1762 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1763 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1764 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1765 if (isSafeTruncation(Val, 32) && 1766 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1767 AsmParser->hasInv2PiInlineImm())) { 1768 Inst.addOperand(MCOperand::createImm(Val)); 1769 return; 1770 } 1771 1772 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1773 return; 1774 1775 case AMDGPU::OPERAND_REG_IMM_INT64: 1776 case AMDGPU::OPERAND_REG_IMM_FP64: 1777 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1778 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1779 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1780 Inst.addOperand(MCOperand::createImm(Val)); 1781 return; 1782 } 1783 1784 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1785 return; 1786 1787 case AMDGPU::OPERAND_REG_IMM_INT16: 1788 case AMDGPU::OPERAND_REG_IMM_FP16: 1789 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1790 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1791 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1792 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1793 if (isSafeTruncation(Val, 16) && 1794 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1795 AsmParser->hasInv2PiInlineImm())) { 1796 Inst.addOperand(MCOperand::createImm(Val)); 1797 return; 1798 } 1799 1800 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1801 return; 1802 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1804 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1807 assert(isSafeTruncation(Val, 16)); 1808 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1809 AsmParser->hasInv2PiInlineImm())); 1810 1811 Inst.addOperand(MCOperand::createImm(Val)); 1812 return; 1813 } 1814 default: 1815 llvm_unreachable("invalid operand size"); 1816 } 1817 } 1818 1819 template <unsigned Bitwidth> 1820 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1821 APInt Literal(64, Imm.Val); 1822 1823 if (!Imm.IsFPImm) { 1824 // We got int literal token. 1825 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1826 return; 1827 } 1828 1829 bool Lost; 1830 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1831 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1832 APFloat::rmNearestTiesToEven, &Lost); 1833 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1834 } 1835 1836 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1837 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1838 } 1839 1840 static bool isInlineValue(unsigned Reg) { 1841 switch (Reg) { 1842 case AMDGPU::SRC_SHARED_BASE: 1843 case AMDGPU::SRC_SHARED_LIMIT: 1844 case AMDGPU::SRC_PRIVATE_BASE: 1845 case AMDGPU::SRC_PRIVATE_LIMIT: 1846 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1847 return true; 1848 case AMDGPU::SRC_VCCZ: 1849 case AMDGPU::SRC_EXECZ: 1850 case AMDGPU::SRC_SCC: 1851 return true; 1852 default: 1853 return false; 1854 } 1855 } 1856 1857 bool AMDGPUOperand::isInlineValue() const { 1858 return isRegKind() && ::isInlineValue(getReg()); 1859 } 1860 1861 //===----------------------------------------------------------------------===// 1862 // AsmParser 1863 //===----------------------------------------------------------------------===// 1864 1865 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1866 if (Is == IS_VGPR) { 1867 switch (RegWidth) { 1868 default: return -1; 1869 case 1: return AMDGPU::VGPR_32RegClassID; 1870 case 2: return AMDGPU::VReg_64RegClassID; 1871 case 3: return AMDGPU::VReg_96RegClassID; 1872 case 4: return AMDGPU::VReg_128RegClassID; 1873 case 8: return AMDGPU::VReg_256RegClassID; 1874 case 16: return AMDGPU::VReg_512RegClassID; 1875 } 1876 } else if (Is == IS_TTMP) { 1877 switch (RegWidth) { 1878 default: return -1; 1879 case 1: return AMDGPU::TTMP_32RegClassID; 1880 case 2: return AMDGPU::TTMP_64RegClassID; 1881 case 4: return AMDGPU::TTMP_128RegClassID; 1882 case 8: return AMDGPU::TTMP_256RegClassID; 1883 case 16: return AMDGPU::TTMP_512RegClassID; 1884 } 1885 } else if (Is == IS_SGPR) { 1886 switch (RegWidth) { 1887 default: return -1; 1888 case 1: return AMDGPU::SGPR_32RegClassID; 1889 case 2: return AMDGPU::SGPR_64RegClassID; 1890 case 4: return AMDGPU::SGPR_128RegClassID; 1891 case 8: return AMDGPU::SGPR_256RegClassID; 1892 case 16: return AMDGPU::SGPR_512RegClassID; 1893 } 1894 } else if (Is == IS_AGPR) { 1895 switch (RegWidth) { 1896 default: return -1; 1897 case 1: return AMDGPU::AGPR_32RegClassID; 1898 case 2: return AMDGPU::AReg_64RegClassID; 1899 case 4: return AMDGPU::AReg_128RegClassID; 1900 case 16: return AMDGPU::AReg_512RegClassID; 1901 case 32: return AMDGPU::AReg_1024RegClassID; 1902 } 1903 } 1904 return -1; 1905 } 1906 1907 static unsigned getSpecialRegForName(StringRef RegName) { 1908 return StringSwitch<unsigned>(RegName) 1909 .Case("exec", AMDGPU::EXEC) 1910 .Case("vcc", AMDGPU::VCC) 1911 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1912 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1913 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1914 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1915 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1916 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1917 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1918 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1919 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1920 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1921 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1922 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1923 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1924 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1925 .Case("m0", AMDGPU::M0) 1926 .Case("vccz", AMDGPU::SRC_VCCZ) 1927 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1928 .Case("execz", AMDGPU::SRC_EXECZ) 1929 .Case("src_execz", AMDGPU::SRC_EXECZ) 1930 .Case("scc", AMDGPU::SRC_SCC) 1931 .Case("src_scc", AMDGPU::SRC_SCC) 1932 .Case("tba", AMDGPU::TBA) 1933 .Case("tma", AMDGPU::TMA) 1934 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1935 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1936 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1937 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1938 .Case("vcc_lo", AMDGPU::VCC_LO) 1939 .Case("vcc_hi", AMDGPU::VCC_HI) 1940 .Case("exec_lo", AMDGPU::EXEC_LO) 1941 .Case("exec_hi", AMDGPU::EXEC_HI) 1942 .Case("tma_lo", AMDGPU::TMA_LO) 1943 .Case("tma_hi", AMDGPU::TMA_HI) 1944 .Case("tba_lo", AMDGPU::TBA_LO) 1945 .Case("tba_hi", AMDGPU::TBA_HI) 1946 .Case("null", AMDGPU::SGPR_NULL) 1947 .Default(0); 1948 } 1949 1950 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1951 SMLoc &EndLoc) { 1952 auto R = parseRegister(); 1953 if (!R) return true; 1954 assert(R->isReg()); 1955 RegNo = R->getReg(); 1956 StartLoc = R->getStartLoc(); 1957 EndLoc = R->getEndLoc(); 1958 return false; 1959 } 1960 1961 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1962 RegisterKind RegKind, unsigned Reg1, 1963 unsigned RegNum) { 1964 switch (RegKind) { 1965 case IS_SPECIAL: 1966 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1967 Reg = AMDGPU::EXEC; 1968 RegWidth = 2; 1969 return true; 1970 } 1971 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1972 Reg = AMDGPU::FLAT_SCR; 1973 RegWidth = 2; 1974 return true; 1975 } 1976 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1977 Reg = AMDGPU::XNACK_MASK; 1978 RegWidth = 2; 1979 return true; 1980 } 1981 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1982 Reg = AMDGPU::VCC; 1983 RegWidth = 2; 1984 return true; 1985 } 1986 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1987 Reg = AMDGPU::TBA; 1988 RegWidth = 2; 1989 return true; 1990 } 1991 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1992 Reg = AMDGPU::TMA; 1993 RegWidth = 2; 1994 return true; 1995 } 1996 return false; 1997 case IS_VGPR: 1998 case IS_SGPR: 1999 case IS_AGPR: 2000 case IS_TTMP: 2001 if (Reg1 != Reg + RegWidth) { 2002 return false; 2003 } 2004 RegWidth++; 2005 return true; 2006 default: 2007 llvm_unreachable("unexpected register kind"); 2008 } 2009 } 2010 2011 static const StringRef Registers[] = { 2012 { "v" }, 2013 { "s" }, 2014 { "ttmp" }, 2015 { "acc" }, 2016 { "a" }, 2017 }; 2018 2019 bool 2020 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2021 const AsmToken &NextToken) const { 2022 2023 // A list of consecutive registers: [s0,s1,s2,s3] 2024 if (Token.is(AsmToken::LBrac)) 2025 return true; 2026 2027 if (!Token.is(AsmToken::Identifier)) 2028 return false; 2029 2030 // A single register like s0 or a range of registers like s[0:1] 2031 2032 StringRef RegName = Token.getString(); 2033 2034 for (StringRef Reg : Registers) { 2035 if (RegName.startswith(Reg)) { 2036 if (Reg.size() < RegName.size()) { 2037 unsigned RegNum; 2038 // A single register with an index: rXX 2039 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 2040 return true; 2041 } else { 2042 // A range of registers: r[XX:YY]. 2043 if (NextToken.is(AsmToken::LBrac)) 2044 return true; 2045 } 2046 } 2047 } 2048 2049 return getSpecialRegForName(RegName); 2050 } 2051 2052 bool 2053 AMDGPUAsmParser::isRegister() 2054 { 2055 return isRegister(getToken(), peekToken()); 2056 } 2057 2058 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2059 unsigned &RegNum, unsigned &RegWidth, 2060 unsigned *DwordRegIndex) { 2061 if (DwordRegIndex) { *DwordRegIndex = 0; } 2062 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2063 if (getLexer().is(AsmToken::Identifier)) { 2064 StringRef RegName = Parser.getTok().getString(); 2065 if ((Reg = getSpecialRegForName(RegName))) { 2066 Parser.Lex(); 2067 RegKind = IS_SPECIAL; 2068 } else { 2069 unsigned RegNumIndex = 0; 2070 if (RegName[0] == 'v') { 2071 RegNumIndex = 1; 2072 RegKind = IS_VGPR; 2073 } else if (RegName[0] == 's') { 2074 RegNumIndex = 1; 2075 RegKind = IS_SGPR; 2076 } else if (RegName[0] == 'a') { 2077 RegNumIndex = RegName.startswith("acc") ? 3 : 1; 2078 RegKind = IS_AGPR; 2079 } else if (RegName.startswith("ttmp")) { 2080 RegNumIndex = strlen("ttmp"); 2081 RegKind = IS_TTMP; 2082 } else { 2083 return false; 2084 } 2085 if (RegName.size() > RegNumIndex) { 2086 // Single 32-bit register: vXX. 2087 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 2088 return false; 2089 Parser.Lex(); 2090 RegWidth = 1; 2091 } else { 2092 // Range of registers: v[XX:YY]. ":YY" is optional. 2093 Parser.Lex(); 2094 int64_t RegLo, RegHi; 2095 if (getLexer().isNot(AsmToken::LBrac)) 2096 return false; 2097 Parser.Lex(); 2098 2099 if (getParser().parseAbsoluteExpression(RegLo)) 2100 return false; 2101 2102 const bool isRBrace = getLexer().is(AsmToken::RBrac); 2103 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 2104 return false; 2105 Parser.Lex(); 2106 2107 if (isRBrace) { 2108 RegHi = RegLo; 2109 } else { 2110 if (getParser().parseAbsoluteExpression(RegHi)) 2111 return false; 2112 2113 if (getLexer().isNot(AsmToken::RBrac)) 2114 return false; 2115 Parser.Lex(); 2116 } 2117 RegNum = (unsigned) RegLo; 2118 RegWidth = (RegHi - RegLo) + 1; 2119 } 2120 } 2121 } else if (getLexer().is(AsmToken::LBrac)) { 2122 // List of consecutive registers: [s0,s1,s2,s3] 2123 Parser.Lex(); 2124 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 2125 return false; 2126 if (RegWidth != 1) 2127 return false; 2128 RegisterKind RegKind1; 2129 unsigned Reg1, RegNum1, RegWidth1; 2130 do { 2131 if (getLexer().is(AsmToken::Comma)) { 2132 Parser.Lex(); 2133 } else if (getLexer().is(AsmToken::RBrac)) { 2134 Parser.Lex(); 2135 break; 2136 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 2137 if (RegWidth1 != 1) { 2138 return false; 2139 } 2140 if (RegKind1 != RegKind) { 2141 return false; 2142 } 2143 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 2144 return false; 2145 } 2146 } else { 2147 return false; 2148 } 2149 } while (true); 2150 } else { 2151 return false; 2152 } 2153 switch (RegKind) { 2154 case IS_SPECIAL: 2155 RegNum = 0; 2156 RegWidth = 1; 2157 break; 2158 case IS_VGPR: 2159 case IS_SGPR: 2160 case IS_AGPR: 2161 case IS_TTMP: 2162 { 2163 unsigned Size = 1; 2164 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2165 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 2166 Size = std::min(RegWidth, 4u); 2167 } 2168 if (RegNum % Size != 0) 2169 return false; 2170 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 2171 RegNum = RegNum / Size; 2172 int RCID = getRegClass(RegKind, RegWidth); 2173 if (RCID == -1) 2174 return false; 2175 const MCRegisterClass RC = TRI->getRegClass(RCID); 2176 if (RegNum >= RC.getNumRegs()) 2177 return false; 2178 Reg = RC.getRegister(RegNum); 2179 break; 2180 } 2181 2182 default: 2183 llvm_unreachable("unexpected register kind"); 2184 } 2185 2186 if (!subtargetHasRegister(*TRI, Reg)) 2187 return false; 2188 return true; 2189 } 2190 2191 Optional<StringRef> 2192 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2193 switch (RegKind) { 2194 case IS_VGPR: 2195 return StringRef(".amdgcn.next_free_vgpr"); 2196 case IS_SGPR: 2197 return StringRef(".amdgcn.next_free_sgpr"); 2198 default: 2199 return None; 2200 } 2201 } 2202 2203 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2204 auto SymbolName = getGprCountSymbolName(RegKind); 2205 assert(SymbolName && "initializing invalid register kind"); 2206 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2207 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2208 } 2209 2210 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2211 unsigned DwordRegIndex, 2212 unsigned RegWidth) { 2213 // Symbols are only defined for GCN targets 2214 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2215 return true; 2216 2217 auto SymbolName = getGprCountSymbolName(RegKind); 2218 if (!SymbolName) 2219 return true; 2220 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2221 2222 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2223 int64_t OldCount; 2224 2225 if (!Sym->isVariable()) 2226 return !Error(getParser().getTok().getLoc(), 2227 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2228 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2229 return !Error( 2230 getParser().getTok().getLoc(), 2231 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2232 2233 if (OldCount <= NewMax) 2234 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2235 2236 return true; 2237 } 2238 2239 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2240 const auto &Tok = Parser.getTok(); 2241 SMLoc StartLoc = Tok.getLoc(); 2242 SMLoc EndLoc = Tok.getEndLoc(); 2243 RegisterKind RegKind; 2244 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2245 2246 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2247 //FIXME: improve error messages (bug 41303). 2248 Error(StartLoc, "not a valid operand."); 2249 return nullptr; 2250 } 2251 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2252 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2253 return nullptr; 2254 } else 2255 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2256 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2257 } 2258 2259 OperandMatchResultTy 2260 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2261 // TODO: add syntactic sugar for 1/(2*PI) 2262 2263 assert(!isRegister()); 2264 assert(!isModifier()); 2265 2266 const auto& Tok = getToken(); 2267 const auto& NextTok = peekToken(); 2268 bool IsReal = Tok.is(AsmToken::Real); 2269 SMLoc S = getLoc(); 2270 bool Negate = false; 2271 2272 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2273 lex(); 2274 IsReal = true; 2275 Negate = true; 2276 } 2277 2278 if (IsReal) { 2279 // Floating-point expressions are not supported. 2280 // Can only allow floating-point literals with an 2281 // optional sign. 2282 2283 StringRef Num = getTokenStr(); 2284 lex(); 2285 2286 APFloat RealVal(APFloat::IEEEdouble()); 2287 auto roundMode = APFloat::rmNearestTiesToEven; 2288 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2289 return MatchOperand_ParseFail; 2290 } 2291 if (Negate) 2292 RealVal.changeSign(); 2293 2294 Operands.push_back( 2295 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2296 AMDGPUOperand::ImmTyNone, true)); 2297 2298 return MatchOperand_Success; 2299 2300 } else { 2301 int64_t IntVal; 2302 const MCExpr *Expr; 2303 SMLoc S = getLoc(); 2304 2305 if (HasSP3AbsModifier) { 2306 // This is a workaround for handling expressions 2307 // as arguments of SP3 'abs' modifier, for example: 2308 // |1.0| 2309 // |-1| 2310 // |1+x| 2311 // This syntax is not compatible with syntax of standard 2312 // MC expressions (due to the trailing '|'). 2313 SMLoc EndLoc; 2314 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2315 return MatchOperand_ParseFail; 2316 } else { 2317 if (Parser.parseExpression(Expr)) 2318 return MatchOperand_ParseFail; 2319 } 2320 2321 if (Expr->evaluateAsAbsolute(IntVal)) { 2322 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2323 } else { 2324 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2325 } 2326 2327 return MatchOperand_Success; 2328 } 2329 2330 return MatchOperand_NoMatch; 2331 } 2332 2333 OperandMatchResultTy 2334 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2335 if (!isRegister()) 2336 return MatchOperand_NoMatch; 2337 2338 if (auto R = parseRegister()) { 2339 assert(R->isReg()); 2340 Operands.push_back(std::move(R)); 2341 return MatchOperand_Success; 2342 } 2343 return MatchOperand_ParseFail; 2344 } 2345 2346 OperandMatchResultTy 2347 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2348 auto res = parseReg(Operands); 2349 if (res != MatchOperand_NoMatch) { 2350 return res; 2351 } else if (isModifier()) { 2352 return MatchOperand_NoMatch; 2353 } else { 2354 return parseImm(Operands, HasSP3AbsMod); 2355 } 2356 } 2357 2358 bool 2359 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2360 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2361 const auto &str = Token.getString(); 2362 return str == "abs" || str == "neg" || str == "sext"; 2363 } 2364 return false; 2365 } 2366 2367 bool 2368 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2369 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2370 } 2371 2372 bool 2373 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2374 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2375 } 2376 2377 bool 2378 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2379 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2380 } 2381 2382 // Check if this is an operand modifier or an opcode modifier 2383 // which may look like an expression but it is not. We should 2384 // avoid parsing these modifiers as expressions. Currently 2385 // recognized sequences are: 2386 // |...| 2387 // abs(...) 2388 // neg(...) 2389 // sext(...) 2390 // -reg 2391 // -|...| 2392 // -abs(...) 2393 // name:... 2394 // Note that simple opcode modifiers like 'gds' may be parsed as 2395 // expressions; this is a special case. See getExpressionAsToken. 2396 // 2397 bool 2398 AMDGPUAsmParser::isModifier() { 2399 2400 AsmToken Tok = getToken(); 2401 AsmToken NextToken[2]; 2402 peekTokens(NextToken); 2403 2404 return isOperandModifier(Tok, NextToken[0]) || 2405 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2406 isOpcodeModifierWithVal(Tok, NextToken[0]); 2407 } 2408 2409 // Check if the current token is an SP3 'neg' modifier. 2410 // Currently this modifier is allowed in the following context: 2411 // 2412 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2413 // 2. Before an 'abs' modifier: -abs(...) 2414 // 3. Before an SP3 'abs' modifier: -|...| 2415 // 2416 // In all other cases "-" is handled as a part 2417 // of an expression that follows the sign. 2418 // 2419 // Note: When "-" is followed by an integer literal, 2420 // this is interpreted as integer negation rather 2421 // than a floating-point NEG modifier applied to N. 2422 // Beside being contr-intuitive, such use of floating-point 2423 // NEG modifier would have resulted in different meaning 2424 // of integer literals used with VOP1/2/C and VOP3, 2425 // for example: 2426 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2427 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2428 // Negative fp literals with preceding "-" are 2429 // handled likewise for unifomtity 2430 // 2431 bool 2432 AMDGPUAsmParser::parseSP3NegModifier() { 2433 2434 AsmToken NextToken[2]; 2435 peekTokens(NextToken); 2436 2437 if (isToken(AsmToken::Minus) && 2438 (isRegister(NextToken[0], NextToken[1]) || 2439 NextToken[0].is(AsmToken::Pipe) || 2440 isId(NextToken[0], "abs"))) { 2441 lex(); 2442 return true; 2443 } 2444 2445 return false; 2446 } 2447 2448 OperandMatchResultTy 2449 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2450 bool AllowImm) { 2451 bool Neg, SP3Neg; 2452 bool Abs, SP3Abs; 2453 SMLoc Loc; 2454 2455 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2456 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2457 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2458 return MatchOperand_ParseFail; 2459 } 2460 2461 SP3Neg = parseSP3NegModifier(); 2462 2463 Loc = getLoc(); 2464 Neg = trySkipId("neg"); 2465 if (Neg && SP3Neg) { 2466 Error(Loc, "expected register or immediate"); 2467 return MatchOperand_ParseFail; 2468 } 2469 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2470 return MatchOperand_ParseFail; 2471 2472 Abs = trySkipId("abs"); 2473 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2474 return MatchOperand_ParseFail; 2475 2476 Loc = getLoc(); 2477 SP3Abs = trySkipToken(AsmToken::Pipe); 2478 if (Abs && SP3Abs) { 2479 Error(Loc, "expected register or immediate"); 2480 return MatchOperand_ParseFail; 2481 } 2482 2483 OperandMatchResultTy Res; 2484 if (AllowImm) { 2485 Res = parseRegOrImm(Operands, SP3Abs); 2486 } else { 2487 Res = parseReg(Operands); 2488 } 2489 if (Res != MatchOperand_Success) { 2490 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2491 } 2492 2493 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2494 return MatchOperand_ParseFail; 2495 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2496 return MatchOperand_ParseFail; 2497 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2498 return MatchOperand_ParseFail; 2499 2500 AMDGPUOperand::Modifiers Mods; 2501 Mods.Abs = Abs || SP3Abs; 2502 Mods.Neg = Neg || SP3Neg; 2503 2504 if (Mods.hasFPModifiers()) { 2505 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2506 if (Op.isExpr()) { 2507 Error(Op.getStartLoc(), "expected an absolute expression"); 2508 return MatchOperand_ParseFail; 2509 } 2510 Op.setModifiers(Mods); 2511 } 2512 return MatchOperand_Success; 2513 } 2514 2515 OperandMatchResultTy 2516 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2517 bool AllowImm) { 2518 bool Sext = trySkipId("sext"); 2519 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2520 return MatchOperand_ParseFail; 2521 2522 OperandMatchResultTy Res; 2523 if (AllowImm) { 2524 Res = parseRegOrImm(Operands); 2525 } else { 2526 Res = parseReg(Operands); 2527 } 2528 if (Res != MatchOperand_Success) { 2529 return Sext? MatchOperand_ParseFail : Res; 2530 } 2531 2532 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2533 return MatchOperand_ParseFail; 2534 2535 AMDGPUOperand::Modifiers Mods; 2536 Mods.Sext = Sext; 2537 2538 if (Mods.hasIntModifiers()) { 2539 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2540 if (Op.isExpr()) { 2541 Error(Op.getStartLoc(), "expected an absolute expression"); 2542 return MatchOperand_ParseFail; 2543 } 2544 Op.setModifiers(Mods); 2545 } 2546 2547 return MatchOperand_Success; 2548 } 2549 2550 OperandMatchResultTy 2551 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2552 return parseRegOrImmWithFPInputMods(Operands, false); 2553 } 2554 2555 OperandMatchResultTy 2556 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2557 return parseRegOrImmWithIntInputMods(Operands, false); 2558 } 2559 2560 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2561 auto Loc = getLoc(); 2562 if (trySkipId("off")) { 2563 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2564 AMDGPUOperand::ImmTyOff, false)); 2565 return MatchOperand_Success; 2566 } 2567 2568 if (!isRegister()) 2569 return MatchOperand_NoMatch; 2570 2571 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2572 if (Reg) { 2573 Operands.push_back(std::move(Reg)); 2574 return MatchOperand_Success; 2575 } 2576 2577 return MatchOperand_ParseFail; 2578 2579 } 2580 2581 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2582 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2583 2584 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2585 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2586 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2587 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2588 return Match_InvalidOperand; 2589 2590 if ((TSFlags & SIInstrFlags::VOP3) && 2591 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2592 getForcedEncodingSize() != 64) 2593 return Match_PreferE32; 2594 2595 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2596 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2597 // v_mac_f32/16 allow only dst_sel == DWORD; 2598 auto OpNum = 2599 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2600 const auto &Op = Inst.getOperand(OpNum); 2601 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2602 return Match_InvalidOperand; 2603 } 2604 } 2605 2606 return Match_Success; 2607 } 2608 2609 // What asm variants we should check 2610 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2611 if (getForcedEncodingSize() == 32) { 2612 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2613 return makeArrayRef(Variants); 2614 } 2615 2616 if (isForcedVOP3()) { 2617 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2618 return makeArrayRef(Variants); 2619 } 2620 2621 if (isForcedSDWA()) { 2622 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2623 AMDGPUAsmVariants::SDWA9}; 2624 return makeArrayRef(Variants); 2625 } 2626 2627 if (isForcedDPP()) { 2628 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2629 return makeArrayRef(Variants); 2630 } 2631 2632 static const unsigned Variants[] = { 2633 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2634 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2635 }; 2636 2637 return makeArrayRef(Variants); 2638 } 2639 2640 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2641 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2642 const unsigned Num = Desc.getNumImplicitUses(); 2643 for (unsigned i = 0; i < Num; ++i) { 2644 unsigned Reg = Desc.ImplicitUses[i]; 2645 switch (Reg) { 2646 case AMDGPU::FLAT_SCR: 2647 case AMDGPU::VCC: 2648 case AMDGPU::VCC_LO: 2649 case AMDGPU::VCC_HI: 2650 case AMDGPU::M0: 2651 case AMDGPU::SGPR_NULL: 2652 return Reg; 2653 default: 2654 break; 2655 } 2656 } 2657 return AMDGPU::NoRegister; 2658 } 2659 2660 // NB: This code is correct only when used to check constant 2661 // bus limitations because GFX7 support no f16 inline constants. 2662 // Note that there are no cases when a GFX7 opcode violates 2663 // constant bus limitations due to the use of an f16 constant. 2664 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2665 unsigned OpIdx) const { 2666 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2667 2668 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2669 return false; 2670 } 2671 2672 const MCOperand &MO = Inst.getOperand(OpIdx); 2673 2674 int64_t Val = MO.getImm(); 2675 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2676 2677 switch (OpSize) { // expected operand size 2678 case 8: 2679 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2680 case 4: 2681 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2682 case 2: { 2683 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2684 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2685 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2686 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2687 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2688 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2689 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2690 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2691 } else { 2692 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2693 } 2694 } 2695 default: 2696 llvm_unreachable("invalid operand size"); 2697 } 2698 } 2699 2700 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2701 const MCOperand &MO = Inst.getOperand(OpIdx); 2702 if (MO.isImm()) { 2703 return !isInlineConstant(Inst, OpIdx); 2704 } 2705 return !MO.isReg() || 2706 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2707 } 2708 2709 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2710 const unsigned Opcode = Inst.getOpcode(); 2711 const MCInstrDesc &Desc = MII.get(Opcode); 2712 unsigned ConstantBusUseCount = 0; 2713 unsigned NumLiterals = 0; 2714 unsigned LiteralSize; 2715 2716 if (Desc.TSFlags & 2717 (SIInstrFlags::VOPC | 2718 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2719 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2720 SIInstrFlags::SDWA)) { 2721 // Check special imm operands (used by madmk, etc) 2722 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2723 ++ConstantBusUseCount; 2724 } 2725 2726 SmallDenseSet<unsigned> SGPRsUsed; 2727 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2728 if (SGPRUsed != AMDGPU::NoRegister) { 2729 SGPRsUsed.insert(SGPRUsed); 2730 ++ConstantBusUseCount; 2731 } 2732 2733 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2734 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2735 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2736 2737 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2738 2739 for (int OpIdx : OpIndices) { 2740 if (OpIdx == -1) break; 2741 2742 const MCOperand &MO = Inst.getOperand(OpIdx); 2743 if (usesConstantBus(Inst, OpIdx)) { 2744 if (MO.isReg()) { 2745 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2746 // Pairs of registers with a partial intersections like these 2747 // s0, s[0:1] 2748 // flat_scratch_lo, flat_scratch 2749 // flat_scratch_lo, flat_scratch_hi 2750 // are theoretically valid but they are disabled anyway. 2751 // Note that this code mimics SIInstrInfo::verifyInstruction 2752 if (!SGPRsUsed.count(Reg)) { 2753 SGPRsUsed.insert(Reg); 2754 ++ConstantBusUseCount; 2755 } 2756 } else { // Expression or a literal 2757 2758 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2759 continue; // special operand like VINTERP attr_chan 2760 2761 // An instruction may use only one literal. 2762 // This has been validated on the previous step. 2763 // See validateVOP3Literal. 2764 // This literal may be used as more than one operand. 2765 // If all these operands are of the same size, 2766 // this literal counts as one scalar value. 2767 // Otherwise it counts as 2 scalar values. 2768 // See "GFX10 Shader Programming", section 3.6.2.3. 2769 2770 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2771 if (Size < 4) Size = 4; 2772 2773 if (NumLiterals == 0) { 2774 NumLiterals = 1; 2775 LiteralSize = Size; 2776 } else if (LiteralSize != Size) { 2777 NumLiterals = 2; 2778 } 2779 } 2780 } 2781 } 2782 } 2783 ConstantBusUseCount += NumLiterals; 2784 2785 if (isGFX10()) 2786 return ConstantBusUseCount <= 2; 2787 2788 return ConstantBusUseCount <= 1; 2789 } 2790 2791 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2792 const unsigned Opcode = Inst.getOpcode(); 2793 const MCInstrDesc &Desc = MII.get(Opcode); 2794 2795 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2796 if (DstIdx == -1 || 2797 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2798 return true; 2799 } 2800 2801 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2802 2803 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2804 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2805 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2806 2807 assert(DstIdx != -1); 2808 const MCOperand &Dst = Inst.getOperand(DstIdx); 2809 assert(Dst.isReg()); 2810 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2811 2812 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2813 2814 for (int SrcIdx : SrcIndices) { 2815 if (SrcIdx == -1) break; 2816 const MCOperand &Src = Inst.getOperand(SrcIdx); 2817 if (Src.isReg()) { 2818 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2819 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2820 return false; 2821 } 2822 } 2823 } 2824 2825 return true; 2826 } 2827 2828 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2829 2830 const unsigned Opc = Inst.getOpcode(); 2831 const MCInstrDesc &Desc = MII.get(Opc); 2832 2833 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2834 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2835 assert(ClampIdx != -1); 2836 return Inst.getOperand(ClampIdx).getImm() == 0; 2837 } 2838 2839 return true; 2840 } 2841 2842 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2843 2844 const unsigned Opc = Inst.getOpcode(); 2845 const MCInstrDesc &Desc = MII.get(Opc); 2846 2847 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2848 return true; 2849 2850 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2851 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2852 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2853 2854 assert(VDataIdx != -1); 2855 assert(DMaskIdx != -1); 2856 assert(TFEIdx != -1); 2857 2858 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2859 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2860 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2861 if (DMask == 0) 2862 DMask = 1; 2863 2864 unsigned DataSize = 2865 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2866 if (hasPackedD16()) { 2867 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2868 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2869 DataSize = (DataSize + 1) / 2; 2870 } 2871 2872 return (VDataSize / 4) == DataSize + TFESize; 2873 } 2874 2875 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2876 const unsigned Opc = Inst.getOpcode(); 2877 const MCInstrDesc &Desc = MII.get(Opc); 2878 2879 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2880 return true; 2881 2882 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2883 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2884 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2885 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2886 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2887 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2888 2889 assert(VAddr0Idx != -1); 2890 assert(SrsrcIdx != -1); 2891 assert(DimIdx != -1); 2892 assert(SrsrcIdx > VAddr0Idx); 2893 2894 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2895 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2896 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2897 unsigned VAddrSize = 2898 IsNSA ? SrsrcIdx - VAddr0Idx 2899 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2900 2901 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2902 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2903 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2904 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2905 if (!IsNSA) { 2906 if (AddrSize > 8) 2907 AddrSize = 16; 2908 else if (AddrSize > 4) 2909 AddrSize = 8; 2910 } 2911 2912 return VAddrSize == AddrSize; 2913 } 2914 2915 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2916 2917 const unsigned Opc = Inst.getOpcode(); 2918 const MCInstrDesc &Desc = MII.get(Opc); 2919 2920 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2921 return true; 2922 if (!Desc.mayLoad() || !Desc.mayStore()) 2923 return true; // Not atomic 2924 2925 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2926 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2927 2928 // This is an incomplete check because image_atomic_cmpswap 2929 // may only use 0x3 and 0xf while other atomic operations 2930 // may use 0x1 and 0x3. However these limitations are 2931 // verified when we check that dmask matches dst size. 2932 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2933 } 2934 2935 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2936 2937 const unsigned Opc = Inst.getOpcode(); 2938 const MCInstrDesc &Desc = MII.get(Opc); 2939 2940 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2941 return true; 2942 2943 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2944 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2945 2946 // GATHER4 instructions use dmask in a different fashion compared to 2947 // other MIMG instructions. The only useful DMASK values are 2948 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2949 // (red,red,red,red) etc.) The ISA document doesn't mention 2950 // this. 2951 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2952 } 2953 2954 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2955 2956 const unsigned Opc = Inst.getOpcode(); 2957 const MCInstrDesc &Desc = MII.get(Opc); 2958 2959 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2960 return true; 2961 2962 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2963 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2964 if (isCI() || isSI()) 2965 return false; 2966 } 2967 2968 return true; 2969 } 2970 2971 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2972 const unsigned Opc = Inst.getOpcode(); 2973 const MCInstrDesc &Desc = MII.get(Opc); 2974 2975 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2976 return true; 2977 2978 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2979 if (DimIdx < 0) 2980 return true; 2981 2982 long Imm = Inst.getOperand(DimIdx).getImm(); 2983 if (Imm < 0 || Imm >= 8) 2984 return false; 2985 2986 return true; 2987 } 2988 2989 static bool IsRevOpcode(const unsigned Opcode) 2990 { 2991 switch (Opcode) { 2992 case AMDGPU::V_SUBREV_F32_e32: 2993 case AMDGPU::V_SUBREV_F32_e64: 2994 case AMDGPU::V_SUBREV_F32_e32_gfx10: 2995 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 2996 case AMDGPU::V_SUBREV_F32_e32_vi: 2997 case AMDGPU::V_SUBREV_F32_e64_gfx10: 2998 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 2999 case AMDGPU::V_SUBREV_F32_e64_vi: 3000 3001 case AMDGPU::V_SUBREV_I32_e32: 3002 case AMDGPU::V_SUBREV_I32_e64: 3003 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3004 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3005 3006 case AMDGPU::V_SUBBREV_U32_e32: 3007 case AMDGPU::V_SUBBREV_U32_e64: 3008 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3009 case AMDGPU::V_SUBBREV_U32_e32_vi: 3010 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3011 case AMDGPU::V_SUBBREV_U32_e64_vi: 3012 3013 case AMDGPU::V_SUBREV_U32_e32: 3014 case AMDGPU::V_SUBREV_U32_e64: 3015 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3016 case AMDGPU::V_SUBREV_U32_e32_vi: 3017 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3018 case AMDGPU::V_SUBREV_U32_e64_vi: 3019 3020 case AMDGPU::V_SUBREV_F16_e32: 3021 case AMDGPU::V_SUBREV_F16_e64: 3022 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3023 case AMDGPU::V_SUBREV_F16_e32_vi: 3024 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3025 case AMDGPU::V_SUBREV_F16_e64_vi: 3026 3027 case AMDGPU::V_SUBREV_U16_e32: 3028 case AMDGPU::V_SUBREV_U16_e64: 3029 case AMDGPU::V_SUBREV_U16_e32_vi: 3030 case AMDGPU::V_SUBREV_U16_e64_vi: 3031 3032 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3033 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3034 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3035 3036 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3037 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3038 3039 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3040 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3041 3042 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3043 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3044 3045 case AMDGPU::V_LSHRREV_B32_e32: 3046 case AMDGPU::V_LSHRREV_B32_e64: 3047 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3048 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3049 case AMDGPU::V_LSHRREV_B32_e32_vi: 3050 case AMDGPU::V_LSHRREV_B32_e64_vi: 3051 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3052 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3053 3054 case AMDGPU::V_ASHRREV_I32_e32: 3055 case AMDGPU::V_ASHRREV_I32_e64: 3056 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3057 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3058 case AMDGPU::V_ASHRREV_I32_e32_vi: 3059 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3060 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3061 case AMDGPU::V_ASHRREV_I32_e64_vi: 3062 3063 case AMDGPU::V_LSHLREV_B32_e32: 3064 case AMDGPU::V_LSHLREV_B32_e64: 3065 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3066 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3067 case AMDGPU::V_LSHLREV_B32_e32_vi: 3068 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3069 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3070 case AMDGPU::V_LSHLREV_B32_e64_vi: 3071 3072 case AMDGPU::V_LSHLREV_B16_e32: 3073 case AMDGPU::V_LSHLREV_B16_e64: 3074 case AMDGPU::V_LSHLREV_B16_e32_vi: 3075 case AMDGPU::V_LSHLREV_B16_e64_vi: 3076 case AMDGPU::V_LSHLREV_B16_gfx10: 3077 3078 case AMDGPU::V_LSHRREV_B16_e32: 3079 case AMDGPU::V_LSHRREV_B16_e64: 3080 case AMDGPU::V_LSHRREV_B16_e32_vi: 3081 case AMDGPU::V_LSHRREV_B16_e64_vi: 3082 case AMDGPU::V_LSHRREV_B16_gfx10: 3083 3084 case AMDGPU::V_ASHRREV_I16_e32: 3085 case AMDGPU::V_ASHRREV_I16_e64: 3086 case AMDGPU::V_ASHRREV_I16_e32_vi: 3087 case AMDGPU::V_ASHRREV_I16_e64_vi: 3088 case AMDGPU::V_ASHRREV_I16_gfx10: 3089 3090 case AMDGPU::V_LSHLREV_B64: 3091 case AMDGPU::V_LSHLREV_B64_gfx10: 3092 case AMDGPU::V_LSHLREV_B64_vi: 3093 3094 case AMDGPU::V_LSHRREV_B64: 3095 case AMDGPU::V_LSHRREV_B64_gfx10: 3096 case AMDGPU::V_LSHRREV_B64_vi: 3097 3098 case AMDGPU::V_ASHRREV_I64: 3099 case AMDGPU::V_ASHRREV_I64_gfx10: 3100 case AMDGPU::V_ASHRREV_I64_vi: 3101 3102 case AMDGPU::V_PK_LSHLREV_B16: 3103 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3104 case AMDGPU::V_PK_LSHLREV_B16_vi: 3105 3106 case AMDGPU::V_PK_LSHRREV_B16: 3107 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3108 case AMDGPU::V_PK_LSHRREV_B16_vi: 3109 case AMDGPU::V_PK_ASHRREV_I16: 3110 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3111 case AMDGPU::V_PK_ASHRREV_I16_vi: 3112 return true; 3113 default: 3114 return false; 3115 } 3116 } 3117 3118 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3119 3120 using namespace SIInstrFlags; 3121 const unsigned Opcode = Inst.getOpcode(); 3122 const MCInstrDesc &Desc = MII.get(Opcode); 3123 3124 // lds_direct register is defined so that it can be used 3125 // with 9-bit operands only. Ignore encodings which do not accept these. 3126 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3127 return true; 3128 3129 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3130 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3131 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3132 3133 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3134 3135 // lds_direct cannot be specified as either src1 or src2. 3136 for (int SrcIdx : SrcIndices) { 3137 if (SrcIdx == -1) break; 3138 const MCOperand &Src = Inst.getOperand(SrcIdx); 3139 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3140 return false; 3141 } 3142 } 3143 3144 if (Src0Idx == -1) 3145 return true; 3146 3147 const MCOperand &Src = Inst.getOperand(Src0Idx); 3148 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3149 return true; 3150 3151 // lds_direct is specified as src0. Check additional limitations. 3152 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3153 } 3154 3155 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3156 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3157 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3158 if (Op.isFlatOffset()) 3159 return Op.getStartLoc(); 3160 } 3161 return getLoc(); 3162 } 3163 3164 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3165 const OperandVector &Operands) { 3166 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3167 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3168 return true; 3169 3170 auto Opcode = Inst.getOpcode(); 3171 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3172 assert(OpNum != -1); 3173 3174 const auto &Op = Inst.getOperand(OpNum); 3175 if (!hasFlatOffsets() && Op.getImm() != 0) { 3176 Error(getFlatOffsetLoc(Operands), 3177 "flat offset modifier is not supported on this GPU"); 3178 return false; 3179 } 3180 3181 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3182 // For FLAT segment the offset must be positive; 3183 // MSB is ignored and forced to zero. 3184 unsigned OffsetSize = isGFX9() ? 13 : 12; 3185 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3186 if (!isIntN(OffsetSize, Op.getImm())) { 3187 Error(getFlatOffsetLoc(Operands), 3188 isGFX9() ? "expected a 13-bit signed offset" : 3189 "expected a 12-bit signed offset"); 3190 return false; 3191 } 3192 } else { 3193 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3194 Error(getFlatOffsetLoc(Operands), 3195 isGFX9() ? "expected a 12-bit unsigned offset" : 3196 "expected an 11-bit unsigned offset"); 3197 return false; 3198 } 3199 } 3200 3201 return true; 3202 } 3203 3204 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3205 unsigned Opcode = Inst.getOpcode(); 3206 const MCInstrDesc &Desc = MII.get(Opcode); 3207 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3208 return true; 3209 3210 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3211 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3212 3213 const int OpIndices[] = { Src0Idx, Src1Idx }; 3214 3215 unsigned NumLiterals = 0; 3216 uint32_t LiteralValue; 3217 3218 for (int OpIdx : OpIndices) { 3219 if (OpIdx == -1) break; 3220 3221 const MCOperand &MO = Inst.getOperand(OpIdx); 3222 if (MO.isImm() && 3223 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3224 AMDGPU::isSISrcOperand(Desc, OpIdx) && 3225 !isInlineConstant(Inst, OpIdx)) { 3226 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3227 if (NumLiterals == 0 || LiteralValue != Value) { 3228 LiteralValue = Value; 3229 ++NumLiterals; 3230 } 3231 } 3232 } 3233 3234 return NumLiterals <= 1; 3235 } 3236 3237 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3238 const unsigned Opc = Inst.getOpcode(); 3239 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3240 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3241 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3242 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3243 3244 if (OpSel & ~3) 3245 return false; 3246 } 3247 return true; 3248 } 3249 3250 // Check if VCC register matches wavefront size 3251 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3252 auto FB = getFeatureBits(); 3253 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3254 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3255 } 3256 3257 // VOP3 literal is only allowed in GFX10+ and only one can be used 3258 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3259 unsigned Opcode = Inst.getOpcode(); 3260 const MCInstrDesc &Desc = MII.get(Opcode); 3261 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3262 return true; 3263 3264 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3265 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3266 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3267 3268 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3269 3270 unsigned NumLiterals = 0; 3271 uint32_t LiteralValue; 3272 3273 for (int OpIdx : OpIndices) { 3274 if (OpIdx == -1) break; 3275 3276 const MCOperand &MO = Inst.getOperand(OpIdx); 3277 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 3278 continue; 3279 3280 if (!isInlineConstant(Inst, OpIdx)) { 3281 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3282 if (NumLiterals == 0 || LiteralValue != Value) { 3283 LiteralValue = Value; 3284 ++NumLiterals; 3285 } 3286 } 3287 } 3288 3289 return !NumLiterals || 3290 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3291 } 3292 3293 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3294 const SMLoc &IDLoc, 3295 const OperandVector &Operands) { 3296 if (!validateLdsDirect(Inst)) { 3297 Error(IDLoc, 3298 "invalid use of lds_direct"); 3299 return false; 3300 } 3301 if (!validateSOPLiteral(Inst)) { 3302 Error(IDLoc, 3303 "only one literal operand is allowed"); 3304 return false; 3305 } 3306 if (!validateVOP3Literal(Inst)) { 3307 Error(IDLoc, 3308 "invalid literal operand"); 3309 return false; 3310 } 3311 if (!validateConstantBusLimitations(Inst)) { 3312 Error(IDLoc, 3313 "invalid operand (violates constant bus restrictions)"); 3314 return false; 3315 } 3316 if (!validateEarlyClobberLimitations(Inst)) { 3317 Error(IDLoc, 3318 "destination must be different than all sources"); 3319 return false; 3320 } 3321 if (!validateIntClampSupported(Inst)) { 3322 Error(IDLoc, 3323 "integer clamping is not supported on this GPU"); 3324 return false; 3325 } 3326 if (!validateOpSel(Inst)) { 3327 Error(IDLoc, 3328 "invalid op_sel operand"); 3329 return false; 3330 } 3331 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3332 if (!validateMIMGD16(Inst)) { 3333 Error(IDLoc, 3334 "d16 modifier is not supported on this GPU"); 3335 return false; 3336 } 3337 if (!validateMIMGDim(Inst)) { 3338 Error(IDLoc, "dim modifier is required on this GPU"); 3339 return false; 3340 } 3341 if (!validateMIMGDataSize(Inst)) { 3342 Error(IDLoc, 3343 "image data size does not match dmask and tfe"); 3344 return false; 3345 } 3346 if (!validateMIMGAddrSize(Inst)) { 3347 Error(IDLoc, 3348 "image address size does not match dim and a16"); 3349 return false; 3350 } 3351 if (!validateMIMGAtomicDMask(Inst)) { 3352 Error(IDLoc, 3353 "invalid atomic image dmask"); 3354 return false; 3355 } 3356 if (!validateMIMGGatherDMask(Inst)) { 3357 Error(IDLoc, 3358 "invalid image_gather dmask: only one bit must be set"); 3359 return false; 3360 } 3361 if (!validateFlatOffset(Inst, Operands)) { 3362 return false; 3363 } 3364 3365 return true; 3366 } 3367 3368 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3369 const FeatureBitset &FBS, 3370 unsigned VariantID = 0); 3371 3372 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3373 OperandVector &Operands, 3374 MCStreamer &Out, 3375 uint64_t &ErrorInfo, 3376 bool MatchingInlineAsm) { 3377 MCInst Inst; 3378 unsigned Result = Match_Success; 3379 for (auto Variant : getMatchedVariants()) { 3380 uint64_t EI; 3381 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3382 Variant); 3383 // We order match statuses from least to most specific. We use most specific 3384 // status as resulting 3385 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3386 if ((R == Match_Success) || 3387 (R == Match_PreferE32) || 3388 (R == Match_MissingFeature && Result != Match_PreferE32) || 3389 (R == Match_InvalidOperand && Result != Match_MissingFeature 3390 && Result != Match_PreferE32) || 3391 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3392 && Result != Match_MissingFeature 3393 && Result != Match_PreferE32)) { 3394 Result = R; 3395 ErrorInfo = EI; 3396 } 3397 if (R == Match_Success) 3398 break; 3399 } 3400 3401 switch (Result) { 3402 default: break; 3403 case Match_Success: 3404 if (!validateInstruction(Inst, IDLoc, Operands)) { 3405 return true; 3406 } 3407 Inst.setLoc(IDLoc); 3408 Out.EmitInstruction(Inst, getSTI()); 3409 return false; 3410 3411 case Match_MissingFeature: 3412 return Error(IDLoc, "instruction not supported on this GPU"); 3413 3414 case Match_MnemonicFail: { 3415 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3416 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3417 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3418 return Error(IDLoc, "invalid instruction" + Suggestion, 3419 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3420 } 3421 3422 case Match_InvalidOperand: { 3423 SMLoc ErrorLoc = IDLoc; 3424 if (ErrorInfo != ~0ULL) { 3425 if (ErrorInfo >= Operands.size()) { 3426 return Error(IDLoc, "too few operands for instruction"); 3427 } 3428 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3429 if (ErrorLoc == SMLoc()) 3430 ErrorLoc = IDLoc; 3431 } 3432 return Error(ErrorLoc, "invalid operand for instruction"); 3433 } 3434 3435 case Match_PreferE32: 3436 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3437 "should be encoded as e32"); 3438 } 3439 llvm_unreachable("Implement any new match types added!"); 3440 } 3441 3442 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3443 int64_t Tmp = -1; 3444 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3445 return true; 3446 } 3447 if (getParser().parseAbsoluteExpression(Tmp)) { 3448 return true; 3449 } 3450 Ret = static_cast<uint32_t>(Tmp); 3451 return false; 3452 } 3453 3454 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3455 uint32_t &Minor) { 3456 if (ParseAsAbsoluteExpression(Major)) 3457 return TokError("invalid major version"); 3458 3459 if (getLexer().isNot(AsmToken::Comma)) 3460 return TokError("minor version number required, comma expected"); 3461 Lex(); 3462 3463 if (ParseAsAbsoluteExpression(Minor)) 3464 return TokError("invalid minor version"); 3465 3466 return false; 3467 } 3468 3469 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3470 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3471 return TokError("directive only supported for amdgcn architecture"); 3472 3473 std::string Target; 3474 3475 SMLoc TargetStart = getTok().getLoc(); 3476 if (getParser().parseEscapedString(Target)) 3477 return true; 3478 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3479 3480 std::string ExpectedTarget; 3481 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3482 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3483 3484 if (Target != ExpectedTargetOS.str()) 3485 return getParser().Error(TargetRange.Start, "target must match options", 3486 TargetRange); 3487 3488 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3489 return false; 3490 } 3491 3492 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3493 return getParser().Error(Range.Start, "value out of range", Range); 3494 } 3495 3496 bool AMDGPUAsmParser::calculateGPRBlocks( 3497 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3498 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3499 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3500 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3501 // TODO(scott.linder): These calculations are duplicated from 3502 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3503 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3504 3505 unsigned NumVGPRs = NextFreeVGPR; 3506 unsigned NumSGPRs = NextFreeSGPR; 3507 3508 if (Version.Major >= 10) 3509 NumSGPRs = 0; 3510 else { 3511 unsigned MaxAddressableNumSGPRs = 3512 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3513 3514 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3515 NumSGPRs > MaxAddressableNumSGPRs) 3516 return OutOfRangeError(SGPRRange); 3517 3518 NumSGPRs += 3519 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3520 3521 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3522 NumSGPRs > MaxAddressableNumSGPRs) 3523 return OutOfRangeError(SGPRRange); 3524 3525 if (Features.test(FeatureSGPRInitBug)) 3526 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3527 } 3528 3529 VGPRBlocks = 3530 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3531 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3532 3533 return false; 3534 } 3535 3536 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3537 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3538 return TokError("directive only supported for amdgcn architecture"); 3539 3540 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3541 return TokError("directive only supported for amdhsa OS"); 3542 3543 StringRef KernelName; 3544 if (getParser().parseIdentifier(KernelName)) 3545 return true; 3546 3547 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3548 3549 StringSet<> Seen; 3550 3551 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3552 3553 SMRange VGPRRange; 3554 uint64_t NextFreeVGPR = 0; 3555 SMRange SGPRRange; 3556 uint64_t NextFreeSGPR = 0; 3557 unsigned UserSGPRCount = 0; 3558 bool ReserveVCC = true; 3559 bool ReserveFlatScr = true; 3560 bool ReserveXNACK = hasXNACK(); 3561 Optional<bool> EnableWavefrontSize32; 3562 3563 while (true) { 3564 while (getLexer().is(AsmToken::EndOfStatement)) 3565 Lex(); 3566 3567 if (getLexer().isNot(AsmToken::Identifier)) 3568 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3569 3570 StringRef ID = getTok().getIdentifier(); 3571 SMRange IDRange = getTok().getLocRange(); 3572 Lex(); 3573 3574 if (ID == ".end_amdhsa_kernel") 3575 break; 3576 3577 if (Seen.find(ID) != Seen.end()) 3578 return TokError(".amdhsa_ directives cannot be repeated"); 3579 Seen.insert(ID); 3580 3581 SMLoc ValStart = getTok().getLoc(); 3582 int64_t IVal; 3583 if (getParser().parseAbsoluteExpression(IVal)) 3584 return true; 3585 SMLoc ValEnd = getTok().getLoc(); 3586 SMRange ValRange = SMRange(ValStart, ValEnd); 3587 3588 if (IVal < 0) 3589 return OutOfRangeError(ValRange); 3590 3591 uint64_t Val = IVal; 3592 3593 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3594 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3595 return OutOfRangeError(RANGE); \ 3596 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3597 3598 if (ID == ".amdhsa_group_segment_fixed_size") { 3599 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3600 return OutOfRangeError(ValRange); 3601 KD.group_segment_fixed_size = Val; 3602 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3603 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3604 return OutOfRangeError(ValRange); 3605 KD.private_segment_fixed_size = Val; 3606 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3607 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3608 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3609 Val, ValRange); 3610 UserSGPRCount += 4; 3611 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3612 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3613 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3614 ValRange); 3615 UserSGPRCount += 2; 3616 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3617 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3618 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3619 ValRange); 3620 UserSGPRCount += 2; 3621 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3622 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3623 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3624 Val, ValRange); 3625 UserSGPRCount += 2; 3626 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3627 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3628 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3629 ValRange); 3630 UserSGPRCount += 2; 3631 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3632 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3633 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3634 ValRange); 3635 UserSGPRCount += 2; 3636 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3637 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3638 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3639 Val, ValRange); 3640 UserSGPRCount += 1; 3641 } else if (ID == ".amdhsa_wavefront_size32") { 3642 if (IVersion.Major < 10) 3643 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3644 IDRange); 3645 EnableWavefrontSize32 = Val; 3646 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3647 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3648 Val, ValRange); 3649 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3650 PARSE_BITS_ENTRY( 3651 KD.compute_pgm_rsrc2, 3652 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3653 ValRange); 3654 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3655 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3656 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3657 ValRange); 3658 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3659 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3660 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3661 ValRange); 3662 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3663 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3664 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3665 ValRange); 3666 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3667 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3668 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3669 ValRange); 3670 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3671 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3672 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3673 ValRange); 3674 } else if (ID == ".amdhsa_next_free_vgpr") { 3675 VGPRRange = ValRange; 3676 NextFreeVGPR = Val; 3677 } else if (ID == ".amdhsa_next_free_sgpr") { 3678 SGPRRange = ValRange; 3679 NextFreeSGPR = Val; 3680 } else if (ID == ".amdhsa_reserve_vcc") { 3681 if (!isUInt<1>(Val)) 3682 return OutOfRangeError(ValRange); 3683 ReserveVCC = Val; 3684 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3685 if (IVersion.Major < 7) 3686 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3687 IDRange); 3688 if (!isUInt<1>(Val)) 3689 return OutOfRangeError(ValRange); 3690 ReserveFlatScr = Val; 3691 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3692 if (IVersion.Major < 8) 3693 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3694 IDRange); 3695 if (!isUInt<1>(Val)) 3696 return OutOfRangeError(ValRange); 3697 ReserveXNACK = Val; 3698 } else if (ID == ".amdhsa_float_round_mode_32") { 3699 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3700 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3701 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3702 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3703 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3704 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3705 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3706 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3707 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3708 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3709 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3710 ValRange); 3711 } else if (ID == ".amdhsa_dx10_clamp") { 3712 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3713 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3714 } else if (ID == ".amdhsa_ieee_mode") { 3715 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3716 Val, ValRange); 3717 } else if (ID == ".amdhsa_fp16_overflow") { 3718 if (IVersion.Major < 9) 3719 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3720 IDRange); 3721 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3722 ValRange); 3723 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3724 if (IVersion.Major < 10) 3725 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3726 IDRange); 3727 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3728 ValRange); 3729 } else if (ID == ".amdhsa_memory_ordered") { 3730 if (IVersion.Major < 10) 3731 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3732 IDRange); 3733 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3734 ValRange); 3735 } else if (ID == ".amdhsa_forward_progress") { 3736 if (IVersion.Major < 10) 3737 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3738 IDRange); 3739 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3740 ValRange); 3741 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3742 PARSE_BITS_ENTRY( 3743 KD.compute_pgm_rsrc2, 3744 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3745 ValRange); 3746 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3747 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3748 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3749 Val, ValRange); 3750 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3751 PARSE_BITS_ENTRY( 3752 KD.compute_pgm_rsrc2, 3753 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3754 ValRange); 3755 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3756 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3757 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3758 Val, ValRange); 3759 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3760 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3761 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3762 Val, ValRange); 3763 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3764 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3765 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3766 Val, ValRange); 3767 } else if (ID == ".amdhsa_exception_int_div_zero") { 3768 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3769 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3770 Val, ValRange); 3771 } else { 3772 return getParser().Error(IDRange.Start, 3773 "unknown .amdhsa_kernel directive", IDRange); 3774 } 3775 3776 #undef PARSE_BITS_ENTRY 3777 } 3778 3779 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3780 return TokError(".amdhsa_next_free_vgpr directive is required"); 3781 3782 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3783 return TokError(".amdhsa_next_free_sgpr directive is required"); 3784 3785 unsigned VGPRBlocks; 3786 unsigned SGPRBlocks; 3787 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3788 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3789 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3790 SGPRBlocks)) 3791 return true; 3792 3793 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3794 VGPRBlocks)) 3795 return OutOfRangeError(VGPRRange); 3796 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3797 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3798 3799 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3800 SGPRBlocks)) 3801 return OutOfRangeError(SGPRRange); 3802 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3803 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3804 SGPRBlocks); 3805 3806 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3807 return TokError("too many user SGPRs enabled"); 3808 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3809 UserSGPRCount); 3810 3811 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3812 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3813 ReserveFlatScr, ReserveXNACK); 3814 return false; 3815 } 3816 3817 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3818 uint32_t Major; 3819 uint32_t Minor; 3820 3821 if (ParseDirectiveMajorMinor(Major, Minor)) 3822 return true; 3823 3824 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3825 return false; 3826 } 3827 3828 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3829 uint32_t Major; 3830 uint32_t Minor; 3831 uint32_t Stepping; 3832 StringRef VendorName; 3833 StringRef ArchName; 3834 3835 // If this directive has no arguments, then use the ISA version for the 3836 // targeted GPU. 3837 if (getLexer().is(AsmToken::EndOfStatement)) { 3838 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3839 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3840 ISA.Stepping, 3841 "AMD", "AMDGPU"); 3842 return false; 3843 } 3844 3845 if (ParseDirectiveMajorMinor(Major, Minor)) 3846 return true; 3847 3848 if (getLexer().isNot(AsmToken::Comma)) 3849 return TokError("stepping version number required, comma expected"); 3850 Lex(); 3851 3852 if (ParseAsAbsoluteExpression(Stepping)) 3853 return TokError("invalid stepping version"); 3854 3855 if (getLexer().isNot(AsmToken::Comma)) 3856 return TokError("vendor name required, comma expected"); 3857 Lex(); 3858 3859 if (getLexer().isNot(AsmToken::String)) 3860 return TokError("invalid vendor name"); 3861 3862 VendorName = getLexer().getTok().getStringContents(); 3863 Lex(); 3864 3865 if (getLexer().isNot(AsmToken::Comma)) 3866 return TokError("arch name required, comma expected"); 3867 Lex(); 3868 3869 if (getLexer().isNot(AsmToken::String)) 3870 return TokError("invalid arch name"); 3871 3872 ArchName = getLexer().getTok().getStringContents(); 3873 Lex(); 3874 3875 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3876 VendorName, ArchName); 3877 return false; 3878 } 3879 3880 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3881 amd_kernel_code_t &Header) { 3882 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3883 // assembly for backwards compatibility. 3884 if (ID == "max_scratch_backing_memory_byte_size") { 3885 Parser.eatToEndOfStatement(); 3886 return false; 3887 } 3888 3889 SmallString<40> ErrStr; 3890 raw_svector_ostream Err(ErrStr); 3891 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3892 return TokError(Err.str()); 3893 } 3894 Lex(); 3895 3896 if (ID == "enable_wavefront_size32") { 3897 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 3898 if (!isGFX10()) 3899 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 3900 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3901 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 3902 } else { 3903 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3904 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 3905 } 3906 } 3907 3908 if (ID == "wavefront_size") { 3909 if (Header.wavefront_size == 5) { 3910 if (!isGFX10()) 3911 return TokError("wavefront_size=5 is only allowed on GFX10+"); 3912 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3913 return TokError("wavefront_size=5 requires +WavefrontSize32"); 3914 } else if (Header.wavefront_size == 6) { 3915 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3916 return TokError("wavefront_size=6 requires +WavefrontSize64"); 3917 } 3918 } 3919 3920 if (ID == "enable_wgp_mode") { 3921 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3922 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3923 } 3924 3925 if (ID == "enable_mem_ordered") { 3926 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3927 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3928 } 3929 3930 if (ID == "enable_fwd_progress") { 3931 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3932 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3933 } 3934 3935 return false; 3936 } 3937 3938 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3939 amd_kernel_code_t Header; 3940 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3941 3942 while (true) { 3943 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3944 // will set the current token to EndOfStatement. 3945 while(getLexer().is(AsmToken::EndOfStatement)) 3946 Lex(); 3947 3948 if (getLexer().isNot(AsmToken::Identifier)) 3949 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3950 3951 StringRef ID = getLexer().getTok().getIdentifier(); 3952 Lex(); 3953 3954 if (ID == ".end_amd_kernel_code_t") 3955 break; 3956 3957 if (ParseAMDKernelCodeTValue(ID, Header)) 3958 return true; 3959 } 3960 3961 getTargetStreamer().EmitAMDKernelCodeT(Header); 3962 3963 return false; 3964 } 3965 3966 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3967 if (getLexer().isNot(AsmToken::Identifier)) 3968 return TokError("expected symbol name"); 3969 3970 StringRef KernelName = Parser.getTok().getString(); 3971 3972 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3973 ELF::STT_AMDGPU_HSA_KERNEL); 3974 Lex(); 3975 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3976 KernelScope.initialize(getContext()); 3977 return false; 3978 } 3979 3980 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3981 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3982 return Error(getParser().getTok().getLoc(), 3983 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3984 "architectures"); 3985 } 3986 3987 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3988 3989 std::string ISAVersionStringFromSTI; 3990 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3991 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3992 3993 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3994 return Error(getParser().getTok().getLoc(), 3995 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3996 "arguments specified through the command line"); 3997 } 3998 3999 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4000 Lex(); 4001 4002 return false; 4003 } 4004 4005 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4006 const char *AssemblerDirectiveBegin; 4007 const char *AssemblerDirectiveEnd; 4008 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4009 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4010 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4011 HSAMD::V3::AssemblerDirectiveEnd) 4012 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4013 HSAMD::AssemblerDirectiveEnd); 4014 4015 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4016 return Error(getParser().getTok().getLoc(), 4017 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4018 "not available on non-amdhsa OSes")).str()); 4019 } 4020 4021 std::string HSAMetadataString; 4022 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4023 HSAMetadataString)) 4024 return true; 4025 4026 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4027 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4028 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4029 } else { 4030 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4031 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4032 } 4033 4034 return false; 4035 } 4036 4037 /// Common code to parse out a block of text (typically YAML) between start and 4038 /// end directives. 4039 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4040 const char *AssemblerDirectiveEnd, 4041 std::string &CollectString) { 4042 4043 raw_string_ostream CollectStream(CollectString); 4044 4045 getLexer().setSkipSpace(false); 4046 4047 bool FoundEnd = false; 4048 while (!getLexer().is(AsmToken::Eof)) { 4049 while (getLexer().is(AsmToken::Space)) { 4050 CollectStream << getLexer().getTok().getString(); 4051 Lex(); 4052 } 4053 4054 if (getLexer().is(AsmToken::Identifier)) { 4055 StringRef ID = getLexer().getTok().getIdentifier(); 4056 if (ID == AssemblerDirectiveEnd) { 4057 Lex(); 4058 FoundEnd = true; 4059 break; 4060 } 4061 } 4062 4063 CollectStream << Parser.parseStringToEndOfStatement() 4064 << getContext().getAsmInfo()->getSeparatorString(); 4065 4066 Parser.eatToEndOfStatement(); 4067 } 4068 4069 getLexer().setSkipSpace(true); 4070 4071 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4072 return TokError(Twine("expected directive ") + 4073 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4074 } 4075 4076 CollectStream.flush(); 4077 return false; 4078 } 4079 4080 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4081 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4082 std::string String; 4083 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4084 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4085 return true; 4086 4087 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4088 if (!PALMetadata->setFromString(String)) 4089 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4090 return false; 4091 } 4092 4093 /// Parse the assembler directive for old linear-format PAL metadata. 4094 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4095 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4096 return Error(getParser().getTok().getLoc(), 4097 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4098 "not available on non-amdpal OSes")).str()); 4099 } 4100 4101 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4102 PALMetadata->setLegacy(); 4103 for (;;) { 4104 uint32_t Key, Value; 4105 if (ParseAsAbsoluteExpression(Key)) { 4106 return TokError(Twine("invalid value in ") + 4107 Twine(PALMD::AssemblerDirective)); 4108 } 4109 if (getLexer().isNot(AsmToken::Comma)) { 4110 return TokError(Twine("expected an even number of values in ") + 4111 Twine(PALMD::AssemblerDirective)); 4112 } 4113 Lex(); 4114 if (ParseAsAbsoluteExpression(Value)) { 4115 return TokError(Twine("invalid value in ") + 4116 Twine(PALMD::AssemblerDirective)); 4117 } 4118 PALMetadata->setRegister(Key, Value); 4119 if (getLexer().isNot(AsmToken::Comma)) 4120 break; 4121 Lex(); 4122 } 4123 return false; 4124 } 4125 4126 /// ParseDirectiveAMDGPULDS 4127 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4128 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4129 if (getParser().checkForValidSection()) 4130 return true; 4131 4132 StringRef Name; 4133 SMLoc NameLoc = getLexer().getLoc(); 4134 if (getParser().parseIdentifier(Name)) 4135 return TokError("expected identifier in directive"); 4136 4137 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4138 if (parseToken(AsmToken::Comma, "expected ','")) 4139 return true; 4140 4141 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4142 4143 int64_t Size; 4144 SMLoc SizeLoc = getLexer().getLoc(); 4145 if (getParser().parseAbsoluteExpression(Size)) 4146 return true; 4147 if (Size < 0) 4148 return Error(SizeLoc, "size must be non-negative"); 4149 if (Size > LocalMemorySize) 4150 return Error(SizeLoc, "size is too large"); 4151 4152 int64_t Align = 4; 4153 if (getLexer().is(AsmToken::Comma)) { 4154 Lex(); 4155 SMLoc AlignLoc = getLexer().getLoc(); 4156 if (getParser().parseAbsoluteExpression(Align)) 4157 return true; 4158 if (Align < 0 || !isPowerOf2_64(Align)) 4159 return Error(AlignLoc, "alignment must be a power of two"); 4160 4161 // Alignment larger than the size of LDS is possible in theory, as long 4162 // as the linker manages to place to symbol at address 0, but we do want 4163 // to make sure the alignment fits nicely into a 32-bit integer. 4164 if (Align >= 1u << 31) 4165 return Error(AlignLoc, "alignment is too large"); 4166 } 4167 4168 if (parseToken(AsmToken::EndOfStatement, 4169 "unexpected token in '.amdgpu_lds' directive")) 4170 return true; 4171 4172 Symbol->redefineIfPossible(); 4173 if (!Symbol->isUndefined()) 4174 return Error(NameLoc, "invalid symbol redefinition"); 4175 4176 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4177 return false; 4178 } 4179 4180 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4181 StringRef IDVal = DirectiveID.getString(); 4182 4183 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4184 if (IDVal == ".amdgcn_target") 4185 return ParseDirectiveAMDGCNTarget(); 4186 4187 if (IDVal == ".amdhsa_kernel") 4188 return ParseDirectiveAMDHSAKernel(); 4189 4190 // TODO: Restructure/combine with PAL metadata directive. 4191 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4192 return ParseDirectiveHSAMetadata(); 4193 } else { 4194 if (IDVal == ".hsa_code_object_version") 4195 return ParseDirectiveHSACodeObjectVersion(); 4196 4197 if (IDVal == ".hsa_code_object_isa") 4198 return ParseDirectiveHSACodeObjectISA(); 4199 4200 if (IDVal == ".amd_kernel_code_t") 4201 return ParseDirectiveAMDKernelCodeT(); 4202 4203 if (IDVal == ".amdgpu_hsa_kernel") 4204 return ParseDirectiveAMDGPUHsaKernel(); 4205 4206 if (IDVal == ".amd_amdgpu_isa") 4207 return ParseDirectiveISAVersion(); 4208 4209 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4210 return ParseDirectiveHSAMetadata(); 4211 } 4212 4213 if (IDVal == ".amdgpu_lds") 4214 return ParseDirectiveAMDGPULDS(); 4215 4216 if (IDVal == PALMD::AssemblerDirectiveBegin) 4217 return ParseDirectivePALMetadataBegin(); 4218 4219 if (IDVal == PALMD::AssemblerDirective) 4220 return ParseDirectivePALMetadata(); 4221 4222 return true; 4223 } 4224 4225 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4226 unsigned RegNo) const { 4227 4228 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4229 R.isValid(); ++R) { 4230 if (*R == RegNo) 4231 return isGFX9() || isGFX10(); 4232 } 4233 4234 // GFX10 has 2 more SGPRs 104 and 105. 4235 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4236 R.isValid(); ++R) { 4237 if (*R == RegNo) 4238 return hasSGPR104_SGPR105(); 4239 } 4240 4241 switch (RegNo) { 4242 case AMDGPU::SRC_SHARED_BASE: 4243 case AMDGPU::SRC_SHARED_LIMIT: 4244 case AMDGPU::SRC_PRIVATE_BASE: 4245 case AMDGPU::SRC_PRIVATE_LIMIT: 4246 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4247 return !isCI() && !isSI() && !isVI(); 4248 case AMDGPU::TBA: 4249 case AMDGPU::TBA_LO: 4250 case AMDGPU::TBA_HI: 4251 case AMDGPU::TMA: 4252 case AMDGPU::TMA_LO: 4253 case AMDGPU::TMA_HI: 4254 return !isGFX9() && !isGFX10(); 4255 case AMDGPU::XNACK_MASK: 4256 case AMDGPU::XNACK_MASK_LO: 4257 case AMDGPU::XNACK_MASK_HI: 4258 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4259 case AMDGPU::SGPR_NULL: 4260 return isGFX10(); 4261 default: 4262 break; 4263 } 4264 4265 if (isCI()) 4266 return true; 4267 4268 if (isSI() || isGFX10()) { 4269 // No flat_scr on SI. 4270 // On GFX10 flat scratch is not a valid register operand and can only be 4271 // accessed with s_setreg/s_getreg. 4272 switch (RegNo) { 4273 case AMDGPU::FLAT_SCR: 4274 case AMDGPU::FLAT_SCR_LO: 4275 case AMDGPU::FLAT_SCR_HI: 4276 return false; 4277 default: 4278 return true; 4279 } 4280 } 4281 4282 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4283 // SI/CI have. 4284 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4285 R.isValid(); ++R) { 4286 if (*R == RegNo) 4287 return hasSGPR102_SGPR103(); 4288 } 4289 4290 return true; 4291 } 4292 4293 OperandMatchResultTy 4294 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4295 OperandMode Mode) { 4296 // Try to parse with a custom parser 4297 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4298 4299 // If we successfully parsed the operand or if there as an error parsing, 4300 // we are done. 4301 // 4302 // If we are parsing after we reach EndOfStatement then this means we 4303 // are appending default values to the Operands list. This is only done 4304 // by custom parser, so we shouldn't continue on to the generic parsing. 4305 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4306 getLexer().is(AsmToken::EndOfStatement)) 4307 return ResTy; 4308 4309 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4310 unsigned Prefix = Operands.size(); 4311 SMLoc LBraceLoc = getTok().getLoc(); 4312 Parser.Lex(); // eat the '[' 4313 4314 for (;;) { 4315 ResTy = parseReg(Operands); 4316 if (ResTy != MatchOperand_Success) 4317 return ResTy; 4318 4319 if (getLexer().is(AsmToken::RBrac)) 4320 break; 4321 4322 if (getLexer().isNot(AsmToken::Comma)) 4323 return MatchOperand_ParseFail; 4324 Parser.Lex(); 4325 } 4326 4327 if (Operands.size() - Prefix > 1) { 4328 Operands.insert(Operands.begin() + Prefix, 4329 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4330 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4331 getTok().getLoc())); 4332 } 4333 4334 Parser.Lex(); // eat the ']' 4335 return MatchOperand_Success; 4336 } 4337 4338 return parseRegOrImm(Operands); 4339 } 4340 4341 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4342 // Clear any forced encodings from the previous instruction. 4343 setForcedEncodingSize(0); 4344 setForcedDPP(false); 4345 setForcedSDWA(false); 4346 4347 if (Name.endswith("_e64")) { 4348 setForcedEncodingSize(64); 4349 return Name.substr(0, Name.size() - 4); 4350 } else if (Name.endswith("_e32")) { 4351 setForcedEncodingSize(32); 4352 return Name.substr(0, Name.size() - 4); 4353 } else if (Name.endswith("_dpp")) { 4354 setForcedDPP(true); 4355 return Name.substr(0, Name.size() - 4); 4356 } else if (Name.endswith("_sdwa")) { 4357 setForcedSDWA(true); 4358 return Name.substr(0, Name.size() - 5); 4359 } 4360 return Name; 4361 } 4362 4363 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4364 StringRef Name, 4365 SMLoc NameLoc, OperandVector &Operands) { 4366 // Add the instruction mnemonic 4367 Name = parseMnemonicSuffix(Name); 4368 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4369 4370 bool IsMIMG = Name.startswith("image_"); 4371 4372 while (!getLexer().is(AsmToken::EndOfStatement)) { 4373 OperandMode Mode = OperandMode_Default; 4374 if (IsMIMG && isGFX10() && Operands.size() == 2) 4375 Mode = OperandMode_NSA; 4376 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4377 4378 // Eat the comma or space if there is one. 4379 if (getLexer().is(AsmToken::Comma)) 4380 Parser.Lex(); 4381 4382 switch (Res) { 4383 case MatchOperand_Success: break; 4384 case MatchOperand_ParseFail: 4385 // FIXME: use real operand location rather than the current location. 4386 Error(getLexer().getLoc(), "failed parsing operand."); 4387 while (!getLexer().is(AsmToken::EndOfStatement)) { 4388 Parser.Lex(); 4389 } 4390 return true; 4391 case MatchOperand_NoMatch: 4392 // FIXME: use real operand location rather than the current location. 4393 Error(getLexer().getLoc(), "not a valid operand."); 4394 while (!getLexer().is(AsmToken::EndOfStatement)) { 4395 Parser.Lex(); 4396 } 4397 return true; 4398 } 4399 } 4400 4401 return false; 4402 } 4403 4404 //===----------------------------------------------------------------------===// 4405 // Utility functions 4406 //===----------------------------------------------------------------------===// 4407 4408 OperandMatchResultTy 4409 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4410 4411 if (!trySkipId(Prefix, AsmToken::Colon)) 4412 return MatchOperand_NoMatch; 4413 4414 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4415 } 4416 4417 OperandMatchResultTy 4418 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4419 AMDGPUOperand::ImmTy ImmTy, 4420 bool (*ConvertResult)(int64_t&)) { 4421 SMLoc S = getLoc(); 4422 int64_t Value = 0; 4423 4424 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4425 if (Res != MatchOperand_Success) 4426 return Res; 4427 4428 if (ConvertResult && !ConvertResult(Value)) { 4429 Error(S, "invalid " + StringRef(Prefix) + " value."); 4430 } 4431 4432 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4433 return MatchOperand_Success; 4434 } 4435 4436 OperandMatchResultTy 4437 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4438 OperandVector &Operands, 4439 AMDGPUOperand::ImmTy ImmTy, 4440 bool (*ConvertResult)(int64_t&)) { 4441 SMLoc S = getLoc(); 4442 if (!trySkipId(Prefix, AsmToken::Colon)) 4443 return MatchOperand_NoMatch; 4444 4445 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4446 return MatchOperand_ParseFail; 4447 4448 unsigned Val = 0; 4449 const unsigned MaxSize = 4; 4450 4451 // FIXME: How to verify the number of elements matches the number of src 4452 // operands? 4453 for (int I = 0; ; ++I) { 4454 int64_t Op; 4455 SMLoc Loc = getLoc(); 4456 if (!parseExpr(Op)) 4457 return MatchOperand_ParseFail; 4458 4459 if (Op != 0 && Op != 1) { 4460 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4461 return MatchOperand_ParseFail; 4462 } 4463 4464 Val |= (Op << I); 4465 4466 if (trySkipToken(AsmToken::RBrac)) 4467 break; 4468 4469 if (I + 1 == MaxSize) { 4470 Error(getLoc(), "expected a closing square bracket"); 4471 return MatchOperand_ParseFail; 4472 } 4473 4474 if (!skipToken(AsmToken::Comma, "expected a comma")) 4475 return MatchOperand_ParseFail; 4476 } 4477 4478 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4479 return MatchOperand_Success; 4480 } 4481 4482 OperandMatchResultTy 4483 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4484 AMDGPUOperand::ImmTy ImmTy) { 4485 int64_t Bit = 0; 4486 SMLoc S = Parser.getTok().getLoc(); 4487 4488 // We are at the end of the statement, and this is a default argument, so 4489 // use a default value. 4490 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4491 switch(getLexer().getKind()) { 4492 case AsmToken::Identifier: { 4493 StringRef Tok = Parser.getTok().getString(); 4494 if (Tok == Name) { 4495 if (Tok == "r128" && isGFX9()) 4496 Error(S, "r128 modifier is not supported on this GPU"); 4497 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4498 Error(S, "a16 modifier is not supported on this GPU"); 4499 Bit = 1; 4500 Parser.Lex(); 4501 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4502 Bit = 0; 4503 Parser.Lex(); 4504 } else { 4505 return MatchOperand_NoMatch; 4506 } 4507 break; 4508 } 4509 default: 4510 return MatchOperand_NoMatch; 4511 } 4512 } 4513 4514 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4515 return MatchOperand_ParseFail; 4516 4517 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4518 return MatchOperand_Success; 4519 } 4520 4521 static void addOptionalImmOperand( 4522 MCInst& Inst, const OperandVector& Operands, 4523 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4524 AMDGPUOperand::ImmTy ImmT, 4525 int64_t Default = 0) { 4526 auto i = OptionalIdx.find(ImmT); 4527 if (i != OptionalIdx.end()) { 4528 unsigned Idx = i->second; 4529 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4530 } else { 4531 Inst.addOperand(MCOperand::createImm(Default)); 4532 } 4533 } 4534 4535 OperandMatchResultTy 4536 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4537 if (getLexer().isNot(AsmToken::Identifier)) { 4538 return MatchOperand_NoMatch; 4539 } 4540 StringRef Tok = Parser.getTok().getString(); 4541 if (Tok != Prefix) { 4542 return MatchOperand_NoMatch; 4543 } 4544 4545 Parser.Lex(); 4546 if (getLexer().isNot(AsmToken::Colon)) { 4547 return MatchOperand_ParseFail; 4548 } 4549 4550 Parser.Lex(); 4551 if (getLexer().isNot(AsmToken::Identifier)) { 4552 return MatchOperand_ParseFail; 4553 } 4554 4555 Value = Parser.getTok().getString(); 4556 return MatchOperand_Success; 4557 } 4558 4559 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4560 // values to live in a joint format operand in the MCInst encoding. 4561 OperandMatchResultTy 4562 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4563 SMLoc S = Parser.getTok().getLoc(); 4564 int64_t Dfmt = 0, Nfmt = 0; 4565 // dfmt and nfmt can appear in either order, and each is optional. 4566 bool GotDfmt = false, GotNfmt = false; 4567 while (!GotDfmt || !GotNfmt) { 4568 if (!GotDfmt) { 4569 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4570 if (Res != MatchOperand_NoMatch) { 4571 if (Res != MatchOperand_Success) 4572 return Res; 4573 if (Dfmt >= 16) { 4574 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4575 return MatchOperand_ParseFail; 4576 } 4577 GotDfmt = true; 4578 Parser.Lex(); 4579 continue; 4580 } 4581 } 4582 if (!GotNfmt) { 4583 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4584 if (Res != MatchOperand_NoMatch) { 4585 if (Res != MatchOperand_Success) 4586 return Res; 4587 if (Nfmt >= 8) { 4588 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4589 return MatchOperand_ParseFail; 4590 } 4591 GotNfmt = true; 4592 Parser.Lex(); 4593 continue; 4594 } 4595 } 4596 break; 4597 } 4598 if (!GotDfmt && !GotNfmt) 4599 return MatchOperand_NoMatch; 4600 auto Format = Dfmt | Nfmt << 4; 4601 Operands.push_back( 4602 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4603 return MatchOperand_Success; 4604 } 4605 4606 //===----------------------------------------------------------------------===// 4607 // ds 4608 //===----------------------------------------------------------------------===// 4609 4610 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4611 const OperandVector &Operands) { 4612 OptionalImmIndexMap OptionalIdx; 4613 4614 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4615 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4616 4617 // Add the register arguments 4618 if (Op.isReg()) { 4619 Op.addRegOperands(Inst, 1); 4620 continue; 4621 } 4622 4623 // Handle optional arguments 4624 OptionalIdx[Op.getImmTy()] = i; 4625 } 4626 4627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4628 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4630 4631 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4632 } 4633 4634 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4635 bool IsGdsHardcoded) { 4636 OptionalImmIndexMap OptionalIdx; 4637 4638 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4639 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4640 4641 // Add the register arguments 4642 if (Op.isReg()) { 4643 Op.addRegOperands(Inst, 1); 4644 continue; 4645 } 4646 4647 if (Op.isToken() && Op.getToken() == "gds") { 4648 IsGdsHardcoded = true; 4649 continue; 4650 } 4651 4652 // Handle optional arguments 4653 OptionalIdx[Op.getImmTy()] = i; 4654 } 4655 4656 AMDGPUOperand::ImmTy OffsetType = 4657 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4658 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4659 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4660 AMDGPUOperand::ImmTyOffset; 4661 4662 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4663 4664 if (!IsGdsHardcoded) { 4665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4666 } 4667 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4668 } 4669 4670 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4671 OptionalImmIndexMap OptionalIdx; 4672 4673 unsigned OperandIdx[4]; 4674 unsigned EnMask = 0; 4675 int SrcIdx = 0; 4676 4677 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4678 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4679 4680 // Add the register arguments 4681 if (Op.isReg()) { 4682 assert(SrcIdx < 4); 4683 OperandIdx[SrcIdx] = Inst.size(); 4684 Op.addRegOperands(Inst, 1); 4685 ++SrcIdx; 4686 continue; 4687 } 4688 4689 if (Op.isOff()) { 4690 assert(SrcIdx < 4); 4691 OperandIdx[SrcIdx] = Inst.size(); 4692 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4693 ++SrcIdx; 4694 continue; 4695 } 4696 4697 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4698 Op.addImmOperands(Inst, 1); 4699 continue; 4700 } 4701 4702 if (Op.isToken() && Op.getToken() == "done") 4703 continue; 4704 4705 // Handle optional arguments 4706 OptionalIdx[Op.getImmTy()] = i; 4707 } 4708 4709 assert(SrcIdx == 4); 4710 4711 bool Compr = false; 4712 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4713 Compr = true; 4714 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4715 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4716 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4717 } 4718 4719 for (auto i = 0; i < SrcIdx; ++i) { 4720 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4721 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4722 } 4723 } 4724 4725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4726 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4727 4728 Inst.addOperand(MCOperand::createImm(EnMask)); 4729 } 4730 4731 //===----------------------------------------------------------------------===// 4732 // s_waitcnt 4733 //===----------------------------------------------------------------------===// 4734 4735 static bool 4736 encodeCnt( 4737 const AMDGPU::IsaVersion ISA, 4738 int64_t &IntVal, 4739 int64_t CntVal, 4740 bool Saturate, 4741 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4742 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4743 { 4744 bool Failed = false; 4745 4746 IntVal = encode(ISA, IntVal, CntVal); 4747 if (CntVal != decode(ISA, IntVal)) { 4748 if (Saturate) { 4749 IntVal = encode(ISA, IntVal, -1); 4750 } else { 4751 Failed = true; 4752 } 4753 } 4754 return Failed; 4755 } 4756 4757 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4758 4759 SMLoc CntLoc = getLoc(); 4760 StringRef CntName = getTokenStr(); 4761 4762 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4763 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4764 return false; 4765 4766 int64_t CntVal; 4767 SMLoc ValLoc = getLoc(); 4768 if (!parseExpr(CntVal)) 4769 return false; 4770 4771 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4772 4773 bool Failed = true; 4774 bool Sat = CntName.endswith("_sat"); 4775 4776 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4777 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4778 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4779 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4780 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4781 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4782 } else { 4783 Error(CntLoc, "invalid counter name " + CntName); 4784 return false; 4785 } 4786 4787 if (Failed) { 4788 Error(ValLoc, "too large value for " + CntName); 4789 return false; 4790 } 4791 4792 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4793 return false; 4794 4795 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4796 if (isToken(AsmToken::EndOfStatement)) { 4797 Error(getLoc(), "expected a counter name"); 4798 return false; 4799 } 4800 } 4801 4802 return true; 4803 } 4804 4805 OperandMatchResultTy 4806 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4807 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4808 int64_t Waitcnt = getWaitcntBitMask(ISA); 4809 SMLoc S = getLoc(); 4810 4811 // If parse failed, do not return error code 4812 // to avoid excessive error messages. 4813 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4814 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4815 } else { 4816 parseExpr(Waitcnt); 4817 } 4818 4819 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4820 return MatchOperand_Success; 4821 } 4822 4823 bool 4824 AMDGPUOperand::isSWaitCnt() const { 4825 return isImm(); 4826 } 4827 4828 //===----------------------------------------------------------------------===// 4829 // hwreg 4830 //===----------------------------------------------------------------------===// 4831 4832 bool 4833 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4834 int64_t &Offset, 4835 int64_t &Width) { 4836 using namespace llvm::AMDGPU::Hwreg; 4837 4838 // The register may be specified by name or using a numeric code 4839 if (isToken(AsmToken::Identifier) && 4840 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4841 HwReg.IsSymbolic = true; 4842 lex(); // skip message name 4843 } else if (!parseExpr(HwReg.Id)) { 4844 return false; 4845 } 4846 4847 if (trySkipToken(AsmToken::RParen)) 4848 return true; 4849 4850 // parse optional params 4851 return 4852 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4853 parseExpr(Offset) && 4854 skipToken(AsmToken::Comma, "expected a comma") && 4855 parseExpr(Width) && 4856 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4857 } 4858 4859 bool 4860 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4861 const int64_t Offset, 4862 const int64_t Width, 4863 const SMLoc Loc) { 4864 4865 using namespace llvm::AMDGPU::Hwreg; 4866 4867 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4868 Error(Loc, "specified hardware register is not supported on this GPU"); 4869 return false; 4870 } else if (!isValidHwreg(HwReg.Id)) { 4871 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4872 return false; 4873 } else if (!isValidHwregOffset(Offset)) { 4874 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4875 return false; 4876 } else if (!isValidHwregWidth(Width)) { 4877 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4878 return false; 4879 } 4880 return true; 4881 } 4882 4883 OperandMatchResultTy 4884 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4885 using namespace llvm::AMDGPU::Hwreg; 4886 4887 int64_t ImmVal = 0; 4888 SMLoc Loc = getLoc(); 4889 4890 // If parse failed, do not return error code 4891 // to avoid excessive error messages. 4892 if (trySkipId("hwreg", AsmToken::LParen)) { 4893 OperandInfoTy HwReg(ID_UNKNOWN_); 4894 int64_t Offset = OFFSET_DEFAULT_; 4895 int64_t Width = WIDTH_DEFAULT_; 4896 if (parseHwregBody(HwReg, Offset, Width) && 4897 validateHwreg(HwReg, Offset, Width, Loc)) { 4898 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 4899 } 4900 } else if (parseExpr(ImmVal)) { 4901 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 4902 Error(Loc, "invalid immediate: only 16-bit values are legal"); 4903 } 4904 4905 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 4906 return MatchOperand_Success; 4907 } 4908 4909 bool AMDGPUOperand::isHwreg() const { 4910 return isImmTy(ImmTyHwreg); 4911 } 4912 4913 //===----------------------------------------------------------------------===// 4914 // sendmsg 4915 //===----------------------------------------------------------------------===// 4916 4917 bool 4918 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 4919 OperandInfoTy &Op, 4920 OperandInfoTy &Stream) { 4921 using namespace llvm::AMDGPU::SendMsg; 4922 4923 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 4924 Msg.IsSymbolic = true; 4925 lex(); // skip message name 4926 } else if (!parseExpr(Msg.Id)) { 4927 return false; 4928 } 4929 4930 if (trySkipToken(AsmToken::Comma)) { 4931 Op.IsDefined = true; 4932 if (isToken(AsmToken::Identifier) && 4933 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 4934 lex(); // skip operation name 4935 } else if (!parseExpr(Op.Id)) { 4936 return false; 4937 } 4938 4939 if (trySkipToken(AsmToken::Comma)) { 4940 Stream.IsDefined = true; 4941 if (!parseExpr(Stream.Id)) 4942 return false; 4943 } 4944 } 4945 4946 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4947 } 4948 4949 bool 4950 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 4951 const OperandInfoTy &Op, 4952 const OperandInfoTy &Stream, 4953 const SMLoc S) { 4954 using namespace llvm::AMDGPU::SendMsg; 4955 4956 // Validation strictness depends on whether message is specified 4957 // in a symbolc or in a numeric form. In the latter case 4958 // only encoding possibility is checked. 4959 bool Strict = Msg.IsSymbolic; 4960 4961 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 4962 Error(S, "invalid message id"); 4963 return false; 4964 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 4965 Error(S, Op.IsDefined ? 4966 "message does not support operations" : 4967 "missing message operation"); 4968 return false; 4969 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 4970 Error(S, "invalid operation id"); 4971 return false; 4972 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 4973 Error(S, "message operation does not support streams"); 4974 return false; 4975 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 4976 Error(S, "invalid message stream id"); 4977 return false; 4978 } 4979 return true; 4980 } 4981 4982 OperandMatchResultTy 4983 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4984 using namespace llvm::AMDGPU::SendMsg; 4985 4986 int64_t ImmVal = 0; 4987 SMLoc Loc = getLoc(); 4988 4989 // If parse failed, do not return error code 4990 // to avoid excessive error messages. 4991 if (trySkipId("sendmsg", AsmToken::LParen)) { 4992 OperandInfoTy Msg(ID_UNKNOWN_); 4993 OperandInfoTy Op(OP_NONE_); 4994 OperandInfoTy Stream(STREAM_ID_NONE_); 4995 if (parseSendMsgBody(Msg, Op, Stream) && 4996 validateSendMsg(Msg, Op, Stream, Loc)) { 4997 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 4998 } 4999 } else if (parseExpr(ImmVal)) { 5000 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5001 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5002 } 5003 5004 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5005 return MatchOperand_Success; 5006 } 5007 5008 bool AMDGPUOperand::isSendMsg() const { 5009 return isImmTy(ImmTySendMsg); 5010 } 5011 5012 //===----------------------------------------------------------------------===// 5013 // v_interp 5014 //===----------------------------------------------------------------------===// 5015 5016 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5017 if (getLexer().getKind() != AsmToken::Identifier) 5018 return MatchOperand_NoMatch; 5019 5020 StringRef Str = Parser.getTok().getString(); 5021 int Slot = StringSwitch<int>(Str) 5022 .Case("p10", 0) 5023 .Case("p20", 1) 5024 .Case("p0", 2) 5025 .Default(-1); 5026 5027 SMLoc S = Parser.getTok().getLoc(); 5028 if (Slot == -1) 5029 return MatchOperand_ParseFail; 5030 5031 Parser.Lex(); 5032 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5033 AMDGPUOperand::ImmTyInterpSlot)); 5034 return MatchOperand_Success; 5035 } 5036 5037 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5038 if (getLexer().getKind() != AsmToken::Identifier) 5039 return MatchOperand_NoMatch; 5040 5041 StringRef Str = Parser.getTok().getString(); 5042 if (!Str.startswith("attr")) 5043 return MatchOperand_NoMatch; 5044 5045 StringRef Chan = Str.take_back(2); 5046 int AttrChan = StringSwitch<int>(Chan) 5047 .Case(".x", 0) 5048 .Case(".y", 1) 5049 .Case(".z", 2) 5050 .Case(".w", 3) 5051 .Default(-1); 5052 if (AttrChan == -1) 5053 return MatchOperand_ParseFail; 5054 5055 Str = Str.drop_back(2).drop_front(4); 5056 5057 uint8_t Attr; 5058 if (Str.getAsInteger(10, Attr)) 5059 return MatchOperand_ParseFail; 5060 5061 SMLoc S = Parser.getTok().getLoc(); 5062 Parser.Lex(); 5063 if (Attr > 63) { 5064 Error(S, "out of bounds attr"); 5065 return MatchOperand_Success; 5066 } 5067 5068 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5069 5070 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5071 AMDGPUOperand::ImmTyInterpAttr)); 5072 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5073 AMDGPUOperand::ImmTyAttrChan)); 5074 return MatchOperand_Success; 5075 } 5076 5077 //===----------------------------------------------------------------------===// 5078 // exp 5079 //===----------------------------------------------------------------------===// 5080 5081 void AMDGPUAsmParser::errorExpTgt() { 5082 Error(Parser.getTok().getLoc(), "invalid exp target"); 5083 } 5084 5085 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5086 uint8_t &Val) { 5087 if (Str == "null") { 5088 Val = 9; 5089 return MatchOperand_Success; 5090 } 5091 5092 if (Str.startswith("mrt")) { 5093 Str = Str.drop_front(3); 5094 if (Str == "z") { // == mrtz 5095 Val = 8; 5096 return MatchOperand_Success; 5097 } 5098 5099 if (Str.getAsInteger(10, Val)) 5100 return MatchOperand_ParseFail; 5101 5102 if (Val > 7) 5103 errorExpTgt(); 5104 5105 return MatchOperand_Success; 5106 } 5107 5108 if (Str.startswith("pos")) { 5109 Str = Str.drop_front(3); 5110 if (Str.getAsInteger(10, Val)) 5111 return MatchOperand_ParseFail; 5112 5113 if (Val > 4 || (Val == 4 && !isGFX10())) 5114 errorExpTgt(); 5115 5116 Val += 12; 5117 return MatchOperand_Success; 5118 } 5119 5120 if (isGFX10() && Str == "prim") { 5121 Val = 20; 5122 return MatchOperand_Success; 5123 } 5124 5125 if (Str.startswith("param")) { 5126 Str = Str.drop_front(5); 5127 if (Str.getAsInteger(10, Val)) 5128 return MatchOperand_ParseFail; 5129 5130 if (Val >= 32) 5131 errorExpTgt(); 5132 5133 Val += 32; 5134 return MatchOperand_Success; 5135 } 5136 5137 if (Str.startswith("invalid_target_")) { 5138 Str = Str.drop_front(15); 5139 if (Str.getAsInteger(10, Val)) 5140 return MatchOperand_ParseFail; 5141 5142 errorExpTgt(); 5143 return MatchOperand_Success; 5144 } 5145 5146 return MatchOperand_NoMatch; 5147 } 5148 5149 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5150 uint8_t Val; 5151 StringRef Str = Parser.getTok().getString(); 5152 5153 auto Res = parseExpTgtImpl(Str, Val); 5154 if (Res != MatchOperand_Success) 5155 return Res; 5156 5157 SMLoc S = Parser.getTok().getLoc(); 5158 Parser.Lex(); 5159 5160 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5161 AMDGPUOperand::ImmTyExpTgt)); 5162 return MatchOperand_Success; 5163 } 5164 5165 //===----------------------------------------------------------------------===// 5166 // parser helpers 5167 //===----------------------------------------------------------------------===// 5168 5169 bool 5170 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5171 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5172 } 5173 5174 bool 5175 AMDGPUAsmParser::isId(const StringRef Id) const { 5176 return isId(getToken(), Id); 5177 } 5178 5179 bool 5180 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5181 return getTokenKind() == Kind; 5182 } 5183 5184 bool 5185 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5186 if (isId(Id)) { 5187 lex(); 5188 return true; 5189 } 5190 return false; 5191 } 5192 5193 bool 5194 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5195 if (isId(Id) && peekToken().is(Kind)) { 5196 lex(); 5197 lex(); 5198 return true; 5199 } 5200 return false; 5201 } 5202 5203 bool 5204 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5205 if (isToken(Kind)) { 5206 lex(); 5207 return true; 5208 } 5209 return false; 5210 } 5211 5212 bool 5213 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5214 const StringRef ErrMsg) { 5215 if (!trySkipToken(Kind)) { 5216 Error(getLoc(), ErrMsg); 5217 return false; 5218 } 5219 return true; 5220 } 5221 5222 bool 5223 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5224 return !getParser().parseAbsoluteExpression(Imm); 5225 } 5226 5227 bool 5228 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5229 if (isToken(AsmToken::String)) { 5230 Val = getToken().getStringContents(); 5231 lex(); 5232 return true; 5233 } else { 5234 Error(getLoc(), ErrMsg); 5235 return false; 5236 } 5237 } 5238 5239 AsmToken 5240 AMDGPUAsmParser::getToken() const { 5241 return Parser.getTok(); 5242 } 5243 5244 AsmToken 5245 AMDGPUAsmParser::peekToken() { 5246 return getLexer().peekTok(); 5247 } 5248 5249 void 5250 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5251 auto TokCount = getLexer().peekTokens(Tokens); 5252 5253 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5254 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5255 } 5256 5257 AsmToken::TokenKind 5258 AMDGPUAsmParser::getTokenKind() const { 5259 return getLexer().getKind(); 5260 } 5261 5262 SMLoc 5263 AMDGPUAsmParser::getLoc() const { 5264 return getToken().getLoc(); 5265 } 5266 5267 StringRef 5268 AMDGPUAsmParser::getTokenStr() const { 5269 return getToken().getString(); 5270 } 5271 5272 void 5273 AMDGPUAsmParser::lex() { 5274 Parser.Lex(); 5275 } 5276 5277 //===----------------------------------------------------------------------===// 5278 // swizzle 5279 //===----------------------------------------------------------------------===// 5280 5281 LLVM_READNONE 5282 static unsigned 5283 encodeBitmaskPerm(const unsigned AndMask, 5284 const unsigned OrMask, 5285 const unsigned XorMask) { 5286 using namespace llvm::AMDGPU::Swizzle; 5287 5288 return BITMASK_PERM_ENC | 5289 (AndMask << BITMASK_AND_SHIFT) | 5290 (OrMask << BITMASK_OR_SHIFT) | 5291 (XorMask << BITMASK_XOR_SHIFT); 5292 } 5293 5294 bool 5295 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5296 const unsigned MinVal, 5297 const unsigned MaxVal, 5298 const StringRef ErrMsg) { 5299 for (unsigned i = 0; i < OpNum; ++i) { 5300 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5301 return false; 5302 } 5303 SMLoc ExprLoc = Parser.getTok().getLoc(); 5304 if (!parseExpr(Op[i])) { 5305 return false; 5306 } 5307 if (Op[i] < MinVal || Op[i] > MaxVal) { 5308 Error(ExprLoc, ErrMsg); 5309 return false; 5310 } 5311 } 5312 5313 return true; 5314 } 5315 5316 bool 5317 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5318 using namespace llvm::AMDGPU::Swizzle; 5319 5320 int64_t Lane[LANE_NUM]; 5321 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5322 "expected a 2-bit lane id")) { 5323 Imm = QUAD_PERM_ENC; 5324 for (unsigned I = 0; I < LANE_NUM; ++I) { 5325 Imm |= Lane[I] << (LANE_SHIFT * I); 5326 } 5327 return true; 5328 } 5329 return false; 5330 } 5331 5332 bool 5333 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5334 using namespace llvm::AMDGPU::Swizzle; 5335 5336 SMLoc S = Parser.getTok().getLoc(); 5337 int64_t GroupSize; 5338 int64_t LaneIdx; 5339 5340 if (!parseSwizzleOperands(1, &GroupSize, 5341 2, 32, 5342 "group size must be in the interval [2,32]")) { 5343 return false; 5344 } 5345 if (!isPowerOf2_64(GroupSize)) { 5346 Error(S, "group size must be a power of two"); 5347 return false; 5348 } 5349 if (parseSwizzleOperands(1, &LaneIdx, 5350 0, GroupSize - 1, 5351 "lane id must be in the interval [0,group size - 1]")) { 5352 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5353 return true; 5354 } 5355 return false; 5356 } 5357 5358 bool 5359 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5360 using namespace llvm::AMDGPU::Swizzle; 5361 5362 SMLoc S = Parser.getTok().getLoc(); 5363 int64_t GroupSize; 5364 5365 if (!parseSwizzleOperands(1, &GroupSize, 5366 2, 32, "group size must be in the interval [2,32]")) { 5367 return false; 5368 } 5369 if (!isPowerOf2_64(GroupSize)) { 5370 Error(S, "group size must be a power of two"); 5371 return false; 5372 } 5373 5374 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5375 return true; 5376 } 5377 5378 bool 5379 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5380 using namespace llvm::AMDGPU::Swizzle; 5381 5382 SMLoc S = Parser.getTok().getLoc(); 5383 int64_t GroupSize; 5384 5385 if (!parseSwizzleOperands(1, &GroupSize, 5386 1, 16, "group size must be in the interval [1,16]")) { 5387 return false; 5388 } 5389 if (!isPowerOf2_64(GroupSize)) { 5390 Error(S, "group size must be a power of two"); 5391 return false; 5392 } 5393 5394 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5395 return true; 5396 } 5397 5398 bool 5399 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5400 using namespace llvm::AMDGPU::Swizzle; 5401 5402 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5403 return false; 5404 } 5405 5406 StringRef Ctl; 5407 SMLoc StrLoc = Parser.getTok().getLoc(); 5408 if (!parseString(Ctl)) { 5409 return false; 5410 } 5411 if (Ctl.size() != BITMASK_WIDTH) { 5412 Error(StrLoc, "expected a 5-character mask"); 5413 return false; 5414 } 5415 5416 unsigned AndMask = 0; 5417 unsigned OrMask = 0; 5418 unsigned XorMask = 0; 5419 5420 for (size_t i = 0; i < Ctl.size(); ++i) { 5421 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5422 switch(Ctl[i]) { 5423 default: 5424 Error(StrLoc, "invalid mask"); 5425 return false; 5426 case '0': 5427 break; 5428 case '1': 5429 OrMask |= Mask; 5430 break; 5431 case 'p': 5432 AndMask |= Mask; 5433 break; 5434 case 'i': 5435 AndMask |= Mask; 5436 XorMask |= Mask; 5437 break; 5438 } 5439 } 5440 5441 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5442 return true; 5443 } 5444 5445 bool 5446 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5447 5448 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5449 5450 if (!parseExpr(Imm)) { 5451 return false; 5452 } 5453 if (!isUInt<16>(Imm)) { 5454 Error(OffsetLoc, "expected a 16-bit offset"); 5455 return false; 5456 } 5457 return true; 5458 } 5459 5460 bool 5461 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5462 using namespace llvm::AMDGPU::Swizzle; 5463 5464 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5465 5466 SMLoc ModeLoc = Parser.getTok().getLoc(); 5467 bool Ok = false; 5468 5469 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5470 Ok = parseSwizzleQuadPerm(Imm); 5471 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5472 Ok = parseSwizzleBitmaskPerm(Imm); 5473 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5474 Ok = parseSwizzleBroadcast(Imm); 5475 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5476 Ok = parseSwizzleSwap(Imm); 5477 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5478 Ok = parseSwizzleReverse(Imm); 5479 } else { 5480 Error(ModeLoc, "expected a swizzle mode"); 5481 } 5482 5483 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5484 } 5485 5486 return false; 5487 } 5488 5489 OperandMatchResultTy 5490 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5491 SMLoc S = Parser.getTok().getLoc(); 5492 int64_t Imm = 0; 5493 5494 if (trySkipId("offset")) { 5495 5496 bool Ok = false; 5497 if (skipToken(AsmToken::Colon, "expected a colon")) { 5498 if (trySkipId("swizzle")) { 5499 Ok = parseSwizzleMacro(Imm); 5500 } else { 5501 Ok = parseSwizzleOffset(Imm); 5502 } 5503 } 5504 5505 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5506 5507 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5508 } else { 5509 // Swizzle "offset" operand is optional. 5510 // If it is omitted, try parsing other optional operands. 5511 return parseOptionalOpr(Operands); 5512 } 5513 } 5514 5515 bool 5516 AMDGPUOperand::isSwizzle() const { 5517 return isImmTy(ImmTySwizzle); 5518 } 5519 5520 //===----------------------------------------------------------------------===// 5521 // VGPR Index Mode 5522 //===----------------------------------------------------------------------===// 5523 5524 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5525 5526 using namespace llvm::AMDGPU::VGPRIndexMode; 5527 5528 if (trySkipToken(AsmToken::RParen)) { 5529 return OFF; 5530 } 5531 5532 int64_t Imm = 0; 5533 5534 while (true) { 5535 unsigned Mode = 0; 5536 SMLoc S = Parser.getTok().getLoc(); 5537 5538 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5539 if (trySkipId(IdSymbolic[ModeId])) { 5540 Mode = 1 << ModeId; 5541 break; 5542 } 5543 } 5544 5545 if (Mode == 0) { 5546 Error(S, (Imm == 0)? 5547 "expected a VGPR index mode or a closing parenthesis" : 5548 "expected a VGPR index mode"); 5549 break; 5550 } 5551 5552 if (Imm & Mode) { 5553 Error(S, "duplicate VGPR index mode"); 5554 break; 5555 } 5556 Imm |= Mode; 5557 5558 if (trySkipToken(AsmToken::RParen)) 5559 break; 5560 if (!skipToken(AsmToken::Comma, 5561 "expected a comma or a closing parenthesis")) 5562 break; 5563 } 5564 5565 return Imm; 5566 } 5567 5568 OperandMatchResultTy 5569 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5570 5571 int64_t Imm = 0; 5572 SMLoc S = Parser.getTok().getLoc(); 5573 5574 if (getLexer().getKind() == AsmToken::Identifier && 5575 Parser.getTok().getString() == "gpr_idx" && 5576 getLexer().peekTok().is(AsmToken::LParen)) { 5577 5578 Parser.Lex(); 5579 Parser.Lex(); 5580 5581 // If parse failed, trigger an error but do not return error code 5582 // to avoid excessive error messages. 5583 Imm = parseGPRIdxMacro(); 5584 5585 } else { 5586 if (getParser().parseAbsoluteExpression(Imm)) 5587 return MatchOperand_NoMatch; 5588 if (Imm < 0 || !isUInt<4>(Imm)) { 5589 Error(S, "invalid immediate: only 4-bit values are legal"); 5590 } 5591 } 5592 5593 Operands.push_back( 5594 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5595 return MatchOperand_Success; 5596 } 5597 5598 bool AMDGPUOperand::isGPRIdxMode() const { 5599 return isImmTy(ImmTyGprIdxMode); 5600 } 5601 5602 //===----------------------------------------------------------------------===// 5603 // sopp branch targets 5604 //===----------------------------------------------------------------------===// 5605 5606 OperandMatchResultTy 5607 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5608 SMLoc S = Parser.getTok().getLoc(); 5609 5610 switch (getLexer().getKind()) { 5611 default: return MatchOperand_ParseFail; 5612 case AsmToken::Integer: { 5613 int64_t Imm; 5614 if (getParser().parseAbsoluteExpression(Imm)) 5615 return MatchOperand_ParseFail; 5616 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5617 return MatchOperand_Success; 5618 } 5619 5620 case AsmToken::Identifier: 5621 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5622 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5623 Parser.getTok().getString()), getContext()), S)); 5624 Parser.Lex(); 5625 return MatchOperand_Success; 5626 } 5627 } 5628 5629 //===----------------------------------------------------------------------===// 5630 // Boolean holding registers 5631 //===----------------------------------------------------------------------===// 5632 5633 OperandMatchResultTy 5634 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5635 return parseReg(Operands); 5636 } 5637 5638 //===----------------------------------------------------------------------===// 5639 // mubuf 5640 //===----------------------------------------------------------------------===// 5641 5642 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5643 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5644 } 5645 5646 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5647 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5648 } 5649 5650 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5651 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5652 } 5653 5654 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5655 const OperandVector &Operands, 5656 bool IsAtomic, 5657 bool IsAtomicReturn, 5658 bool IsLds) { 5659 bool IsLdsOpcode = IsLds; 5660 bool HasLdsModifier = false; 5661 OptionalImmIndexMap OptionalIdx; 5662 assert(IsAtomicReturn ? IsAtomic : true); 5663 unsigned FirstOperandIdx = 1; 5664 5665 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5666 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5667 5668 // Add the register arguments 5669 if (Op.isReg()) { 5670 Op.addRegOperands(Inst, 1); 5671 // Insert a tied src for atomic return dst. 5672 // This cannot be postponed as subsequent calls to 5673 // addImmOperands rely on correct number of MC operands. 5674 if (IsAtomicReturn && i == FirstOperandIdx) 5675 Op.addRegOperands(Inst, 1); 5676 continue; 5677 } 5678 5679 // Handle the case where soffset is an immediate 5680 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5681 Op.addImmOperands(Inst, 1); 5682 continue; 5683 } 5684 5685 HasLdsModifier |= Op.isLDS(); 5686 5687 // Handle tokens like 'offen' which are sometimes hard-coded into the 5688 // asm string. There are no MCInst operands for these. 5689 if (Op.isToken()) { 5690 continue; 5691 } 5692 assert(Op.isImm()); 5693 5694 // Handle optional arguments 5695 OptionalIdx[Op.getImmTy()] = i; 5696 } 5697 5698 // This is a workaround for an llvm quirk which may result in an 5699 // incorrect instruction selection. Lds and non-lds versions of 5700 // MUBUF instructions are identical except that lds versions 5701 // have mandatory 'lds' modifier. However this modifier follows 5702 // optional modifiers and llvm asm matcher regards this 'lds' 5703 // modifier as an optional one. As a result, an lds version 5704 // of opcode may be selected even if it has no 'lds' modifier. 5705 if (IsLdsOpcode && !HasLdsModifier) { 5706 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5707 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5708 Inst.setOpcode(NoLdsOpcode); 5709 IsLdsOpcode = false; 5710 } 5711 } 5712 5713 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5714 if (!IsAtomic) { // glc is hard-coded. 5715 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5716 } 5717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5718 5719 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5721 } 5722 5723 if (isGFX10()) 5724 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5725 } 5726 5727 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5728 OptionalImmIndexMap OptionalIdx; 5729 5730 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5731 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5732 5733 // Add the register arguments 5734 if (Op.isReg()) { 5735 Op.addRegOperands(Inst, 1); 5736 continue; 5737 } 5738 5739 // Handle the case where soffset is an immediate 5740 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5741 Op.addImmOperands(Inst, 1); 5742 continue; 5743 } 5744 5745 // Handle tokens like 'offen' which are sometimes hard-coded into the 5746 // asm string. There are no MCInst operands for these. 5747 if (Op.isToken()) { 5748 continue; 5749 } 5750 assert(Op.isImm()); 5751 5752 // Handle optional arguments 5753 OptionalIdx[Op.getImmTy()] = i; 5754 } 5755 5756 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5757 AMDGPUOperand::ImmTyOffset); 5758 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5759 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5760 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5761 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5762 5763 if (isGFX10()) 5764 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5765 } 5766 5767 //===----------------------------------------------------------------------===// 5768 // mimg 5769 //===----------------------------------------------------------------------===// 5770 5771 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5772 bool IsAtomic) { 5773 unsigned I = 1; 5774 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5775 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5776 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5777 } 5778 5779 if (IsAtomic) { 5780 // Add src, same as dst 5781 assert(Desc.getNumDefs() == 1); 5782 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5783 } 5784 5785 OptionalImmIndexMap OptionalIdx; 5786 5787 for (unsigned E = Operands.size(); I != E; ++I) { 5788 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5789 5790 // Add the register arguments 5791 if (Op.isReg()) { 5792 Op.addRegOperands(Inst, 1); 5793 } else if (Op.isImmModifier()) { 5794 OptionalIdx[Op.getImmTy()] = I; 5795 } else if (!Op.isToken()) { 5796 llvm_unreachable("unexpected operand type"); 5797 } 5798 } 5799 5800 bool IsGFX10 = isGFX10(); 5801 5802 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5803 if (IsGFX10) 5804 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5805 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5806 if (IsGFX10) 5807 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5808 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5809 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5810 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5811 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5812 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5813 if (!IsGFX10) 5814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5815 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5816 } 5817 5818 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5819 cvtMIMG(Inst, Operands, true); 5820 } 5821 5822 //===----------------------------------------------------------------------===// 5823 // smrd 5824 //===----------------------------------------------------------------------===// 5825 5826 bool AMDGPUOperand::isSMRDOffset8() const { 5827 return isImm() && isUInt<8>(getImm()); 5828 } 5829 5830 bool AMDGPUOperand::isSMRDOffset20() const { 5831 return isImm() && isUInt<20>(getImm()); 5832 } 5833 5834 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5835 // 32-bit literals are only supported on CI and we only want to use them 5836 // when the offset is > 8-bits. 5837 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5838 } 5839 5840 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5841 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5842 } 5843 5844 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5845 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5846 } 5847 5848 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5849 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5850 } 5851 5852 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 5853 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5854 } 5855 5856 //===----------------------------------------------------------------------===// 5857 // vop3 5858 //===----------------------------------------------------------------------===// 5859 5860 static bool ConvertOmodMul(int64_t &Mul) { 5861 if (Mul != 1 && Mul != 2 && Mul != 4) 5862 return false; 5863 5864 Mul >>= 1; 5865 return true; 5866 } 5867 5868 static bool ConvertOmodDiv(int64_t &Div) { 5869 if (Div == 1) { 5870 Div = 0; 5871 return true; 5872 } 5873 5874 if (Div == 2) { 5875 Div = 3; 5876 return true; 5877 } 5878 5879 return false; 5880 } 5881 5882 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5883 if (BoundCtrl == 0) { 5884 BoundCtrl = 1; 5885 return true; 5886 } 5887 5888 if (BoundCtrl == -1) { 5889 BoundCtrl = 0; 5890 return true; 5891 } 5892 5893 return false; 5894 } 5895 5896 // Note: the order in this table matches the order of operands in AsmString. 5897 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5898 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5899 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5900 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5901 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5902 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5903 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5904 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5905 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5906 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5907 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5908 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5909 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5910 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5911 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5912 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5913 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5914 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5915 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5916 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5917 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5918 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5919 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5920 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5921 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5922 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5923 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5924 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5925 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5926 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5927 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 5928 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5929 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5930 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5931 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5932 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5933 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5934 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5935 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5936 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5937 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 5938 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 5939 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 5940 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 5941 }; 5942 5943 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5944 unsigned size = Operands.size(); 5945 assert(size > 0); 5946 5947 OperandMatchResultTy res = parseOptionalOpr(Operands); 5948 5949 // This is a hack to enable hardcoded mandatory operands which follow 5950 // optional operands. 5951 // 5952 // Current design assumes that all operands after the first optional operand 5953 // are also optional. However implementation of some instructions violates 5954 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5955 // 5956 // To alleviate this problem, we have to (implicitly) parse extra operands 5957 // to make sure autogenerated parser of custom operands never hit hardcoded 5958 // mandatory operands. 5959 5960 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5961 5962 // We have parsed the first optional operand. 5963 // Parse as many operands as necessary to skip all mandatory operands. 5964 5965 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5966 if (res != MatchOperand_Success || 5967 getLexer().is(AsmToken::EndOfStatement)) break; 5968 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5969 res = parseOptionalOpr(Operands); 5970 } 5971 } 5972 5973 return res; 5974 } 5975 5976 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5977 OperandMatchResultTy res; 5978 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5979 // try to parse any optional operand here 5980 if (Op.IsBit) { 5981 res = parseNamedBit(Op.Name, Operands, Op.Type); 5982 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5983 res = parseOModOperand(Operands); 5984 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5985 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5986 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5987 res = parseSDWASel(Operands, Op.Name, Op.Type); 5988 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5989 res = parseSDWADstUnused(Operands); 5990 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5991 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5992 Op.Type == AMDGPUOperand::ImmTyNegLo || 5993 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5994 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5995 Op.ConvertResult); 5996 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 5997 res = parseDim(Operands); 5998 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 5999 res = parseDfmtNfmt(Operands); 6000 } else { 6001 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6002 } 6003 if (res != MatchOperand_NoMatch) { 6004 return res; 6005 } 6006 } 6007 return MatchOperand_NoMatch; 6008 } 6009 6010 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6011 StringRef Name = Parser.getTok().getString(); 6012 if (Name == "mul") { 6013 return parseIntWithPrefix("mul", Operands, 6014 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6015 } 6016 6017 if (Name == "div") { 6018 return parseIntWithPrefix("div", Operands, 6019 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6020 } 6021 6022 return MatchOperand_NoMatch; 6023 } 6024 6025 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6026 cvtVOP3P(Inst, Operands); 6027 6028 int Opc = Inst.getOpcode(); 6029 6030 int SrcNum; 6031 const int Ops[] = { AMDGPU::OpName::src0, 6032 AMDGPU::OpName::src1, 6033 AMDGPU::OpName::src2 }; 6034 for (SrcNum = 0; 6035 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6036 ++SrcNum); 6037 assert(SrcNum > 0); 6038 6039 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6040 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6041 6042 if ((OpSel & (1 << SrcNum)) != 0) { 6043 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6044 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6045 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6046 } 6047 } 6048 6049 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6050 // 1. This operand is input modifiers 6051 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6052 // 2. This is not last operand 6053 && Desc.NumOperands > (OpNum + 1) 6054 // 3. Next operand is register class 6055 && Desc.OpInfo[OpNum + 1].RegClass != -1 6056 // 4. Next register is not tied to any other operand 6057 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6058 } 6059 6060 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6061 { 6062 OptionalImmIndexMap OptionalIdx; 6063 unsigned Opc = Inst.getOpcode(); 6064 6065 unsigned I = 1; 6066 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6067 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6068 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6069 } 6070 6071 for (unsigned E = Operands.size(); I != E; ++I) { 6072 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6073 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6074 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6075 } else if (Op.isInterpSlot() || 6076 Op.isInterpAttr() || 6077 Op.isAttrChan()) { 6078 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6079 } else if (Op.isImmModifier()) { 6080 OptionalIdx[Op.getImmTy()] = I; 6081 } else { 6082 llvm_unreachable("unhandled operand type"); 6083 } 6084 } 6085 6086 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6087 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6088 } 6089 6090 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6091 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6092 } 6093 6094 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6096 } 6097 } 6098 6099 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6100 OptionalImmIndexMap &OptionalIdx) { 6101 unsigned Opc = Inst.getOpcode(); 6102 6103 unsigned I = 1; 6104 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6105 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6106 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6107 } 6108 6109 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6110 // This instruction has src modifiers 6111 for (unsigned E = Operands.size(); I != E; ++I) { 6112 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6113 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6114 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6115 } else if (Op.isImmModifier()) { 6116 OptionalIdx[Op.getImmTy()] = I; 6117 } else if (Op.isRegOrImm()) { 6118 Op.addRegOrImmOperands(Inst, 1); 6119 } else { 6120 llvm_unreachable("unhandled operand type"); 6121 } 6122 } 6123 } else { 6124 // No src modifiers 6125 for (unsigned E = Operands.size(); I != E; ++I) { 6126 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6127 if (Op.isMod()) { 6128 OptionalIdx[Op.getImmTy()] = I; 6129 } else { 6130 Op.addRegOrImmOperands(Inst, 1); 6131 } 6132 } 6133 } 6134 6135 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6136 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6137 } 6138 6139 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6140 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6141 } 6142 6143 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6144 // it has src2 register operand that is tied to dst operand 6145 // we don't allow modifiers for this operand in assembler so src2_modifiers 6146 // should be 0. 6147 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6148 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6149 Opc == AMDGPU::V_MAC_F32_e64_vi || 6150 Opc == AMDGPU::V_MAC_F16_e64_vi || 6151 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6152 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6153 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6154 auto it = Inst.begin(); 6155 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6156 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6157 ++it; 6158 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6159 } 6160 } 6161 6162 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6163 OptionalImmIndexMap OptionalIdx; 6164 cvtVOP3(Inst, Operands, OptionalIdx); 6165 } 6166 6167 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6168 const OperandVector &Operands) { 6169 OptionalImmIndexMap OptIdx; 6170 const int Opc = Inst.getOpcode(); 6171 const MCInstrDesc &Desc = MII.get(Opc); 6172 6173 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6174 6175 cvtVOP3(Inst, Operands, OptIdx); 6176 6177 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6178 assert(!IsPacked); 6179 Inst.addOperand(Inst.getOperand(0)); 6180 } 6181 6182 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6183 // instruction, and then figure out where to actually put the modifiers 6184 6185 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6186 6187 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6188 if (OpSelHiIdx != -1) { 6189 int DefaultVal = IsPacked ? -1 : 0; 6190 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6191 DefaultVal); 6192 } 6193 6194 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6195 if (NegLoIdx != -1) { 6196 assert(IsPacked); 6197 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6198 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6199 } 6200 6201 const int Ops[] = { AMDGPU::OpName::src0, 6202 AMDGPU::OpName::src1, 6203 AMDGPU::OpName::src2 }; 6204 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6205 AMDGPU::OpName::src1_modifiers, 6206 AMDGPU::OpName::src2_modifiers }; 6207 6208 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6209 6210 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6211 unsigned OpSelHi = 0; 6212 unsigned NegLo = 0; 6213 unsigned NegHi = 0; 6214 6215 if (OpSelHiIdx != -1) { 6216 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6217 } 6218 6219 if (NegLoIdx != -1) { 6220 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6221 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6222 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6223 } 6224 6225 for (int J = 0; J < 3; ++J) { 6226 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6227 if (OpIdx == -1) 6228 break; 6229 6230 uint32_t ModVal = 0; 6231 6232 if ((OpSel & (1 << J)) != 0) 6233 ModVal |= SISrcMods::OP_SEL_0; 6234 6235 if ((OpSelHi & (1 << J)) != 0) 6236 ModVal |= SISrcMods::OP_SEL_1; 6237 6238 if ((NegLo & (1 << J)) != 0) 6239 ModVal |= SISrcMods::NEG; 6240 6241 if ((NegHi & (1 << J)) != 0) 6242 ModVal |= SISrcMods::NEG_HI; 6243 6244 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6245 6246 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6247 } 6248 } 6249 6250 //===----------------------------------------------------------------------===// 6251 // dpp 6252 //===----------------------------------------------------------------------===// 6253 6254 bool AMDGPUOperand::isDPP8() const { 6255 return isImmTy(ImmTyDPP8); 6256 } 6257 6258 bool AMDGPUOperand::isDPPCtrl() const { 6259 using namespace AMDGPU::DPP; 6260 6261 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6262 if (result) { 6263 int64_t Imm = getImm(); 6264 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6265 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6266 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6267 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6268 (Imm == DppCtrl::WAVE_SHL1) || 6269 (Imm == DppCtrl::WAVE_ROL1) || 6270 (Imm == DppCtrl::WAVE_SHR1) || 6271 (Imm == DppCtrl::WAVE_ROR1) || 6272 (Imm == DppCtrl::ROW_MIRROR) || 6273 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6274 (Imm == DppCtrl::BCAST15) || 6275 (Imm == DppCtrl::BCAST31) || 6276 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6277 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6278 } 6279 return false; 6280 } 6281 6282 //===----------------------------------------------------------------------===// 6283 // mAI 6284 //===----------------------------------------------------------------------===// 6285 6286 bool AMDGPUOperand::isBLGP() const { 6287 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6288 } 6289 6290 bool AMDGPUOperand::isCBSZ() const { 6291 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6292 } 6293 6294 bool AMDGPUOperand::isABID() const { 6295 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6296 } 6297 6298 bool AMDGPUOperand::isS16Imm() const { 6299 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6300 } 6301 6302 bool AMDGPUOperand::isU16Imm() const { 6303 return isImm() && isUInt<16>(getImm()); 6304 } 6305 6306 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6307 if (!isGFX10()) 6308 return MatchOperand_NoMatch; 6309 6310 SMLoc S = Parser.getTok().getLoc(); 6311 6312 if (getLexer().isNot(AsmToken::Identifier)) 6313 return MatchOperand_NoMatch; 6314 if (getLexer().getTok().getString() != "dim") 6315 return MatchOperand_NoMatch; 6316 6317 Parser.Lex(); 6318 if (getLexer().isNot(AsmToken::Colon)) 6319 return MatchOperand_ParseFail; 6320 6321 Parser.Lex(); 6322 6323 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6324 // integer. 6325 std::string Token; 6326 if (getLexer().is(AsmToken::Integer)) { 6327 SMLoc Loc = getLexer().getTok().getEndLoc(); 6328 Token = getLexer().getTok().getString(); 6329 Parser.Lex(); 6330 if (getLexer().getTok().getLoc() != Loc) 6331 return MatchOperand_ParseFail; 6332 } 6333 if (getLexer().isNot(AsmToken::Identifier)) 6334 return MatchOperand_ParseFail; 6335 Token += getLexer().getTok().getString(); 6336 6337 StringRef DimId = Token; 6338 if (DimId.startswith("SQ_RSRC_IMG_")) 6339 DimId = DimId.substr(12); 6340 6341 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6342 if (!DimInfo) 6343 return MatchOperand_ParseFail; 6344 6345 Parser.Lex(); 6346 6347 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6348 AMDGPUOperand::ImmTyDim)); 6349 return MatchOperand_Success; 6350 } 6351 6352 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6353 SMLoc S = Parser.getTok().getLoc(); 6354 StringRef Prefix; 6355 6356 if (getLexer().getKind() == AsmToken::Identifier) { 6357 Prefix = Parser.getTok().getString(); 6358 } else { 6359 return MatchOperand_NoMatch; 6360 } 6361 6362 if (Prefix != "dpp8") 6363 return parseDPPCtrl(Operands); 6364 if (!isGFX10()) 6365 return MatchOperand_NoMatch; 6366 6367 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6368 6369 int64_t Sels[8]; 6370 6371 Parser.Lex(); 6372 if (getLexer().isNot(AsmToken::Colon)) 6373 return MatchOperand_ParseFail; 6374 6375 Parser.Lex(); 6376 if (getLexer().isNot(AsmToken::LBrac)) 6377 return MatchOperand_ParseFail; 6378 6379 Parser.Lex(); 6380 if (getParser().parseAbsoluteExpression(Sels[0])) 6381 return MatchOperand_ParseFail; 6382 if (0 > Sels[0] || 7 < Sels[0]) 6383 return MatchOperand_ParseFail; 6384 6385 for (size_t i = 1; i < 8; ++i) { 6386 if (getLexer().isNot(AsmToken::Comma)) 6387 return MatchOperand_ParseFail; 6388 6389 Parser.Lex(); 6390 if (getParser().parseAbsoluteExpression(Sels[i])) 6391 return MatchOperand_ParseFail; 6392 if (0 > Sels[i] || 7 < Sels[i]) 6393 return MatchOperand_ParseFail; 6394 } 6395 6396 if (getLexer().isNot(AsmToken::RBrac)) 6397 return MatchOperand_ParseFail; 6398 Parser.Lex(); 6399 6400 unsigned DPP8 = 0; 6401 for (size_t i = 0; i < 8; ++i) 6402 DPP8 |= (Sels[i] << (i * 3)); 6403 6404 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6405 return MatchOperand_Success; 6406 } 6407 6408 OperandMatchResultTy 6409 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6410 using namespace AMDGPU::DPP; 6411 6412 SMLoc S = Parser.getTok().getLoc(); 6413 StringRef Prefix; 6414 int64_t Int; 6415 6416 if (getLexer().getKind() == AsmToken::Identifier) { 6417 Prefix = Parser.getTok().getString(); 6418 } else { 6419 return MatchOperand_NoMatch; 6420 } 6421 6422 if (Prefix == "row_mirror") { 6423 Int = DppCtrl::ROW_MIRROR; 6424 Parser.Lex(); 6425 } else if (Prefix == "row_half_mirror") { 6426 Int = DppCtrl::ROW_HALF_MIRROR; 6427 Parser.Lex(); 6428 } else { 6429 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6430 if (Prefix != "quad_perm" 6431 && Prefix != "row_shl" 6432 && Prefix != "row_shr" 6433 && Prefix != "row_ror" 6434 && Prefix != "wave_shl" 6435 && Prefix != "wave_rol" 6436 && Prefix != "wave_shr" 6437 && Prefix != "wave_ror" 6438 && Prefix != "row_bcast" 6439 && Prefix != "row_share" 6440 && Prefix != "row_xmask") { 6441 return MatchOperand_NoMatch; 6442 } 6443 6444 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6445 return MatchOperand_NoMatch; 6446 6447 if (!isVI() && !isGFX9() && 6448 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6449 Prefix == "wave_rol" || Prefix == "wave_ror" || 6450 Prefix == "row_bcast")) 6451 return MatchOperand_NoMatch; 6452 6453 Parser.Lex(); 6454 if (getLexer().isNot(AsmToken::Colon)) 6455 return MatchOperand_ParseFail; 6456 6457 if (Prefix == "quad_perm") { 6458 // quad_perm:[%d,%d,%d,%d] 6459 Parser.Lex(); 6460 if (getLexer().isNot(AsmToken::LBrac)) 6461 return MatchOperand_ParseFail; 6462 Parser.Lex(); 6463 6464 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6465 return MatchOperand_ParseFail; 6466 6467 for (int i = 0; i < 3; ++i) { 6468 if (getLexer().isNot(AsmToken::Comma)) 6469 return MatchOperand_ParseFail; 6470 Parser.Lex(); 6471 6472 int64_t Temp; 6473 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6474 return MatchOperand_ParseFail; 6475 const int shift = i*2 + 2; 6476 Int += (Temp << shift); 6477 } 6478 6479 if (getLexer().isNot(AsmToken::RBrac)) 6480 return MatchOperand_ParseFail; 6481 Parser.Lex(); 6482 } else { 6483 // sel:%d 6484 Parser.Lex(); 6485 if (getParser().parseAbsoluteExpression(Int)) 6486 return MatchOperand_ParseFail; 6487 6488 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6489 Int |= DppCtrl::ROW_SHL0; 6490 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6491 Int |= DppCtrl::ROW_SHR0; 6492 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6493 Int |= DppCtrl::ROW_ROR0; 6494 } else if (Prefix == "wave_shl" && 1 == Int) { 6495 Int = DppCtrl::WAVE_SHL1; 6496 } else if (Prefix == "wave_rol" && 1 == Int) { 6497 Int = DppCtrl::WAVE_ROL1; 6498 } else if (Prefix == "wave_shr" && 1 == Int) { 6499 Int = DppCtrl::WAVE_SHR1; 6500 } else if (Prefix == "wave_ror" && 1 == Int) { 6501 Int = DppCtrl::WAVE_ROR1; 6502 } else if (Prefix == "row_bcast") { 6503 if (Int == 15) { 6504 Int = DppCtrl::BCAST15; 6505 } else if (Int == 31) { 6506 Int = DppCtrl::BCAST31; 6507 } else { 6508 return MatchOperand_ParseFail; 6509 } 6510 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6511 Int |= DppCtrl::ROW_SHARE_FIRST; 6512 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6513 Int |= DppCtrl::ROW_XMASK_FIRST; 6514 } else { 6515 return MatchOperand_ParseFail; 6516 } 6517 } 6518 } 6519 6520 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6521 return MatchOperand_Success; 6522 } 6523 6524 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6525 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6526 } 6527 6528 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6529 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6530 } 6531 6532 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6533 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6534 } 6535 6536 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6537 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6538 } 6539 6540 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6541 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6542 } 6543 6544 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6545 OptionalImmIndexMap OptionalIdx; 6546 6547 unsigned I = 1; 6548 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6549 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6550 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6551 } 6552 6553 int Fi = 0; 6554 for (unsigned E = Operands.size(); I != E; ++I) { 6555 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6556 MCOI::TIED_TO); 6557 if (TiedTo != -1) { 6558 assert((unsigned)TiedTo < Inst.getNumOperands()); 6559 // handle tied old or src2 for MAC instructions 6560 Inst.addOperand(Inst.getOperand(TiedTo)); 6561 } 6562 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6563 // Add the register arguments 6564 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6565 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6566 // Skip it. 6567 continue; 6568 } 6569 6570 if (IsDPP8) { 6571 if (Op.isDPP8()) { 6572 Op.addImmOperands(Inst, 1); 6573 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6574 Op.addRegWithFPInputModsOperands(Inst, 2); 6575 } else if (Op.isFI()) { 6576 Fi = Op.getImm(); 6577 } else if (Op.isReg()) { 6578 Op.addRegOperands(Inst, 1); 6579 } else { 6580 llvm_unreachable("Invalid operand type"); 6581 } 6582 } else { 6583 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6584 Op.addRegWithFPInputModsOperands(Inst, 2); 6585 } else if (Op.isDPPCtrl()) { 6586 Op.addImmOperands(Inst, 1); 6587 } else if (Op.isImm()) { 6588 // Handle optional arguments 6589 OptionalIdx[Op.getImmTy()] = I; 6590 } else { 6591 llvm_unreachable("Invalid operand type"); 6592 } 6593 } 6594 } 6595 6596 if (IsDPP8) { 6597 using namespace llvm::AMDGPU::DPP; 6598 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6599 } else { 6600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6603 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6604 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6605 } 6606 } 6607 } 6608 6609 //===----------------------------------------------------------------------===// 6610 // sdwa 6611 //===----------------------------------------------------------------------===// 6612 6613 OperandMatchResultTy 6614 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6615 AMDGPUOperand::ImmTy Type) { 6616 using namespace llvm::AMDGPU::SDWA; 6617 6618 SMLoc S = Parser.getTok().getLoc(); 6619 StringRef Value; 6620 OperandMatchResultTy res; 6621 6622 res = parseStringWithPrefix(Prefix, Value); 6623 if (res != MatchOperand_Success) { 6624 return res; 6625 } 6626 6627 int64_t Int; 6628 Int = StringSwitch<int64_t>(Value) 6629 .Case("BYTE_0", SdwaSel::BYTE_0) 6630 .Case("BYTE_1", SdwaSel::BYTE_1) 6631 .Case("BYTE_2", SdwaSel::BYTE_2) 6632 .Case("BYTE_3", SdwaSel::BYTE_3) 6633 .Case("WORD_0", SdwaSel::WORD_0) 6634 .Case("WORD_1", SdwaSel::WORD_1) 6635 .Case("DWORD", SdwaSel::DWORD) 6636 .Default(0xffffffff); 6637 Parser.Lex(); // eat last token 6638 6639 if (Int == 0xffffffff) { 6640 return MatchOperand_ParseFail; 6641 } 6642 6643 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6644 return MatchOperand_Success; 6645 } 6646 6647 OperandMatchResultTy 6648 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6649 using namespace llvm::AMDGPU::SDWA; 6650 6651 SMLoc S = Parser.getTok().getLoc(); 6652 StringRef Value; 6653 OperandMatchResultTy res; 6654 6655 res = parseStringWithPrefix("dst_unused", Value); 6656 if (res != MatchOperand_Success) { 6657 return res; 6658 } 6659 6660 int64_t Int; 6661 Int = StringSwitch<int64_t>(Value) 6662 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6663 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6664 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6665 .Default(0xffffffff); 6666 Parser.Lex(); // eat last token 6667 6668 if (Int == 0xffffffff) { 6669 return MatchOperand_ParseFail; 6670 } 6671 6672 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6673 return MatchOperand_Success; 6674 } 6675 6676 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6677 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6678 } 6679 6680 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6681 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6682 } 6683 6684 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6685 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6686 } 6687 6688 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6689 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6690 } 6691 6692 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6693 uint64_t BasicInstType, bool skipVcc) { 6694 using namespace llvm::AMDGPU::SDWA; 6695 6696 OptionalImmIndexMap OptionalIdx; 6697 bool skippedVcc = false; 6698 6699 unsigned I = 1; 6700 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6701 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6702 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6703 } 6704 6705 for (unsigned E = Operands.size(); I != E; ++I) { 6706 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6707 if (skipVcc && !skippedVcc && Op.isReg() && 6708 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6709 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6710 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6711 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6712 // Skip VCC only if we didn't skip it on previous iteration. 6713 if (BasicInstType == SIInstrFlags::VOP2 && 6714 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6715 skippedVcc = true; 6716 continue; 6717 } else if (BasicInstType == SIInstrFlags::VOPC && 6718 Inst.getNumOperands() == 0) { 6719 skippedVcc = true; 6720 continue; 6721 } 6722 } 6723 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6724 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6725 } else if (Op.isImm()) { 6726 // Handle optional arguments 6727 OptionalIdx[Op.getImmTy()] = I; 6728 } else { 6729 llvm_unreachable("Invalid operand type"); 6730 } 6731 skippedVcc = false; 6732 } 6733 6734 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6735 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6736 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6737 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6738 switch (BasicInstType) { 6739 case SIInstrFlags::VOP1: 6740 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6741 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6743 } 6744 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6745 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6746 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6747 break; 6748 6749 case SIInstrFlags::VOP2: 6750 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6751 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6752 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6753 } 6754 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6755 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6756 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6757 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6758 break; 6759 6760 case SIInstrFlags::VOPC: 6761 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6762 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6763 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6764 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6765 break; 6766 6767 default: 6768 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6769 } 6770 } 6771 6772 // special case v_mac_{f16, f32}: 6773 // it has src2 register operand that is tied to dst operand 6774 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6775 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6776 auto it = Inst.begin(); 6777 std::advance( 6778 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6779 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6780 } 6781 } 6782 6783 //===----------------------------------------------------------------------===// 6784 // mAI 6785 //===----------------------------------------------------------------------===// 6786 6787 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6788 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6789 } 6790 6791 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6792 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6793 } 6794 6795 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6796 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6797 } 6798 6799 /// Force static initialization. 6800 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6801 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6802 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6803 } 6804 6805 #define GET_REGISTER_MATCHER 6806 #define GET_MATCHER_IMPLEMENTATION 6807 #define GET_MNEMONIC_SPELL_CHECKER 6808 #include "AMDGPUGenAsmMatcher.inc" 6809 6810 // This fuction should be defined after auto-generated include so that we have 6811 // MatchClassKind enum defined 6812 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6813 unsigned Kind) { 6814 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6815 // But MatchInstructionImpl() expects to meet token and fails to validate 6816 // operand. This method checks if we are given immediate operand but expect to 6817 // get corresponding token. 6818 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6819 switch (Kind) { 6820 case MCK_addr64: 6821 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6822 case MCK_gds: 6823 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6824 case MCK_lds: 6825 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6826 case MCK_glc: 6827 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6828 case MCK_idxen: 6829 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6830 case MCK_offen: 6831 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6832 case MCK_SSrcB32: 6833 // When operands have expression values, they will return true for isToken, 6834 // because it is not possible to distinguish between a token and an 6835 // expression at parse time. MatchInstructionImpl() will always try to 6836 // match an operand as a token, when isToken returns true, and when the 6837 // name of the expression is not a valid token, the match will fail, 6838 // so we need to handle it here. 6839 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6840 case MCK_SSrcF32: 6841 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6842 case MCK_SoppBrTarget: 6843 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6844 case MCK_VReg32OrOff: 6845 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6846 case MCK_InterpSlot: 6847 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6848 case MCK_Attr: 6849 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6850 case MCK_AttrChan: 6851 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6852 default: 6853 return Match_InvalidOperand; 6854 } 6855 } 6856 6857 //===----------------------------------------------------------------------===// 6858 // endpgm 6859 //===----------------------------------------------------------------------===// 6860 6861 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6862 SMLoc S = Parser.getTok().getLoc(); 6863 int64_t Imm = 0; 6864 6865 if (!parseExpr(Imm)) { 6866 // The operand is optional, if not present default to 0 6867 Imm = 0; 6868 } 6869 6870 if (!isUInt<16>(Imm)) { 6871 Error(S, "expected a 16-bit value"); 6872 return MatchOperand_ParseFail; 6873 } 6874 6875 Operands.push_back( 6876 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6877 return MatchOperand_Success; 6878 } 6879 6880 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6881