1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyLWE, 167 ImmTyExpTgt, 168 ImmTyExpCompr, 169 ImmTyExpVM, 170 ImmTyFORMAT, 171 ImmTyHwreg, 172 ImmTyOff, 173 ImmTySendMsg, 174 ImmTyInterpSlot, 175 ImmTyInterpAttr, 176 ImmTyAttrChan, 177 ImmTyOpSel, 178 ImmTyOpSelHi, 179 ImmTyNegLo, 180 ImmTyNegHi, 181 ImmTySwizzle, 182 ImmTyGprIdxMode, 183 ImmTyHigh, 184 ImmTyBLGP, 185 ImmTyCBSZ, 186 ImmTyABID, 187 ImmTyEndpgm, 188 }; 189 190 private: 191 struct TokOp { 192 const char *Data; 193 unsigned Length; 194 }; 195 196 struct ImmOp { 197 int64_t Val; 198 ImmTy Type; 199 bool IsFPImm; 200 Modifiers Mods; 201 }; 202 203 struct RegOp { 204 unsigned RegNo; 205 Modifiers Mods; 206 }; 207 208 union { 209 TokOp Tok; 210 ImmOp Imm; 211 RegOp Reg; 212 const MCExpr *Expr; 213 }; 214 215 public: 216 bool isToken() const override { 217 if (Kind == Token) 218 return true; 219 220 // When parsing operands, we can't always tell if something was meant to be 221 // a token, like 'gds', or an expression that references a global variable. 222 // In this case, we assume the string is an expression, and if we need to 223 // interpret is a token, then we treat the symbol name as the token. 224 return isSymbolRefExpr(); 225 } 226 227 bool isSymbolRefExpr() const { 228 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 229 } 230 231 bool isImm() const override { 232 return Kind == Immediate; 233 } 234 235 bool isInlinableImm(MVT type) const; 236 bool isLiteralImm(MVT type) const; 237 238 bool isRegKind() const { 239 return Kind == Register; 240 } 241 242 bool isReg() const override { 243 return isRegKind() && !hasModifiers(); 244 } 245 246 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 247 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 248 } 249 250 bool isRegOrImmWithInt16InputMods() const { 251 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 252 } 253 254 bool isRegOrImmWithInt32InputMods() const { 255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 256 } 257 258 bool isRegOrImmWithInt64InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 260 } 261 262 bool isRegOrImmWithFP16InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 264 } 265 266 bool isRegOrImmWithFP32InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 268 } 269 270 bool isRegOrImmWithFP64InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 272 } 273 274 bool isVReg() const { 275 return isRegClass(AMDGPU::VGPR_32RegClassID) || 276 isRegClass(AMDGPU::VReg_64RegClassID) || 277 isRegClass(AMDGPU::VReg_96RegClassID) || 278 isRegClass(AMDGPU::VReg_128RegClassID) || 279 isRegClass(AMDGPU::VReg_160RegClassID) || 280 isRegClass(AMDGPU::VReg_256RegClassID) || 281 isRegClass(AMDGPU::VReg_512RegClassID) || 282 isRegClass(AMDGPU::VReg_1024RegClassID); 283 } 284 285 bool isVReg32() const { 286 return isRegClass(AMDGPU::VGPR_32RegClassID); 287 } 288 289 bool isVReg32OrOff() const { 290 return isOff() || isVReg32(); 291 } 292 293 bool isNull() const { 294 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 295 } 296 297 bool isSDWAOperand(MVT type) const; 298 bool isSDWAFP16Operand() const; 299 bool isSDWAFP32Operand() const; 300 bool isSDWAInt16Operand() const; 301 bool isSDWAInt32Operand() const; 302 303 bool isImmTy(ImmTy ImmT) const { 304 return isImm() && Imm.Type == ImmT; 305 } 306 307 bool isImmModifier() const { 308 return isImm() && Imm.Type != ImmTyNone; 309 } 310 311 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 312 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 313 bool isDMask() const { return isImmTy(ImmTyDMask); } 314 bool isDim() const { return isImmTy(ImmTyDim); } 315 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 316 bool isDA() const { return isImmTy(ImmTyDA); } 317 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 318 bool isLWE() const { return isImmTy(ImmTyLWE); } 319 bool isOff() const { return isImmTy(ImmTyOff); } 320 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 321 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 322 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 323 bool isOffen() const { return isImmTy(ImmTyOffen); } 324 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 325 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 326 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 327 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 328 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 329 330 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 331 bool isGDS() const { return isImmTy(ImmTyGDS); } 332 bool isLDS() const { return isImmTy(ImmTyLDS); } 333 bool isDLC() const { return isImmTy(ImmTyDLC); } 334 bool isGLC() const { return isImmTy(ImmTyGLC); } 335 bool isSLC() const { return isImmTy(ImmTySLC); } 336 bool isSWZ() const { return isImmTy(ImmTySWZ); } 337 bool isTFE() const { return isImmTy(ImmTyTFE); } 338 bool isD16() const { return isImmTy(ImmTyD16); } 339 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 340 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 341 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 342 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 343 bool isFI() const { return isImmTy(ImmTyDppFi); } 344 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 345 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 346 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 347 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 348 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 349 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 350 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 351 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 352 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 353 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 354 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 355 bool isHigh() const { return isImmTy(ImmTyHigh); } 356 357 bool isMod() const { 358 return isClampSI() || isOModSI(); 359 } 360 361 bool isRegOrImm() const { 362 return isReg() || isImm(); 363 } 364 365 bool isRegClass(unsigned RCID) const; 366 367 bool isInlineValue() const; 368 369 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 370 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 371 } 372 373 bool isSCSrcB16() const { 374 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 375 } 376 377 bool isSCSrcV2B16() const { 378 return isSCSrcB16(); 379 } 380 381 bool isSCSrcB32() const { 382 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 383 } 384 385 bool isSCSrcB64() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 387 } 388 389 bool isBoolReg() const; 390 391 bool isSCSrcF16() const { 392 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 393 } 394 395 bool isSCSrcV2F16() const { 396 return isSCSrcF16(); 397 } 398 399 bool isSCSrcF32() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 401 } 402 403 bool isSCSrcF64() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 405 } 406 407 bool isSSrcB32() const { 408 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 409 } 410 411 bool isSSrcB16() const { 412 return isSCSrcB16() || isLiteralImm(MVT::i16); 413 } 414 415 bool isSSrcV2B16() const { 416 llvm_unreachable("cannot happen"); 417 return isSSrcB16(); 418 } 419 420 bool isSSrcB64() const { 421 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 422 // See isVSrc64(). 423 return isSCSrcB64() || isLiteralImm(MVT::i64); 424 } 425 426 bool isSSrcF32() const { 427 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 428 } 429 430 bool isSSrcF64() const { 431 return isSCSrcB64() || isLiteralImm(MVT::f64); 432 } 433 434 bool isSSrcF16() const { 435 return isSCSrcB16() || isLiteralImm(MVT::f16); 436 } 437 438 bool isSSrcV2F16() const { 439 llvm_unreachable("cannot happen"); 440 return isSSrcF16(); 441 } 442 443 bool isSSrcOrLdsB32() const { 444 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 445 isLiteralImm(MVT::i32) || isExpr(); 446 } 447 448 bool isVCSrcB32() const { 449 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 450 } 451 452 bool isVCSrcB64() const { 453 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 454 } 455 456 bool isVCSrcB16() const { 457 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 458 } 459 460 bool isVCSrcV2B16() const { 461 return isVCSrcB16(); 462 } 463 464 bool isVCSrcF32() const { 465 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 466 } 467 468 bool isVCSrcF64() const { 469 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 470 } 471 472 bool isVCSrcF16() const { 473 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 474 } 475 476 bool isVCSrcV2F16() const { 477 return isVCSrcF16(); 478 } 479 480 bool isVSrcB32() const { 481 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 482 } 483 484 bool isVSrcB64() const { 485 return isVCSrcF64() || isLiteralImm(MVT::i64); 486 } 487 488 bool isVSrcB16() const { 489 return isVCSrcF16() || isLiteralImm(MVT::i16); 490 } 491 492 bool isVSrcV2B16() const { 493 return isVSrcB16() || isLiteralImm(MVT::v2i16); 494 } 495 496 bool isVSrcF32() const { 497 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 498 } 499 500 bool isVSrcF64() const { 501 return isVCSrcF64() || isLiteralImm(MVT::f64); 502 } 503 504 bool isVSrcF16() const { 505 return isVCSrcF16() || isLiteralImm(MVT::f16); 506 } 507 508 bool isVSrcV2F16() const { 509 return isVSrcF16() || isLiteralImm(MVT::v2f16); 510 } 511 512 bool isVISrcB32() const { 513 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 514 } 515 516 bool isVISrcB16() const { 517 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 518 } 519 520 bool isVISrcV2B16() const { 521 return isVISrcB16(); 522 } 523 524 bool isVISrcF32() const { 525 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 526 } 527 528 bool isVISrcF16() const { 529 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 530 } 531 532 bool isVISrcV2F16() const { 533 return isVISrcF16() || isVISrcB32(); 534 } 535 536 bool isAISrcB32() const { 537 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 538 } 539 540 bool isAISrcB16() const { 541 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 542 } 543 544 bool isAISrcV2B16() const { 545 return isAISrcB16(); 546 } 547 548 bool isAISrcF32() const { 549 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 550 } 551 552 bool isAISrcF16() const { 553 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 554 } 555 556 bool isAISrcV2F16() const { 557 return isAISrcF16() || isAISrcB32(); 558 } 559 560 bool isAISrc_128B32() const { 561 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 562 } 563 564 bool isAISrc_128B16() const { 565 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 566 } 567 568 bool isAISrc_128V2B16() const { 569 return isAISrc_128B16(); 570 } 571 572 bool isAISrc_128F32() const { 573 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 574 } 575 576 bool isAISrc_128F16() const { 577 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 578 } 579 580 bool isAISrc_128V2F16() const { 581 return isAISrc_128F16() || isAISrc_128B32(); 582 } 583 584 bool isAISrc_512B32() const { 585 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 586 } 587 588 bool isAISrc_512B16() const { 589 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 590 } 591 592 bool isAISrc_512V2B16() const { 593 return isAISrc_512B16(); 594 } 595 596 bool isAISrc_512F32() const { 597 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 598 } 599 600 bool isAISrc_512F16() const { 601 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 602 } 603 604 bool isAISrc_512V2F16() const { 605 return isAISrc_512F16() || isAISrc_512B32(); 606 } 607 608 bool isAISrc_1024B32() const { 609 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 610 } 611 612 bool isAISrc_1024B16() const { 613 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 614 } 615 616 bool isAISrc_1024V2B16() const { 617 return isAISrc_1024B16(); 618 } 619 620 bool isAISrc_1024F32() const { 621 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 622 } 623 624 bool isAISrc_1024F16() const { 625 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 626 } 627 628 bool isAISrc_1024V2F16() const { 629 return isAISrc_1024F16() || isAISrc_1024B32(); 630 } 631 632 bool isKImmFP32() const { 633 return isLiteralImm(MVT::f32); 634 } 635 636 bool isKImmFP16() const { 637 return isLiteralImm(MVT::f16); 638 } 639 640 bool isMem() const override { 641 return false; 642 } 643 644 bool isExpr() const { 645 return Kind == Expression; 646 } 647 648 bool isSoppBrTarget() const { 649 return isExpr() || isImm(); 650 } 651 652 bool isSWaitCnt() const; 653 bool isHwreg() const; 654 bool isSendMsg() const; 655 bool isSwizzle() const; 656 bool isSMRDOffset8() const; 657 bool isSMRDOffset20() const; 658 bool isSMRDLiteralOffset() const; 659 bool isDPP8() const; 660 bool isDPPCtrl() const; 661 bool isBLGP() const; 662 bool isCBSZ() const; 663 bool isABID() const; 664 bool isGPRIdxMode() const; 665 bool isS16Imm() const; 666 bool isU16Imm() const; 667 bool isEndpgm() const; 668 669 StringRef getExpressionAsToken() const { 670 assert(isExpr()); 671 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 672 return S->getSymbol().getName(); 673 } 674 675 StringRef getToken() const { 676 assert(isToken()); 677 678 if (Kind == Expression) 679 return getExpressionAsToken(); 680 681 return StringRef(Tok.Data, Tok.Length); 682 } 683 684 int64_t getImm() const { 685 assert(isImm()); 686 return Imm.Val; 687 } 688 689 ImmTy getImmTy() const { 690 assert(isImm()); 691 return Imm.Type; 692 } 693 694 unsigned getReg() const override { 695 assert(isRegKind()); 696 return Reg.RegNo; 697 } 698 699 SMLoc getStartLoc() const override { 700 return StartLoc; 701 } 702 703 SMLoc getEndLoc() const override { 704 return EndLoc; 705 } 706 707 SMRange getLocRange() const { 708 return SMRange(StartLoc, EndLoc); 709 } 710 711 Modifiers getModifiers() const { 712 assert(isRegKind() || isImmTy(ImmTyNone)); 713 return isRegKind() ? Reg.Mods : Imm.Mods; 714 } 715 716 void setModifiers(Modifiers Mods) { 717 assert(isRegKind() || isImmTy(ImmTyNone)); 718 if (isRegKind()) 719 Reg.Mods = Mods; 720 else 721 Imm.Mods = Mods; 722 } 723 724 bool hasModifiers() const { 725 return getModifiers().hasModifiers(); 726 } 727 728 bool hasFPModifiers() const { 729 return getModifiers().hasFPModifiers(); 730 } 731 732 bool hasIntModifiers() const { 733 return getModifiers().hasIntModifiers(); 734 } 735 736 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 737 738 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 739 740 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 741 742 template <unsigned Bitwidth> 743 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 744 745 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 746 addKImmFPOperands<16>(Inst, N); 747 } 748 749 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 750 addKImmFPOperands<32>(Inst, N); 751 } 752 753 void addRegOperands(MCInst &Inst, unsigned N) const; 754 755 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 756 addRegOperands(Inst, N); 757 } 758 759 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 760 if (isRegKind()) 761 addRegOperands(Inst, N); 762 else if (isExpr()) 763 Inst.addOperand(MCOperand::createExpr(Expr)); 764 else 765 addImmOperands(Inst, N); 766 } 767 768 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 769 Modifiers Mods = getModifiers(); 770 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 771 if (isRegKind()) { 772 addRegOperands(Inst, N); 773 } else { 774 addImmOperands(Inst, N, false); 775 } 776 } 777 778 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 779 assert(!hasIntModifiers()); 780 addRegOrImmWithInputModsOperands(Inst, N); 781 } 782 783 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 784 assert(!hasFPModifiers()); 785 addRegOrImmWithInputModsOperands(Inst, N); 786 } 787 788 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 789 Modifiers Mods = getModifiers(); 790 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 791 assert(isRegKind()); 792 addRegOperands(Inst, N); 793 } 794 795 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 796 assert(!hasIntModifiers()); 797 addRegWithInputModsOperands(Inst, N); 798 } 799 800 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 801 assert(!hasFPModifiers()); 802 addRegWithInputModsOperands(Inst, N); 803 } 804 805 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 806 if (isImm()) 807 addImmOperands(Inst, N); 808 else { 809 assert(isExpr()); 810 Inst.addOperand(MCOperand::createExpr(Expr)); 811 } 812 } 813 814 static void printImmTy(raw_ostream& OS, ImmTy Type) { 815 switch (Type) { 816 case ImmTyNone: OS << "None"; break; 817 case ImmTyGDS: OS << "GDS"; break; 818 case ImmTyLDS: OS << "LDS"; break; 819 case ImmTyOffen: OS << "Offen"; break; 820 case ImmTyIdxen: OS << "Idxen"; break; 821 case ImmTyAddr64: OS << "Addr64"; break; 822 case ImmTyOffset: OS << "Offset"; break; 823 case ImmTyInstOffset: OS << "InstOffset"; break; 824 case ImmTyOffset0: OS << "Offset0"; break; 825 case ImmTyOffset1: OS << "Offset1"; break; 826 case ImmTyDLC: OS << "DLC"; break; 827 case ImmTyGLC: OS << "GLC"; break; 828 case ImmTySLC: OS << "SLC"; break; 829 case ImmTySWZ: OS << "SWZ"; break; 830 case ImmTyTFE: OS << "TFE"; break; 831 case ImmTyD16: OS << "D16"; break; 832 case ImmTyFORMAT: OS << "FORMAT"; break; 833 case ImmTyClampSI: OS << "ClampSI"; break; 834 case ImmTyOModSI: OS << "OModSI"; break; 835 case ImmTyDPP8: OS << "DPP8"; break; 836 case ImmTyDppCtrl: OS << "DppCtrl"; break; 837 case ImmTyDppRowMask: OS << "DppRowMask"; break; 838 case ImmTyDppBankMask: OS << "DppBankMask"; break; 839 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 840 case ImmTyDppFi: OS << "FI"; break; 841 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 842 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 843 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 844 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 845 case ImmTyDMask: OS << "DMask"; break; 846 case ImmTyDim: OS << "Dim"; break; 847 case ImmTyUNorm: OS << "UNorm"; break; 848 case ImmTyDA: OS << "DA"; break; 849 case ImmTyR128A16: OS << "R128A16"; break; 850 case ImmTyLWE: OS << "LWE"; break; 851 case ImmTyOff: OS << "Off"; break; 852 case ImmTyExpTgt: OS << "ExpTgt"; break; 853 case ImmTyExpCompr: OS << "ExpCompr"; break; 854 case ImmTyExpVM: OS << "ExpVM"; break; 855 case ImmTyHwreg: OS << "Hwreg"; break; 856 case ImmTySendMsg: OS << "SendMsg"; break; 857 case ImmTyInterpSlot: OS << "InterpSlot"; break; 858 case ImmTyInterpAttr: OS << "InterpAttr"; break; 859 case ImmTyAttrChan: OS << "AttrChan"; break; 860 case ImmTyOpSel: OS << "OpSel"; break; 861 case ImmTyOpSelHi: OS << "OpSelHi"; break; 862 case ImmTyNegLo: OS << "NegLo"; break; 863 case ImmTyNegHi: OS << "NegHi"; break; 864 case ImmTySwizzle: OS << "Swizzle"; break; 865 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 866 case ImmTyHigh: OS << "High"; break; 867 case ImmTyBLGP: OS << "BLGP"; break; 868 case ImmTyCBSZ: OS << "CBSZ"; break; 869 case ImmTyABID: OS << "ABID"; break; 870 case ImmTyEndpgm: OS << "Endpgm"; break; 871 } 872 } 873 874 void print(raw_ostream &OS) const override { 875 switch (Kind) { 876 case Register: 877 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 878 break; 879 case Immediate: 880 OS << '<' << getImm(); 881 if (getImmTy() != ImmTyNone) { 882 OS << " type: "; printImmTy(OS, getImmTy()); 883 } 884 OS << " mods: " << Imm.Mods << '>'; 885 break; 886 case Token: 887 OS << '\'' << getToken() << '\''; 888 break; 889 case Expression: 890 OS << "<expr " << *Expr << '>'; 891 break; 892 } 893 } 894 895 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 896 int64_t Val, SMLoc Loc, 897 ImmTy Type = ImmTyNone, 898 bool IsFPImm = false) { 899 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 900 Op->Imm.Val = Val; 901 Op->Imm.IsFPImm = IsFPImm; 902 Op->Imm.Type = Type; 903 Op->Imm.Mods = Modifiers(); 904 Op->StartLoc = Loc; 905 Op->EndLoc = Loc; 906 return Op; 907 } 908 909 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 910 StringRef Str, SMLoc Loc, 911 bool HasExplicitEncodingSize = true) { 912 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 913 Res->Tok.Data = Str.data(); 914 Res->Tok.Length = Str.size(); 915 Res->StartLoc = Loc; 916 Res->EndLoc = Loc; 917 return Res; 918 } 919 920 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 921 unsigned RegNo, SMLoc S, 922 SMLoc E) { 923 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 924 Op->Reg.RegNo = RegNo; 925 Op->Reg.Mods = Modifiers(); 926 Op->StartLoc = S; 927 Op->EndLoc = E; 928 return Op; 929 } 930 931 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 932 const class MCExpr *Expr, SMLoc S) { 933 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 934 Op->Expr = Expr; 935 Op->StartLoc = S; 936 Op->EndLoc = S; 937 return Op; 938 } 939 }; 940 941 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 942 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 943 return OS; 944 } 945 946 //===----------------------------------------------------------------------===// 947 // AsmParser 948 //===----------------------------------------------------------------------===// 949 950 // Holds info related to the current kernel, e.g. count of SGPRs used. 951 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 952 // .amdgpu_hsa_kernel or at EOF. 953 class KernelScopeInfo { 954 int SgprIndexUnusedMin = -1; 955 int VgprIndexUnusedMin = -1; 956 MCContext *Ctx = nullptr; 957 958 void usesSgprAt(int i) { 959 if (i >= SgprIndexUnusedMin) { 960 SgprIndexUnusedMin = ++i; 961 if (Ctx) { 962 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 963 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 964 } 965 } 966 } 967 968 void usesVgprAt(int i) { 969 if (i >= VgprIndexUnusedMin) { 970 VgprIndexUnusedMin = ++i; 971 if (Ctx) { 972 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 973 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 974 } 975 } 976 } 977 978 public: 979 KernelScopeInfo() = default; 980 981 void initialize(MCContext &Context) { 982 Ctx = &Context; 983 usesSgprAt(SgprIndexUnusedMin = -1); 984 usesVgprAt(VgprIndexUnusedMin = -1); 985 } 986 987 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 988 switch (RegKind) { 989 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 990 case IS_AGPR: // fall through 991 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 992 default: break; 993 } 994 } 995 }; 996 997 class AMDGPUAsmParser : public MCTargetAsmParser { 998 MCAsmParser &Parser; 999 1000 // Number of extra operands parsed after the first optional operand. 1001 // This may be necessary to skip hardcoded mandatory operands. 1002 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1003 1004 unsigned ForcedEncodingSize = 0; 1005 bool ForcedDPP = false; 1006 bool ForcedSDWA = false; 1007 KernelScopeInfo KernelScope; 1008 1009 /// @name Auto-generated Match Functions 1010 /// { 1011 1012 #define GET_ASSEMBLER_HEADER 1013 #include "AMDGPUGenAsmMatcher.inc" 1014 1015 /// } 1016 1017 private: 1018 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1019 bool OutOfRangeError(SMRange Range); 1020 /// Calculate VGPR/SGPR blocks required for given target, reserved 1021 /// registers, and user-specified NextFreeXGPR values. 1022 /// 1023 /// \param Features [in] Target features, used for bug corrections. 1024 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1025 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1026 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1027 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1028 /// descriptor field, if valid. 1029 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1030 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1031 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1032 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1033 /// \param VGPRBlocks [out] Result VGPR block count. 1034 /// \param SGPRBlocks [out] Result SGPR block count. 1035 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1036 bool FlatScrUsed, bool XNACKUsed, 1037 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1038 SMRange VGPRRange, unsigned NextFreeSGPR, 1039 SMRange SGPRRange, unsigned &VGPRBlocks, 1040 unsigned &SGPRBlocks); 1041 bool ParseDirectiveAMDGCNTarget(); 1042 bool ParseDirectiveAMDHSAKernel(); 1043 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1044 bool ParseDirectiveHSACodeObjectVersion(); 1045 bool ParseDirectiveHSACodeObjectISA(); 1046 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1047 bool ParseDirectiveAMDKernelCodeT(); 1048 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1049 bool ParseDirectiveAMDGPUHsaKernel(); 1050 1051 bool ParseDirectiveISAVersion(); 1052 bool ParseDirectiveHSAMetadata(); 1053 bool ParseDirectivePALMetadataBegin(); 1054 bool ParseDirectivePALMetadata(); 1055 bool ParseDirectiveAMDGPULDS(); 1056 1057 /// Common code to parse out a block of text (typically YAML) between start and 1058 /// end directives. 1059 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1060 const char *AssemblerDirectiveEnd, 1061 std::string &CollectString); 1062 1063 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1064 RegisterKind RegKind, unsigned Reg1); 1065 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1066 unsigned& RegNum, unsigned& RegWidth); 1067 unsigned ParseRegularReg(RegisterKind &RegKind, 1068 unsigned &RegNum, 1069 unsigned &RegWidth); 1070 unsigned ParseSpecialReg(RegisterKind &RegKind, 1071 unsigned &RegNum, 1072 unsigned &RegWidth); 1073 unsigned ParseRegList(RegisterKind &RegKind, 1074 unsigned &RegNum, 1075 unsigned &RegWidth); 1076 bool ParseRegRange(unsigned& Num, unsigned& Width); 1077 unsigned getRegularReg(RegisterKind RegKind, 1078 unsigned RegNum, 1079 unsigned RegWidth); 1080 1081 bool isRegister(); 1082 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1083 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1084 void initializeGprCountSymbol(RegisterKind RegKind); 1085 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1086 unsigned RegWidth); 1087 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1088 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1089 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1090 bool IsGdsHardcoded); 1091 1092 public: 1093 enum AMDGPUMatchResultTy { 1094 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1095 }; 1096 enum OperandMode { 1097 OperandMode_Default, 1098 OperandMode_NSA, 1099 }; 1100 1101 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1102 1103 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1104 const MCInstrInfo &MII, 1105 const MCTargetOptions &Options) 1106 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1107 MCAsmParserExtension::Initialize(Parser); 1108 1109 if (getFeatureBits().none()) { 1110 // Set default features. 1111 copySTI().ToggleFeature("southern-islands"); 1112 } 1113 1114 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1115 1116 { 1117 // TODO: make those pre-defined variables read-only. 1118 // Currently there is none suitable machinery in the core llvm-mc for this. 1119 // MCSymbol::isRedefinable is intended for another purpose, and 1120 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1121 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1122 MCContext &Ctx = getContext(); 1123 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1124 MCSymbol *Sym = 1125 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1126 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1127 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1128 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1129 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1130 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1131 } else { 1132 MCSymbol *Sym = 1133 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1134 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1135 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1136 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1137 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1138 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1139 } 1140 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1141 initializeGprCountSymbol(IS_VGPR); 1142 initializeGprCountSymbol(IS_SGPR); 1143 } else 1144 KernelScope.initialize(getContext()); 1145 } 1146 } 1147 1148 bool hasXNACK() const { 1149 return AMDGPU::hasXNACK(getSTI()); 1150 } 1151 1152 bool hasMIMG_R128() const { 1153 return AMDGPU::hasMIMG_R128(getSTI()); 1154 } 1155 1156 bool hasPackedD16() const { 1157 return AMDGPU::hasPackedD16(getSTI()); 1158 } 1159 1160 bool isSI() const { 1161 return AMDGPU::isSI(getSTI()); 1162 } 1163 1164 bool isCI() const { 1165 return AMDGPU::isCI(getSTI()); 1166 } 1167 1168 bool isVI() const { 1169 return AMDGPU::isVI(getSTI()); 1170 } 1171 1172 bool isGFX9() const { 1173 return AMDGPU::isGFX9(getSTI()); 1174 } 1175 1176 bool isGFX10() const { 1177 return AMDGPU::isGFX10(getSTI()); 1178 } 1179 1180 bool hasInv2PiInlineImm() const { 1181 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1182 } 1183 1184 bool hasFlatOffsets() const { 1185 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1186 } 1187 1188 bool hasSGPR102_SGPR103() const { 1189 return !isVI() && !isGFX9(); 1190 } 1191 1192 bool hasSGPR104_SGPR105() const { 1193 return isGFX10(); 1194 } 1195 1196 bool hasIntClamp() const { 1197 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1198 } 1199 1200 AMDGPUTargetStreamer &getTargetStreamer() { 1201 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1202 return static_cast<AMDGPUTargetStreamer &>(TS); 1203 } 1204 1205 const MCRegisterInfo *getMRI() const { 1206 // We need this const_cast because for some reason getContext() is not const 1207 // in MCAsmParser. 1208 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1209 } 1210 1211 const MCInstrInfo *getMII() const { 1212 return &MII; 1213 } 1214 1215 const FeatureBitset &getFeatureBits() const { 1216 return getSTI().getFeatureBits(); 1217 } 1218 1219 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1220 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1221 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1222 1223 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1224 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1225 bool isForcedDPP() const { return ForcedDPP; } 1226 bool isForcedSDWA() const { return ForcedSDWA; } 1227 ArrayRef<unsigned> getMatchedVariants() const; 1228 1229 std::unique_ptr<AMDGPUOperand> parseRegister(); 1230 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1231 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1232 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1233 unsigned Kind) override; 1234 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1235 OperandVector &Operands, MCStreamer &Out, 1236 uint64_t &ErrorInfo, 1237 bool MatchingInlineAsm) override; 1238 bool ParseDirective(AsmToken DirectiveID) override; 1239 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1240 OperandMode Mode = OperandMode_Default); 1241 StringRef parseMnemonicSuffix(StringRef Name); 1242 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1243 SMLoc NameLoc, OperandVector &Operands) override; 1244 //bool ProcessInstruction(MCInst &Inst); 1245 1246 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1247 1248 OperandMatchResultTy 1249 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1250 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1251 bool (*ConvertResult)(int64_t &) = nullptr); 1252 1253 OperandMatchResultTy 1254 parseOperandArrayWithPrefix(const char *Prefix, 1255 OperandVector &Operands, 1256 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1257 bool (*ConvertResult)(int64_t&) = nullptr); 1258 1259 OperandMatchResultTy 1260 parseNamedBit(const char *Name, OperandVector &Operands, 1261 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1262 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1263 StringRef &Value); 1264 1265 bool isModifier(); 1266 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1267 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1268 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1269 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1270 bool parseSP3NegModifier(); 1271 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1272 OperandMatchResultTy parseReg(OperandVector &Operands); 1273 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1274 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1275 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1276 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1277 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1278 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1279 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1280 1281 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1282 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1283 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1284 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1285 1286 bool parseCnt(int64_t &IntVal); 1287 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1288 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1289 1290 private: 1291 struct OperandInfoTy { 1292 int64_t Id; 1293 bool IsSymbolic = false; 1294 bool IsDefined = false; 1295 1296 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1297 }; 1298 1299 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1300 bool validateSendMsg(const OperandInfoTy &Msg, 1301 const OperandInfoTy &Op, 1302 const OperandInfoTy &Stream, 1303 const SMLoc Loc); 1304 1305 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1306 bool validateHwreg(const OperandInfoTy &HwReg, 1307 const int64_t Offset, 1308 const int64_t Width, 1309 const SMLoc Loc); 1310 1311 void errorExpTgt(); 1312 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1313 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1314 1315 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1316 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1317 bool validateSOPLiteral(const MCInst &Inst) const; 1318 bool validateConstantBusLimitations(const MCInst &Inst); 1319 bool validateEarlyClobberLimitations(const MCInst &Inst); 1320 bool validateIntClampSupported(const MCInst &Inst); 1321 bool validateMIMGAtomicDMask(const MCInst &Inst); 1322 bool validateMIMGGatherDMask(const MCInst &Inst); 1323 bool validateMovrels(const MCInst &Inst); 1324 bool validateMIMGDataSize(const MCInst &Inst); 1325 bool validateMIMGAddrSize(const MCInst &Inst); 1326 bool validateMIMGD16(const MCInst &Inst); 1327 bool validateMIMGDim(const MCInst &Inst); 1328 bool validateLdsDirect(const MCInst &Inst); 1329 bool validateOpSel(const MCInst &Inst); 1330 bool validateVccOperand(unsigned Reg) const; 1331 bool validateVOP3Literal(const MCInst &Inst) const; 1332 unsigned getConstantBusLimit(unsigned Opcode) const; 1333 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1334 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1335 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1336 1337 bool isId(const StringRef Id) const; 1338 bool isId(const AsmToken &Token, const StringRef Id) const; 1339 bool isToken(const AsmToken::TokenKind Kind) const; 1340 bool trySkipId(const StringRef Id); 1341 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1342 bool trySkipToken(const AsmToken::TokenKind Kind); 1343 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1344 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1345 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1346 AsmToken::TokenKind getTokenKind() const; 1347 bool parseExpr(int64_t &Imm); 1348 bool parseExpr(OperandVector &Operands); 1349 StringRef getTokenStr() const; 1350 AsmToken peekToken(); 1351 AsmToken getToken() const; 1352 SMLoc getLoc() const; 1353 void lex(); 1354 1355 public: 1356 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1357 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1358 1359 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1360 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1361 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1362 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1363 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1364 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1365 1366 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1367 const unsigned MinVal, 1368 const unsigned MaxVal, 1369 const StringRef ErrMsg); 1370 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1371 bool parseSwizzleOffset(int64_t &Imm); 1372 bool parseSwizzleMacro(int64_t &Imm); 1373 bool parseSwizzleQuadPerm(int64_t &Imm); 1374 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1375 bool parseSwizzleBroadcast(int64_t &Imm); 1376 bool parseSwizzleSwap(int64_t &Imm); 1377 bool parseSwizzleReverse(int64_t &Imm); 1378 1379 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1380 int64_t parseGPRIdxMacro(); 1381 1382 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1383 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1384 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1385 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1386 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1387 1388 AMDGPUOperand::Ptr defaultDLC() const; 1389 AMDGPUOperand::Ptr defaultGLC() const; 1390 AMDGPUOperand::Ptr defaultSLC() const; 1391 1392 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1393 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1394 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1395 AMDGPUOperand::Ptr defaultFlatOffset() const; 1396 1397 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1398 1399 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1400 OptionalImmIndexMap &OptionalIdx); 1401 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1402 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1403 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1404 1405 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1406 1407 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1408 bool IsAtomic = false); 1409 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1410 1411 OperandMatchResultTy parseDim(OperandVector &Operands); 1412 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1413 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1414 AMDGPUOperand::Ptr defaultRowMask() const; 1415 AMDGPUOperand::Ptr defaultBankMask() const; 1416 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1417 AMDGPUOperand::Ptr defaultFI() const; 1418 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1419 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1420 1421 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1422 AMDGPUOperand::ImmTy Type); 1423 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1424 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1425 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1426 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1427 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1428 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1429 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1430 uint64_t BasicInstType, 1431 bool SkipDstVcc = false, 1432 bool SkipSrcVcc = false); 1433 1434 AMDGPUOperand::Ptr defaultBLGP() const; 1435 AMDGPUOperand::Ptr defaultCBSZ() const; 1436 AMDGPUOperand::Ptr defaultABID() const; 1437 1438 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1439 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1440 }; 1441 1442 struct OptionalOperand { 1443 const char *Name; 1444 AMDGPUOperand::ImmTy Type; 1445 bool IsBit; 1446 bool (*ConvertResult)(int64_t&); 1447 }; 1448 1449 } // end anonymous namespace 1450 1451 // May be called with integer type with equivalent bitwidth. 1452 static const fltSemantics *getFltSemantics(unsigned Size) { 1453 switch (Size) { 1454 case 4: 1455 return &APFloat::IEEEsingle(); 1456 case 8: 1457 return &APFloat::IEEEdouble(); 1458 case 2: 1459 return &APFloat::IEEEhalf(); 1460 default: 1461 llvm_unreachable("unsupported fp type"); 1462 } 1463 } 1464 1465 static const fltSemantics *getFltSemantics(MVT VT) { 1466 return getFltSemantics(VT.getSizeInBits() / 8); 1467 } 1468 1469 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1470 switch (OperandType) { 1471 case AMDGPU::OPERAND_REG_IMM_INT32: 1472 case AMDGPU::OPERAND_REG_IMM_FP32: 1473 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1474 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1475 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1476 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1477 return &APFloat::IEEEsingle(); 1478 case AMDGPU::OPERAND_REG_IMM_INT64: 1479 case AMDGPU::OPERAND_REG_IMM_FP64: 1480 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1481 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1482 return &APFloat::IEEEdouble(); 1483 case AMDGPU::OPERAND_REG_IMM_INT16: 1484 case AMDGPU::OPERAND_REG_IMM_FP16: 1485 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1486 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1487 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1488 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1489 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1490 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1491 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1492 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1493 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1494 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1495 return &APFloat::IEEEhalf(); 1496 default: 1497 llvm_unreachable("unsupported fp type"); 1498 } 1499 } 1500 1501 //===----------------------------------------------------------------------===// 1502 // Operand 1503 //===----------------------------------------------------------------------===// 1504 1505 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1506 bool Lost; 1507 1508 // Convert literal to single precision 1509 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1510 APFloat::rmNearestTiesToEven, 1511 &Lost); 1512 // We allow precision lost but not overflow or underflow 1513 if (Status != APFloat::opOK && 1514 Lost && 1515 ((Status & APFloat::opOverflow) != 0 || 1516 (Status & APFloat::opUnderflow) != 0)) { 1517 return false; 1518 } 1519 1520 return true; 1521 } 1522 1523 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1524 return isUIntN(Size, Val) || isIntN(Size, Val); 1525 } 1526 1527 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1528 1529 // This is a hack to enable named inline values like 1530 // shared_base with both 32-bit and 64-bit operands. 1531 // Note that these values are defined as 1532 // 32-bit operands only. 1533 if (isInlineValue()) { 1534 return true; 1535 } 1536 1537 if (!isImmTy(ImmTyNone)) { 1538 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1539 return false; 1540 } 1541 // TODO: We should avoid using host float here. It would be better to 1542 // check the float bit values which is what a few other places do. 1543 // We've had bot failures before due to weird NaN support on mips hosts. 1544 1545 APInt Literal(64, Imm.Val); 1546 1547 if (Imm.IsFPImm) { // We got fp literal token 1548 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1549 return AMDGPU::isInlinableLiteral64(Imm.Val, 1550 AsmParser->hasInv2PiInlineImm()); 1551 } 1552 1553 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1554 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1555 return false; 1556 1557 if (type.getScalarSizeInBits() == 16) { 1558 return AMDGPU::isInlinableLiteral16( 1559 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1560 AsmParser->hasInv2PiInlineImm()); 1561 } 1562 1563 // Check if single precision literal is inlinable 1564 return AMDGPU::isInlinableLiteral32( 1565 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1566 AsmParser->hasInv2PiInlineImm()); 1567 } 1568 1569 // We got int literal token. 1570 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1571 return AMDGPU::isInlinableLiteral64(Imm.Val, 1572 AsmParser->hasInv2PiInlineImm()); 1573 } 1574 1575 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1576 return false; 1577 } 1578 1579 if (type.getScalarSizeInBits() == 16) { 1580 return AMDGPU::isInlinableLiteral16( 1581 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1582 AsmParser->hasInv2PiInlineImm()); 1583 } 1584 1585 return AMDGPU::isInlinableLiteral32( 1586 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1587 AsmParser->hasInv2PiInlineImm()); 1588 } 1589 1590 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1591 // Check that this immediate can be added as literal 1592 if (!isImmTy(ImmTyNone)) { 1593 return false; 1594 } 1595 1596 if (!Imm.IsFPImm) { 1597 // We got int literal token. 1598 1599 if (type == MVT::f64 && hasFPModifiers()) { 1600 // Cannot apply fp modifiers to int literals preserving the same semantics 1601 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1602 // disable these cases. 1603 return false; 1604 } 1605 1606 unsigned Size = type.getSizeInBits(); 1607 if (Size == 64) 1608 Size = 32; 1609 1610 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1611 // types. 1612 return isSafeTruncation(Imm.Val, Size); 1613 } 1614 1615 // We got fp literal token 1616 if (type == MVT::f64) { // Expected 64-bit fp operand 1617 // We would set low 64-bits of literal to zeroes but we accept this literals 1618 return true; 1619 } 1620 1621 if (type == MVT::i64) { // Expected 64-bit int operand 1622 // We don't allow fp literals in 64-bit integer instructions. It is 1623 // unclear how we should encode them. 1624 return false; 1625 } 1626 1627 // We allow fp literals with f16x2 operands assuming that the specified 1628 // literal goes into the lower half and the upper half is zero. We also 1629 // require that the literal may be losslesly converted to f16. 1630 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1631 (type == MVT::v2i16)? MVT::i16 : type; 1632 1633 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1634 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1635 } 1636 1637 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1638 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1639 } 1640 1641 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1642 if (AsmParser->isVI()) 1643 return isVReg32(); 1644 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1645 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1646 else 1647 return false; 1648 } 1649 1650 bool AMDGPUOperand::isSDWAFP16Operand() const { 1651 return isSDWAOperand(MVT::f16); 1652 } 1653 1654 bool AMDGPUOperand::isSDWAFP32Operand() const { 1655 return isSDWAOperand(MVT::f32); 1656 } 1657 1658 bool AMDGPUOperand::isSDWAInt16Operand() const { 1659 return isSDWAOperand(MVT::i16); 1660 } 1661 1662 bool AMDGPUOperand::isSDWAInt32Operand() const { 1663 return isSDWAOperand(MVT::i32); 1664 } 1665 1666 bool AMDGPUOperand::isBoolReg() const { 1667 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1668 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1669 } 1670 1671 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1672 { 1673 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1674 assert(Size == 2 || Size == 4 || Size == 8); 1675 1676 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1677 1678 if (Imm.Mods.Abs) { 1679 Val &= ~FpSignMask; 1680 } 1681 if (Imm.Mods.Neg) { 1682 Val ^= FpSignMask; 1683 } 1684 1685 return Val; 1686 } 1687 1688 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1689 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1690 Inst.getNumOperands())) { 1691 addLiteralImmOperand(Inst, Imm.Val, 1692 ApplyModifiers & 1693 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1694 } else { 1695 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1696 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1697 } 1698 } 1699 1700 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1701 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1702 auto OpNum = Inst.getNumOperands(); 1703 // Check that this operand accepts literals 1704 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1705 1706 if (ApplyModifiers) { 1707 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1708 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1709 Val = applyInputFPModifiers(Val, Size); 1710 } 1711 1712 APInt Literal(64, Val); 1713 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1714 1715 if (Imm.IsFPImm) { // We got fp literal token 1716 switch (OpTy) { 1717 case AMDGPU::OPERAND_REG_IMM_INT64: 1718 case AMDGPU::OPERAND_REG_IMM_FP64: 1719 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1720 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1721 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1722 AsmParser->hasInv2PiInlineImm())) { 1723 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1724 return; 1725 } 1726 1727 // Non-inlineable 1728 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1729 // For fp operands we check if low 32 bits are zeros 1730 if (Literal.getLoBits(32) != 0) { 1731 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1732 "Can't encode literal as exact 64-bit floating-point operand. " 1733 "Low 32-bits will be set to zero"); 1734 } 1735 1736 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1737 return; 1738 } 1739 1740 // We don't allow fp literals in 64-bit integer instructions. It is 1741 // unclear how we should encode them. This case should be checked earlier 1742 // in predicate methods (isLiteralImm()) 1743 llvm_unreachable("fp literal in 64-bit integer instruction."); 1744 1745 case AMDGPU::OPERAND_REG_IMM_INT32: 1746 case AMDGPU::OPERAND_REG_IMM_FP32: 1747 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1748 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1749 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1750 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1751 case AMDGPU::OPERAND_REG_IMM_INT16: 1752 case AMDGPU::OPERAND_REG_IMM_FP16: 1753 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1754 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1755 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1756 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1757 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1758 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1759 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1760 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1761 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1762 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1763 bool lost; 1764 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1765 // Convert literal to single precision 1766 FPLiteral.convert(*getOpFltSemantics(OpTy), 1767 APFloat::rmNearestTiesToEven, &lost); 1768 // We allow precision lost but not overflow or underflow. This should be 1769 // checked earlier in isLiteralImm() 1770 1771 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1772 Inst.addOperand(MCOperand::createImm(ImmVal)); 1773 return; 1774 } 1775 default: 1776 llvm_unreachable("invalid operand size"); 1777 } 1778 1779 return; 1780 } 1781 1782 // We got int literal token. 1783 // Only sign extend inline immediates. 1784 switch (OpTy) { 1785 case AMDGPU::OPERAND_REG_IMM_INT32: 1786 case AMDGPU::OPERAND_REG_IMM_FP32: 1787 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1788 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1789 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1790 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1791 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1792 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1793 if (isSafeTruncation(Val, 32) && 1794 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1795 AsmParser->hasInv2PiInlineImm())) { 1796 Inst.addOperand(MCOperand::createImm(Val)); 1797 return; 1798 } 1799 1800 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1801 return; 1802 1803 case AMDGPU::OPERAND_REG_IMM_INT64: 1804 case AMDGPU::OPERAND_REG_IMM_FP64: 1805 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1806 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1807 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1808 Inst.addOperand(MCOperand::createImm(Val)); 1809 return; 1810 } 1811 1812 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1813 return; 1814 1815 case AMDGPU::OPERAND_REG_IMM_INT16: 1816 case AMDGPU::OPERAND_REG_IMM_FP16: 1817 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1818 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1820 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1821 if (isSafeTruncation(Val, 16) && 1822 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1823 AsmParser->hasInv2PiInlineImm())) { 1824 Inst.addOperand(MCOperand::createImm(Val)); 1825 return; 1826 } 1827 1828 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1829 return; 1830 1831 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1832 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1833 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1834 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1835 assert(isSafeTruncation(Val, 16)); 1836 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1837 AsmParser->hasInv2PiInlineImm())); 1838 1839 Inst.addOperand(MCOperand::createImm(Val)); 1840 return; 1841 } 1842 default: 1843 llvm_unreachable("invalid operand size"); 1844 } 1845 } 1846 1847 template <unsigned Bitwidth> 1848 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1849 APInt Literal(64, Imm.Val); 1850 1851 if (!Imm.IsFPImm) { 1852 // We got int literal token. 1853 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1854 return; 1855 } 1856 1857 bool Lost; 1858 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1859 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1860 APFloat::rmNearestTiesToEven, &Lost); 1861 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1862 } 1863 1864 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1865 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1866 } 1867 1868 static bool isInlineValue(unsigned Reg) { 1869 switch (Reg) { 1870 case AMDGPU::SRC_SHARED_BASE: 1871 case AMDGPU::SRC_SHARED_LIMIT: 1872 case AMDGPU::SRC_PRIVATE_BASE: 1873 case AMDGPU::SRC_PRIVATE_LIMIT: 1874 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1875 return true; 1876 case AMDGPU::SRC_VCCZ: 1877 case AMDGPU::SRC_EXECZ: 1878 case AMDGPU::SRC_SCC: 1879 return true; 1880 case AMDGPU::SGPR_NULL: 1881 return true; 1882 default: 1883 return false; 1884 } 1885 } 1886 1887 bool AMDGPUOperand::isInlineValue() const { 1888 return isRegKind() && ::isInlineValue(getReg()); 1889 } 1890 1891 //===----------------------------------------------------------------------===// 1892 // AsmParser 1893 //===----------------------------------------------------------------------===// 1894 1895 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1896 if (Is == IS_VGPR) { 1897 switch (RegWidth) { 1898 default: return -1; 1899 case 1: return AMDGPU::VGPR_32RegClassID; 1900 case 2: return AMDGPU::VReg_64RegClassID; 1901 case 3: return AMDGPU::VReg_96RegClassID; 1902 case 4: return AMDGPU::VReg_128RegClassID; 1903 case 5: return AMDGPU::VReg_160RegClassID; 1904 case 8: return AMDGPU::VReg_256RegClassID; 1905 case 16: return AMDGPU::VReg_512RegClassID; 1906 case 32: return AMDGPU::VReg_1024RegClassID; 1907 } 1908 } else if (Is == IS_TTMP) { 1909 switch (RegWidth) { 1910 default: return -1; 1911 case 1: return AMDGPU::TTMP_32RegClassID; 1912 case 2: return AMDGPU::TTMP_64RegClassID; 1913 case 4: return AMDGPU::TTMP_128RegClassID; 1914 case 8: return AMDGPU::TTMP_256RegClassID; 1915 case 16: return AMDGPU::TTMP_512RegClassID; 1916 } 1917 } else if (Is == IS_SGPR) { 1918 switch (RegWidth) { 1919 default: return -1; 1920 case 1: return AMDGPU::SGPR_32RegClassID; 1921 case 2: return AMDGPU::SGPR_64RegClassID; 1922 case 4: return AMDGPU::SGPR_128RegClassID; 1923 case 8: return AMDGPU::SGPR_256RegClassID; 1924 case 16: return AMDGPU::SGPR_512RegClassID; 1925 } 1926 } else if (Is == IS_AGPR) { 1927 switch (RegWidth) { 1928 default: return -1; 1929 case 1: return AMDGPU::AGPR_32RegClassID; 1930 case 2: return AMDGPU::AReg_64RegClassID; 1931 case 4: return AMDGPU::AReg_128RegClassID; 1932 case 16: return AMDGPU::AReg_512RegClassID; 1933 case 32: return AMDGPU::AReg_1024RegClassID; 1934 } 1935 } 1936 return -1; 1937 } 1938 1939 static unsigned getSpecialRegForName(StringRef RegName) { 1940 return StringSwitch<unsigned>(RegName) 1941 .Case("exec", AMDGPU::EXEC) 1942 .Case("vcc", AMDGPU::VCC) 1943 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1944 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1945 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1946 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1947 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1948 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1949 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1950 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1951 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1952 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1953 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1954 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1955 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1956 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1957 .Case("m0", AMDGPU::M0) 1958 .Case("vccz", AMDGPU::SRC_VCCZ) 1959 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1960 .Case("execz", AMDGPU::SRC_EXECZ) 1961 .Case("src_execz", AMDGPU::SRC_EXECZ) 1962 .Case("scc", AMDGPU::SRC_SCC) 1963 .Case("src_scc", AMDGPU::SRC_SCC) 1964 .Case("tba", AMDGPU::TBA) 1965 .Case("tma", AMDGPU::TMA) 1966 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1967 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1968 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1969 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1970 .Case("vcc_lo", AMDGPU::VCC_LO) 1971 .Case("vcc_hi", AMDGPU::VCC_HI) 1972 .Case("exec_lo", AMDGPU::EXEC_LO) 1973 .Case("exec_hi", AMDGPU::EXEC_HI) 1974 .Case("tma_lo", AMDGPU::TMA_LO) 1975 .Case("tma_hi", AMDGPU::TMA_HI) 1976 .Case("tba_lo", AMDGPU::TBA_LO) 1977 .Case("tba_hi", AMDGPU::TBA_HI) 1978 .Case("null", AMDGPU::SGPR_NULL) 1979 .Default(AMDGPU::NoRegister); 1980 } 1981 1982 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1983 SMLoc &EndLoc) { 1984 auto R = parseRegister(); 1985 if (!R) return true; 1986 assert(R->isReg()); 1987 RegNo = R->getReg(); 1988 StartLoc = R->getStartLoc(); 1989 EndLoc = R->getEndLoc(); 1990 return false; 1991 } 1992 1993 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1994 RegisterKind RegKind, unsigned Reg1) { 1995 switch (RegKind) { 1996 case IS_SPECIAL: 1997 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1998 Reg = AMDGPU::EXEC; 1999 RegWidth = 2; 2000 return true; 2001 } 2002 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2003 Reg = AMDGPU::FLAT_SCR; 2004 RegWidth = 2; 2005 return true; 2006 } 2007 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2008 Reg = AMDGPU::XNACK_MASK; 2009 RegWidth = 2; 2010 return true; 2011 } 2012 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2013 Reg = AMDGPU::VCC; 2014 RegWidth = 2; 2015 return true; 2016 } 2017 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2018 Reg = AMDGPU::TBA; 2019 RegWidth = 2; 2020 return true; 2021 } 2022 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2023 Reg = AMDGPU::TMA; 2024 RegWidth = 2; 2025 return true; 2026 } 2027 return false; 2028 case IS_VGPR: 2029 case IS_SGPR: 2030 case IS_AGPR: 2031 case IS_TTMP: 2032 if (Reg1 != Reg + RegWidth) { 2033 return false; 2034 } 2035 RegWidth++; 2036 return true; 2037 default: 2038 llvm_unreachable("unexpected register kind"); 2039 } 2040 } 2041 2042 struct RegInfo { 2043 StringLiteral Name; 2044 RegisterKind Kind; 2045 }; 2046 2047 static constexpr RegInfo RegularRegisters[] = { 2048 {{"v"}, IS_VGPR}, 2049 {{"s"}, IS_SGPR}, 2050 {{"ttmp"}, IS_TTMP}, 2051 {{"acc"}, IS_AGPR}, 2052 {{"a"}, IS_AGPR}, 2053 }; 2054 2055 static bool isRegularReg(RegisterKind Kind) { 2056 return Kind == IS_VGPR || 2057 Kind == IS_SGPR || 2058 Kind == IS_TTMP || 2059 Kind == IS_AGPR; 2060 } 2061 2062 static const RegInfo* getRegularRegInfo(StringRef Str) { 2063 for (const RegInfo &Reg : RegularRegisters) 2064 if (Str.startswith(Reg.Name)) 2065 return &Reg; 2066 return nullptr; 2067 } 2068 2069 static bool getRegNum(StringRef Str, unsigned& Num) { 2070 return !Str.getAsInteger(10, Num); 2071 } 2072 2073 bool 2074 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2075 const AsmToken &NextToken) const { 2076 2077 // A list of consecutive registers: [s0,s1,s2,s3] 2078 if (Token.is(AsmToken::LBrac)) 2079 return true; 2080 2081 if (!Token.is(AsmToken::Identifier)) 2082 return false; 2083 2084 // A single register like s0 or a range of registers like s[0:1] 2085 2086 StringRef Str = Token.getString(); 2087 const RegInfo *Reg = getRegularRegInfo(Str); 2088 if (Reg) { 2089 StringRef RegName = Reg->Name; 2090 StringRef RegSuffix = Str.substr(RegName.size()); 2091 if (!RegSuffix.empty()) { 2092 unsigned Num; 2093 // A single register with an index: rXX 2094 if (getRegNum(RegSuffix, Num)) 2095 return true; 2096 } else { 2097 // A range of registers: r[XX:YY]. 2098 if (NextToken.is(AsmToken::LBrac)) 2099 return true; 2100 } 2101 } 2102 2103 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2104 } 2105 2106 bool 2107 AMDGPUAsmParser::isRegister() 2108 { 2109 return isRegister(getToken(), peekToken()); 2110 } 2111 2112 unsigned 2113 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2114 unsigned RegNum, 2115 unsigned RegWidth) { 2116 2117 assert(isRegularReg(RegKind)); 2118 2119 unsigned AlignSize = 1; 2120 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2121 // SGPR and TTMP registers must be aligned. 2122 // Max required alignment is 4 dwords. 2123 AlignSize = std::min(RegWidth, 4u); 2124 } 2125 2126 if (RegNum % AlignSize != 0) 2127 return AMDGPU::NoRegister; 2128 2129 unsigned RegIdx = RegNum / AlignSize; 2130 int RCID = getRegClass(RegKind, RegWidth); 2131 if (RCID == -1) 2132 return AMDGPU::NoRegister; 2133 2134 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2135 const MCRegisterClass RC = TRI->getRegClass(RCID); 2136 if (RegIdx >= RC.getNumRegs()) 2137 return AMDGPU::NoRegister; 2138 2139 return RC.getRegister(RegIdx); 2140 } 2141 2142 bool 2143 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2144 int64_t RegLo, RegHi; 2145 if (!trySkipToken(AsmToken::LBrac)) 2146 return false; 2147 2148 if (!parseExpr(RegLo)) 2149 return false; 2150 2151 if (trySkipToken(AsmToken::Colon)) { 2152 if (!parseExpr(RegHi)) 2153 return false; 2154 } else { 2155 RegHi = RegLo; 2156 } 2157 2158 if (!trySkipToken(AsmToken::RBrac)) 2159 return false; 2160 2161 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2162 return false; 2163 2164 Num = static_cast<unsigned>(RegLo); 2165 Width = (RegHi - RegLo) + 1; 2166 return true; 2167 } 2168 2169 unsigned 2170 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2171 unsigned &RegNum, 2172 unsigned &RegWidth) { 2173 assert(isToken(AsmToken::Identifier)); 2174 unsigned Reg = getSpecialRegForName(getTokenStr()); 2175 if (Reg) { 2176 RegNum = 0; 2177 RegWidth = 1; 2178 RegKind = IS_SPECIAL; 2179 lex(); // skip register name 2180 } 2181 return Reg; 2182 } 2183 2184 unsigned 2185 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2186 unsigned &RegNum, 2187 unsigned &RegWidth) { 2188 assert(isToken(AsmToken::Identifier)); 2189 StringRef RegName = getTokenStr(); 2190 2191 const RegInfo *RI = getRegularRegInfo(RegName); 2192 if (!RI) 2193 return AMDGPU::NoRegister; 2194 lex(); // skip register name 2195 2196 RegKind = RI->Kind; 2197 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2198 if (!RegSuffix.empty()) { 2199 // Single 32-bit register: vXX. 2200 if (!getRegNum(RegSuffix, RegNum)) 2201 return AMDGPU::NoRegister; 2202 RegWidth = 1; 2203 } else { 2204 // Range of registers: v[XX:YY]. ":YY" is optional. 2205 if (!ParseRegRange(RegNum, RegWidth)) 2206 return AMDGPU::NoRegister; 2207 } 2208 2209 return getRegularReg(RegKind, RegNum, RegWidth); 2210 } 2211 2212 unsigned 2213 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, 2214 unsigned &RegNum, 2215 unsigned &RegWidth) { 2216 unsigned Reg = AMDGPU::NoRegister; 2217 2218 if (!trySkipToken(AsmToken::LBrac)) 2219 return AMDGPU::NoRegister; 2220 2221 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2222 2223 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2224 return AMDGPU::NoRegister; 2225 if (RegWidth != 1) 2226 return AMDGPU::NoRegister; 2227 2228 for (; trySkipToken(AsmToken::Comma); ) { 2229 RegisterKind NextRegKind; 2230 unsigned NextReg, NextRegNum, NextRegWidth; 2231 2232 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth)) 2233 return AMDGPU::NoRegister; 2234 if (NextRegWidth != 1) 2235 return AMDGPU::NoRegister; 2236 if (NextRegKind != RegKind) 2237 return AMDGPU::NoRegister; 2238 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2239 return AMDGPU::NoRegister; 2240 } 2241 2242 if (!trySkipToken(AsmToken::RBrac)) 2243 return AMDGPU::NoRegister; 2244 2245 if (isRegularReg(RegKind)) 2246 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2247 2248 return Reg; 2249 } 2250 2251 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, 2252 unsigned &Reg, 2253 unsigned &RegNum, 2254 unsigned &RegWidth) { 2255 Reg = AMDGPU::NoRegister; 2256 2257 if (isToken(AsmToken::Identifier)) { 2258 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth); 2259 if (Reg == AMDGPU::NoRegister) 2260 Reg = ParseRegularReg(RegKind, RegNum, RegWidth); 2261 } else { 2262 Reg = ParseRegList(RegKind, RegNum, RegWidth); 2263 } 2264 2265 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2266 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2267 } 2268 2269 Optional<StringRef> 2270 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2271 switch (RegKind) { 2272 case IS_VGPR: 2273 return StringRef(".amdgcn.next_free_vgpr"); 2274 case IS_SGPR: 2275 return StringRef(".amdgcn.next_free_sgpr"); 2276 default: 2277 return None; 2278 } 2279 } 2280 2281 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2282 auto SymbolName = getGprCountSymbolName(RegKind); 2283 assert(SymbolName && "initializing invalid register kind"); 2284 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2285 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2286 } 2287 2288 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2289 unsigned DwordRegIndex, 2290 unsigned RegWidth) { 2291 // Symbols are only defined for GCN targets 2292 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2293 return true; 2294 2295 auto SymbolName = getGprCountSymbolName(RegKind); 2296 if (!SymbolName) 2297 return true; 2298 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2299 2300 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2301 int64_t OldCount; 2302 2303 if (!Sym->isVariable()) 2304 return !Error(getParser().getTok().getLoc(), 2305 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2306 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2307 return !Error( 2308 getParser().getTok().getLoc(), 2309 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2310 2311 if (OldCount <= NewMax) 2312 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2313 2314 return true; 2315 } 2316 2317 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2318 const auto &Tok = Parser.getTok(); 2319 SMLoc StartLoc = Tok.getLoc(); 2320 SMLoc EndLoc = Tok.getEndLoc(); 2321 RegisterKind RegKind; 2322 unsigned Reg, RegNum, RegWidth; 2323 2324 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2325 //FIXME: improve error messages (bug 41303). 2326 Error(StartLoc, "not a valid operand."); 2327 return nullptr; 2328 } 2329 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2330 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2331 return nullptr; 2332 } else 2333 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2334 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2335 } 2336 2337 OperandMatchResultTy 2338 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2339 // TODO: add syntactic sugar for 1/(2*PI) 2340 2341 assert(!isRegister()); 2342 assert(!isModifier()); 2343 2344 const auto& Tok = getToken(); 2345 const auto& NextTok = peekToken(); 2346 bool IsReal = Tok.is(AsmToken::Real); 2347 SMLoc S = getLoc(); 2348 bool Negate = false; 2349 2350 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2351 lex(); 2352 IsReal = true; 2353 Negate = true; 2354 } 2355 2356 if (IsReal) { 2357 // Floating-point expressions are not supported. 2358 // Can only allow floating-point literals with an 2359 // optional sign. 2360 2361 StringRef Num = getTokenStr(); 2362 lex(); 2363 2364 APFloat RealVal(APFloat::IEEEdouble()); 2365 auto roundMode = APFloat::rmNearestTiesToEven; 2366 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2367 return MatchOperand_ParseFail; 2368 } 2369 if (Negate) 2370 RealVal.changeSign(); 2371 2372 Operands.push_back( 2373 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2374 AMDGPUOperand::ImmTyNone, true)); 2375 2376 return MatchOperand_Success; 2377 2378 } else { 2379 int64_t IntVal; 2380 const MCExpr *Expr; 2381 SMLoc S = getLoc(); 2382 2383 if (HasSP3AbsModifier) { 2384 // This is a workaround for handling expressions 2385 // as arguments of SP3 'abs' modifier, for example: 2386 // |1.0| 2387 // |-1| 2388 // |1+x| 2389 // This syntax is not compatible with syntax of standard 2390 // MC expressions (due to the trailing '|'). 2391 SMLoc EndLoc; 2392 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2393 return MatchOperand_ParseFail; 2394 } else { 2395 if (Parser.parseExpression(Expr)) 2396 return MatchOperand_ParseFail; 2397 } 2398 2399 if (Expr->evaluateAsAbsolute(IntVal)) { 2400 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2401 } else { 2402 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2403 } 2404 2405 return MatchOperand_Success; 2406 } 2407 2408 return MatchOperand_NoMatch; 2409 } 2410 2411 OperandMatchResultTy 2412 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2413 if (!isRegister()) 2414 return MatchOperand_NoMatch; 2415 2416 if (auto R = parseRegister()) { 2417 assert(R->isReg()); 2418 Operands.push_back(std::move(R)); 2419 return MatchOperand_Success; 2420 } 2421 return MatchOperand_ParseFail; 2422 } 2423 2424 OperandMatchResultTy 2425 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2426 auto res = parseReg(Operands); 2427 if (res != MatchOperand_NoMatch) { 2428 return res; 2429 } else if (isModifier()) { 2430 return MatchOperand_NoMatch; 2431 } else { 2432 return parseImm(Operands, HasSP3AbsMod); 2433 } 2434 } 2435 2436 bool 2437 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2438 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2439 const auto &str = Token.getString(); 2440 return str == "abs" || str == "neg" || str == "sext"; 2441 } 2442 return false; 2443 } 2444 2445 bool 2446 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2447 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2448 } 2449 2450 bool 2451 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2452 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2453 } 2454 2455 bool 2456 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2457 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2458 } 2459 2460 // Check if this is an operand modifier or an opcode modifier 2461 // which may look like an expression but it is not. We should 2462 // avoid parsing these modifiers as expressions. Currently 2463 // recognized sequences are: 2464 // |...| 2465 // abs(...) 2466 // neg(...) 2467 // sext(...) 2468 // -reg 2469 // -|...| 2470 // -abs(...) 2471 // name:... 2472 // Note that simple opcode modifiers like 'gds' may be parsed as 2473 // expressions; this is a special case. See getExpressionAsToken. 2474 // 2475 bool 2476 AMDGPUAsmParser::isModifier() { 2477 2478 AsmToken Tok = getToken(); 2479 AsmToken NextToken[2]; 2480 peekTokens(NextToken); 2481 2482 return isOperandModifier(Tok, NextToken[0]) || 2483 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2484 isOpcodeModifierWithVal(Tok, NextToken[0]); 2485 } 2486 2487 // Check if the current token is an SP3 'neg' modifier. 2488 // Currently this modifier is allowed in the following context: 2489 // 2490 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2491 // 2. Before an 'abs' modifier: -abs(...) 2492 // 3. Before an SP3 'abs' modifier: -|...| 2493 // 2494 // In all other cases "-" is handled as a part 2495 // of an expression that follows the sign. 2496 // 2497 // Note: When "-" is followed by an integer literal, 2498 // this is interpreted as integer negation rather 2499 // than a floating-point NEG modifier applied to N. 2500 // Beside being contr-intuitive, such use of floating-point 2501 // NEG modifier would have resulted in different meaning 2502 // of integer literals used with VOP1/2/C and VOP3, 2503 // for example: 2504 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2505 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2506 // Negative fp literals with preceding "-" are 2507 // handled likewise for unifomtity 2508 // 2509 bool 2510 AMDGPUAsmParser::parseSP3NegModifier() { 2511 2512 AsmToken NextToken[2]; 2513 peekTokens(NextToken); 2514 2515 if (isToken(AsmToken::Minus) && 2516 (isRegister(NextToken[0], NextToken[1]) || 2517 NextToken[0].is(AsmToken::Pipe) || 2518 isId(NextToken[0], "abs"))) { 2519 lex(); 2520 return true; 2521 } 2522 2523 return false; 2524 } 2525 2526 OperandMatchResultTy 2527 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2528 bool AllowImm) { 2529 bool Neg, SP3Neg; 2530 bool Abs, SP3Abs; 2531 SMLoc Loc; 2532 2533 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2534 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2535 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2536 return MatchOperand_ParseFail; 2537 } 2538 2539 SP3Neg = parseSP3NegModifier(); 2540 2541 Loc = getLoc(); 2542 Neg = trySkipId("neg"); 2543 if (Neg && SP3Neg) { 2544 Error(Loc, "expected register or immediate"); 2545 return MatchOperand_ParseFail; 2546 } 2547 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2548 return MatchOperand_ParseFail; 2549 2550 Abs = trySkipId("abs"); 2551 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2552 return MatchOperand_ParseFail; 2553 2554 Loc = getLoc(); 2555 SP3Abs = trySkipToken(AsmToken::Pipe); 2556 if (Abs && SP3Abs) { 2557 Error(Loc, "expected register or immediate"); 2558 return MatchOperand_ParseFail; 2559 } 2560 2561 OperandMatchResultTy Res; 2562 if (AllowImm) { 2563 Res = parseRegOrImm(Operands, SP3Abs); 2564 } else { 2565 Res = parseReg(Operands); 2566 } 2567 if (Res != MatchOperand_Success) { 2568 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2569 } 2570 2571 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2572 return MatchOperand_ParseFail; 2573 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2574 return MatchOperand_ParseFail; 2575 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2576 return MatchOperand_ParseFail; 2577 2578 AMDGPUOperand::Modifiers Mods; 2579 Mods.Abs = Abs || SP3Abs; 2580 Mods.Neg = Neg || SP3Neg; 2581 2582 if (Mods.hasFPModifiers()) { 2583 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2584 if (Op.isExpr()) { 2585 Error(Op.getStartLoc(), "expected an absolute expression"); 2586 return MatchOperand_ParseFail; 2587 } 2588 Op.setModifiers(Mods); 2589 } 2590 return MatchOperand_Success; 2591 } 2592 2593 OperandMatchResultTy 2594 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2595 bool AllowImm) { 2596 bool Sext = trySkipId("sext"); 2597 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2598 return MatchOperand_ParseFail; 2599 2600 OperandMatchResultTy Res; 2601 if (AllowImm) { 2602 Res = parseRegOrImm(Operands); 2603 } else { 2604 Res = parseReg(Operands); 2605 } 2606 if (Res != MatchOperand_Success) { 2607 return Sext? MatchOperand_ParseFail : Res; 2608 } 2609 2610 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2611 return MatchOperand_ParseFail; 2612 2613 AMDGPUOperand::Modifiers Mods; 2614 Mods.Sext = Sext; 2615 2616 if (Mods.hasIntModifiers()) { 2617 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2618 if (Op.isExpr()) { 2619 Error(Op.getStartLoc(), "expected an absolute expression"); 2620 return MatchOperand_ParseFail; 2621 } 2622 Op.setModifiers(Mods); 2623 } 2624 2625 return MatchOperand_Success; 2626 } 2627 2628 OperandMatchResultTy 2629 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2630 return parseRegOrImmWithFPInputMods(Operands, false); 2631 } 2632 2633 OperandMatchResultTy 2634 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2635 return parseRegOrImmWithIntInputMods(Operands, false); 2636 } 2637 2638 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2639 auto Loc = getLoc(); 2640 if (trySkipId("off")) { 2641 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2642 AMDGPUOperand::ImmTyOff, false)); 2643 return MatchOperand_Success; 2644 } 2645 2646 if (!isRegister()) 2647 return MatchOperand_NoMatch; 2648 2649 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2650 if (Reg) { 2651 Operands.push_back(std::move(Reg)); 2652 return MatchOperand_Success; 2653 } 2654 2655 return MatchOperand_ParseFail; 2656 2657 } 2658 2659 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2660 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2661 2662 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2663 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2664 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2665 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2666 return Match_InvalidOperand; 2667 2668 if ((TSFlags & SIInstrFlags::VOP3) && 2669 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2670 getForcedEncodingSize() != 64) 2671 return Match_PreferE32; 2672 2673 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2674 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2675 // v_mac_f32/16 allow only dst_sel == DWORD; 2676 auto OpNum = 2677 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2678 const auto &Op = Inst.getOperand(OpNum); 2679 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2680 return Match_InvalidOperand; 2681 } 2682 } 2683 2684 return Match_Success; 2685 } 2686 2687 // What asm variants we should check 2688 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2689 if (getForcedEncodingSize() == 32) { 2690 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2691 return makeArrayRef(Variants); 2692 } 2693 2694 if (isForcedVOP3()) { 2695 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2696 return makeArrayRef(Variants); 2697 } 2698 2699 if (isForcedSDWA()) { 2700 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2701 AMDGPUAsmVariants::SDWA9}; 2702 return makeArrayRef(Variants); 2703 } 2704 2705 if (isForcedDPP()) { 2706 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2707 return makeArrayRef(Variants); 2708 } 2709 2710 static const unsigned Variants[] = { 2711 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2712 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2713 }; 2714 2715 return makeArrayRef(Variants); 2716 } 2717 2718 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2719 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2720 const unsigned Num = Desc.getNumImplicitUses(); 2721 for (unsigned i = 0; i < Num; ++i) { 2722 unsigned Reg = Desc.ImplicitUses[i]; 2723 switch (Reg) { 2724 case AMDGPU::FLAT_SCR: 2725 case AMDGPU::VCC: 2726 case AMDGPU::VCC_LO: 2727 case AMDGPU::VCC_HI: 2728 case AMDGPU::M0: 2729 return Reg; 2730 default: 2731 break; 2732 } 2733 } 2734 return AMDGPU::NoRegister; 2735 } 2736 2737 // NB: This code is correct only when used to check constant 2738 // bus limitations because GFX7 support no f16 inline constants. 2739 // Note that there are no cases when a GFX7 opcode violates 2740 // constant bus limitations due to the use of an f16 constant. 2741 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2742 unsigned OpIdx) const { 2743 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2744 2745 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2746 return false; 2747 } 2748 2749 const MCOperand &MO = Inst.getOperand(OpIdx); 2750 2751 int64_t Val = MO.getImm(); 2752 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2753 2754 switch (OpSize) { // expected operand size 2755 case 8: 2756 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2757 case 4: 2758 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2759 case 2: { 2760 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2761 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2762 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2763 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2764 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2765 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2766 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2767 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2768 } else { 2769 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2770 } 2771 } 2772 default: 2773 llvm_unreachable("invalid operand size"); 2774 } 2775 } 2776 2777 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2778 if (!isGFX10()) 2779 return 1; 2780 2781 switch (Opcode) { 2782 // 64-bit shift instructions can use only one scalar value input 2783 case AMDGPU::V_LSHLREV_B64: 2784 case AMDGPU::V_LSHLREV_B64_gfx10: 2785 case AMDGPU::V_LSHL_B64: 2786 case AMDGPU::V_LSHRREV_B64: 2787 case AMDGPU::V_LSHRREV_B64_gfx10: 2788 case AMDGPU::V_LSHR_B64: 2789 case AMDGPU::V_ASHRREV_I64: 2790 case AMDGPU::V_ASHRREV_I64_gfx10: 2791 case AMDGPU::V_ASHR_I64: 2792 return 1; 2793 default: 2794 return 2; 2795 } 2796 } 2797 2798 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2799 const MCOperand &MO = Inst.getOperand(OpIdx); 2800 if (MO.isImm()) { 2801 return !isInlineConstant(Inst, OpIdx); 2802 } else if (MO.isReg()) { 2803 auto Reg = MO.getReg(); 2804 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2805 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2806 } else { 2807 return true; 2808 } 2809 } 2810 2811 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2812 const unsigned Opcode = Inst.getOpcode(); 2813 const MCInstrDesc &Desc = MII.get(Opcode); 2814 unsigned ConstantBusUseCount = 0; 2815 unsigned NumLiterals = 0; 2816 unsigned LiteralSize; 2817 2818 if (Desc.TSFlags & 2819 (SIInstrFlags::VOPC | 2820 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2821 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2822 SIInstrFlags::SDWA)) { 2823 // Check special imm operands (used by madmk, etc) 2824 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2825 ++ConstantBusUseCount; 2826 } 2827 2828 SmallDenseSet<unsigned> SGPRsUsed; 2829 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2830 if (SGPRUsed != AMDGPU::NoRegister) { 2831 SGPRsUsed.insert(SGPRUsed); 2832 ++ConstantBusUseCount; 2833 } 2834 2835 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2836 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2837 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2838 2839 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2840 2841 for (int OpIdx : OpIndices) { 2842 if (OpIdx == -1) break; 2843 2844 const MCOperand &MO = Inst.getOperand(OpIdx); 2845 if (usesConstantBus(Inst, OpIdx)) { 2846 if (MO.isReg()) { 2847 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2848 // Pairs of registers with a partial intersections like these 2849 // s0, s[0:1] 2850 // flat_scratch_lo, flat_scratch 2851 // flat_scratch_lo, flat_scratch_hi 2852 // are theoretically valid but they are disabled anyway. 2853 // Note that this code mimics SIInstrInfo::verifyInstruction 2854 if (!SGPRsUsed.count(Reg)) { 2855 SGPRsUsed.insert(Reg); 2856 ++ConstantBusUseCount; 2857 } 2858 } else { // Expression or a literal 2859 2860 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2861 continue; // special operand like VINTERP attr_chan 2862 2863 // An instruction may use only one literal. 2864 // This has been validated on the previous step. 2865 // See validateVOP3Literal. 2866 // This literal may be used as more than one operand. 2867 // If all these operands are of the same size, 2868 // this literal counts as one scalar value. 2869 // Otherwise it counts as 2 scalar values. 2870 // See "GFX10 Shader Programming", section 3.6.2.3. 2871 2872 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2873 if (Size < 4) Size = 4; 2874 2875 if (NumLiterals == 0) { 2876 NumLiterals = 1; 2877 LiteralSize = Size; 2878 } else if (LiteralSize != Size) { 2879 NumLiterals = 2; 2880 } 2881 } 2882 } 2883 } 2884 } 2885 ConstantBusUseCount += NumLiterals; 2886 2887 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2888 } 2889 2890 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2891 const unsigned Opcode = Inst.getOpcode(); 2892 const MCInstrDesc &Desc = MII.get(Opcode); 2893 2894 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2895 if (DstIdx == -1 || 2896 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2897 return true; 2898 } 2899 2900 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2901 2902 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2903 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2904 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2905 2906 assert(DstIdx != -1); 2907 const MCOperand &Dst = Inst.getOperand(DstIdx); 2908 assert(Dst.isReg()); 2909 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2910 2911 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2912 2913 for (int SrcIdx : SrcIndices) { 2914 if (SrcIdx == -1) break; 2915 const MCOperand &Src = Inst.getOperand(SrcIdx); 2916 if (Src.isReg()) { 2917 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2918 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2919 return false; 2920 } 2921 } 2922 } 2923 2924 return true; 2925 } 2926 2927 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2928 2929 const unsigned Opc = Inst.getOpcode(); 2930 const MCInstrDesc &Desc = MII.get(Opc); 2931 2932 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2933 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2934 assert(ClampIdx != -1); 2935 return Inst.getOperand(ClampIdx).getImm() == 0; 2936 } 2937 2938 return true; 2939 } 2940 2941 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2942 2943 const unsigned Opc = Inst.getOpcode(); 2944 const MCInstrDesc &Desc = MII.get(Opc); 2945 2946 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2947 return true; 2948 2949 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2950 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2951 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2952 2953 assert(VDataIdx != -1); 2954 assert(DMaskIdx != -1); 2955 assert(TFEIdx != -1); 2956 2957 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2958 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2959 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2960 if (DMask == 0) 2961 DMask = 1; 2962 2963 unsigned DataSize = 2964 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2965 if (hasPackedD16()) { 2966 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2967 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2968 DataSize = (DataSize + 1) / 2; 2969 } 2970 2971 return (VDataSize / 4) == DataSize + TFESize; 2972 } 2973 2974 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2975 const unsigned Opc = Inst.getOpcode(); 2976 const MCInstrDesc &Desc = MII.get(Opc); 2977 2978 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2979 return true; 2980 2981 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2982 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2983 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2984 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2985 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2986 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2987 2988 assert(VAddr0Idx != -1); 2989 assert(SrsrcIdx != -1); 2990 assert(DimIdx != -1); 2991 assert(SrsrcIdx > VAddr0Idx); 2992 2993 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2994 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2995 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2996 unsigned VAddrSize = 2997 IsNSA ? SrsrcIdx - VAddr0Idx 2998 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2999 3000 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3001 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3002 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3003 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3004 if (!IsNSA) { 3005 if (AddrSize > 8) 3006 AddrSize = 16; 3007 else if (AddrSize > 4) 3008 AddrSize = 8; 3009 } 3010 3011 return VAddrSize == AddrSize; 3012 } 3013 3014 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3015 3016 const unsigned Opc = Inst.getOpcode(); 3017 const MCInstrDesc &Desc = MII.get(Opc); 3018 3019 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3020 return true; 3021 if (!Desc.mayLoad() || !Desc.mayStore()) 3022 return true; // Not atomic 3023 3024 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3025 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3026 3027 // This is an incomplete check because image_atomic_cmpswap 3028 // may only use 0x3 and 0xf while other atomic operations 3029 // may use 0x1 and 0x3. However these limitations are 3030 // verified when we check that dmask matches dst size. 3031 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3032 } 3033 3034 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3035 3036 const unsigned Opc = Inst.getOpcode(); 3037 const MCInstrDesc &Desc = MII.get(Opc); 3038 3039 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3040 return true; 3041 3042 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3043 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3044 3045 // GATHER4 instructions use dmask in a different fashion compared to 3046 // other MIMG instructions. The only useful DMASK values are 3047 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3048 // (red,red,red,red) etc.) The ISA document doesn't mention 3049 // this. 3050 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3051 } 3052 3053 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3054 { 3055 switch (Opcode) { 3056 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3057 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3058 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3059 return true; 3060 default: 3061 return false; 3062 } 3063 } 3064 3065 // movrels* opcodes should only allow VGPRS as src0. 3066 // This is specified in .td description for vop1/vop3, 3067 // but sdwa is handled differently. See isSDWAOperand. 3068 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3069 3070 const unsigned Opc = Inst.getOpcode(); 3071 const MCInstrDesc &Desc = MII.get(Opc); 3072 3073 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3074 return true; 3075 3076 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3077 assert(Src0Idx != -1); 3078 3079 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3080 if (!Src0.isReg()) 3081 return false; 3082 3083 auto Reg = Src0.getReg(); 3084 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3085 return !isSGPR(mc2PseudoReg(Reg), TRI); 3086 } 3087 3088 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3089 3090 const unsigned Opc = Inst.getOpcode(); 3091 const MCInstrDesc &Desc = MII.get(Opc); 3092 3093 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3094 return true; 3095 3096 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3097 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3098 if (isCI() || isSI()) 3099 return false; 3100 } 3101 3102 return true; 3103 } 3104 3105 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3106 const unsigned Opc = Inst.getOpcode(); 3107 const MCInstrDesc &Desc = MII.get(Opc); 3108 3109 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3110 return true; 3111 3112 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3113 if (DimIdx < 0) 3114 return true; 3115 3116 long Imm = Inst.getOperand(DimIdx).getImm(); 3117 if (Imm < 0 || Imm >= 8) 3118 return false; 3119 3120 return true; 3121 } 3122 3123 static bool IsRevOpcode(const unsigned Opcode) 3124 { 3125 switch (Opcode) { 3126 case AMDGPU::V_SUBREV_F32_e32: 3127 case AMDGPU::V_SUBREV_F32_e64: 3128 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3129 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3130 case AMDGPU::V_SUBREV_F32_e32_vi: 3131 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3132 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3133 case AMDGPU::V_SUBREV_F32_e64_vi: 3134 3135 case AMDGPU::V_SUBREV_I32_e32: 3136 case AMDGPU::V_SUBREV_I32_e64: 3137 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3138 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3139 3140 case AMDGPU::V_SUBBREV_U32_e32: 3141 case AMDGPU::V_SUBBREV_U32_e64: 3142 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3143 case AMDGPU::V_SUBBREV_U32_e32_vi: 3144 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3145 case AMDGPU::V_SUBBREV_U32_e64_vi: 3146 3147 case AMDGPU::V_SUBREV_U32_e32: 3148 case AMDGPU::V_SUBREV_U32_e64: 3149 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3150 case AMDGPU::V_SUBREV_U32_e32_vi: 3151 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3152 case AMDGPU::V_SUBREV_U32_e64_vi: 3153 3154 case AMDGPU::V_SUBREV_F16_e32: 3155 case AMDGPU::V_SUBREV_F16_e64: 3156 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3157 case AMDGPU::V_SUBREV_F16_e32_vi: 3158 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3159 case AMDGPU::V_SUBREV_F16_e64_vi: 3160 3161 case AMDGPU::V_SUBREV_U16_e32: 3162 case AMDGPU::V_SUBREV_U16_e64: 3163 case AMDGPU::V_SUBREV_U16_e32_vi: 3164 case AMDGPU::V_SUBREV_U16_e64_vi: 3165 3166 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3167 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3168 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3169 3170 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3171 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3172 3173 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3174 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3175 3176 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3177 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3178 3179 case AMDGPU::V_LSHRREV_B32_e32: 3180 case AMDGPU::V_LSHRREV_B32_e64: 3181 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3182 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3183 case AMDGPU::V_LSHRREV_B32_e32_vi: 3184 case AMDGPU::V_LSHRREV_B32_e64_vi: 3185 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3186 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3187 3188 case AMDGPU::V_ASHRREV_I32_e32: 3189 case AMDGPU::V_ASHRREV_I32_e64: 3190 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3191 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3192 case AMDGPU::V_ASHRREV_I32_e32_vi: 3193 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3194 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3195 case AMDGPU::V_ASHRREV_I32_e64_vi: 3196 3197 case AMDGPU::V_LSHLREV_B32_e32: 3198 case AMDGPU::V_LSHLREV_B32_e64: 3199 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3200 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3201 case AMDGPU::V_LSHLREV_B32_e32_vi: 3202 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3203 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3204 case AMDGPU::V_LSHLREV_B32_e64_vi: 3205 3206 case AMDGPU::V_LSHLREV_B16_e32: 3207 case AMDGPU::V_LSHLREV_B16_e64: 3208 case AMDGPU::V_LSHLREV_B16_e32_vi: 3209 case AMDGPU::V_LSHLREV_B16_e64_vi: 3210 case AMDGPU::V_LSHLREV_B16_gfx10: 3211 3212 case AMDGPU::V_LSHRREV_B16_e32: 3213 case AMDGPU::V_LSHRREV_B16_e64: 3214 case AMDGPU::V_LSHRREV_B16_e32_vi: 3215 case AMDGPU::V_LSHRREV_B16_e64_vi: 3216 case AMDGPU::V_LSHRREV_B16_gfx10: 3217 3218 case AMDGPU::V_ASHRREV_I16_e32: 3219 case AMDGPU::V_ASHRREV_I16_e64: 3220 case AMDGPU::V_ASHRREV_I16_e32_vi: 3221 case AMDGPU::V_ASHRREV_I16_e64_vi: 3222 case AMDGPU::V_ASHRREV_I16_gfx10: 3223 3224 case AMDGPU::V_LSHLREV_B64: 3225 case AMDGPU::V_LSHLREV_B64_gfx10: 3226 case AMDGPU::V_LSHLREV_B64_vi: 3227 3228 case AMDGPU::V_LSHRREV_B64: 3229 case AMDGPU::V_LSHRREV_B64_gfx10: 3230 case AMDGPU::V_LSHRREV_B64_vi: 3231 3232 case AMDGPU::V_ASHRREV_I64: 3233 case AMDGPU::V_ASHRREV_I64_gfx10: 3234 case AMDGPU::V_ASHRREV_I64_vi: 3235 3236 case AMDGPU::V_PK_LSHLREV_B16: 3237 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3238 case AMDGPU::V_PK_LSHLREV_B16_vi: 3239 3240 case AMDGPU::V_PK_LSHRREV_B16: 3241 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3242 case AMDGPU::V_PK_LSHRREV_B16_vi: 3243 case AMDGPU::V_PK_ASHRREV_I16: 3244 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3245 case AMDGPU::V_PK_ASHRREV_I16_vi: 3246 return true; 3247 default: 3248 return false; 3249 } 3250 } 3251 3252 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3253 3254 using namespace SIInstrFlags; 3255 const unsigned Opcode = Inst.getOpcode(); 3256 const MCInstrDesc &Desc = MII.get(Opcode); 3257 3258 // lds_direct register is defined so that it can be used 3259 // with 9-bit operands only. Ignore encodings which do not accept these. 3260 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3261 return true; 3262 3263 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3264 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3265 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3266 3267 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3268 3269 // lds_direct cannot be specified as either src1 or src2. 3270 for (int SrcIdx : SrcIndices) { 3271 if (SrcIdx == -1) break; 3272 const MCOperand &Src = Inst.getOperand(SrcIdx); 3273 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3274 return false; 3275 } 3276 } 3277 3278 if (Src0Idx == -1) 3279 return true; 3280 3281 const MCOperand &Src = Inst.getOperand(Src0Idx); 3282 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3283 return true; 3284 3285 // lds_direct is specified as src0. Check additional limitations. 3286 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3287 } 3288 3289 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3290 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3291 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3292 if (Op.isFlatOffset()) 3293 return Op.getStartLoc(); 3294 } 3295 return getLoc(); 3296 } 3297 3298 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3299 const OperandVector &Operands) { 3300 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3301 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3302 return true; 3303 3304 auto Opcode = Inst.getOpcode(); 3305 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3306 assert(OpNum != -1); 3307 3308 const auto &Op = Inst.getOperand(OpNum); 3309 if (!hasFlatOffsets() && Op.getImm() != 0) { 3310 Error(getFlatOffsetLoc(Operands), 3311 "flat offset modifier is not supported on this GPU"); 3312 return false; 3313 } 3314 3315 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3316 // For FLAT segment the offset must be positive; 3317 // MSB is ignored and forced to zero. 3318 unsigned OffsetSize = isGFX9() ? 13 : 12; 3319 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3320 if (!isIntN(OffsetSize, Op.getImm())) { 3321 Error(getFlatOffsetLoc(Operands), 3322 isGFX9() ? "expected a 13-bit signed offset" : 3323 "expected a 12-bit signed offset"); 3324 return false; 3325 } 3326 } else { 3327 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3328 Error(getFlatOffsetLoc(Operands), 3329 isGFX9() ? "expected a 12-bit unsigned offset" : 3330 "expected an 11-bit unsigned offset"); 3331 return false; 3332 } 3333 } 3334 3335 return true; 3336 } 3337 3338 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3339 unsigned Opcode = Inst.getOpcode(); 3340 const MCInstrDesc &Desc = MII.get(Opcode); 3341 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3342 return true; 3343 3344 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3345 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3346 3347 const int OpIndices[] = { Src0Idx, Src1Idx }; 3348 3349 unsigned NumExprs = 0; 3350 unsigned NumLiterals = 0; 3351 uint32_t LiteralValue; 3352 3353 for (int OpIdx : OpIndices) { 3354 if (OpIdx == -1) break; 3355 3356 const MCOperand &MO = Inst.getOperand(OpIdx); 3357 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3358 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3359 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3360 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3361 if (NumLiterals == 0 || LiteralValue != Value) { 3362 LiteralValue = Value; 3363 ++NumLiterals; 3364 } 3365 } else if (MO.isExpr()) { 3366 ++NumExprs; 3367 } 3368 } 3369 } 3370 3371 return NumLiterals + NumExprs <= 1; 3372 } 3373 3374 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3375 const unsigned Opc = Inst.getOpcode(); 3376 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3377 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3378 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3379 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3380 3381 if (OpSel & ~3) 3382 return false; 3383 } 3384 return true; 3385 } 3386 3387 // Check if VCC register matches wavefront size 3388 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3389 auto FB = getFeatureBits(); 3390 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3391 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3392 } 3393 3394 // VOP3 literal is only allowed in GFX10+ and only one can be used 3395 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3396 unsigned Opcode = Inst.getOpcode(); 3397 const MCInstrDesc &Desc = MII.get(Opcode); 3398 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3399 return true; 3400 3401 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3402 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3403 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3404 3405 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3406 3407 unsigned NumExprs = 0; 3408 unsigned NumLiterals = 0; 3409 uint32_t LiteralValue; 3410 3411 for (int OpIdx : OpIndices) { 3412 if (OpIdx == -1) break; 3413 3414 const MCOperand &MO = Inst.getOperand(OpIdx); 3415 if (!MO.isImm() && !MO.isExpr()) 3416 continue; 3417 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3418 continue; 3419 3420 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3421 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3422 return false; 3423 3424 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3425 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3426 if (NumLiterals == 0 || LiteralValue != Value) { 3427 LiteralValue = Value; 3428 ++NumLiterals; 3429 } 3430 } else if (MO.isExpr()) { 3431 ++NumExprs; 3432 } 3433 } 3434 NumLiterals += NumExprs; 3435 3436 return !NumLiterals || 3437 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3438 } 3439 3440 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3441 const SMLoc &IDLoc, 3442 const OperandVector &Operands) { 3443 if (!validateLdsDirect(Inst)) { 3444 Error(IDLoc, 3445 "invalid use of lds_direct"); 3446 return false; 3447 } 3448 if (!validateSOPLiteral(Inst)) { 3449 Error(IDLoc, 3450 "only one literal operand is allowed"); 3451 return false; 3452 } 3453 if (!validateVOP3Literal(Inst)) { 3454 Error(IDLoc, 3455 "invalid literal operand"); 3456 return false; 3457 } 3458 if (!validateConstantBusLimitations(Inst)) { 3459 Error(IDLoc, 3460 "invalid operand (violates constant bus restrictions)"); 3461 return false; 3462 } 3463 if (!validateEarlyClobberLimitations(Inst)) { 3464 Error(IDLoc, 3465 "destination must be different than all sources"); 3466 return false; 3467 } 3468 if (!validateIntClampSupported(Inst)) { 3469 Error(IDLoc, 3470 "integer clamping is not supported on this GPU"); 3471 return false; 3472 } 3473 if (!validateOpSel(Inst)) { 3474 Error(IDLoc, 3475 "invalid op_sel operand"); 3476 return false; 3477 } 3478 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3479 if (!validateMIMGD16(Inst)) { 3480 Error(IDLoc, 3481 "d16 modifier is not supported on this GPU"); 3482 return false; 3483 } 3484 if (!validateMIMGDim(Inst)) { 3485 Error(IDLoc, "dim modifier is required on this GPU"); 3486 return false; 3487 } 3488 if (!validateMIMGDataSize(Inst)) { 3489 Error(IDLoc, 3490 "image data size does not match dmask and tfe"); 3491 return false; 3492 } 3493 if (!validateMIMGAddrSize(Inst)) { 3494 Error(IDLoc, 3495 "image address size does not match dim and a16"); 3496 return false; 3497 } 3498 if (!validateMIMGAtomicDMask(Inst)) { 3499 Error(IDLoc, 3500 "invalid atomic image dmask"); 3501 return false; 3502 } 3503 if (!validateMIMGGatherDMask(Inst)) { 3504 Error(IDLoc, 3505 "invalid image_gather dmask: only one bit must be set"); 3506 return false; 3507 } 3508 if (!validateMovrels(Inst)) { 3509 Error(IDLoc, "source operand must be a VGPR"); 3510 return false; 3511 } 3512 if (!validateFlatOffset(Inst, Operands)) { 3513 return false; 3514 } 3515 3516 return true; 3517 } 3518 3519 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3520 const FeatureBitset &FBS, 3521 unsigned VariantID = 0); 3522 3523 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3524 OperandVector &Operands, 3525 MCStreamer &Out, 3526 uint64_t &ErrorInfo, 3527 bool MatchingInlineAsm) { 3528 MCInst Inst; 3529 unsigned Result = Match_Success; 3530 for (auto Variant : getMatchedVariants()) { 3531 uint64_t EI; 3532 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3533 Variant); 3534 // We order match statuses from least to most specific. We use most specific 3535 // status as resulting 3536 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3537 if ((R == Match_Success) || 3538 (R == Match_PreferE32) || 3539 (R == Match_MissingFeature && Result != Match_PreferE32) || 3540 (R == Match_InvalidOperand && Result != Match_MissingFeature 3541 && Result != Match_PreferE32) || 3542 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3543 && Result != Match_MissingFeature 3544 && Result != Match_PreferE32)) { 3545 Result = R; 3546 ErrorInfo = EI; 3547 } 3548 if (R == Match_Success) 3549 break; 3550 } 3551 3552 switch (Result) { 3553 default: break; 3554 case Match_Success: 3555 if (!validateInstruction(Inst, IDLoc, Operands)) { 3556 return true; 3557 } 3558 Inst.setLoc(IDLoc); 3559 Out.EmitInstruction(Inst, getSTI()); 3560 return false; 3561 3562 case Match_MissingFeature: 3563 return Error(IDLoc, "instruction not supported on this GPU"); 3564 3565 case Match_MnemonicFail: { 3566 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3567 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3568 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3569 return Error(IDLoc, "invalid instruction" + Suggestion, 3570 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3571 } 3572 3573 case Match_InvalidOperand: { 3574 SMLoc ErrorLoc = IDLoc; 3575 if (ErrorInfo != ~0ULL) { 3576 if (ErrorInfo >= Operands.size()) { 3577 return Error(IDLoc, "too few operands for instruction"); 3578 } 3579 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3580 if (ErrorLoc == SMLoc()) 3581 ErrorLoc = IDLoc; 3582 } 3583 return Error(ErrorLoc, "invalid operand for instruction"); 3584 } 3585 3586 case Match_PreferE32: 3587 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3588 "should be encoded as e32"); 3589 } 3590 llvm_unreachable("Implement any new match types added!"); 3591 } 3592 3593 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3594 int64_t Tmp = -1; 3595 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3596 return true; 3597 } 3598 if (getParser().parseAbsoluteExpression(Tmp)) { 3599 return true; 3600 } 3601 Ret = static_cast<uint32_t>(Tmp); 3602 return false; 3603 } 3604 3605 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3606 uint32_t &Minor) { 3607 if (ParseAsAbsoluteExpression(Major)) 3608 return TokError("invalid major version"); 3609 3610 if (getLexer().isNot(AsmToken::Comma)) 3611 return TokError("minor version number required, comma expected"); 3612 Lex(); 3613 3614 if (ParseAsAbsoluteExpression(Minor)) 3615 return TokError("invalid minor version"); 3616 3617 return false; 3618 } 3619 3620 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3621 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3622 return TokError("directive only supported for amdgcn architecture"); 3623 3624 std::string Target; 3625 3626 SMLoc TargetStart = getTok().getLoc(); 3627 if (getParser().parseEscapedString(Target)) 3628 return true; 3629 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3630 3631 std::string ExpectedTarget; 3632 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3633 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3634 3635 if (Target != ExpectedTargetOS.str()) 3636 return getParser().Error(TargetRange.Start, "target must match options", 3637 TargetRange); 3638 3639 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3640 return false; 3641 } 3642 3643 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3644 return getParser().Error(Range.Start, "value out of range", Range); 3645 } 3646 3647 bool AMDGPUAsmParser::calculateGPRBlocks( 3648 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3649 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3650 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3651 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3652 // TODO(scott.linder): These calculations are duplicated from 3653 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3654 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3655 3656 unsigned NumVGPRs = NextFreeVGPR; 3657 unsigned NumSGPRs = NextFreeSGPR; 3658 3659 if (Version.Major >= 10) 3660 NumSGPRs = 0; 3661 else { 3662 unsigned MaxAddressableNumSGPRs = 3663 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3664 3665 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3666 NumSGPRs > MaxAddressableNumSGPRs) 3667 return OutOfRangeError(SGPRRange); 3668 3669 NumSGPRs += 3670 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3671 3672 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3673 NumSGPRs > MaxAddressableNumSGPRs) 3674 return OutOfRangeError(SGPRRange); 3675 3676 if (Features.test(FeatureSGPRInitBug)) 3677 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3678 } 3679 3680 VGPRBlocks = 3681 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3682 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3683 3684 return false; 3685 } 3686 3687 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3688 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3689 return TokError("directive only supported for amdgcn architecture"); 3690 3691 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3692 return TokError("directive only supported for amdhsa OS"); 3693 3694 StringRef KernelName; 3695 if (getParser().parseIdentifier(KernelName)) 3696 return true; 3697 3698 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3699 3700 StringSet<> Seen; 3701 3702 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3703 3704 SMRange VGPRRange; 3705 uint64_t NextFreeVGPR = 0; 3706 SMRange SGPRRange; 3707 uint64_t NextFreeSGPR = 0; 3708 unsigned UserSGPRCount = 0; 3709 bool ReserveVCC = true; 3710 bool ReserveFlatScr = true; 3711 bool ReserveXNACK = hasXNACK(); 3712 Optional<bool> EnableWavefrontSize32; 3713 3714 while (true) { 3715 while (getLexer().is(AsmToken::EndOfStatement)) 3716 Lex(); 3717 3718 if (getLexer().isNot(AsmToken::Identifier)) 3719 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3720 3721 StringRef ID = getTok().getIdentifier(); 3722 SMRange IDRange = getTok().getLocRange(); 3723 Lex(); 3724 3725 if (ID == ".end_amdhsa_kernel") 3726 break; 3727 3728 if (Seen.find(ID) != Seen.end()) 3729 return TokError(".amdhsa_ directives cannot be repeated"); 3730 Seen.insert(ID); 3731 3732 SMLoc ValStart = getTok().getLoc(); 3733 int64_t IVal; 3734 if (getParser().parseAbsoluteExpression(IVal)) 3735 return true; 3736 SMLoc ValEnd = getTok().getLoc(); 3737 SMRange ValRange = SMRange(ValStart, ValEnd); 3738 3739 if (IVal < 0) 3740 return OutOfRangeError(ValRange); 3741 3742 uint64_t Val = IVal; 3743 3744 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3745 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3746 return OutOfRangeError(RANGE); \ 3747 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3748 3749 if (ID == ".amdhsa_group_segment_fixed_size") { 3750 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3751 return OutOfRangeError(ValRange); 3752 KD.group_segment_fixed_size = Val; 3753 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3754 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3755 return OutOfRangeError(ValRange); 3756 KD.private_segment_fixed_size = Val; 3757 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3758 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3759 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3760 Val, ValRange); 3761 if (Val) 3762 UserSGPRCount += 4; 3763 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3764 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3765 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3766 ValRange); 3767 if (Val) 3768 UserSGPRCount += 2; 3769 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3770 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3771 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3772 ValRange); 3773 if (Val) 3774 UserSGPRCount += 2; 3775 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3776 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3777 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3778 Val, ValRange); 3779 if (Val) 3780 UserSGPRCount += 2; 3781 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3782 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3783 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3784 ValRange); 3785 if (Val) 3786 UserSGPRCount += 2; 3787 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3788 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3789 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3790 ValRange); 3791 if (Val) 3792 UserSGPRCount += 2; 3793 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3794 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3795 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3796 Val, ValRange); 3797 if (Val) 3798 UserSGPRCount += 1; 3799 } else if (ID == ".amdhsa_wavefront_size32") { 3800 if (IVersion.Major < 10) 3801 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3802 IDRange); 3803 EnableWavefrontSize32 = Val; 3804 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3805 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3806 Val, ValRange); 3807 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3808 PARSE_BITS_ENTRY( 3809 KD.compute_pgm_rsrc2, 3810 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3811 ValRange); 3812 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3813 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3814 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3815 ValRange); 3816 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3817 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3818 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3819 ValRange); 3820 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3821 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3822 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3823 ValRange); 3824 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3825 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3826 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3827 ValRange); 3828 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3829 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3830 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3831 ValRange); 3832 } else if (ID == ".amdhsa_next_free_vgpr") { 3833 VGPRRange = ValRange; 3834 NextFreeVGPR = Val; 3835 } else if (ID == ".amdhsa_next_free_sgpr") { 3836 SGPRRange = ValRange; 3837 NextFreeSGPR = Val; 3838 } else if (ID == ".amdhsa_reserve_vcc") { 3839 if (!isUInt<1>(Val)) 3840 return OutOfRangeError(ValRange); 3841 ReserveVCC = Val; 3842 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3843 if (IVersion.Major < 7) 3844 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3845 IDRange); 3846 if (!isUInt<1>(Val)) 3847 return OutOfRangeError(ValRange); 3848 ReserveFlatScr = Val; 3849 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3850 if (IVersion.Major < 8) 3851 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3852 IDRange); 3853 if (!isUInt<1>(Val)) 3854 return OutOfRangeError(ValRange); 3855 ReserveXNACK = Val; 3856 } else if (ID == ".amdhsa_float_round_mode_32") { 3857 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3858 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3859 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3860 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3861 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3862 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3863 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3864 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3865 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3866 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3867 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3868 ValRange); 3869 } else if (ID == ".amdhsa_dx10_clamp") { 3870 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3871 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3872 } else if (ID == ".amdhsa_ieee_mode") { 3873 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3874 Val, ValRange); 3875 } else if (ID == ".amdhsa_fp16_overflow") { 3876 if (IVersion.Major < 9) 3877 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3878 IDRange); 3879 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3880 ValRange); 3881 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3882 if (IVersion.Major < 10) 3883 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3884 IDRange); 3885 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3886 ValRange); 3887 } else if (ID == ".amdhsa_memory_ordered") { 3888 if (IVersion.Major < 10) 3889 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3890 IDRange); 3891 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3892 ValRange); 3893 } else if (ID == ".amdhsa_forward_progress") { 3894 if (IVersion.Major < 10) 3895 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3896 IDRange); 3897 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3898 ValRange); 3899 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3900 PARSE_BITS_ENTRY( 3901 KD.compute_pgm_rsrc2, 3902 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3903 ValRange); 3904 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3905 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3906 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3907 Val, ValRange); 3908 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3909 PARSE_BITS_ENTRY( 3910 KD.compute_pgm_rsrc2, 3911 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3912 ValRange); 3913 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3914 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3915 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3916 Val, ValRange); 3917 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3918 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3919 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3920 Val, ValRange); 3921 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3922 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3923 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3924 Val, ValRange); 3925 } else if (ID == ".amdhsa_exception_int_div_zero") { 3926 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3927 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3928 Val, ValRange); 3929 } else { 3930 return getParser().Error(IDRange.Start, 3931 "unknown .amdhsa_kernel directive", IDRange); 3932 } 3933 3934 #undef PARSE_BITS_ENTRY 3935 } 3936 3937 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3938 return TokError(".amdhsa_next_free_vgpr directive is required"); 3939 3940 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3941 return TokError(".amdhsa_next_free_sgpr directive is required"); 3942 3943 unsigned VGPRBlocks; 3944 unsigned SGPRBlocks; 3945 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3946 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3947 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3948 SGPRBlocks)) 3949 return true; 3950 3951 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3952 VGPRBlocks)) 3953 return OutOfRangeError(VGPRRange); 3954 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3955 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3956 3957 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3958 SGPRBlocks)) 3959 return OutOfRangeError(SGPRRange); 3960 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3961 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3962 SGPRBlocks); 3963 3964 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3965 return TokError("too many user SGPRs enabled"); 3966 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3967 UserSGPRCount); 3968 3969 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3970 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3971 ReserveFlatScr, ReserveXNACK); 3972 return false; 3973 } 3974 3975 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3976 uint32_t Major; 3977 uint32_t Minor; 3978 3979 if (ParseDirectiveMajorMinor(Major, Minor)) 3980 return true; 3981 3982 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3983 return false; 3984 } 3985 3986 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3987 uint32_t Major; 3988 uint32_t Minor; 3989 uint32_t Stepping; 3990 StringRef VendorName; 3991 StringRef ArchName; 3992 3993 // If this directive has no arguments, then use the ISA version for the 3994 // targeted GPU. 3995 if (getLexer().is(AsmToken::EndOfStatement)) { 3996 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3997 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3998 ISA.Stepping, 3999 "AMD", "AMDGPU"); 4000 return false; 4001 } 4002 4003 if (ParseDirectiveMajorMinor(Major, Minor)) 4004 return true; 4005 4006 if (getLexer().isNot(AsmToken::Comma)) 4007 return TokError("stepping version number required, comma expected"); 4008 Lex(); 4009 4010 if (ParseAsAbsoluteExpression(Stepping)) 4011 return TokError("invalid stepping version"); 4012 4013 if (getLexer().isNot(AsmToken::Comma)) 4014 return TokError("vendor name required, comma expected"); 4015 Lex(); 4016 4017 if (getLexer().isNot(AsmToken::String)) 4018 return TokError("invalid vendor name"); 4019 4020 VendorName = getLexer().getTok().getStringContents(); 4021 Lex(); 4022 4023 if (getLexer().isNot(AsmToken::Comma)) 4024 return TokError("arch name required, comma expected"); 4025 Lex(); 4026 4027 if (getLexer().isNot(AsmToken::String)) 4028 return TokError("invalid arch name"); 4029 4030 ArchName = getLexer().getTok().getStringContents(); 4031 Lex(); 4032 4033 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4034 VendorName, ArchName); 4035 return false; 4036 } 4037 4038 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4039 amd_kernel_code_t &Header) { 4040 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4041 // assembly for backwards compatibility. 4042 if (ID == "max_scratch_backing_memory_byte_size") { 4043 Parser.eatToEndOfStatement(); 4044 return false; 4045 } 4046 4047 SmallString<40> ErrStr; 4048 raw_svector_ostream Err(ErrStr); 4049 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4050 return TokError(Err.str()); 4051 } 4052 Lex(); 4053 4054 if (ID == "enable_wavefront_size32") { 4055 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4056 if (!isGFX10()) 4057 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4058 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4059 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4060 } else { 4061 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4062 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4063 } 4064 } 4065 4066 if (ID == "wavefront_size") { 4067 if (Header.wavefront_size == 5) { 4068 if (!isGFX10()) 4069 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4070 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4071 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4072 } else if (Header.wavefront_size == 6) { 4073 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4074 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4075 } 4076 } 4077 4078 if (ID == "enable_wgp_mode") { 4079 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4080 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4081 } 4082 4083 if (ID == "enable_mem_ordered") { 4084 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4085 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4086 } 4087 4088 if (ID == "enable_fwd_progress") { 4089 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4090 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4091 } 4092 4093 return false; 4094 } 4095 4096 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4097 amd_kernel_code_t Header; 4098 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4099 4100 while (true) { 4101 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4102 // will set the current token to EndOfStatement. 4103 while(getLexer().is(AsmToken::EndOfStatement)) 4104 Lex(); 4105 4106 if (getLexer().isNot(AsmToken::Identifier)) 4107 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4108 4109 StringRef ID = getLexer().getTok().getIdentifier(); 4110 Lex(); 4111 4112 if (ID == ".end_amd_kernel_code_t") 4113 break; 4114 4115 if (ParseAMDKernelCodeTValue(ID, Header)) 4116 return true; 4117 } 4118 4119 getTargetStreamer().EmitAMDKernelCodeT(Header); 4120 4121 return false; 4122 } 4123 4124 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4125 if (getLexer().isNot(AsmToken::Identifier)) 4126 return TokError("expected symbol name"); 4127 4128 StringRef KernelName = Parser.getTok().getString(); 4129 4130 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4131 ELF::STT_AMDGPU_HSA_KERNEL); 4132 Lex(); 4133 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4134 KernelScope.initialize(getContext()); 4135 return false; 4136 } 4137 4138 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4139 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4140 return Error(getParser().getTok().getLoc(), 4141 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4142 "architectures"); 4143 } 4144 4145 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4146 4147 std::string ISAVersionStringFromSTI; 4148 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4149 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4150 4151 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4152 return Error(getParser().getTok().getLoc(), 4153 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4154 "arguments specified through the command line"); 4155 } 4156 4157 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4158 Lex(); 4159 4160 return false; 4161 } 4162 4163 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4164 const char *AssemblerDirectiveBegin; 4165 const char *AssemblerDirectiveEnd; 4166 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4167 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4168 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4169 HSAMD::V3::AssemblerDirectiveEnd) 4170 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4171 HSAMD::AssemblerDirectiveEnd); 4172 4173 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4174 return Error(getParser().getTok().getLoc(), 4175 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4176 "not available on non-amdhsa OSes")).str()); 4177 } 4178 4179 std::string HSAMetadataString; 4180 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4181 HSAMetadataString)) 4182 return true; 4183 4184 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4185 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4186 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4187 } else { 4188 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4189 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4190 } 4191 4192 return false; 4193 } 4194 4195 /// Common code to parse out a block of text (typically YAML) between start and 4196 /// end directives. 4197 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4198 const char *AssemblerDirectiveEnd, 4199 std::string &CollectString) { 4200 4201 raw_string_ostream CollectStream(CollectString); 4202 4203 getLexer().setSkipSpace(false); 4204 4205 bool FoundEnd = false; 4206 while (!getLexer().is(AsmToken::Eof)) { 4207 while (getLexer().is(AsmToken::Space)) { 4208 CollectStream << getLexer().getTok().getString(); 4209 Lex(); 4210 } 4211 4212 if (getLexer().is(AsmToken::Identifier)) { 4213 StringRef ID = getLexer().getTok().getIdentifier(); 4214 if (ID == AssemblerDirectiveEnd) { 4215 Lex(); 4216 FoundEnd = true; 4217 break; 4218 } 4219 } 4220 4221 CollectStream << Parser.parseStringToEndOfStatement() 4222 << getContext().getAsmInfo()->getSeparatorString(); 4223 4224 Parser.eatToEndOfStatement(); 4225 } 4226 4227 getLexer().setSkipSpace(true); 4228 4229 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4230 return TokError(Twine("expected directive ") + 4231 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4232 } 4233 4234 CollectStream.flush(); 4235 return false; 4236 } 4237 4238 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4239 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4240 std::string String; 4241 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4242 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4243 return true; 4244 4245 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4246 if (!PALMetadata->setFromString(String)) 4247 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4248 return false; 4249 } 4250 4251 /// Parse the assembler directive for old linear-format PAL metadata. 4252 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4253 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4254 return Error(getParser().getTok().getLoc(), 4255 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4256 "not available on non-amdpal OSes")).str()); 4257 } 4258 4259 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4260 PALMetadata->setLegacy(); 4261 for (;;) { 4262 uint32_t Key, Value; 4263 if (ParseAsAbsoluteExpression(Key)) { 4264 return TokError(Twine("invalid value in ") + 4265 Twine(PALMD::AssemblerDirective)); 4266 } 4267 if (getLexer().isNot(AsmToken::Comma)) { 4268 return TokError(Twine("expected an even number of values in ") + 4269 Twine(PALMD::AssemblerDirective)); 4270 } 4271 Lex(); 4272 if (ParseAsAbsoluteExpression(Value)) { 4273 return TokError(Twine("invalid value in ") + 4274 Twine(PALMD::AssemblerDirective)); 4275 } 4276 PALMetadata->setRegister(Key, Value); 4277 if (getLexer().isNot(AsmToken::Comma)) 4278 break; 4279 Lex(); 4280 } 4281 return false; 4282 } 4283 4284 /// ParseDirectiveAMDGPULDS 4285 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4286 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4287 if (getParser().checkForValidSection()) 4288 return true; 4289 4290 StringRef Name; 4291 SMLoc NameLoc = getLexer().getLoc(); 4292 if (getParser().parseIdentifier(Name)) 4293 return TokError("expected identifier in directive"); 4294 4295 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4296 if (parseToken(AsmToken::Comma, "expected ','")) 4297 return true; 4298 4299 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4300 4301 int64_t Size; 4302 SMLoc SizeLoc = getLexer().getLoc(); 4303 if (getParser().parseAbsoluteExpression(Size)) 4304 return true; 4305 if (Size < 0) 4306 return Error(SizeLoc, "size must be non-negative"); 4307 if (Size > LocalMemorySize) 4308 return Error(SizeLoc, "size is too large"); 4309 4310 int64_t Align = 4; 4311 if (getLexer().is(AsmToken::Comma)) { 4312 Lex(); 4313 SMLoc AlignLoc = getLexer().getLoc(); 4314 if (getParser().parseAbsoluteExpression(Align)) 4315 return true; 4316 if (Align < 0 || !isPowerOf2_64(Align)) 4317 return Error(AlignLoc, "alignment must be a power of two"); 4318 4319 // Alignment larger than the size of LDS is possible in theory, as long 4320 // as the linker manages to place to symbol at address 0, but we do want 4321 // to make sure the alignment fits nicely into a 32-bit integer. 4322 if (Align >= 1u << 31) 4323 return Error(AlignLoc, "alignment is too large"); 4324 } 4325 4326 if (parseToken(AsmToken::EndOfStatement, 4327 "unexpected token in '.amdgpu_lds' directive")) 4328 return true; 4329 4330 Symbol->redefineIfPossible(); 4331 if (!Symbol->isUndefined()) 4332 return Error(NameLoc, "invalid symbol redefinition"); 4333 4334 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4335 return false; 4336 } 4337 4338 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4339 StringRef IDVal = DirectiveID.getString(); 4340 4341 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4342 if (IDVal == ".amdgcn_target") 4343 return ParseDirectiveAMDGCNTarget(); 4344 4345 if (IDVal == ".amdhsa_kernel") 4346 return ParseDirectiveAMDHSAKernel(); 4347 4348 // TODO: Restructure/combine with PAL metadata directive. 4349 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4350 return ParseDirectiveHSAMetadata(); 4351 } else { 4352 if (IDVal == ".hsa_code_object_version") 4353 return ParseDirectiveHSACodeObjectVersion(); 4354 4355 if (IDVal == ".hsa_code_object_isa") 4356 return ParseDirectiveHSACodeObjectISA(); 4357 4358 if (IDVal == ".amd_kernel_code_t") 4359 return ParseDirectiveAMDKernelCodeT(); 4360 4361 if (IDVal == ".amdgpu_hsa_kernel") 4362 return ParseDirectiveAMDGPUHsaKernel(); 4363 4364 if (IDVal == ".amd_amdgpu_isa") 4365 return ParseDirectiveISAVersion(); 4366 4367 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4368 return ParseDirectiveHSAMetadata(); 4369 } 4370 4371 if (IDVal == ".amdgpu_lds") 4372 return ParseDirectiveAMDGPULDS(); 4373 4374 if (IDVal == PALMD::AssemblerDirectiveBegin) 4375 return ParseDirectivePALMetadataBegin(); 4376 4377 if (IDVal == PALMD::AssemblerDirective) 4378 return ParseDirectivePALMetadata(); 4379 4380 return true; 4381 } 4382 4383 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4384 unsigned RegNo) const { 4385 4386 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4387 R.isValid(); ++R) { 4388 if (*R == RegNo) 4389 return isGFX9() || isGFX10(); 4390 } 4391 4392 // GFX10 has 2 more SGPRs 104 and 105. 4393 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4394 R.isValid(); ++R) { 4395 if (*R == RegNo) 4396 return hasSGPR104_SGPR105(); 4397 } 4398 4399 switch (RegNo) { 4400 case AMDGPU::SRC_SHARED_BASE: 4401 case AMDGPU::SRC_SHARED_LIMIT: 4402 case AMDGPU::SRC_PRIVATE_BASE: 4403 case AMDGPU::SRC_PRIVATE_LIMIT: 4404 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4405 return !isCI() && !isSI() && !isVI(); 4406 case AMDGPU::TBA: 4407 case AMDGPU::TBA_LO: 4408 case AMDGPU::TBA_HI: 4409 case AMDGPU::TMA: 4410 case AMDGPU::TMA_LO: 4411 case AMDGPU::TMA_HI: 4412 return !isGFX9() && !isGFX10(); 4413 case AMDGPU::XNACK_MASK: 4414 case AMDGPU::XNACK_MASK_LO: 4415 case AMDGPU::XNACK_MASK_HI: 4416 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4417 case AMDGPU::SGPR_NULL: 4418 return isGFX10(); 4419 default: 4420 break; 4421 } 4422 4423 if (isCI()) 4424 return true; 4425 4426 if (isSI() || isGFX10()) { 4427 // No flat_scr on SI. 4428 // On GFX10 flat scratch is not a valid register operand and can only be 4429 // accessed with s_setreg/s_getreg. 4430 switch (RegNo) { 4431 case AMDGPU::FLAT_SCR: 4432 case AMDGPU::FLAT_SCR_LO: 4433 case AMDGPU::FLAT_SCR_HI: 4434 return false; 4435 default: 4436 return true; 4437 } 4438 } 4439 4440 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4441 // SI/CI have. 4442 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4443 R.isValid(); ++R) { 4444 if (*R == RegNo) 4445 return hasSGPR102_SGPR103(); 4446 } 4447 4448 return true; 4449 } 4450 4451 OperandMatchResultTy 4452 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4453 OperandMode Mode) { 4454 // Try to parse with a custom parser 4455 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4456 4457 // If we successfully parsed the operand or if there as an error parsing, 4458 // we are done. 4459 // 4460 // If we are parsing after we reach EndOfStatement then this means we 4461 // are appending default values to the Operands list. This is only done 4462 // by custom parser, so we shouldn't continue on to the generic parsing. 4463 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4464 getLexer().is(AsmToken::EndOfStatement)) 4465 return ResTy; 4466 4467 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4468 unsigned Prefix = Operands.size(); 4469 SMLoc LBraceLoc = getTok().getLoc(); 4470 Parser.Lex(); // eat the '[' 4471 4472 for (;;) { 4473 ResTy = parseReg(Operands); 4474 if (ResTy != MatchOperand_Success) 4475 return ResTy; 4476 4477 if (getLexer().is(AsmToken::RBrac)) 4478 break; 4479 4480 if (getLexer().isNot(AsmToken::Comma)) 4481 return MatchOperand_ParseFail; 4482 Parser.Lex(); 4483 } 4484 4485 if (Operands.size() - Prefix > 1) { 4486 Operands.insert(Operands.begin() + Prefix, 4487 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4488 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4489 getTok().getLoc())); 4490 } 4491 4492 Parser.Lex(); // eat the ']' 4493 return MatchOperand_Success; 4494 } 4495 4496 return parseRegOrImm(Operands); 4497 } 4498 4499 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4500 // Clear any forced encodings from the previous instruction. 4501 setForcedEncodingSize(0); 4502 setForcedDPP(false); 4503 setForcedSDWA(false); 4504 4505 if (Name.endswith("_e64")) { 4506 setForcedEncodingSize(64); 4507 return Name.substr(0, Name.size() - 4); 4508 } else if (Name.endswith("_e32")) { 4509 setForcedEncodingSize(32); 4510 return Name.substr(0, Name.size() - 4); 4511 } else if (Name.endswith("_dpp")) { 4512 setForcedDPP(true); 4513 return Name.substr(0, Name.size() - 4); 4514 } else if (Name.endswith("_sdwa")) { 4515 setForcedSDWA(true); 4516 return Name.substr(0, Name.size() - 5); 4517 } 4518 return Name; 4519 } 4520 4521 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4522 StringRef Name, 4523 SMLoc NameLoc, OperandVector &Operands) { 4524 // Add the instruction mnemonic 4525 Name = parseMnemonicSuffix(Name); 4526 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4527 4528 bool IsMIMG = Name.startswith("image_"); 4529 4530 while (!getLexer().is(AsmToken::EndOfStatement)) { 4531 OperandMode Mode = OperandMode_Default; 4532 if (IsMIMG && isGFX10() && Operands.size() == 2) 4533 Mode = OperandMode_NSA; 4534 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4535 4536 // Eat the comma or space if there is one. 4537 if (getLexer().is(AsmToken::Comma)) 4538 Parser.Lex(); 4539 4540 switch (Res) { 4541 case MatchOperand_Success: break; 4542 case MatchOperand_ParseFail: 4543 // FIXME: use real operand location rather than the current location. 4544 Error(getLexer().getLoc(), "failed parsing operand."); 4545 while (!getLexer().is(AsmToken::EndOfStatement)) { 4546 Parser.Lex(); 4547 } 4548 return true; 4549 case MatchOperand_NoMatch: 4550 // FIXME: use real operand location rather than the current location. 4551 Error(getLexer().getLoc(), "not a valid operand."); 4552 while (!getLexer().is(AsmToken::EndOfStatement)) { 4553 Parser.Lex(); 4554 } 4555 return true; 4556 } 4557 } 4558 4559 return false; 4560 } 4561 4562 //===----------------------------------------------------------------------===// 4563 // Utility functions 4564 //===----------------------------------------------------------------------===// 4565 4566 OperandMatchResultTy 4567 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4568 4569 if (!trySkipId(Prefix, AsmToken::Colon)) 4570 return MatchOperand_NoMatch; 4571 4572 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4573 } 4574 4575 OperandMatchResultTy 4576 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4577 AMDGPUOperand::ImmTy ImmTy, 4578 bool (*ConvertResult)(int64_t&)) { 4579 SMLoc S = getLoc(); 4580 int64_t Value = 0; 4581 4582 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4583 if (Res != MatchOperand_Success) 4584 return Res; 4585 4586 if (ConvertResult && !ConvertResult(Value)) { 4587 Error(S, "invalid " + StringRef(Prefix) + " value."); 4588 } 4589 4590 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4591 return MatchOperand_Success; 4592 } 4593 4594 OperandMatchResultTy 4595 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4596 OperandVector &Operands, 4597 AMDGPUOperand::ImmTy ImmTy, 4598 bool (*ConvertResult)(int64_t&)) { 4599 SMLoc S = getLoc(); 4600 if (!trySkipId(Prefix, AsmToken::Colon)) 4601 return MatchOperand_NoMatch; 4602 4603 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4604 return MatchOperand_ParseFail; 4605 4606 unsigned Val = 0; 4607 const unsigned MaxSize = 4; 4608 4609 // FIXME: How to verify the number of elements matches the number of src 4610 // operands? 4611 for (int I = 0; ; ++I) { 4612 int64_t Op; 4613 SMLoc Loc = getLoc(); 4614 if (!parseExpr(Op)) 4615 return MatchOperand_ParseFail; 4616 4617 if (Op != 0 && Op != 1) { 4618 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4619 return MatchOperand_ParseFail; 4620 } 4621 4622 Val |= (Op << I); 4623 4624 if (trySkipToken(AsmToken::RBrac)) 4625 break; 4626 4627 if (I + 1 == MaxSize) { 4628 Error(getLoc(), "expected a closing square bracket"); 4629 return MatchOperand_ParseFail; 4630 } 4631 4632 if (!skipToken(AsmToken::Comma, "expected a comma")) 4633 return MatchOperand_ParseFail; 4634 } 4635 4636 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4637 return MatchOperand_Success; 4638 } 4639 4640 OperandMatchResultTy 4641 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4642 AMDGPUOperand::ImmTy ImmTy) { 4643 int64_t Bit = 0; 4644 SMLoc S = Parser.getTok().getLoc(); 4645 4646 // We are at the end of the statement, and this is a default argument, so 4647 // use a default value. 4648 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4649 switch(getLexer().getKind()) { 4650 case AsmToken::Identifier: { 4651 StringRef Tok = Parser.getTok().getString(); 4652 if (Tok == Name) { 4653 if (Tok == "r128" && isGFX9()) 4654 Error(S, "r128 modifier is not supported on this GPU"); 4655 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4656 Error(S, "a16 modifier is not supported on this GPU"); 4657 Bit = 1; 4658 Parser.Lex(); 4659 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4660 Bit = 0; 4661 Parser.Lex(); 4662 } else { 4663 return MatchOperand_NoMatch; 4664 } 4665 break; 4666 } 4667 default: 4668 return MatchOperand_NoMatch; 4669 } 4670 } 4671 4672 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4673 return MatchOperand_ParseFail; 4674 4675 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4676 return MatchOperand_Success; 4677 } 4678 4679 static void addOptionalImmOperand( 4680 MCInst& Inst, const OperandVector& Operands, 4681 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4682 AMDGPUOperand::ImmTy ImmT, 4683 int64_t Default = 0) { 4684 auto i = OptionalIdx.find(ImmT); 4685 if (i != OptionalIdx.end()) { 4686 unsigned Idx = i->second; 4687 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4688 } else { 4689 Inst.addOperand(MCOperand::createImm(Default)); 4690 } 4691 } 4692 4693 OperandMatchResultTy 4694 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4695 if (getLexer().isNot(AsmToken::Identifier)) { 4696 return MatchOperand_NoMatch; 4697 } 4698 StringRef Tok = Parser.getTok().getString(); 4699 if (Tok != Prefix) { 4700 return MatchOperand_NoMatch; 4701 } 4702 4703 Parser.Lex(); 4704 if (getLexer().isNot(AsmToken::Colon)) { 4705 return MatchOperand_ParseFail; 4706 } 4707 4708 Parser.Lex(); 4709 if (getLexer().isNot(AsmToken::Identifier)) { 4710 return MatchOperand_ParseFail; 4711 } 4712 4713 Value = Parser.getTok().getString(); 4714 return MatchOperand_Success; 4715 } 4716 4717 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4718 // values to live in a joint format operand in the MCInst encoding. 4719 OperandMatchResultTy 4720 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4721 SMLoc S = Parser.getTok().getLoc(); 4722 int64_t Dfmt = 0, Nfmt = 0; 4723 // dfmt and nfmt can appear in either order, and each is optional. 4724 bool GotDfmt = false, GotNfmt = false; 4725 while (!GotDfmt || !GotNfmt) { 4726 if (!GotDfmt) { 4727 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4728 if (Res != MatchOperand_NoMatch) { 4729 if (Res != MatchOperand_Success) 4730 return Res; 4731 if (Dfmt >= 16) { 4732 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4733 return MatchOperand_ParseFail; 4734 } 4735 GotDfmt = true; 4736 Parser.Lex(); 4737 continue; 4738 } 4739 } 4740 if (!GotNfmt) { 4741 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4742 if (Res != MatchOperand_NoMatch) { 4743 if (Res != MatchOperand_Success) 4744 return Res; 4745 if (Nfmt >= 8) { 4746 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4747 return MatchOperand_ParseFail; 4748 } 4749 GotNfmt = true; 4750 Parser.Lex(); 4751 continue; 4752 } 4753 } 4754 break; 4755 } 4756 if (!GotDfmt && !GotNfmt) 4757 return MatchOperand_NoMatch; 4758 auto Format = Dfmt | Nfmt << 4; 4759 Operands.push_back( 4760 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4761 return MatchOperand_Success; 4762 } 4763 4764 //===----------------------------------------------------------------------===// 4765 // ds 4766 //===----------------------------------------------------------------------===// 4767 4768 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4769 const OperandVector &Operands) { 4770 OptionalImmIndexMap OptionalIdx; 4771 4772 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4773 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4774 4775 // Add the register arguments 4776 if (Op.isReg()) { 4777 Op.addRegOperands(Inst, 1); 4778 continue; 4779 } 4780 4781 // Handle optional arguments 4782 OptionalIdx[Op.getImmTy()] = i; 4783 } 4784 4785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4788 4789 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4790 } 4791 4792 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4793 bool IsGdsHardcoded) { 4794 OptionalImmIndexMap OptionalIdx; 4795 4796 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4797 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4798 4799 // Add the register arguments 4800 if (Op.isReg()) { 4801 Op.addRegOperands(Inst, 1); 4802 continue; 4803 } 4804 4805 if (Op.isToken() && Op.getToken() == "gds") { 4806 IsGdsHardcoded = true; 4807 continue; 4808 } 4809 4810 // Handle optional arguments 4811 OptionalIdx[Op.getImmTy()] = i; 4812 } 4813 4814 AMDGPUOperand::ImmTy OffsetType = 4815 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4816 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4817 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4818 AMDGPUOperand::ImmTyOffset; 4819 4820 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4821 4822 if (!IsGdsHardcoded) { 4823 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4824 } 4825 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4826 } 4827 4828 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4829 OptionalImmIndexMap OptionalIdx; 4830 4831 unsigned OperandIdx[4]; 4832 unsigned EnMask = 0; 4833 int SrcIdx = 0; 4834 4835 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4836 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4837 4838 // Add the register arguments 4839 if (Op.isReg()) { 4840 assert(SrcIdx < 4); 4841 OperandIdx[SrcIdx] = Inst.size(); 4842 Op.addRegOperands(Inst, 1); 4843 ++SrcIdx; 4844 continue; 4845 } 4846 4847 if (Op.isOff()) { 4848 assert(SrcIdx < 4); 4849 OperandIdx[SrcIdx] = Inst.size(); 4850 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4851 ++SrcIdx; 4852 continue; 4853 } 4854 4855 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4856 Op.addImmOperands(Inst, 1); 4857 continue; 4858 } 4859 4860 if (Op.isToken() && Op.getToken() == "done") 4861 continue; 4862 4863 // Handle optional arguments 4864 OptionalIdx[Op.getImmTy()] = i; 4865 } 4866 4867 assert(SrcIdx == 4); 4868 4869 bool Compr = false; 4870 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4871 Compr = true; 4872 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4873 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4874 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4875 } 4876 4877 for (auto i = 0; i < SrcIdx; ++i) { 4878 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4879 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4880 } 4881 } 4882 4883 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4884 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4885 4886 Inst.addOperand(MCOperand::createImm(EnMask)); 4887 } 4888 4889 //===----------------------------------------------------------------------===// 4890 // s_waitcnt 4891 //===----------------------------------------------------------------------===// 4892 4893 static bool 4894 encodeCnt( 4895 const AMDGPU::IsaVersion ISA, 4896 int64_t &IntVal, 4897 int64_t CntVal, 4898 bool Saturate, 4899 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4900 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4901 { 4902 bool Failed = false; 4903 4904 IntVal = encode(ISA, IntVal, CntVal); 4905 if (CntVal != decode(ISA, IntVal)) { 4906 if (Saturate) { 4907 IntVal = encode(ISA, IntVal, -1); 4908 } else { 4909 Failed = true; 4910 } 4911 } 4912 return Failed; 4913 } 4914 4915 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4916 4917 SMLoc CntLoc = getLoc(); 4918 StringRef CntName = getTokenStr(); 4919 4920 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4921 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4922 return false; 4923 4924 int64_t CntVal; 4925 SMLoc ValLoc = getLoc(); 4926 if (!parseExpr(CntVal)) 4927 return false; 4928 4929 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4930 4931 bool Failed = true; 4932 bool Sat = CntName.endswith("_sat"); 4933 4934 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4935 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4936 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4937 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4938 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4939 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4940 } else { 4941 Error(CntLoc, "invalid counter name " + CntName); 4942 return false; 4943 } 4944 4945 if (Failed) { 4946 Error(ValLoc, "too large value for " + CntName); 4947 return false; 4948 } 4949 4950 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4951 return false; 4952 4953 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4954 if (isToken(AsmToken::EndOfStatement)) { 4955 Error(getLoc(), "expected a counter name"); 4956 return false; 4957 } 4958 } 4959 4960 return true; 4961 } 4962 4963 OperandMatchResultTy 4964 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4965 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4966 int64_t Waitcnt = getWaitcntBitMask(ISA); 4967 SMLoc S = getLoc(); 4968 4969 // If parse failed, do not return error code 4970 // to avoid excessive error messages. 4971 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4972 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4973 } else { 4974 parseExpr(Waitcnt); 4975 } 4976 4977 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4978 return MatchOperand_Success; 4979 } 4980 4981 bool 4982 AMDGPUOperand::isSWaitCnt() const { 4983 return isImm(); 4984 } 4985 4986 //===----------------------------------------------------------------------===// 4987 // hwreg 4988 //===----------------------------------------------------------------------===// 4989 4990 bool 4991 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4992 int64_t &Offset, 4993 int64_t &Width) { 4994 using namespace llvm::AMDGPU::Hwreg; 4995 4996 // The register may be specified by name or using a numeric code 4997 if (isToken(AsmToken::Identifier) && 4998 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4999 HwReg.IsSymbolic = true; 5000 lex(); // skip message name 5001 } else if (!parseExpr(HwReg.Id)) { 5002 return false; 5003 } 5004 5005 if (trySkipToken(AsmToken::RParen)) 5006 return true; 5007 5008 // parse optional params 5009 return 5010 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5011 parseExpr(Offset) && 5012 skipToken(AsmToken::Comma, "expected a comma") && 5013 parseExpr(Width) && 5014 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5015 } 5016 5017 bool 5018 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5019 const int64_t Offset, 5020 const int64_t Width, 5021 const SMLoc Loc) { 5022 5023 using namespace llvm::AMDGPU::Hwreg; 5024 5025 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5026 Error(Loc, "specified hardware register is not supported on this GPU"); 5027 return false; 5028 } else if (!isValidHwreg(HwReg.Id)) { 5029 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5030 return false; 5031 } else if (!isValidHwregOffset(Offset)) { 5032 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5033 return false; 5034 } else if (!isValidHwregWidth(Width)) { 5035 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5036 return false; 5037 } 5038 return true; 5039 } 5040 5041 OperandMatchResultTy 5042 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5043 using namespace llvm::AMDGPU::Hwreg; 5044 5045 int64_t ImmVal = 0; 5046 SMLoc Loc = getLoc(); 5047 5048 // If parse failed, do not return error code 5049 // to avoid excessive error messages. 5050 if (trySkipId("hwreg", AsmToken::LParen)) { 5051 OperandInfoTy HwReg(ID_UNKNOWN_); 5052 int64_t Offset = OFFSET_DEFAULT_; 5053 int64_t Width = WIDTH_DEFAULT_; 5054 if (parseHwregBody(HwReg, Offset, Width) && 5055 validateHwreg(HwReg, Offset, Width, Loc)) { 5056 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5057 } 5058 } else if (parseExpr(ImmVal)) { 5059 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5060 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5061 } 5062 5063 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5064 return MatchOperand_Success; 5065 } 5066 5067 bool AMDGPUOperand::isHwreg() const { 5068 return isImmTy(ImmTyHwreg); 5069 } 5070 5071 //===----------------------------------------------------------------------===// 5072 // sendmsg 5073 //===----------------------------------------------------------------------===// 5074 5075 bool 5076 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5077 OperandInfoTy &Op, 5078 OperandInfoTy &Stream) { 5079 using namespace llvm::AMDGPU::SendMsg; 5080 5081 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5082 Msg.IsSymbolic = true; 5083 lex(); // skip message name 5084 } else if (!parseExpr(Msg.Id)) { 5085 return false; 5086 } 5087 5088 if (trySkipToken(AsmToken::Comma)) { 5089 Op.IsDefined = true; 5090 if (isToken(AsmToken::Identifier) && 5091 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5092 lex(); // skip operation name 5093 } else if (!parseExpr(Op.Id)) { 5094 return false; 5095 } 5096 5097 if (trySkipToken(AsmToken::Comma)) { 5098 Stream.IsDefined = true; 5099 if (!parseExpr(Stream.Id)) 5100 return false; 5101 } 5102 } 5103 5104 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5105 } 5106 5107 bool 5108 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5109 const OperandInfoTy &Op, 5110 const OperandInfoTy &Stream, 5111 const SMLoc S) { 5112 using namespace llvm::AMDGPU::SendMsg; 5113 5114 // Validation strictness depends on whether message is specified 5115 // in a symbolc or in a numeric form. In the latter case 5116 // only encoding possibility is checked. 5117 bool Strict = Msg.IsSymbolic; 5118 5119 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5120 Error(S, "invalid message id"); 5121 return false; 5122 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5123 Error(S, Op.IsDefined ? 5124 "message does not support operations" : 5125 "missing message operation"); 5126 return false; 5127 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5128 Error(S, "invalid operation id"); 5129 return false; 5130 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5131 Error(S, "message operation does not support streams"); 5132 return false; 5133 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5134 Error(S, "invalid message stream id"); 5135 return false; 5136 } 5137 return true; 5138 } 5139 5140 OperandMatchResultTy 5141 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5142 using namespace llvm::AMDGPU::SendMsg; 5143 5144 int64_t ImmVal = 0; 5145 SMLoc Loc = getLoc(); 5146 5147 // If parse failed, do not return error code 5148 // to avoid excessive error messages. 5149 if (trySkipId("sendmsg", AsmToken::LParen)) { 5150 OperandInfoTy Msg(ID_UNKNOWN_); 5151 OperandInfoTy Op(OP_NONE_); 5152 OperandInfoTy Stream(STREAM_ID_NONE_); 5153 if (parseSendMsgBody(Msg, Op, Stream) && 5154 validateSendMsg(Msg, Op, Stream, Loc)) { 5155 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5156 } 5157 } else if (parseExpr(ImmVal)) { 5158 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5159 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5160 } 5161 5162 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5163 return MatchOperand_Success; 5164 } 5165 5166 bool AMDGPUOperand::isSendMsg() const { 5167 return isImmTy(ImmTySendMsg); 5168 } 5169 5170 //===----------------------------------------------------------------------===// 5171 // v_interp 5172 //===----------------------------------------------------------------------===// 5173 5174 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5175 if (getLexer().getKind() != AsmToken::Identifier) 5176 return MatchOperand_NoMatch; 5177 5178 StringRef Str = Parser.getTok().getString(); 5179 int Slot = StringSwitch<int>(Str) 5180 .Case("p10", 0) 5181 .Case("p20", 1) 5182 .Case("p0", 2) 5183 .Default(-1); 5184 5185 SMLoc S = Parser.getTok().getLoc(); 5186 if (Slot == -1) 5187 return MatchOperand_ParseFail; 5188 5189 Parser.Lex(); 5190 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5191 AMDGPUOperand::ImmTyInterpSlot)); 5192 return MatchOperand_Success; 5193 } 5194 5195 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5196 if (getLexer().getKind() != AsmToken::Identifier) 5197 return MatchOperand_NoMatch; 5198 5199 StringRef Str = Parser.getTok().getString(); 5200 if (!Str.startswith("attr")) 5201 return MatchOperand_NoMatch; 5202 5203 StringRef Chan = Str.take_back(2); 5204 int AttrChan = StringSwitch<int>(Chan) 5205 .Case(".x", 0) 5206 .Case(".y", 1) 5207 .Case(".z", 2) 5208 .Case(".w", 3) 5209 .Default(-1); 5210 if (AttrChan == -1) 5211 return MatchOperand_ParseFail; 5212 5213 Str = Str.drop_back(2).drop_front(4); 5214 5215 uint8_t Attr; 5216 if (Str.getAsInteger(10, Attr)) 5217 return MatchOperand_ParseFail; 5218 5219 SMLoc S = Parser.getTok().getLoc(); 5220 Parser.Lex(); 5221 if (Attr > 63) { 5222 Error(S, "out of bounds attr"); 5223 return MatchOperand_Success; 5224 } 5225 5226 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5227 5228 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5229 AMDGPUOperand::ImmTyInterpAttr)); 5230 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5231 AMDGPUOperand::ImmTyAttrChan)); 5232 return MatchOperand_Success; 5233 } 5234 5235 //===----------------------------------------------------------------------===// 5236 // exp 5237 //===----------------------------------------------------------------------===// 5238 5239 void AMDGPUAsmParser::errorExpTgt() { 5240 Error(Parser.getTok().getLoc(), "invalid exp target"); 5241 } 5242 5243 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5244 uint8_t &Val) { 5245 if (Str == "null") { 5246 Val = 9; 5247 return MatchOperand_Success; 5248 } 5249 5250 if (Str.startswith("mrt")) { 5251 Str = Str.drop_front(3); 5252 if (Str == "z") { // == mrtz 5253 Val = 8; 5254 return MatchOperand_Success; 5255 } 5256 5257 if (Str.getAsInteger(10, Val)) 5258 return MatchOperand_ParseFail; 5259 5260 if (Val > 7) 5261 errorExpTgt(); 5262 5263 return MatchOperand_Success; 5264 } 5265 5266 if (Str.startswith("pos")) { 5267 Str = Str.drop_front(3); 5268 if (Str.getAsInteger(10, Val)) 5269 return MatchOperand_ParseFail; 5270 5271 if (Val > 4 || (Val == 4 && !isGFX10())) 5272 errorExpTgt(); 5273 5274 Val += 12; 5275 return MatchOperand_Success; 5276 } 5277 5278 if (isGFX10() && Str == "prim") { 5279 Val = 20; 5280 return MatchOperand_Success; 5281 } 5282 5283 if (Str.startswith("param")) { 5284 Str = Str.drop_front(5); 5285 if (Str.getAsInteger(10, Val)) 5286 return MatchOperand_ParseFail; 5287 5288 if (Val >= 32) 5289 errorExpTgt(); 5290 5291 Val += 32; 5292 return MatchOperand_Success; 5293 } 5294 5295 if (Str.startswith("invalid_target_")) { 5296 Str = Str.drop_front(15); 5297 if (Str.getAsInteger(10, Val)) 5298 return MatchOperand_ParseFail; 5299 5300 errorExpTgt(); 5301 return MatchOperand_Success; 5302 } 5303 5304 return MatchOperand_NoMatch; 5305 } 5306 5307 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5308 uint8_t Val; 5309 StringRef Str = Parser.getTok().getString(); 5310 5311 auto Res = parseExpTgtImpl(Str, Val); 5312 if (Res != MatchOperand_Success) 5313 return Res; 5314 5315 SMLoc S = Parser.getTok().getLoc(); 5316 Parser.Lex(); 5317 5318 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5319 AMDGPUOperand::ImmTyExpTgt)); 5320 return MatchOperand_Success; 5321 } 5322 5323 //===----------------------------------------------------------------------===// 5324 // parser helpers 5325 //===----------------------------------------------------------------------===// 5326 5327 bool 5328 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5329 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5330 } 5331 5332 bool 5333 AMDGPUAsmParser::isId(const StringRef Id) const { 5334 return isId(getToken(), Id); 5335 } 5336 5337 bool 5338 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5339 return getTokenKind() == Kind; 5340 } 5341 5342 bool 5343 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5344 if (isId(Id)) { 5345 lex(); 5346 return true; 5347 } 5348 return false; 5349 } 5350 5351 bool 5352 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5353 if (isId(Id) && peekToken().is(Kind)) { 5354 lex(); 5355 lex(); 5356 return true; 5357 } 5358 return false; 5359 } 5360 5361 bool 5362 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5363 if (isToken(Kind)) { 5364 lex(); 5365 return true; 5366 } 5367 return false; 5368 } 5369 5370 bool 5371 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5372 const StringRef ErrMsg) { 5373 if (!trySkipToken(Kind)) { 5374 Error(getLoc(), ErrMsg); 5375 return false; 5376 } 5377 return true; 5378 } 5379 5380 bool 5381 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5382 return !getParser().parseAbsoluteExpression(Imm); 5383 } 5384 5385 bool 5386 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5387 SMLoc S = getLoc(); 5388 5389 const MCExpr *Expr; 5390 if (Parser.parseExpression(Expr)) 5391 return false; 5392 5393 int64_t IntVal; 5394 if (Expr->evaluateAsAbsolute(IntVal)) { 5395 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5396 } else { 5397 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5398 } 5399 return true; 5400 } 5401 5402 bool 5403 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5404 if (isToken(AsmToken::String)) { 5405 Val = getToken().getStringContents(); 5406 lex(); 5407 return true; 5408 } else { 5409 Error(getLoc(), ErrMsg); 5410 return false; 5411 } 5412 } 5413 5414 AsmToken 5415 AMDGPUAsmParser::getToken() const { 5416 return Parser.getTok(); 5417 } 5418 5419 AsmToken 5420 AMDGPUAsmParser::peekToken() { 5421 return getLexer().peekTok(); 5422 } 5423 5424 void 5425 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5426 auto TokCount = getLexer().peekTokens(Tokens); 5427 5428 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5429 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5430 } 5431 5432 AsmToken::TokenKind 5433 AMDGPUAsmParser::getTokenKind() const { 5434 return getLexer().getKind(); 5435 } 5436 5437 SMLoc 5438 AMDGPUAsmParser::getLoc() const { 5439 return getToken().getLoc(); 5440 } 5441 5442 StringRef 5443 AMDGPUAsmParser::getTokenStr() const { 5444 return getToken().getString(); 5445 } 5446 5447 void 5448 AMDGPUAsmParser::lex() { 5449 Parser.Lex(); 5450 } 5451 5452 //===----------------------------------------------------------------------===// 5453 // swizzle 5454 //===----------------------------------------------------------------------===// 5455 5456 LLVM_READNONE 5457 static unsigned 5458 encodeBitmaskPerm(const unsigned AndMask, 5459 const unsigned OrMask, 5460 const unsigned XorMask) { 5461 using namespace llvm::AMDGPU::Swizzle; 5462 5463 return BITMASK_PERM_ENC | 5464 (AndMask << BITMASK_AND_SHIFT) | 5465 (OrMask << BITMASK_OR_SHIFT) | 5466 (XorMask << BITMASK_XOR_SHIFT); 5467 } 5468 5469 bool 5470 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5471 const unsigned MinVal, 5472 const unsigned MaxVal, 5473 const StringRef ErrMsg) { 5474 for (unsigned i = 0; i < OpNum; ++i) { 5475 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5476 return false; 5477 } 5478 SMLoc ExprLoc = Parser.getTok().getLoc(); 5479 if (!parseExpr(Op[i])) { 5480 return false; 5481 } 5482 if (Op[i] < MinVal || Op[i] > MaxVal) { 5483 Error(ExprLoc, ErrMsg); 5484 return false; 5485 } 5486 } 5487 5488 return true; 5489 } 5490 5491 bool 5492 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5493 using namespace llvm::AMDGPU::Swizzle; 5494 5495 int64_t Lane[LANE_NUM]; 5496 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5497 "expected a 2-bit lane id")) { 5498 Imm = QUAD_PERM_ENC; 5499 for (unsigned I = 0; I < LANE_NUM; ++I) { 5500 Imm |= Lane[I] << (LANE_SHIFT * I); 5501 } 5502 return true; 5503 } 5504 return false; 5505 } 5506 5507 bool 5508 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5509 using namespace llvm::AMDGPU::Swizzle; 5510 5511 SMLoc S = Parser.getTok().getLoc(); 5512 int64_t GroupSize; 5513 int64_t LaneIdx; 5514 5515 if (!parseSwizzleOperands(1, &GroupSize, 5516 2, 32, 5517 "group size must be in the interval [2,32]")) { 5518 return false; 5519 } 5520 if (!isPowerOf2_64(GroupSize)) { 5521 Error(S, "group size must be a power of two"); 5522 return false; 5523 } 5524 if (parseSwizzleOperands(1, &LaneIdx, 5525 0, GroupSize - 1, 5526 "lane id must be in the interval [0,group size - 1]")) { 5527 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5528 return true; 5529 } 5530 return false; 5531 } 5532 5533 bool 5534 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5535 using namespace llvm::AMDGPU::Swizzle; 5536 5537 SMLoc S = Parser.getTok().getLoc(); 5538 int64_t GroupSize; 5539 5540 if (!parseSwizzleOperands(1, &GroupSize, 5541 2, 32, "group size must be in the interval [2,32]")) { 5542 return false; 5543 } 5544 if (!isPowerOf2_64(GroupSize)) { 5545 Error(S, "group size must be a power of two"); 5546 return false; 5547 } 5548 5549 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5550 return true; 5551 } 5552 5553 bool 5554 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5555 using namespace llvm::AMDGPU::Swizzle; 5556 5557 SMLoc S = Parser.getTok().getLoc(); 5558 int64_t GroupSize; 5559 5560 if (!parseSwizzleOperands(1, &GroupSize, 5561 1, 16, "group size must be in the interval [1,16]")) { 5562 return false; 5563 } 5564 if (!isPowerOf2_64(GroupSize)) { 5565 Error(S, "group size must be a power of two"); 5566 return false; 5567 } 5568 5569 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5570 return true; 5571 } 5572 5573 bool 5574 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5575 using namespace llvm::AMDGPU::Swizzle; 5576 5577 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5578 return false; 5579 } 5580 5581 StringRef Ctl; 5582 SMLoc StrLoc = Parser.getTok().getLoc(); 5583 if (!parseString(Ctl)) { 5584 return false; 5585 } 5586 if (Ctl.size() != BITMASK_WIDTH) { 5587 Error(StrLoc, "expected a 5-character mask"); 5588 return false; 5589 } 5590 5591 unsigned AndMask = 0; 5592 unsigned OrMask = 0; 5593 unsigned XorMask = 0; 5594 5595 for (size_t i = 0; i < Ctl.size(); ++i) { 5596 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5597 switch(Ctl[i]) { 5598 default: 5599 Error(StrLoc, "invalid mask"); 5600 return false; 5601 case '0': 5602 break; 5603 case '1': 5604 OrMask |= Mask; 5605 break; 5606 case 'p': 5607 AndMask |= Mask; 5608 break; 5609 case 'i': 5610 AndMask |= Mask; 5611 XorMask |= Mask; 5612 break; 5613 } 5614 } 5615 5616 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5617 return true; 5618 } 5619 5620 bool 5621 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5622 5623 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5624 5625 if (!parseExpr(Imm)) { 5626 return false; 5627 } 5628 if (!isUInt<16>(Imm)) { 5629 Error(OffsetLoc, "expected a 16-bit offset"); 5630 return false; 5631 } 5632 return true; 5633 } 5634 5635 bool 5636 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5637 using namespace llvm::AMDGPU::Swizzle; 5638 5639 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5640 5641 SMLoc ModeLoc = Parser.getTok().getLoc(); 5642 bool Ok = false; 5643 5644 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5645 Ok = parseSwizzleQuadPerm(Imm); 5646 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5647 Ok = parseSwizzleBitmaskPerm(Imm); 5648 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5649 Ok = parseSwizzleBroadcast(Imm); 5650 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5651 Ok = parseSwizzleSwap(Imm); 5652 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5653 Ok = parseSwizzleReverse(Imm); 5654 } else { 5655 Error(ModeLoc, "expected a swizzle mode"); 5656 } 5657 5658 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5659 } 5660 5661 return false; 5662 } 5663 5664 OperandMatchResultTy 5665 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5666 SMLoc S = Parser.getTok().getLoc(); 5667 int64_t Imm = 0; 5668 5669 if (trySkipId("offset")) { 5670 5671 bool Ok = false; 5672 if (skipToken(AsmToken::Colon, "expected a colon")) { 5673 if (trySkipId("swizzle")) { 5674 Ok = parseSwizzleMacro(Imm); 5675 } else { 5676 Ok = parseSwizzleOffset(Imm); 5677 } 5678 } 5679 5680 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5681 5682 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5683 } else { 5684 // Swizzle "offset" operand is optional. 5685 // If it is omitted, try parsing other optional operands. 5686 return parseOptionalOpr(Operands); 5687 } 5688 } 5689 5690 bool 5691 AMDGPUOperand::isSwizzle() const { 5692 return isImmTy(ImmTySwizzle); 5693 } 5694 5695 //===----------------------------------------------------------------------===// 5696 // VGPR Index Mode 5697 //===----------------------------------------------------------------------===// 5698 5699 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5700 5701 using namespace llvm::AMDGPU::VGPRIndexMode; 5702 5703 if (trySkipToken(AsmToken::RParen)) { 5704 return OFF; 5705 } 5706 5707 int64_t Imm = 0; 5708 5709 while (true) { 5710 unsigned Mode = 0; 5711 SMLoc S = Parser.getTok().getLoc(); 5712 5713 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5714 if (trySkipId(IdSymbolic[ModeId])) { 5715 Mode = 1 << ModeId; 5716 break; 5717 } 5718 } 5719 5720 if (Mode == 0) { 5721 Error(S, (Imm == 0)? 5722 "expected a VGPR index mode or a closing parenthesis" : 5723 "expected a VGPR index mode"); 5724 break; 5725 } 5726 5727 if (Imm & Mode) { 5728 Error(S, "duplicate VGPR index mode"); 5729 break; 5730 } 5731 Imm |= Mode; 5732 5733 if (trySkipToken(AsmToken::RParen)) 5734 break; 5735 if (!skipToken(AsmToken::Comma, 5736 "expected a comma or a closing parenthesis")) 5737 break; 5738 } 5739 5740 return Imm; 5741 } 5742 5743 OperandMatchResultTy 5744 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5745 5746 int64_t Imm = 0; 5747 SMLoc S = Parser.getTok().getLoc(); 5748 5749 if (getLexer().getKind() == AsmToken::Identifier && 5750 Parser.getTok().getString() == "gpr_idx" && 5751 getLexer().peekTok().is(AsmToken::LParen)) { 5752 5753 Parser.Lex(); 5754 Parser.Lex(); 5755 5756 // If parse failed, trigger an error but do not return error code 5757 // to avoid excessive error messages. 5758 Imm = parseGPRIdxMacro(); 5759 5760 } else { 5761 if (getParser().parseAbsoluteExpression(Imm)) 5762 return MatchOperand_NoMatch; 5763 if (Imm < 0 || !isUInt<4>(Imm)) { 5764 Error(S, "invalid immediate: only 4-bit values are legal"); 5765 } 5766 } 5767 5768 Operands.push_back( 5769 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5770 return MatchOperand_Success; 5771 } 5772 5773 bool AMDGPUOperand::isGPRIdxMode() const { 5774 return isImmTy(ImmTyGprIdxMode); 5775 } 5776 5777 //===----------------------------------------------------------------------===// 5778 // sopp branch targets 5779 //===----------------------------------------------------------------------===// 5780 5781 OperandMatchResultTy 5782 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5783 5784 // Make sure we are not parsing something 5785 // that looks like a label or an expression but is not. 5786 // This will improve error messages. 5787 if (isRegister() || isModifier()) 5788 return MatchOperand_NoMatch; 5789 5790 if (parseExpr(Operands)) { 5791 5792 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5793 assert(Opr.isImm() || Opr.isExpr()); 5794 SMLoc Loc = Opr.getStartLoc(); 5795 5796 // Currently we do not support arbitrary expressions as branch targets. 5797 // Only labels and absolute expressions are accepted. 5798 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5799 Error(Loc, "expected an absolute expression or a label"); 5800 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5801 Error(Loc, "expected a 16-bit signed jump offset"); 5802 } 5803 } 5804 5805 return MatchOperand_Success; // avoid excessive error messages 5806 } 5807 5808 //===----------------------------------------------------------------------===// 5809 // Boolean holding registers 5810 //===----------------------------------------------------------------------===// 5811 5812 OperandMatchResultTy 5813 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5814 return parseReg(Operands); 5815 } 5816 5817 //===----------------------------------------------------------------------===// 5818 // mubuf 5819 //===----------------------------------------------------------------------===// 5820 5821 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5822 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5823 } 5824 5825 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5826 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5827 } 5828 5829 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5830 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5831 } 5832 5833 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5834 const OperandVector &Operands, 5835 bool IsAtomic, 5836 bool IsAtomicReturn, 5837 bool IsLds) { 5838 bool IsLdsOpcode = IsLds; 5839 bool HasLdsModifier = false; 5840 OptionalImmIndexMap OptionalIdx; 5841 assert(IsAtomicReturn ? IsAtomic : true); 5842 unsigned FirstOperandIdx = 1; 5843 5844 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5845 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5846 5847 // Add the register arguments 5848 if (Op.isReg()) { 5849 Op.addRegOperands(Inst, 1); 5850 // Insert a tied src for atomic return dst. 5851 // This cannot be postponed as subsequent calls to 5852 // addImmOperands rely on correct number of MC operands. 5853 if (IsAtomicReturn && i == FirstOperandIdx) 5854 Op.addRegOperands(Inst, 1); 5855 continue; 5856 } 5857 5858 // Handle the case where soffset is an immediate 5859 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5860 Op.addImmOperands(Inst, 1); 5861 continue; 5862 } 5863 5864 HasLdsModifier |= Op.isLDS(); 5865 5866 // Handle tokens like 'offen' which are sometimes hard-coded into the 5867 // asm string. There are no MCInst operands for these. 5868 if (Op.isToken()) { 5869 continue; 5870 } 5871 assert(Op.isImm()); 5872 5873 // Handle optional arguments 5874 OptionalIdx[Op.getImmTy()] = i; 5875 } 5876 5877 // This is a workaround for an llvm quirk which may result in an 5878 // incorrect instruction selection. Lds and non-lds versions of 5879 // MUBUF instructions are identical except that lds versions 5880 // have mandatory 'lds' modifier. However this modifier follows 5881 // optional modifiers and llvm asm matcher regards this 'lds' 5882 // modifier as an optional one. As a result, an lds version 5883 // of opcode may be selected even if it has no 'lds' modifier. 5884 if (IsLdsOpcode && !HasLdsModifier) { 5885 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5886 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5887 Inst.setOpcode(NoLdsOpcode); 5888 IsLdsOpcode = false; 5889 } 5890 } 5891 5892 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5893 if (!IsAtomic) { // glc is hard-coded. 5894 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5895 } 5896 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5897 5898 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5899 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5900 } 5901 5902 if (isGFX10()) 5903 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5904 } 5905 5906 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5907 OptionalImmIndexMap OptionalIdx; 5908 5909 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5910 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5911 5912 // Add the register arguments 5913 if (Op.isReg()) { 5914 Op.addRegOperands(Inst, 1); 5915 continue; 5916 } 5917 5918 // Handle the case where soffset is an immediate 5919 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5920 Op.addImmOperands(Inst, 1); 5921 continue; 5922 } 5923 5924 // Handle tokens like 'offen' which are sometimes hard-coded into the 5925 // asm string. There are no MCInst operands for these. 5926 if (Op.isToken()) { 5927 continue; 5928 } 5929 assert(Op.isImm()); 5930 5931 // Handle optional arguments 5932 OptionalIdx[Op.getImmTy()] = i; 5933 } 5934 5935 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5936 AMDGPUOperand::ImmTyOffset); 5937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5941 5942 if (isGFX10()) 5943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5944 } 5945 5946 //===----------------------------------------------------------------------===// 5947 // mimg 5948 //===----------------------------------------------------------------------===// 5949 5950 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5951 bool IsAtomic) { 5952 unsigned I = 1; 5953 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5954 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5955 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5956 } 5957 5958 if (IsAtomic) { 5959 // Add src, same as dst 5960 assert(Desc.getNumDefs() == 1); 5961 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5962 } 5963 5964 OptionalImmIndexMap OptionalIdx; 5965 5966 for (unsigned E = Operands.size(); I != E; ++I) { 5967 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5968 5969 // Add the register arguments 5970 if (Op.isReg()) { 5971 Op.addRegOperands(Inst, 1); 5972 } else if (Op.isImmModifier()) { 5973 OptionalIdx[Op.getImmTy()] = I; 5974 } else if (!Op.isToken()) { 5975 llvm_unreachable("unexpected operand type"); 5976 } 5977 } 5978 5979 bool IsGFX10 = isGFX10(); 5980 5981 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5982 if (IsGFX10) 5983 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5984 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5985 if (IsGFX10) 5986 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5987 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5988 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5989 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5990 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5992 if (!IsGFX10) 5993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5995 } 5996 5997 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5998 cvtMIMG(Inst, Operands, true); 5999 } 6000 6001 //===----------------------------------------------------------------------===// 6002 // smrd 6003 //===----------------------------------------------------------------------===// 6004 6005 bool AMDGPUOperand::isSMRDOffset8() const { 6006 return isImm() && isUInt<8>(getImm()); 6007 } 6008 6009 bool AMDGPUOperand::isSMRDOffset20() const { 6010 return isImm() && isUInt<20>(getImm()); 6011 } 6012 6013 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6014 // 32-bit literals are only supported on CI and we only want to use them 6015 // when the offset is > 8-bits. 6016 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6017 } 6018 6019 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6020 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6021 } 6022 6023 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 6024 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6025 } 6026 6027 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6028 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6029 } 6030 6031 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6032 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6033 } 6034 6035 //===----------------------------------------------------------------------===// 6036 // vop3 6037 //===----------------------------------------------------------------------===// 6038 6039 static bool ConvertOmodMul(int64_t &Mul) { 6040 if (Mul != 1 && Mul != 2 && Mul != 4) 6041 return false; 6042 6043 Mul >>= 1; 6044 return true; 6045 } 6046 6047 static bool ConvertOmodDiv(int64_t &Div) { 6048 if (Div == 1) { 6049 Div = 0; 6050 return true; 6051 } 6052 6053 if (Div == 2) { 6054 Div = 3; 6055 return true; 6056 } 6057 6058 return false; 6059 } 6060 6061 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6062 if (BoundCtrl == 0) { 6063 BoundCtrl = 1; 6064 return true; 6065 } 6066 6067 if (BoundCtrl == -1) { 6068 BoundCtrl = 0; 6069 return true; 6070 } 6071 6072 return false; 6073 } 6074 6075 // Note: the order in this table matches the order of operands in AsmString. 6076 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6077 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6078 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6079 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6080 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6081 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6082 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6083 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6084 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6085 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6086 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6087 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6088 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6089 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6090 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6091 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6092 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6093 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6094 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6095 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6096 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6097 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6098 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6099 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6100 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6101 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6102 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6103 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6104 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6105 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6106 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6107 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6108 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6109 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6110 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6111 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6112 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6113 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6114 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6115 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6116 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6117 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6118 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6119 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6120 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6121 }; 6122 6123 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6124 6125 OperandMatchResultTy res = parseOptionalOpr(Operands); 6126 6127 // This is a hack to enable hardcoded mandatory operands which follow 6128 // optional operands. 6129 // 6130 // Current design assumes that all operands after the first optional operand 6131 // are also optional. However implementation of some instructions violates 6132 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6133 // 6134 // To alleviate this problem, we have to (implicitly) parse extra operands 6135 // to make sure autogenerated parser of custom operands never hit hardcoded 6136 // mandatory operands. 6137 6138 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6139 if (res != MatchOperand_Success || 6140 isToken(AsmToken::EndOfStatement)) 6141 break; 6142 6143 trySkipToken(AsmToken::Comma); 6144 res = parseOptionalOpr(Operands); 6145 } 6146 6147 return res; 6148 } 6149 6150 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6151 OperandMatchResultTy res; 6152 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6153 // try to parse any optional operand here 6154 if (Op.IsBit) { 6155 res = parseNamedBit(Op.Name, Operands, Op.Type); 6156 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6157 res = parseOModOperand(Operands); 6158 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6159 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6160 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6161 res = parseSDWASel(Operands, Op.Name, Op.Type); 6162 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6163 res = parseSDWADstUnused(Operands); 6164 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6165 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6166 Op.Type == AMDGPUOperand::ImmTyNegLo || 6167 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6168 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6169 Op.ConvertResult); 6170 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6171 res = parseDim(Operands); 6172 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6173 res = parseDfmtNfmt(Operands); 6174 } else { 6175 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6176 } 6177 if (res != MatchOperand_NoMatch) { 6178 return res; 6179 } 6180 } 6181 return MatchOperand_NoMatch; 6182 } 6183 6184 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6185 StringRef Name = Parser.getTok().getString(); 6186 if (Name == "mul") { 6187 return parseIntWithPrefix("mul", Operands, 6188 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6189 } 6190 6191 if (Name == "div") { 6192 return parseIntWithPrefix("div", Operands, 6193 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6194 } 6195 6196 return MatchOperand_NoMatch; 6197 } 6198 6199 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6200 cvtVOP3P(Inst, Operands); 6201 6202 int Opc = Inst.getOpcode(); 6203 6204 int SrcNum; 6205 const int Ops[] = { AMDGPU::OpName::src0, 6206 AMDGPU::OpName::src1, 6207 AMDGPU::OpName::src2 }; 6208 for (SrcNum = 0; 6209 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6210 ++SrcNum); 6211 assert(SrcNum > 0); 6212 6213 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6214 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6215 6216 if ((OpSel & (1 << SrcNum)) != 0) { 6217 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6218 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6219 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6220 } 6221 } 6222 6223 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6224 // 1. This operand is input modifiers 6225 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6226 // 2. This is not last operand 6227 && Desc.NumOperands > (OpNum + 1) 6228 // 3. Next operand is register class 6229 && Desc.OpInfo[OpNum + 1].RegClass != -1 6230 // 4. Next register is not tied to any other operand 6231 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6232 } 6233 6234 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6235 { 6236 OptionalImmIndexMap OptionalIdx; 6237 unsigned Opc = Inst.getOpcode(); 6238 6239 unsigned I = 1; 6240 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6241 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6242 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6243 } 6244 6245 for (unsigned E = Operands.size(); I != E; ++I) { 6246 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6247 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6248 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6249 } else if (Op.isInterpSlot() || 6250 Op.isInterpAttr() || 6251 Op.isAttrChan()) { 6252 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6253 } else if (Op.isImmModifier()) { 6254 OptionalIdx[Op.getImmTy()] = I; 6255 } else { 6256 llvm_unreachable("unhandled operand type"); 6257 } 6258 } 6259 6260 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6262 } 6263 6264 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6266 } 6267 6268 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6270 } 6271 } 6272 6273 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6274 OptionalImmIndexMap &OptionalIdx) { 6275 unsigned Opc = Inst.getOpcode(); 6276 6277 unsigned I = 1; 6278 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6279 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6280 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6281 } 6282 6283 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6284 // This instruction has src modifiers 6285 for (unsigned E = Operands.size(); I != E; ++I) { 6286 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6287 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6288 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6289 } else if (Op.isImmModifier()) { 6290 OptionalIdx[Op.getImmTy()] = I; 6291 } else if (Op.isRegOrImm()) { 6292 Op.addRegOrImmOperands(Inst, 1); 6293 } else { 6294 llvm_unreachable("unhandled operand type"); 6295 } 6296 } 6297 } else { 6298 // No src modifiers 6299 for (unsigned E = Operands.size(); I != E; ++I) { 6300 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6301 if (Op.isMod()) { 6302 OptionalIdx[Op.getImmTy()] = I; 6303 } else { 6304 Op.addRegOrImmOperands(Inst, 1); 6305 } 6306 } 6307 } 6308 6309 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6310 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6311 } 6312 6313 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6314 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6315 } 6316 6317 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6318 // it has src2 register operand that is tied to dst operand 6319 // we don't allow modifiers for this operand in assembler so src2_modifiers 6320 // should be 0. 6321 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6322 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6323 Opc == AMDGPU::V_MAC_F32_e64_vi || 6324 Opc == AMDGPU::V_MAC_F16_e64_vi || 6325 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6326 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6327 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6328 auto it = Inst.begin(); 6329 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6330 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6331 ++it; 6332 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6333 } 6334 } 6335 6336 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6337 OptionalImmIndexMap OptionalIdx; 6338 cvtVOP3(Inst, Operands, OptionalIdx); 6339 } 6340 6341 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6342 const OperandVector &Operands) { 6343 OptionalImmIndexMap OptIdx; 6344 const int Opc = Inst.getOpcode(); 6345 const MCInstrDesc &Desc = MII.get(Opc); 6346 6347 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6348 6349 cvtVOP3(Inst, Operands, OptIdx); 6350 6351 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6352 assert(!IsPacked); 6353 Inst.addOperand(Inst.getOperand(0)); 6354 } 6355 6356 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6357 // instruction, and then figure out where to actually put the modifiers 6358 6359 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6360 6361 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6362 if (OpSelHiIdx != -1) { 6363 int DefaultVal = IsPacked ? -1 : 0; 6364 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6365 DefaultVal); 6366 } 6367 6368 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6369 if (NegLoIdx != -1) { 6370 assert(IsPacked); 6371 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6372 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6373 } 6374 6375 const int Ops[] = { AMDGPU::OpName::src0, 6376 AMDGPU::OpName::src1, 6377 AMDGPU::OpName::src2 }; 6378 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6379 AMDGPU::OpName::src1_modifiers, 6380 AMDGPU::OpName::src2_modifiers }; 6381 6382 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6383 6384 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6385 unsigned OpSelHi = 0; 6386 unsigned NegLo = 0; 6387 unsigned NegHi = 0; 6388 6389 if (OpSelHiIdx != -1) { 6390 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6391 } 6392 6393 if (NegLoIdx != -1) { 6394 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6395 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6396 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6397 } 6398 6399 for (int J = 0; J < 3; ++J) { 6400 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6401 if (OpIdx == -1) 6402 break; 6403 6404 uint32_t ModVal = 0; 6405 6406 if ((OpSel & (1 << J)) != 0) 6407 ModVal |= SISrcMods::OP_SEL_0; 6408 6409 if ((OpSelHi & (1 << J)) != 0) 6410 ModVal |= SISrcMods::OP_SEL_1; 6411 6412 if ((NegLo & (1 << J)) != 0) 6413 ModVal |= SISrcMods::NEG; 6414 6415 if ((NegHi & (1 << J)) != 0) 6416 ModVal |= SISrcMods::NEG_HI; 6417 6418 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6419 6420 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6421 } 6422 } 6423 6424 //===----------------------------------------------------------------------===// 6425 // dpp 6426 //===----------------------------------------------------------------------===// 6427 6428 bool AMDGPUOperand::isDPP8() const { 6429 return isImmTy(ImmTyDPP8); 6430 } 6431 6432 bool AMDGPUOperand::isDPPCtrl() const { 6433 using namespace AMDGPU::DPP; 6434 6435 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6436 if (result) { 6437 int64_t Imm = getImm(); 6438 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6439 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6440 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6441 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6442 (Imm == DppCtrl::WAVE_SHL1) || 6443 (Imm == DppCtrl::WAVE_ROL1) || 6444 (Imm == DppCtrl::WAVE_SHR1) || 6445 (Imm == DppCtrl::WAVE_ROR1) || 6446 (Imm == DppCtrl::ROW_MIRROR) || 6447 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6448 (Imm == DppCtrl::BCAST15) || 6449 (Imm == DppCtrl::BCAST31) || 6450 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6451 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6452 } 6453 return false; 6454 } 6455 6456 //===----------------------------------------------------------------------===// 6457 // mAI 6458 //===----------------------------------------------------------------------===// 6459 6460 bool AMDGPUOperand::isBLGP() const { 6461 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6462 } 6463 6464 bool AMDGPUOperand::isCBSZ() const { 6465 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6466 } 6467 6468 bool AMDGPUOperand::isABID() const { 6469 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6470 } 6471 6472 bool AMDGPUOperand::isS16Imm() const { 6473 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6474 } 6475 6476 bool AMDGPUOperand::isU16Imm() const { 6477 return isImm() && isUInt<16>(getImm()); 6478 } 6479 6480 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6481 if (!isGFX10()) 6482 return MatchOperand_NoMatch; 6483 6484 SMLoc S = Parser.getTok().getLoc(); 6485 6486 if (getLexer().isNot(AsmToken::Identifier)) 6487 return MatchOperand_NoMatch; 6488 if (getLexer().getTok().getString() != "dim") 6489 return MatchOperand_NoMatch; 6490 6491 Parser.Lex(); 6492 if (getLexer().isNot(AsmToken::Colon)) 6493 return MatchOperand_ParseFail; 6494 6495 Parser.Lex(); 6496 6497 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6498 // integer. 6499 std::string Token; 6500 if (getLexer().is(AsmToken::Integer)) { 6501 SMLoc Loc = getLexer().getTok().getEndLoc(); 6502 Token = getLexer().getTok().getString(); 6503 Parser.Lex(); 6504 if (getLexer().getTok().getLoc() != Loc) 6505 return MatchOperand_ParseFail; 6506 } 6507 if (getLexer().isNot(AsmToken::Identifier)) 6508 return MatchOperand_ParseFail; 6509 Token += getLexer().getTok().getString(); 6510 6511 StringRef DimId = Token; 6512 if (DimId.startswith("SQ_RSRC_IMG_")) 6513 DimId = DimId.substr(12); 6514 6515 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6516 if (!DimInfo) 6517 return MatchOperand_ParseFail; 6518 6519 Parser.Lex(); 6520 6521 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6522 AMDGPUOperand::ImmTyDim)); 6523 return MatchOperand_Success; 6524 } 6525 6526 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6527 SMLoc S = Parser.getTok().getLoc(); 6528 StringRef Prefix; 6529 6530 if (getLexer().getKind() == AsmToken::Identifier) { 6531 Prefix = Parser.getTok().getString(); 6532 } else { 6533 return MatchOperand_NoMatch; 6534 } 6535 6536 if (Prefix != "dpp8") 6537 return parseDPPCtrl(Operands); 6538 if (!isGFX10()) 6539 return MatchOperand_NoMatch; 6540 6541 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6542 6543 int64_t Sels[8]; 6544 6545 Parser.Lex(); 6546 if (getLexer().isNot(AsmToken::Colon)) 6547 return MatchOperand_ParseFail; 6548 6549 Parser.Lex(); 6550 if (getLexer().isNot(AsmToken::LBrac)) 6551 return MatchOperand_ParseFail; 6552 6553 Parser.Lex(); 6554 if (getParser().parseAbsoluteExpression(Sels[0])) 6555 return MatchOperand_ParseFail; 6556 if (0 > Sels[0] || 7 < Sels[0]) 6557 return MatchOperand_ParseFail; 6558 6559 for (size_t i = 1; i < 8; ++i) { 6560 if (getLexer().isNot(AsmToken::Comma)) 6561 return MatchOperand_ParseFail; 6562 6563 Parser.Lex(); 6564 if (getParser().parseAbsoluteExpression(Sels[i])) 6565 return MatchOperand_ParseFail; 6566 if (0 > Sels[i] || 7 < Sels[i]) 6567 return MatchOperand_ParseFail; 6568 } 6569 6570 if (getLexer().isNot(AsmToken::RBrac)) 6571 return MatchOperand_ParseFail; 6572 Parser.Lex(); 6573 6574 unsigned DPP8 = 0; 6575 for (size_t i = 0; i < 8; ++i) 6576 DPP8 |= (Sels[i] << (i * 3)); 6577 6578 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6579 return MatchOperand_Success; 6580 } 6581 6582 OperandMatchResultTy 6583 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6584 using namespace AMDGPU::DPP; 6585 6586 SMLoc S = Parser.getTok().getLoc(); 6587 StringRef Prefix; 6588 int64_t Int; 6589 6590 if (getLexer().getKind() == AsmToken::Identifier) { 6591 Prefix = Parser.getTok().getString(); 6592 } else { 6593 return MatchOperand_NoMatch; 6594 } 6595 6596 if (Prefix == "row_mirror") { 6597 Int = DppCtrl::ROW_MIRROR; 6598 Parser.Lex(); 6599 } else if (Prefix == "row_half_mirror") { 6600 Int = DppCtrl::ROW_HALF_MIRROR; 6601 Parser.Lex(); 6602 } else { 6603 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6604 if (Prefix != "quad_perm" 6605 && Prefix != "row_shl" 6606 && Prefix != "row_shr" 6607 && Prefix != "row_ror" 6608 && Prefix != "wave_shl" 6609 && Prefix != "wave_rol" 6610 && Prefix != "wave_shr" 6611 && Prefix != "wave_ror" 6612 && Prefix != "row_bcast" 6613 && Prefix != "row_share" 6614 && Prefix != "row_xmask") { 6615 return MatchOperand_NoMatch; 6616 } 6617 6618 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6619 return MatchOperand_NoMatch; 6620 6621 if (!isVI() && !isGFX9() && 6622 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6623 Prefix == "wave_rol" || Prefix == "wave_ror" || 6624 Prefix == "row_bcast")) 6625 return MatchOperand_NoMatch; 6626 6627 Parser.Lex(); 6628 if (getLexer().isNot(AsmToken::Colon)) 6629 return MatchOperand_ParseFail; 6630 6631 if (Prefix == "quad_perm") { 6632 // quad_perm:[%d,%d,%d,%d] 6633 Parser.Lex(); 6634 if (getLexer().isNot(AsmToken::LBrac)) 6635 return MatchOperand_ParseFail; 6636 Parser.Lex(); 6637 6638 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6639 return MatchOperand_ParseFail; 6640 6641 for (int i = 0; i < 3; ++i) { 6642 if (getLexer().isNot(AsmToken::Comma)) 6643 return MatchOperand_ParseFail; 6644 Parser.Lex(); 6645 6646 int64_t Temp; 6647 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6648 return MatchOperand_ParseFail; 6649 const int shift = i*2 + 2; 6650 Int += (Temp << shift); 6651 } 6652 6653 if (getLexer().isNot(AsmToken::RBrac)) 6654 return MatchOperand_ParseFail; 6655 Parser.Lex(); 6656 } else { 6657 // sel:%d 6658 Parser.Lex(); 6659 if (getParser().parseAbsoluteExpression(Int)) 6660 return MatchOperand_ParseFail; 6661 6662 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6663 Int |= DppCtrl::ROW_SHL0; 6664 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6665 Int |= DppCtrl::ROW_SHR0; 6666 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6667 Int |= DppCtrl::ROW_ROR0; 6668 } else if (Prefix == "wave_shl" && 1 == Int) { 6669 Int = DppCtrl::WAVE_SHL1; 6670 } else if (Prefix == "wave_rol" && 1 == Int) { 6671 Int = DppCtrl::WAVE_ROL1; 6672 } else if (Prefix == "wave_shr" && 1 == Int) { 6673 Int = DppCtrl::WAVE_SHR1; 6674 } else if (Prefix == "wave_ror" && 1 == Int) { 6675 Int = DppCtrl::WAVE_ROR1; 6676 } else if (Prefix == "row_bcast") { 6677 if (Int == 15) { 6678 Int = DppCtrl::BCAST15; 6679 } else if (Int == 31) { 6680 Int = DppCtrl::BCAST31; 6681 } else { 6682 return MatchOperand_ParseFail; 6683 } 6684 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6685 Int |= DppCtrl::ROW_SHARE_FIRST; 6686 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6687 Int |= DppCtrl::ROW_XMASK_FIRST; 6688 } else { 6689 return MatchOperand_ParseFail; 6690 } 6691 } 6692 } 6693 6694 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6695 return MatchOperand_Success; 6696 } 6697 6698 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6699 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6700 } 6701 6702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6703 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6704 } 6705 6706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6707 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6708 } 6709 6710 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6711 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6712 } 6713 6714 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6715 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6716 } 6717 6718 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6719 OptionalImmIndexMap OptionalIdx; 6720 6721 unsigned I = 1; 6722 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6723 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6724 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6725 } 6726 6727 int Fi = 0; 6728 for (unsigned E = Operands.size(); I != E; ++I) { 6729 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6730 MCOI::TIED_TO); 6731 if (TiedTo != -1) { 6732 assert((unsigned)TiedTo < Inst.getNumOperands()); 6733 // handle tied old or src2 for MAC instructions 6734 Inst.addOperand(Inst.getOperand(TiedTo)); 6735 } 6736 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6737 // Add the register arguments 6738 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6739 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6740 // Skip it. 6741 continue; 6742 } 6743 6744 if (IsDPP8) { 6745 if (Op.isDPP8()) { 6746 Op.addImmOperands(Inst, 1); 6747 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6748 Op.addRegWithFPInputModsOperands(Inst, 2); 6749 } else if (Op.isFI()) { 6750 Fi = Op.getImm(); 6751 } else if (Op.isReg()) { 6752 Op.addRegOperands(Inst, 1); 6753 } else { 6754 llvm_unreachable("Invalid operand type"); 6755 } 6756 } else { 6757 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6758 Op.addRegWithFPInputModsOperands(Inst, 2); 6759 } else if (Op.isDPPCtrl()) { 6760 Op.addImmOperands(Inst, 1); 6761 } else if (Op.isImm()) { 6762 // Handle optional arguments 6763 OptionalIdx[Op.getImmTy()] = I; 6764 } else { 6765 llvm_unreachable("Invalid operand type"); 6766 } 6767 } 6768 } 6769 6770 if (IsDPP8) { 6771 using namespace llvm::AMDGPU::DPP; 6772 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6773 } else { 6774 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6776 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6777 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6779 } 6780 } 6781 } 6782 6783 //===----------------------------------------------------------------------===// 6784 // sdwa 6785 //===----------------------------------------------------------------------===// 6786 6787 OperandMatchResultTy 6788 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6789 AMDGPUOperand::ImmTy Type) { 6790 using namespace llvm::AMDGPU::SDWA; 6791 6792 SMLoc S = Parser.getTok().getLoc(); 6793 StringRef Value; 6794 OperandMatchResultTy res; 6795 6796 res = parseStringWithPrefix(Prefix, Value); 6797 if (res != MatchOperand_Success) { 6798 return res; 6799 } 6800 6801 int64_t Int; 6802 Int = StringSwitch<int64_t>(Value) 6803 .Case("BYTE_0", SdwaSel::BYTE_0) 6804 .Case("BYTE_1", SdwaSel::BYTE_1) 6805 .Case("BYTE_2", SdwaSel::BYTE_2) 6806 .Case("BYTE_3", SdwaSel::BYTE_3) 6807 .Case("WORD_0", SdwaSel::WORD_0) 6808 .Case("WORD_1", SdwaSel::WORD_1) 6809 .Case("DWORD", SdwaSel::DWORD) 6810 .Default(0xffffffff); 6811 Parser.Lex(); // eat last token 6812 6813 if (Int == 0xffffffff) { 6814 return MatchOperand_ParseFail; 6815 } 6816 6817 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6818 return MatchOperand_Success; 6819 } 6820 6821 OperandMatchResultTy 6822 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6823 using namespace llvm::AMDGPU::SDWA; 6824 6825 SMLoc S = Parser.getTok().getLoc(); 6826 StringRef Value; 6827 OperandMatchResultTy res; 6828 6829 res = parseStringWithPrefix("dst_unused", Value); 6830 if (res != MatchOperand_Success) { 6831 return res; 6832 } 6833 6834 int64_t Int; 6835 Int = StringSwitch<int64_t>(Value) 6836 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6837 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6838 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6839 .Default(0xffffffff); 6840 Parser.Lex(); // eat last token 6841 6842 if (Int == 0xffffffff) { 6843 return MatchOperand_ParseFail; 6844 } 6845 6846 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6847 return MatchOperand_Success; 6848 } 6849 6850 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6851 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6852 } 6853 6854 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6855 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6856 } 6857 6858 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6859 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 6860 } 6861 6862 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 6863 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 6864 } 6865 6866 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6867 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6868 } 6869 6870 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6871 uint64_t BasicInstType, 6872 bool SkipDstVcc, 6873 bool SkipSrcVcc) { 6874 using namespace llvm::AMDGPU::SDWA; 6875 6876 OptionalImmIndexMap OptionalIdx; 6877 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 6878 bool SkippedVcc = false; 6879 6880 unsigned I = 1; 6881 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6882 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6883 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6884 } 6885 6886 for (unsigned E = Operands.size(); I != E; ++I) { 6887 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6888 if (SkipVcc && !SkippedVcc && Op.isReg() && 6889 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6890 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6891 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6892 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6893 // Skip VCC only if we didn't skip it on previous iteration. 6894 // Note that src0 and src1 occupy 2 slots each because of modifiers. 6895 if (BasicInstType == SIInstrFlags::VOP2 && 6896 ((SkipDstVcc && Inst.getNumOperands() == 1) || 6897 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 6898 SkippedVcc = true; 6899 continue; 6900 } else if (BasicInstType == SIInstrFlags::VOPC && 6901 Inst.getNumOperands() == 0) { 6902 SkippedVcc = true; 6903 continue; 6904 } 6905 } 6906 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6907 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6908 } else if (Op.isImm()) { 6909 // Handle optional arguments 6910 OptionalIdx[Op.getImmTy()] = I; 6911 } else { 6912 llvm_unreachable("Invalid operand type"); 6913 } 6914 SkippedVcc = false; 6915 } 6916 6917 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6918 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6919 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6920 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6921 switch (BasicInstType) { 6922 case SIInstrFlags::VOP1: 6923 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6924 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6926 } 6927 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6928 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6929 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6930 break; 6931 6932 case SIInstrFlags::VOP2: 6933 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6934 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6935 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6936 } 6937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6941 break; 6942 6943 case SIInstrFlags::VOPC: 6944 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6948 break; 6949 6950 default: 6951 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6952 } 6953 } 6954 6955 // special case v_mac_{f16, f32}: 6956 // it has src2 register operand that is tied to dst operand 6957 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6958 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6959 auto it = Inst.begin(); 6960 std::advance( 6961 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6962 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6963 } 6964 } 6965 6966 //===----------------------------------------------------------------------===// 6967 // mAI 6968 //===----------------------------------------------------------------------===// 6969 6970 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6971 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6972 } 6973 6974 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6975 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6976 } 6977 6978 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6979 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6980 } 6981 6982 /// Force static initialization. 6983 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 6984 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6985 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6986 } 6987 6988 #define GET_REGISTER_MATCHER 6989 #define GET_MATCHER_IMPLEMENTATION 6990 #define GET_MNEMONIC_SPELL_CHECKER 6991 #include "AMDGPUGenAsmMatcher.inc" 6992 6993 // This fuction should be defined after auto-generated include so that we have 6994 // MatchClassKind enum defined 6995 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6996 unsigned Kind) { 6997 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6998 // But MatchInstructionImpl() expects to meet token and fails to validate 6999 // operand. This method checks if we are given immediate operand but expect to 7000 // get corresponding token. 7001 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7002 switch (Kind) { 7003 case MCK_addr64: 7004 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7005 case MCK_gds: 7006 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7007 case MCK_lds: 7008 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7009 case MCK_glc: 7010 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7011 case MCK_idxen: 7012 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7013 case MCK_offen: 7014 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7015 case MCK_SSrcB32: 7016 // When operands have expression values, they will return true for isToken, 7017 // because it is not possible to distinguish between a token and an 7018 // expression at parse time. MatchInstructionImpl() will always try to 7019 // match an operand as a token, when isToken returns true, and when the 7020 // name of the expression is not a valid token, the match will fail, 7021 // so we need to handle it here. 7022 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7023 case MCK_SSrcF32: 7024 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7025 case MCK_SoppBrTarget: 7026 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7027 case MCK_VReg32OrOff: 7028 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7029 case MCK_InterpSlot: 7030 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7031 case MCK_Attr: 7032 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7033 case MCK_AttrChan: 7034 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7035 case MCK_SReg_64: 7036 case MCK_SReg_64_XEXEC: 7037 // Null is defined as a 32-bit register but 7038 // it should also be enabled with 64-bit operands. 7039 // The following code enables it for SReg_64 operands 7040 // used as source and destination. Remaining source 7041 // operands are handled in isInlinableImm. 7042 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7043 default: 7044 return Match_InvalidOperand; 7045 } 7046 } 7047 7048 //===----------------------------------------------------------------------===// 7049 // endpgm 7050 //===----------------------------------------------------------------------===// 7051 7052 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7053 SMLoc S = Parser.getTok().getLoc(); 7054 int64_t Imm = 0; 7055 7056 if (!parseExpr(Imm)) { 7057 // The operand is optional, if not present default to 0 7058 Imm = 0; 7059 } 7060 7061 if (!isUInt<16>(Imm)) { 7062 Error(S, "expected a 16-bit value"); 7063 return MatchOperand_ParseFail; 7064 } 7065 7066 Operands.push_back( 7067 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7068 return MatchOperand_Success; 7069 } 7070 7071 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7072