1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUAsmUtils.h" 12 #include "AMDKernelCodeT.h" 13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14 #include "Utils/AMDKernelCodeTUtils.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/BinaryFormat/ELF.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/IntrinsicsAMDGPU.h" 22 #include "llvm/IR/IntrinsicsR600.h" 23 #include "llvm/IR/LLVMContext.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCSubtargetInfo.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/TargetParser/TargetParser.h" 29 #include <optional> 30 31 #define GET_INSTRINFO_NAMED_OPS 32 #define GET_INSTRMAP_INFO 33 #include "AMDGPUGenInstrInfo.inc" 34 35 static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( 36 "amdhsa-code-object-version", llvm::cl::Hidden, 37 llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6), 38 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " 39 "or asm directive still take priority if present)")); 40 41 namespace { 42 43 /// \returns Bit mask for given bit \p Shift and bit \p Width. 44 unsigned getBitMask(unsigned Shift, unsigned Width) { 45 return ((1 << Width) - 1) << Shift; 46 } 47 48 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 49 /// 50 /// \returns Packed \p Dst. 51 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 52 unsigned Mask = getBitMask(Shift, Width); 53 return ((Src << Shift) & Mask) | (Dst & ~Mask); 54 } 55 56 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 57 /// 58 /// \returns Unpacked bits. 59 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 60 return (Src & getBitMask(Shift, Width)) >> Shift; 61 } 62 63 /// \returns Vmcnt bit shift (lower bits). 64 unsigned getVmcntBitShiftLo(unsigned VersionMajor) { 65 return VersionMajor >= 11 ? 10 : 0; 66 } 67 68 /// \returns Vmcnt bit width (lower bits). 69 unsigned getVmcntBitWidthLo(unsigned VersionMajor) { 70 return VersionMajor >= 11 ? 6 : 4; 71 } 72 73 /// \returns Expcnt bit shift. 74 unsigned getExpcntBitShift(unsigned VersionMajor) { 75 return VersionMajor >= 11 ? 0 : 4; 76 } 77 78 /// \returns Expcnt bit width. 79 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } 80 81 /// \returns Lgkmcnt bit shift. 82 unsigned getLgkmcntBitShift(unsigned VersionMajor) { 83 return VersionMajor >= 11 ? 4 : 8; 84 } 85 86 /// \returns Lgkmcnt bit width. 87 unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 88 return VersionMajor >= 10 ? 6 : 4; 89 } 90 91 /// \returns Vmcnt bit shift (higher bits). 92 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } 93 94 /// \returns Vmcnt bit width (higher bits). 95 unsigned getVmcntBitWidthHi(unsigned VersionMajor) { 96 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; 97 } 98 99 /// \returns Loadcnt bit width 100 unsigned getLoadcntBitWidth(unsigned VersionMajor) { 101 return VersionMajor >= 12 ? 6 : 0; 102 } 103 104 /// \returns Samplecnt bit width. 105 unsigned getSamplecntBitWidth(unsigned VersionMajor) { 106 return VersionMajor >= 12 ? 6 : 0; 107 } 108 109 /// \returns Bvhcnt bit width. 110 unsigned getBvhcntBitWidth(unsigned VersionMajor) { 111 return VersionMajor >= 12 ? 3 : 0; 112 } 113 114 /// \returns Dscnt bit width. 115 unsigned getDscntBitWidth(unsigned VersionMajor) { 116 return VersionMajor >= 12 ? 6 : 0; 117 } 118 119 /// \returns Dscnt bit shift in combined S_WAIT instructions. 120 unsigned getDscntBitShift(unsigned VersionMajor) { return 0; } 121 122 /// \returns Storecnt or Vscnt bit width, depending on VersionMajor. 123 unsigned getStorecntBitWidth(unsigned VersionMajor) { 124 return VersionMajor >= 10 ? 6 : 0; 125 } 126 127 /// \returns Kmcnt bit width. 128 unsigned getKmcntBitWidth(unsigned VersionMajor) { 129 return VersionMajor >= 12 ? 5 : 0; 130 } 131 132 /// \returns Xcnt bit width. 133 unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) { 134 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0; 135 } 136 137 /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions. 138 unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) { 139 return VersionMajor >= 12 ? 8 : 0; 140 } 141 142 /// \returns VaSdst bit width 143 inline unsigned getVaSdstBitWidth() { return 3; } 144 145 /// \returns VaSdst bit shift 146 inline unsigned getVaSdstBitShift() { return 9; } 147 148 /// \returns VmVsrc bit width 149 inline unsigned getVmVsrcBitWidth() { return 3; } 150 151 /// \returns VmVsrc bit shift 152 inline unsigned getVmVsrcBitShift() { return 2; } 153 154 /// \returns VaVdst bit width 155 inline unsigned getVaVdstBitWidth() { return 4; } 156 157 /// \returns VaVdst bit shift 158 inline unsigned getVaVdstBitShift() { return 12; } 159 160 /// \returns VaVcc bit width 161 inline unsigned getVaVccBitWidth() { return 1; } 162 163 /// \returns VaVcc bit shift 164 inline unsigned getVaVccBitShift() { return 1; } 165 166 /// \returns SaSdst bit width 167 inline unsigned getSaSdstBitWidth() { return 1; } 168 169 /// \returns SaSdst bit shift 170 inline unsigned getSaSdstBitShift() { return 0; } 171 172 /// \returns VaSsrc width 173 inline unsigned getVaSsrcBitWidth() { return 1; } 174 175 /// \returns VaSsrc bit shift 176 inline unsigned getVaSsrcBitShift() { return 8; } 177 178 /// \returns HoldCnt bit shift 179 inline unsigned getHoldCntWidth() { return 1; } 180 181 /// \returns HoldCnt bit shift 182 inline unsigned getHoldCntBitShift() { return 7; } 183 184 } // end anonymous namespace 185 186 namespace llvm { 187 188 namespace AMDGPU { 189 190 /// \returns true if the target supports signed immediate offset for SMRD 191 /// instructions. 192 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 193 return isGFX9Plus(ST); 194 } 195 196 /// \returns True if \p STI is AMDHSA. 197 bool isHsaAbi(const MCSubtargetInfo &STI) { 198 return STI.getTargetTriple().getOS() == Triple::AMDHSA; 199 } 200 201 unsigned getAMDHSACodeObjectVersion(const Module &M) { 202 if (auto *Ver = mdconst::extract_or_null<ConstantInt>( 203 M.getModuleFlag("amdhsa_code_object_version"))) { 204 return (unsigned)Ver->getZExtValue() / 100; 205 } 206 207 return getDefaultAMDHSACodeObjectVersion(); 208 } 209 210 unsigned getDefaultAMDHSACodeObjectVersion() { 211 return DefaultAMDHSACodeObjectVersion; 212 } 213 214 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) { 215 switch (ABIVersion) { 216 case ELF::ELFABIVERSION_AMDGPU_HSA_V4: 217 return 4; 218 case ELF::ELFABIVERSION_AMDGPU_HSA_V5: 219 return 5; 220 case ELF::ELFABIVERSION_AMDGPU_HSA_V6: 221 return 6; 222 default: 223 return getDefaultAMDHSACodeObjectVersion(); 224 } 225 } 226 227 uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { 228 if (T.getOS() != Triple::AMDHSA) 229 return 0; 230 231 switch (CodeObjectVersion) { 232 case 4: 233 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 234 case 5: 235 return ELF::ELFABIVERSION_AMDGPU_HSA_V5; 236 case 6: 237 return ELF::ELFABIVERSION_AMDGPU_HSA_V6; 238 default: 239 report_fatal_error("Unsupported AMDHSA Code Object Version " + 240 Twine(CodeObjectVersion)); 241 } 242 } 243 244 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { 245 switch (CodeObjectVersion) { 246 case AMDHSA_COV4: 247 return 48; 248 case AMDHSA_COV5: 249 case AMDHSA_COV6: 250 default: 251 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; 252 } 253 } 254 255 // FIXME: All such magic numbers about the ABI should be in a 256 // central TD file. 257 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { 258 switch (CodeObjectVersion) { 259 case AMDHSA_COV4: 260 return 24; 261 case AMDHSA_COV5: 262 case AMDHSA_COV6: 263 default: 264 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; 265 } 266 } 267 268 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { 269 switch (CodeObjectVersion) { 270 case AMDHSA_COV4: 271 return 32; 272 case AMDHSA_COV5: 273 case AMDHSA_COV6: 274 default: 275 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; 276 } 277 } 278 279 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { 280 switch (CodeObjectVersion) { 281 case AMDHSA_COV4: 282 return 40; 283 case AMDHSA_COV5: 284 case AMDHSA_COV6: 285 default: 286 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; 287 } 288 } 289 290 #define GET_MIMGBaseOpcodesTable_IMPL 291 #define GET_MIMGDimInfoTable_IMPL 292 #define GET_MIMGInfoTable_IMPL 293 #define GET_MIMGLZMappingTable_IMPL 294 #define GET_MIMGMIPMappingTable_IMPL 295 #define GET_MIMGBiasMappingTable_IMPL 296 #define GET_MIMGOffsetMappingTable_IMPL 297 #define GET_MIMGG16MappingTable_IMPL 298 #define GET_MAIInstInfoTable_IMPL 299 #include "AMDGPUGenSearchableTables.inc" 300 301 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 302 unsigned VDataDwords, unsigned VAddrDwords) { 303 const MIMGInfo *Info = 304 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords); 305 return Info ? Info->Opcode : -1; 306 } 307 308 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 309 const MIMGInfo *Info = getMIMGInfo(Opc); 310 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 311 } 312 313 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 314 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 315 const MIMGInfo *NewInfo = 316 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 317 NewChannels, OrigInfo->VAddrDwords); 318 return NewInfo ? NewInfo->Opcode : -1; 319 } 320 321 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 322 const MIMGDimInfo *Dim, bool IsA16, 323 bool IsG16Supported) { 324 unsigned AddrWords = BaseOpcode->NumExtraArgs; 325 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 326 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 327 if (IsA16) 328 AddrWords += divideCeil(AddrComponents, 2); 329 else 330 AddrWords += AddrComponents; 331 332 // Note: For subtargets that support A16 but not G16, enabling A16 also 333 // enables 16 bit gradients. 334 // For subtargets that support A16 (operand) and G16 (done with a different 335 // instruction encoding), they are independent. 336 337 if (BaseOpcode->Gradients) { 338 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) 339 // There are two gradients per coordinate, we pack them separately. 340 // For the 3d case, 341 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) 342 AddrWords += alignTo<2>(Dim->NumGradients / 2); 343 else 344 AddrWords += Dim->NumGradients; 345 } 346 return AddrWords; 347 } 348 349 struct MUBUFInfo { 350 uint16_t Opcode; 351 uint16_t BaseOpcode; 352 uint8_t elements; 353 bool has_vaddr; 354 bool has_srsrc; 355 bool has_soffset; 356 bool IsBufferInv; 357 bool tfe; 358 }; 359 360 struct MTBUFInfo { 361 uint16_t Opcode; 362 uint16_t BaseOpcode; 363 uint8_t elements; 364 bool has_vaddr; 365 bool has_srsrc; 366 bool has_soffset; 367 }; 368 369 struct SMInfo { 370 uint16_t Opcode; 371 bool IsBuffer; 372 }; 373 374 struct VOPInfo { 375 uint16_t Opcode; 376 bool IsSingle; 377 }; 378 379 struct VOPC64DPPInfo { 380 uint16_t Opcode; 381 }; 382 383 struct VOPCDPPAsmOnlyInfo { 384 uint16_t Opcode; 385 }; 386 387 struct VOP3CDPPAsmOnlyInfo { 388 uint16_t Opcode; 389 }; 390 391 struct VOPDComponentInfo { 392 uint16_t BaseVOP; 393 uint16_t VOPDOp; 394 bool CanBeVOPDX; 395 bool CanBeVOPD3X; 396 }; 397 398 struct VOPDInfo { 399 uint16_t Opcode; 400 uint16_t OpX; 401 uint16_t OpY; 402 uint16_t Subtarget; 403 bool VOPD3; 404 }; 405 406 struct VOPTrue16Info { 407 uint16_t Opcode; 408 bool IsTrue16; 409 }; 410 411 #define GET_FP4FP8DstByteSelTable_DECL 412 #define GET_FP4FP8DstByteSelTable_IMPL 413 414 struct DPMACCInstructionInfo { 415 uint16_t Opcode; 416 bool IsDPMACCInstruction; 417 }; 418 419 struct FP4FP8DstByteSelInfo { 420 uint16_t Opcode; 421 bool HasFP8DstByteSel; 422 bool HasFP4DstByteSel; 423 }; 424 425 #define GET_MTBUFInfoTable_DECL 426 #define GET_MTBUFInfoTable_IMPL 427 #define GET_MUBUFInfoTable_DECL 428 #define GET_MUBUFInfoTable_IMPL 429 #define GET_SMInfoTable_DECL 430 #define GET_SMInfoTable_IMPL 431 #define GET_VOP1InfoTable_DECL 432 #define GET_VOP1InfoTable_IMPL 433 #define GET_VOP2InfoTable_DECL 434 #define GET_VOP2InfoTable_IMPL 435 #define GET_VOP3InfoTable_DECL 436 #define GET_VOP3InfoTable_IMPL 437 #define GET_VOPC64DPPTable_DECL 438 #define GET_VOPC64DPPTable_IMPL 439 #define GET_VOPC64DPP8Table_DECL 440 #define GET_VOPC64DPP8Table_IMPL 441 #define GET_VOPCAsmOnlyInfoTable_DECL 442 #define GET_VOPCAsmOnlyInfoTable_IMPL 443 #define GET_VOP3CAsmOnlyInfoTable_DECL 444 #define GET_VOP3CAsmOnlyInfoTable_IMPL 445 #define GET_VOPDComponentTable_DECL 446 #define GET_VOPDComponentTable_IMPL 447 #define GET_VOPDPairs_DECL 448 #define GET_VOPDPairs_IMPL 449 #define GET_VOPTrue16Table_DECL 450 #define GET_VOPTrue16Table_IMPL 451 #define GET_True16D16Table_IMPL 452 #define GET_WMMAOpcode2AddrMappingTable_DECL 453 #define GET_WMMAOpcode2AddrMappingTable_IMPL 454 #define GET_WMMAOpcode3AddrMappingTable_DECL 455 #define GET_WMMAOpcode3AddrMappingTable_IMPL 456 #define GET_getMFMA_F8F6F4_WithSize_DECL 457 #define GET_getMFMA_F8F6F4_WithSize_IMPL 458 #define GET_isMFMA_F8F6F4Table_IMPL 459 #define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL 460 461 #include "AMDGPUGenSearchableTables.inc" 462 463 int getMTBUFBaseOpcode(unsigned Opc) { 464 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 465 return Info ? Info->BaseOpcode : -1; 466 } 467 468 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 469 const MTBUFInfo *Info = 470 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 471 return Info ? Info->Opcode : -1; 472 } 473 474 int getMTBUFElements(unsigned Opc) { 475 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 476 return Info ? Info->elements : 0; 477 } 478 479 bool getMTBUFHasVAddr(unsigned Opc) { 480 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 481 return Info && Info->has_vaddr; 482 } 483 484 bool getMTBUFHasSrsrc(unsigned Opc) { 485 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 486 return Info && Info->has_srsrc; 487 } 488 489 bool getMTBUFHasSoffset(unsigned Opc) { 490 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 491 return Info && Info->has_soffset; 492 } 493 494 int getMUBUFBaseOpcode(unsigned Opc) { 495 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 496 return Info ? Info->BaseOpcode : -1; 497 } 498 499 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 500 const MUBUFInfo *Info = 501 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 502 return Info ? Info->Opcode : -1; 503 } 504 505 int getMUBUFElements(unsigned Opc) { 506 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 507 return Info ? Info->elements : 0; 508 } 509 510 bool getMUBUFHasVAddr(unsigned Opc) { 511 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 512 return Info && Info->has_vaddr; 513 } 514 515 bool getMUBUFHasSrsrc(unsigned Opc) { 516 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 517 return Info && Info->has_srsrc; 518 } 519 520 bool getMUBUFHasSoffset(unsigned Opc) { 521 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 522 return Info && Info->has_soffset; 523 } 524 525 bool getMUBUFIsBufferInv(unsigned Opc) { 526 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 527 return Info && Info->IsBufferInv; 528 } 529 530 bool getMUBUFTfe(unsigned Opc) { 531 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 532 return Info && Info->tfe; 533 } 534 535 bool getSMEMIsBuffer(unsigned Opc) { 536 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 537 return Info && Info->IsBuffer; 538 } 539 540 bool getVOP1IsSingle(unsigned Opc) { 541 const VOPInfo *Info = getVOP1OpcodeHelper(Opc); 542 return !Info || Info->IsSingle; 543 } 544 545 bool getVOP2IsSingle(unsigned Opc) { 546 const VOPInfo *Info = getVOP2OpcodeHelper(Opc); 547 return !Info || Info->IsSingle; 548 } 549 550 bool getVOP3IsSingle(unsigned Opc) { 551 const VOPInfo *Info = getVOP3OpcodeHelper(Opc); 552 return !Info || Info->IsSingle; 553 } 554 555 bool isVOPC64DPP(unsigned Opc) { 556 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); 557 } 558 559 bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); } 560 561 bool getMAIIsDGEMM(unsigned Opc) { 562 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 563 return Info && Info->is_dgemm; 564 } 565 566 bool getMAIIsGFX940XDL(unsigned Opc) { 567 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 568 return Info && Info->is_gfx940_xdl; 569 } 570 571 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) { 572 switch (EncodingVal) { 573 case MFMAScaleFormats::FP6_E2M3: 574 case MFMAScaleFormats::FP6_E3M2: 575 return 6; 576 case MFMAScaleFormats::FP4_E2M1: 577 return 4; 578 case MFMAScaleFormats::FP8_E4M3: 579 case MFMAScaleFormats::FP8_E5M2: 580 default: 581 return 8; 582 } 583 584 llvm_unreachable("covered switch over mfma scale formats"); 585 } 586 587 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, 588 unsigned BLGP, 589 unsigned F8F8Opcode) { 590 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ); 591 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP); 592 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode); 593 } 594 595 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { 596 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts)) 597 return SIEncodingFamily::GFX1250; 598 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts)) 599 return SIEncodingFamily::GFX12; 600 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts)) 601 return SIEncodingFamily::GFX11; 602 llvm_unreachable("Subtarget generation does not support VOPD!"); 603 } 604 605 CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) { 606 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0; 607 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc; 608 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 609 if (Info) { 610 // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a 611 // VOPDX is just a placeholder here, it is supported on all encodings. 612 // TODO: This can be optimized by creating tables of supported VOPDY 613 // opcodes per encoding. 614 unsigned VOPDMov = AMDGPU::getVOPDOpcode(AMDGPU::V_MOV_B32_e32, VOPD3); 615 bool CanBeVOPDY = getVOPDFull(VOPDMov, AMDGPU::getVOPDOpcode(Opc, VOPD3), 616 EncodingFamily, VOPD3) != -1; 617 return {VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, CanBeVOPDY}; 618 } 619 620 return {false, false}; 621 } 622 623 unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) { 624 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0; 625 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc; 626 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 627 return Info ? Info->VOPDOp : ~0u; 628 } 629 630 bool isVOPD(unsigned Opc) { 631 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); 632 } 633 634 bool isMAC(unsigned Opc) { 635 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 636 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 637 Opc == AMDGPU::V_MAC_F32_e64_vi || 638 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 639 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 640 Opc == AMDGPU::V_MAC_F16_e64_vi || 641 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 642 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 || 643 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 644 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 645 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || 646 Opc == AMDGPU::V_FMAC_F32_e64_vi || 647 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 648 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 649 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 650 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || 651 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 || 652 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 || 653 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 || 654 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || 655 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi || 656 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || 657 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || 658 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; 659 } 660 661 bool isPermlane16(unsigned Opc) { 662 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 663 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || 664 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || 665 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 || 666 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 || 667 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 || 668 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 || 669 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; 670 } 671 672 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { 673 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || 674 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || 675 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || 676 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || 677 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || 678 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || 679 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 || 680 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 || 681 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 || 682 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12; 683 } 684 685 bool isGenericAtomic(unsigned Opc) { 686 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || 687 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || 688 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || 689 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || 690 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN || 691 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX || 692 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX || 693 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND || 694 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR || 695 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR || 696 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC || 697 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC || 698 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD || 699 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN || 700 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX || 701 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP || 702 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG; 703 } 704 705 bool isAsyncStore(unsigned Opc) { 706 return false; // placeholder before async store implementation. 707 } 708 709 bool isTensorStore(unsigned Opc) { 710 return Opc == TENSOR_STORE_FROM_LDS_gfx1250 || 711 Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250; 712 } 713 714 unsigned getTemporalHintType(const MCInstrDesc TID) { 715 if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)) 716 return CPol::TH_TYPE_ATOMIC; 717 unsigned Opc = TID.getOpcode(); 718 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE 719 if (TID.mayStore() && 720 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad())) 721 return CPol::TH_TYPE_STORE; 722 723 // This will default to returning TH_TYPE_LOAD when neither MayStore nor 724 // MayLoad flag is present which is the case with instructions like 725 // image_get_resinfo. 726 return CPol::TH_TYPE_LOAD; 727 } 728 729 bool isTrue16Inst(unsigned Opc) { 730 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); 731 return Info && Info->IsTrue16; 732 } 733 734 FPType getFPDstSelType(unsigned Opc) { 735 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc); 736 if (!Info) 737 return FPType::None; 738 if (Info->HasFP8DstByteSel) 739 return FPType::FP8; 740 if (Info->HasFP4DstByteSel) 741 return FPType::FP4; 742 743 return FPType::None; 744 } 745 746 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { 747 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); 748 return Info ? Info->Opcode3Addr : ~0u; 749 } 750 751 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { 752 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); 753 return Info ? Info->Opcode2Addr : ~0u; 754 } 755 756 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 757 // header files, so we need to wrap it in a function that takes unsigned 758 // instead. 759 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 760 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 761 } 762 763 unsigned getBitOp2(unsigned Opc) { 764 switch (Opc) { 765 default: 766 return 0; 767 case AMDGPU::V_AND_B32_e32: 768 return 0x40; 769 case AMDGPU::V_OR_B32_e32: 770 return 0x54; 771 case AMDGPU::V_XOR_B32_e32: 772 return 0x14; 773 case AMDGPU::V_XNOR_B32_e32: 774 return 0x41; 775 } 776 } 777 778 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, 779 bool VOPD3) { 780 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0; 781 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY; 782 const VOPDInfo *Info = 783 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3); 784 return Info ? Info->Opcode : -1; 785 } 786 787 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { 788 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode); 789 assert(Info); 790 const auto *OpX = getVOPDBaseFromComponent(Info->OpX); 791 const auto *OpY = getVOPDBaseFromComponent(Info->OpY); 792 assert(OpX && OpY); 793 return {OpX->BaseVOP, OpY->BaseVOP}; 794 } 795 796 namespace VOPD { 797 798 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) { 799 assert(OpDesc.getNumDefs() == Component::DST_NUM); 800 801 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); 802 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); 803 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO); 804 assert(TiedIdx == -1 || TiedIdx == Component::DST); 805 HasSrc2Acc = TiedIdx != -1; 806 Opcode = OpDesc.getOpcode(); 807 808 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3); 809 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2) ? 3 810 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm) ? 3 811 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2 812 : 1; 813 assert(SrcOperandsNum <= Component::MAX_SRC_NUM); 814 815 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 || 816 Opcode == AMDGPU::V_CNDMASK_B32_e64) { 817 // CNDMASK is an awkward exception, it has FP modifiers, but not FP 818 // operands. 819 NumVOPD3Mods = 2; 820 if (IsVOP3) 821 SrcOperandsNum = 3; 822 } else if (isSISrcFPOperand(OpDesc, 823 getNamedOperandIdx(Opcode, OpName::src0))) { 824 // All FP VOPD instructions have Neg modifiers for all operands except 825 // for tied src2. 826 NumVOPD3Mods = SrcOperandsNum; 827 if (HasSrc2Acc) 828 --NumVOPD3Mods; 829 } 830 831 if (OpDesc.TSFlags & SIInstrFlags::VOP3) 832 return; 833 834 auto OperandsNum = OpDesc.getNumOperands(); 835 unsigned CompOprIdx; 836 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { 837 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { 838 MandatoryLiteralIdx = CompOprIdx; 839 break; 840 } 841 } 842 } 843 844 int ComponentProps::getBitOp3OperandIdx() const { 845 return getNamedOperandIdx(Opcode, OpName::bitop3); 846 } 847 848 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { 849 assert(CompOprIdx < Component::MAX_OPR_NUM); 850 851 if (CompOprIdx == Component::DST) 852 return getIndexOfDstInParsedOperands(); 853 854 auto CompSrcIdx = CompOprIdx - Component::DST_NUM; 855 if (CompSrcIdx < getCompParsedSrcOperandsNum()) 856 return getIndexOfSrcInParsedOperands(CompSrcIdx); 857 858 // The specified operand does not exist. 859 return 0; 860 } 861 862 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( 863 std::function<unsigned(unsigned, unsigned)> GetRegIdx, 864 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR, 865 bool VOPD3) const { 866 867 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx, 868 CompInfo[ComponentIndex::X].isVOP3()); 869 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx, 870 CompInfo[ComponentIndex::Y].isVOP3()); 871 872 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y, 873 unsigned BanksMask) -> bool { 874 MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0); 875 MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0); 876 if (!BaseX) 877 BaseX = X; 878 if (!BaseY) 879 BaseY = Y; 880 if ((BaseX & BanksMask) == (BaseY & BanksMask)) 881 return true; 882 if (BaseX != X /* This is 64-bit register */ && 883 ((BaseX + 1) & BanksMask) == (BaseY & BanksMask)) 884 return true; 885 if (BaseY != Y && (BaseX & BanksMask) == ((BaseY + 1) & BanksMask)) 886 return true; 887 888 // If both are 64-bit bank conflict will be detected yet while checking 889 // the first subreg. 890 return false; 891 }; 892 893 unsigned CompOprIdx; 894 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) { 895 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx] 896 : VOPD_VGPR_BANK_MASKS[CompOprIdx]; 897 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx]) 898 continue; 899 900 if (SkipSrc && CompOprIdx >= Component::DST_NUM) 901 continue; 902 903 if (CompOprIdx < Component::DST_NUM) { 904 // Even if we do not check vdst parity, vdst operands still shall not 905 // overlap. 906 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx])) 907 return CompOprIdx; 908 if (VOPD3) // No need to check dst parity. 909 continue; 910 } 911 912 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) && 913 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM || 914 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx])) 915 return CompOprIdx; 916 } 917 918 return {}; 919 } 920 921 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used 922 // by the specified component. If an operand is unused 923 // or is not a VGPR, the corresponding value is 0. 924 // 925 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 926 // for the specified component and MC operand. The callback must return 0 927 // if the operand is not a register or not a VGPR. 928 InstInfo::RegIndices 929 InstInfo::getRegIndices(unsigned CompIdx, 930 std::function<unsigned(unsigned, unsigned)> GetRegIdx, 931 bool VOPD3) const { 932 assert(CompIdx < COMPONENTS_NUM); 933 934 const auto &Comp = CompInfo[CompIdx]; 935 InstInfo::RegIndices RegIndices; 936 937 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); 938 939 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { 940 unsigned CompSrcIdx = CompOprIdx - DST_NUM; 941 RegIndices[CompOprIdx] = 942 Comp.hasRegSrcOperand(CompSrcIdx) 943 ? GetRegIdx(CompIdx, 944 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3)) 945 : 0; 946 } 947 return RegIndices; 948 } 949 950 } // namespace VOPD 951 952 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { 953 return VOPD::InstInfo(OpX, OpY); 954 } 955 956 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, 957 const MCInstrInfo *InstrInfo) { 958 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); 959 const auto &OpXDesc = InstrInfo->get(OpX); 960 const auto &OpYDesc = InstrInfo->get(OpY); 961 bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3; 962 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3); 963 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3); 964 return VOPD::InstInfo(OpXInfo, OpYInfo); 965 } 966 967 namespace IsaInfo { 968 969 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 970 : STI(STI), XnackSetting(TargetIDSetting::Any), 971 SramEccSetting(TargetIDSetting::Any) { 972 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 973 XnackSetting = TargetIDSetting::Unsupported; 974 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 975 SramEccSetting = TargetIDSetting::Unsupported; 976 } 977 978 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 979 // Check if xnack or sramecc is explicitly enabled or disabled. In the 980 // absence of the target features we assume we must generate code that can run 981 // in any environment. 982 SubtargetFeatures Features(FS); 983 std::optional<bool> XnackRequested; 984 std::optional<bool> SramEccRequested; 985 986 for (const std::string &Feature : Features.getFeatures()) { 987 if (Feature == "+xnack") 988 XnackRequested = true; 989 else if (Feature == "-xnack") 990 XnackRequested = false; 991 else if (Feature == "+sramecc") 992 SramEccRequested = true; 993 else if (Feature == "-sramecc") 994 SramEccRequested = false; 995 } 996 997 bool XnackSupported = isXnackSupported(); 998 bool SramEccSupported = isSramEccSupported(); 999 1000 if (XnackRequested) { 1001 if (XnackSupported) { 1002 XnackSetting = 1003 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 1004 } else { 1005 // If a specific xnack setting was requested and this GPU does not support 1006 // xnack emit a warning. Setting will remain set to "Unsupported". 1007 if (*XnackRequested) { 1008 errs() << "warning: xnack 'On' was requested for a processor that does " 1009 "not support it!\n"; 1010 } else { 1011 errs() << "warning: xnack 'Off' was requested for a processor that " 1012 "does not support it!\n"; 1013 } 1014 } 1015 } 1016 1017 if (SramEccRequested) { 1018 if (SramEccSupported) { 1019 SramEccSetting = 1020 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 1021 } else { 1022 // If a specific sramecc setting was requested and this GPU does not 1023 // support sramecc emit a warning. Setting will remain set to 1024 // "Unsupported". 1025 if (*SramEccRequested) { 1026 errs() << "warning: sramecc 'On' was requested for a processor that " 1027 "does not support it!\n"; 1028 } else { 1029 errs() << "warning: sramecc 'Off' was requested for a processor that " 1030 "does not support it!\n"; 1031 } 1032 } 1033 } 1034 } 1035 1036 static TargetIDSetting 1037 getTargetIDSettingFromFeatureString(StringRef FeatureString) { 1038 if (FeatureString.ends_with("-")) 1039 return TargetIDSetting::Off; 1040 if (FeatureString.ends_with("+")) 1041 return TargetIDSetting::On; 1042 1043 llvm_unreachable("Malformed feature string"); 1044 } 1045 1046 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 1047 SmallVector<StringRef, 3> TargetIDSplit; 1048 TargetID.split(TargetIDSplit, ':'); 1049 1050 for (const auto &FeatureString : TargetIDSplit) { 1051 if (FeatureString.starts_with("xnack")) 1052 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 1053 if (FeatureString.starts_with("sramecc")) 1054 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 1055 } 1056 } 1057 1058 std::string AMDGPUTargetID::toString() const { 1059 std::string StringRep; 1060 raw_string_ostream StreamRep(StringRep); 1061 1062 auto TargetTriple = STI.getTargetTriple(); 1063 auto Version = getIsaVersion(STI.getCPU()); 1064 1065 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName() 1066 << '-' << TargetTriple.getOSName() << '-' 1067 << TargetTriple.getEnvironmentName() << '-'; 1068 1069 std::string Processor; 1070 // TODO: Following else statement is present here because we used various 1071 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 1072 // Remove once all aliases are removed from GCNProcessors.td. 1073 if (Version.Major >= 9) 1074 Processor = STI.getCPU().str(); 1075 else 1076 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 1077 Twine(Version.Stepping)) 1078 .str(); 1079 1080 std::string Features; 1081 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { 1082 // sramecc. 1083 if (getSramEccSetting() == TargetIDSetting::Off) 1084 Features += ":sramecc-"; 1085 else if (getSramEccSetting() == TargetIDSetting::On) 1086 Features += ":sramecc+"; 1087 // xnack. 1088 if (getXnackSetting() == TargetIDSetting::Off) 1089 Features += ":xnack-"; 1090 else if (getXnackSetting() == TargetIDSetting::On) 1091 Features += ":xnack+"; 1092 } 1093 1094 StreamRep << Processor << Features; 1095 1096 return StringRep; 1097 } 1098 1099 unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 1100 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 1101 return 16; 1102 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 1103 return 32; 1104 1105 return 64; 1106 } 1107 1108 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 1109 unsigned BytesPerCU = getAddressableLocalMemorySize(STI); 1110 1111 // "Per CU" really means "per whatever functional block the waves of a 1112 // workgroup must share". So the effective local memory size is doubled in 1113 // WGP mode on gfx10. 1114 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 1115 BytesPerCU *= 2; 1116 1117 return BytesPerCU; 1118 } 1119 1120 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { 1121 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768)) 1122 return 32768; 1123 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536)) 1124 return 65536; 1125 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) 1126 return 163840; 1127 return 0; 1128 } 1129 1130 unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 1131 // "Per CU" really means "per whatever functional block the waves of a 1132 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 1133 // two SIMDs. 1134 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 1135 return 2; 1136 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 1137 // two CUs, so a total of four SIMDs. 1138 return 4; 1139 } 1140 1141 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 1142 unsigned FlatWorkGroupSize) { 1143 assert(FlatWorkGroupSize != 0); 1144 if (!STI->getTargetTriple().isAMDGCN()) 1145 return 8; 1146 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); 1147 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 1148 if (N == 1) { 1149 // Single-wave workgroups don't consume barrier resources. 1150 return MaxWaves; 1151 } 1152 1153 unsigned MaxBarriers = 16; 1154 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 1155 MaxBarriers = 32; 1156 1157 return std::min(MaxWaves / N, MaxBarriers); 1158 } 1159 1160 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } 1161 1162 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 1163 // FIXME: Need to take scratch memory into account. 1164 if (isGFX90A(*STI)) 1165 return 8; 1166 if (!isGFX10Plus(*STI)) 1167 return 10; 1168 return hasGFX10_3Insts(*STI) ? 16 : 20; 1169 } 1170 1171 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 1172 unsigned FlatWorkGroupSize) { 1173 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 1174 getEUsPerCU(STI)); 1175 } 1176 1177 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; } 1178 1179 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 1180 // Some subtargets allow encoding 2048, but this isn't tested or supported. 1181 return 1024; 1182 } 1183 1184 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 1185 unsigned FlatWorkGroupSize) { 1186 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 1187 } 1188 1189 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 1190 IsaVersion Version = getIsaVersion(STI->getCPU()); 1191 if (Version.Major >= 10) 1192 return getAddressableNumSGPRs(STI); 1193 if (Version.Major >= 8) 1194 return 16; 1195 return 8; 1196 } 1197 1198 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; } 1199 1200 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 1201 IsaVersion Version = getIsaVersion(STI->getCPU()); 1202 if (Version.Major >= 8) 1203 return 800; 1204 return 512; 1205 } 1206 1207 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 1208 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 1209 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 1210 1211 IsaVersion Version = getIsaVersion(STI->getCPU()); 1212 if (Version.Major >= 10) 1213 return 106; 1214 if (Version.Major >= 8) 1215 return 102; 1216 return 104; 1217 } 1218 1219 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1220 assert(WavesPerEU != 0); 1221 1222 IsaVersion Version = getIsaVersion(STI->getCPU()); 1223 if (Version.Major >= 10) 1224 return 0; 1225 1226 if (WavesPerEU >= getMaxWavesPerEU(STI)) 1227 return 0; 1228 1229 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 1230 if (STI->getFeatureBits().test(FeatureTrapHandler)) 1231 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 1232 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 1233 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 1234 } 1235 1236 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 1237 bool Addressable) { 1238 assert(WavesPerEU != 0); 1239 1240 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 1241 IsaVersion Version = getIsaVersion(STI->getCPU()); 1242 if (Version.Major >= 10) 1243 return Addressable ? AddressableNumSGPRs : 108; 1244 if (Version.Major >= 8 && !Addressable) 1245 AddressableNumSGPRs = 112; 1246 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 1247 if (STI->getFeatureBits().test(FeatureTrapHandler)) 1248 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 1249 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 1250 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 1251 } 1252 1253 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1254 bool FlatScrUsed, bool XNACKUsed) { 1255 unsigned ExtraSGPRs = 0; 1256 if (VCCUsed) 1257 ExtraSGPRs = 2; 1258 1259 IsaVersion Version = getIsaVersion(STI->getCPU()); 1260 if (Version.Major >= 10) 1261 return ExtraSGPRs; 1262 1263 if (Version.Major < 8) { 1264 if (FlatScrUsed) 1265 ExtraSGPRs = 4; 1266 } else { 1267 if (XNACKUsed) 1268 ExtraSGPRs = 4; 1269 1270 if (FlatScrUsed || 1271 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) 1272 ExtraSGPRs = 6; 1273 } 1274 1275 return ExtraSGPRs; 1276 } 1277 1278 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1279 bool FlatScrUsed) { 1280 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 1281 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 1282 } 1283 1284 static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, 1285 unsigned Granule) { 1286 return divideCeil(std::max(1u, NumRegs), Granule); 1287 } 1288 1289 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 1290 // SGPRBlocks is actual number of SGPR blocks minus 1. 1291 return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) - 1292 1; 1293 } 1294 1295 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 1296 unsigned DynamicVGPRBlockSize, 1297 std::optional<bool> EnableWavefrontSize32) { 1298 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1299 return 8; 1300 1301 if (DynamicVGPRBlockSize != 0) 1302 return DynamicVGPRBlockSize; 1303 1304 // Temporarily check the subtarget feature, until we fully switch to using 1305 // attributes. 1306 if (STI->getFeatureBits().test(FeatureDynamicVGPR)) 1307 return STI->getFeatureBits().test(FeatureDynamicVGPRBlockSize32) ? 32 : 16; 1308 1309 bool IsWave32 = EnableWavefrontSize32 1310 ? *EnableWavefrontSize32 1311 : STI->getFeatureBits().test(FeatureWavefrontSize32); 1312 1313 if (STI->getFeatureBits().test(Feature1_5xVGPRs)) 1314 return IsWave32 ? 24 : 12; 1315 1316 if (hasGFX10_3Insts(*STI)) 1317 return IsWave32 ? 16 : 8; 1318 1319 return IsWave32 ? 8 : 4; 1320 } 1321 1322 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 1323 std::optional<bool> EnableWavefrontSize32) { 1324 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1325 return 8; 1326 1327 bool IsWave32 = EnableWavefrontSize32 1328 ? *EnableWavefrontSize32 1329 : STI->getFeatureBits().test(FeatureWavefrontSize32); 1330 1331 return IsWave32 ? 8 : 4; 1332 } 1333 1334 unsigned getArchVGPRAllocGranule() { return 4; } 1335 1336 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 1337 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1338 return 512; 1339 if (!isGFX10Plus(*STI)) 1340 return 256; 1341 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32); 1342 if (STI->getFeatureBits().test(Feature1_5xVGPRs)) 1343 return IsWave32 ? 1536 : 768; 1344 return IsWave32 ? 1024 : 512; 1345 } 1346 1347 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; } 1348 1349 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, 1350 unsigned DynamicVGPRBlockSize) { 1351 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1352 return 512; 1353 1354 // Temporarily check the subtarget feature, until we fully switch to using 1355 // attributes. 1356 if (DynamicVGPRBlockSize != 0 || 1357 STI->getFeatureBits().test(FeatureDynamicVGPR)) 1358 // On GFX12 we can allocate at most 8 blocks of VGPRs. 1359 return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize); 1360 return getAddressableNumArchVGPRs(STI); 1361 } 1362 1363 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 1364 unsigned NumVGPRs, 1365 unsigned DynamicVGPRBlockSize) { 1366 return getNumWavesPerEUWithNumVGPRs( 1367 NumVGPRs, getVGPRAllocGranule(STI, DynamicVGPRBlockSize), 1368 getMaxWavesPerEU(STI), getTotalNumVGPRs(STI)); 1369 } 1370 1371 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule, 1372 unsigned MaxWaves, 1373 unsigned TotalNumVGPRs) { 1374 if (NumVGPRs < Granule) 1375 return MaxWaves; 1376 unsigned RoundedRegs = alignTo(NumVGPRs, Granule); 1377 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves); 1378 } 1379 1380 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, 1381 AMDGPUSubtarget::Generation Gen) { 1382 if (Gen >= AMDGPUSubtarget::GFX10) 1383 return MaxWaves; 1384 1385 if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 1386 if (SGPRs <= 80) 1387 return 10; 1388 if (SGPRs <= 88) 1389 return 9; 1390 if (SGPRs <= 100) 1391 return 8; 1392 return 7; 1393 } 1394 if (SGPRs <= 48) 1395 return 10; 1396 if (SGPRs <= 56) 1397 return 9; 1398 if (SGPRs <= 64) 1399 return 8; 1400 if (SGPRs <= 72) 1401 return 7; 1402 if (SGPRs <= 80) 1403 return 6; 1404 return 5; 1405 } 1406 1407 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 1408 unsigned DynamicVGPRBlockSize) { 1409 assert(WavesPerEU != 0); 1410 1411 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); 1412 if (WavesPerEU >= MaxWavesPerEU) 1413 return 0; 1414 1415 unsigned TotNumVGPRs = getTotalNumVGPRs(STI); 1416 unsigned AddrsableNumVGPRs = 1417 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize); 1418 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize); 1419 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule); 1420 1421 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule)) 1422 return 0; 1423 1424 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs, 1425 DynamicVGPRBlockSize); 1426 if (WavesPerEU < MinWavesPerEU) 1427 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize); 1428 1429 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule); 1430 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext); 1431 return std::min(MinNumVGPRs, AddrsableNumVGPRs); 1432 } 1433 1434 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 1435 unsigned DynamicVGPRBlockSize) { 1436 assert(WavesPerEU != 0); 1437 1438 unsigned MaxNumVGPRs = 1439 alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 1440 getVGPRAllocGranule(STI, DynamicVGPRBlockSize)); 1441 unsigned AddressableNumVGPRs = 1442 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize); 1443 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 1444 } 1445 1446 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 1447 std::optional<bool> EnableWavefrontSize32) { 1448 return getGranulatedNumRegisterBlocks( 1449 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) - 1450 1; 1451 } 1452 1453 unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, 1454 unsigned NumVGPRs, 1455 unsigned DynamicVGPRBlockSize, 1456 std::optional<bool> EnableWavefrontSize32) { 1457 return getGranulatedNumRegisterBlocks( 1458 NumVGPRs, 1459 getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32)); 1460 } 1461 } // end namespace IsaInfo 1462 1463 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, 1464 const MCSubtargetInfo *STI) { 1465 IsaVersion Version = getIsaVersion(STI->getCPU()); 1466 KernelCode.amd_kernel_code_version_major = 1; 1467 KernelCode.amd_kernel_code_version_minor = 2; 1468 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 1469 KernelCode.amd_machine_version_major = Version.Major; 1470 KernelCode.amd_machine_version_minor = Version.Minor; 1471 KernelCode.amd_machine_version_stepping = Version.Stepping; 1472 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t); 1473 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 1474 KernelCode.wavefront_size = 5; 1475 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 1476 } else { 1477 KernelCode.wavefront_size = 6; 1478 } 1479 1480 // If the code object does not support indirect functions, then the value must 1481 // be 0xffffffff. 1482 KernelCode.call_convention = -1; 1483 1484 // These alignment values are specified in powers of two, so alignment = 1485 // 2^n. The minimum alignment is 2^4 = 16. 1486 KernelCode.kernarg_segment_alignment = 4; 1487 KernelCode.group_segment_alignment = 4; 1488 KernelCode.private_segment_alignment = 4; 1489 1490 if (Version.Major >= 10) { 1491 KernelCode.compute_pgm_resource_registers |= 1492 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 1493 S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1); 1494 } 1495 } 1496 1497 bool isGroupSegment(const GlobalValue *GV) { 1498 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 1499 } 1500 1501 bool isGlobalSegment(const GlobalValue *GV) { 1502 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 1503 } 1504 1505 bool isReadOnlySegment(const GlobalValue *GV) { 1506 unsigned AS = GV->getAddressSpace(); 1507 return AS == AMDGPUAS::CONSTANT_ADDRESS || 1508 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 1509 } 1510 1511 bool shouldEmitConstantsToTextSection(const Triple &TT) { 1512 return TT.getArch() == Triple::r600; 1513 } 1514 1515 std::pair<unsigned, unsigned> 1516 getIntegerPairAttribute(const Function &F, StringRef Name, 1517 std::pair<unsigned, unsigned> Default, 1518 bool OnlyFirstRequired) { 1519 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired)) 1520 return {Attr->first, Attr->second.value_or(Default.second)}; 1521 return Default; 1522 } 1523 1524 std::optional<std::pair<unsigned, std::optional<unsigned>>> 1525 getIntegerPairAttribute(const Function &F, StringRef Name, 1526 bool OnlyFirstRequired) { 1527 Attribute A = F.getFnAttribute(Name); 1528 if (!A.isStringAttribute()) 1529 return std::nullopt; 1530 1531 LLVMContext &Ctx = F.getContext(); 1532 std::pair<unsigned, std::optional<unsigned>> Ints; 1533 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 1534 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 1535 Ctx.emitError("can't parse first integer attribute " + Name); 1536 return std::nullopt; 1537 } 1538 unsigned Second = 0; 1539 if (Strs.second.trim().getAsInteger(0, Second)) { 1540 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 1541 Ctx.emitError("can't parse second integer attribute " + Name); 1542 return std::nullopt; 1543 } 1544 } else { 1545 Ints.second = Second; 1546 } 1547 1548 return Ints; 1549 } 1550 1551 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name, 1552 unsigned Size, 1553 unsigned DefaultVal) { 1554 std::optional<SmallVector<unsigned>> R = 1555 getIntegerVecAttribute(F, Name, Size); 1556 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal); 1557 } 1558 1559 std::optional<SmallVector<unsigned>> 1560 getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) { 1561 assert(Size > 2); 1562 LLVMContext &Ctx = F.getContext(); 1563 1564 Attribute A = F.getFnAttribute(Name); 1565 if (!A.isValid()) 1566 return std::nullopt; 1567 if (!A.isStringAttribute()) { 1568 Ctx.emitError(Name + " is not a string attribute"); 1569 return std::nullopt; 1570 } 1571 1572 SmallVector<unsigned> Vals(Size); 1573 1574 StringRef S = A.getValueAsString(); 1575 unsigned i = 0; 1576 for (; !S.empty() && i < Size; i++) { 1577 std::pair<StringRef, StringRef> Strs = S.split(','); 1578 unsigned IntVal; 1579 if (Strs.first.trim().getAsInteger(0, IntVal)) { 1580 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " + 1581 Name); 1582 return std::nullopt; 1583 } 1584 Vals[i] = IntVal; 1585 S = Strs.second; 1586 } 1587 1588 if (!S.empty() || i < Size) { 1589 Ctx.emitError("attribute " + Name + 1590 " has incorrect number of integers; expected " + 1591 llvm::utostr(Size)); 1592 return std::nullopt; 1593 } 1594 return Vals; 1595 } 1596 1597 unsigned getVmcntBitMask(const IsaVersion &Version) { 1598 return (1 << (getVmcntBitWidthLo(Version.Major) + 1599 getVmcntBitWidthHi(Version.Major))) - 1600 1; 1601 } 1602 1603 unsigned getLoadcntBitMask(const IsaVersion &Version) { 1604 return (1 << getLoadcntBitWidth(Version.Major)) - 1; 1605 } 1606 1607 unsigned getSamplecntBitMask(const IsaVersion &Version) { 1608 return (1 << getSamplecntBitWidth(Version.Major)) - 1; 1609 } 1610 1611 unsigned getBvhcntBitMask(const IsaVersion &Version) { 1612 return (1 << getBvhcntBitWidth(Version.Major)) - 1; 1613 } 1614 1615 unsigned getExpcntBitMask(const IsaVersion &Version) { 1616 return (1 << getExpcntBitWidth(Version.Major)) - 1; 1617 } 1618 1619 unsigned getLgkmcntBitMask(const IsaVersion &Version) { 1620 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 1621 } 1622 1623 unsigned getDscntBitMask(const IsaVersion &Version) { 1624 return (1 << getDscntBitWidth(Version.Major)) - 1; 1625 } 1626 1627 unsigned getKmcntBitMask(const IsaVersion &Version) { 1628 return (1 << getKmcntBitWidth(Version.Major)) - 1; 1629 } 1630 1631 unsigned getXcntBitMask(const IsaVersion &Version) { 1632 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1; 1633 } 1634 1635 unsigned getStorecntBitMask(const IsaVersion &Version) { 1636 return (1 << getStorecntBitWidth(Version.Major)) - 1; 1637 } 1638 1639 unsigned getWaitcntBitMask(const IsaVersion &Version) { 1640 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), 1641 getVmcntBitWidthLo(Version.Major)); 1642 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), 1643 getExpcntBitWidth(Version.Major)); 1644 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), 1645 getLgkmcntBitWidth(Version.Major)); 1646 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), 1647 getVmcntBitWidthHi(Version.Major)); 1648 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; 1649 } 1650 1651 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1652 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), 1653 getVmcntBitWidthLo(Version.Major)); 1654 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), 1655 getVmcntBitWidthHi(Version.Major)); 1656 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); 1657 } 1658 1659 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 1660 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), 1661 getExpcntBitWidth(Version.Major)); 1662 } 1663 1664 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1665 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), 1666 getLgkmcntBitWidth(Version.Major)); 1667 } 1668 1669 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, 1670 unsigned &Expcnt, unsigned &Lgkmcnt) { 1671 Vmcnt = decodeVmcnt(Version, Waitcnt); 1672 Expcnt = decodeExpcnt(Version, Waitcnt); 1673 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 1674 } 1675 1676 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 1677 Waitcnt Decoded; 1678 Decoded.LoadCnt = decodeVmcnt(Version, Encoded); 1679 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 1680 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded); 1681 return Decoded; 1682 } 1683 1684 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1685 unsigned Vmcnt) { 1686 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), 1687 getVmcntBitWidthLo(Version.Major)); 1688 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, 1689 getVmcntBitShiftHi(Version.Major), 1690 getVmcntBitWidthHi(Version.Major)); 1691 } 1692 1693 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1694 unsigned Expcnt) { 1695 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), 1696 getExpcntBitWidth(Version.Major)); 1697 } 1698 1699 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1700 unsigned Lgkmcnt) { 1701 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), 1702 getLgkmcntBitWidth(Version.Major)); 1703 } 1704 1705 unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, 1706 unsigned Expcnt, unsigned Lgkmcnt) { 1707 unsigned Waitcnt = getWaitcntBitMask(Version); 1708 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 1709 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 1710 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 1711 return Waitcnt; 1712 } 1713 1714 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1715 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt); 1716 } 1717 1718 static unsigned getCombinedCountBitMask(const IsaVersion &Version, 1719 bool IsStore) { 1720 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major), 1721 getDscntBitWidth(Version.Major)); 1722 if (IsStore) { 1723 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1724 getStorecntBitWidth(Version.Major)); 1725 return Dscnt | Storecnt; 1726 } 1727 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1728 getLoadcntBitWidth(Version.Major)); 1729 return Dscnt | Loadcnt; 1730 } 1731 1732 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) { 1733 Waitcnt Decoded; 1734 Decoded.LoadCnt = 1735 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major), 1736 getLoadcntBitWidth(Version.Major)); 1737 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major), 1738 getDscntBitWidth(Version.Major)); 1739 return Decoded; 1740 } 1741 1742 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) { 1743 Waitcnt Decoded; 1744 Decoded.StoreCnt = 1745 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major), 1746 getStorecntBitWidth(Version.Major)); 1747 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major), 1748 getDscntBitWidth(Version.Major)); 1749 return Decoded; 1750 } 1751 1752 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, 1753 unsigned Loadcnt) { 1754 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1755 getLoadcntBitWidth(Version.Major)); 1756 } 1757 1758 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, 1759 unsigned Storecnt) { 1760 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1761 getStorecntBitWidth(Version.Major)); 1762 } 1763 1764 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, 1765 unsigned Dscnt) { 1766 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major), 1767 getDscntBitWidth(Version.Major)); 1768 } 1769 1770 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, 1771 unsigned Dscnt) { 1772 unsigned Waitcnt = getCombinedCountBitMask(Version, false); 1773 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt); 1774 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1775 return Waitcnt; 1776 } 1777 1778 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1779 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt); 1780 } 1781 1782 static unsigned encodeStorecntDscnt(const IsaVersion &Version, 1783 unsigned Storecnt, unsigned Dscnt) { 1784 unsigned Waitcnt = getCombinedCountBitMask(Version, true); 1785 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt); 1786 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1787 return Waitcnt; 1788 } 1789 1790 unsigned encodeStorecntDscnt(const IsaVersion &Version, 1791 const Waitcnt &Decoded) { 1792 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt); 1793 } 1794 1795 //===----------------------------------------------------------------------===// 1796 // Custom Operand Values 1797 //===----------------------------------------------------------------------===// 1798 1799 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, 1800 int Size, 1801 const MCSubtargetInfo &STI) { 1802 unsigned Enc = 0; 1803 for (int Idx = 0; Idx < Size; ++Idx) { 1804 const auto &Op = Opr[Idx]; 1805 if (Op.isSupported(STI)) 1806 Enc |= Op.encode(Op.Default); 1807 } 1808 return Enc; 1809 } 1810 1811 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, 1812 int Size, unsigned Code, 1813 bool &HasNonDefaultVal, 1814 const MCSubtargetInfo &STI) { 1815 unsigned UsedOprMask = 0; 1816 HasNonDefaultVal = false; 1817 for (int Idx = 0; Idx < Size; ++Idx) { 1818 const auto &Op = Opr[Idx]; 1819 if (!Op.isSupported(STI)) 1820 continue; 1821 UsedOprMask |= Op.getMask(); 1822 unsigned Val = Op.decode(Code); 1823 if (!Op.isValid(Val)) 1824 return false; 1825 HasNonDefaultVal |= (Val != Op.Default); 1826 } 1827 return (Code & ~UsedOprMask) == 0; 1828 } 1829 1830 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, 1831 unsigned Code, int &Idx, StringRef &Name, 1832 unsigned &Val, bool &IsDefault, 1833 const MCSubtargetInfo &STI) { 1834 while (Idx < Size) { 1835 const auto &Op = Opr[Idx++]; 1836 if (Op.isSupported(STI)) { 1837 Name = Op.Name; 1838 Val = Op.decode(Code); 1839 IsDefault = (Val == Op.Default); 1840 return true; 1841 } 1842 } 1843 1844 return false; 1845 } 1846 1847 static int encodeCustomOperandVal(const CustomOperandVal &Op, 1848 int64_t InputVal) { 1849 if (InputVal < 0 || InputVal > Op.Max) 1850 return OPR_VAL_INVALID; 1851 return Op.encode(InputVal); 1852 } 1853 1854 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, 1855 const StringRef Name, int64_t InputVal, 1856 unsigned &UsedOprMask, 1857 const MCSubtargetInfo &STI) { 1858 int InvalidId = OPR_ID_UNKNOWN; 1859 for (int Idx = 0; Idx < Size; ++Idx) { 1860 const auto &Op = Opr[Idx]; 1861 if (Op.Name == Name) { 1862 if (!Op.isSupported(STI)) { 1863 InvalidId = OPR_ID_UNSUPPORTED; 1864 continue; 1865 } 1866 auto OprMask = Op.getMask(); 1867 if (OprMask & UsedOprMask) 1868 return OPR_ID_DUPLICATE; 1869 UsedOprMask |= OprMask; 1870 return encodeCustomOperandVal(Op, InputVal); 1871 } 1872 } 1873 return InvalidId; 1874 } 1875 1876 //===----------------------------------------------------------------------===// 1877 // DepCtr 1878 //===----------------------------------------------------------------------===// 1879 1880 namespace DepCtr { 1881 1882 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { 1883 static int Default = -1; 1884 if (Default == -1) 1885 Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); 1886 return Default; 1887 } 1888 1889 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1890 const MCSubtargetInfo &STI) { 1891 return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, 1892 HasNonDefaultVal, STI); 1893 } 1894 1895 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1896 bool &IsDefault, const MCSubtargetInfo &STI) { 1897 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, 1898 IsDefault, STI); 1899 } 1900 1901 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1902 const MCSubtargetInfo &STI) { 1903 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, 1904 STI); 1905 } 1906 1907 unsigned decodeFieldVmVsrc(unsigned Encoded) { 1908 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1909 } 1910 1911 unsigned decodeFieldVaVdst(unsigned Encoded) { 1912 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1913 } 1914 1915 unsigned decodeFieldSaSdst(unsigned Encoded) { 1916 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1917 } 1918 1919 unsigned decodeFieldVaSdst(unsigned Encoded) { 1920 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth()); 1921 } 1922 1923 unsigned decodeFieldVaVcc(unsigned Encoded) { 1924 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth()); 1925 } 1926 1927 unsigned decodeFieldVaSsrc(unsigned Encoded) { 1928 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth()); 1929 } 1930 1931 unsigned decodeFieldHoldCnt(unsigned Encoded) { 1932 return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth()); 1933 } 1934 1935 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { 1936 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1937 } 1938 1939 unsigned encodeFieldVmVsrc(unsigned VmVsrc) { 1940 return encodeFieldVmVsrc(0xffff, VmVsrc); 1941 } 1942 1943 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { 1944 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1945 } 1946 1947 unsigned encodeFieldVaVdst(unsigned VaVdst) { 1948 return encodeFieldVaVdst(0xffff, VaVdst); 1949 } 1950 1951 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { 1952 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1953 } 1954 1955 unsigned encodeFieldSaSdst(unsigned SaSdst) { 1956 return encodeFieldSaSdst(0xffff, SaSdst); 1957 } 1958 1959 unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) { 1960 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth()); 1961 } 1962 1963 unsigned encodeFieldVaSdst(unsigned VaSdst) { 1964 return encodeFieldVaSdst(0xffff, VaSdst); 1965 } 1966 1967 unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) { 1968 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth()); 1969 } 1970 1971 unsigned encodeFieldVaVcc(unsigned VaVcc) { 1972 return encodeFieldVaVcc(0xffff, VaVcc); 1973 } 1974 1975 unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) { 1976 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth()); 1977 } 1978 1979 unsigned encodeFieldVaSsrc(unsigned VaSsrc) { 1980 return encodeFieldVaSsrc(0xffff, VaSsrc); 1981 } 1982 1983 unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) { 1984 return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth()); 1985 } 1986 1987 unsigned encodeFieldHoldCnt(unsigned HoldCnt) { 1988 return encodeFieldHoldCnt(0xffff, HoldCnt); 1989 } 1990 1991 } // namespace DepCtr 1992 1993 //===----------------------------------------------------------------------===// 1994 // exp tgt 1995 //===----------------------------------------------------------------------===// 1996 1997 namespace Exp { 1998 1999 struct ExpTgt { 2000 StringLiteral Name; 2001 unsigned Tgt; 2002 unsigned MaxIndex; 2003 }; 2004 2005 // clang-format off 2006 static constexpr ExpTgt ExpTgtInfo[] = { 2007 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 2008 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 2009 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 2010 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 2011 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 2012 {{"dual_src_blend"},ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, 2013 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 2014 }; 2015 // clang-format on 2016 2017 bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 2018 for (const ExpTgt &Val : ExpTgtInfo) { 2019 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 2020 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 2021 Name = Val.Name; 2022 return true; 2023 } 2024 } 2025 return false; 2026 } 2027 2028 unsigned getTgtId(const StringRef Name) { 2029 2030 for (const ExpTgt &Val : ExpTgtInfo) { 2031 if (Val.MaxIndex == 0 && Name == Val.Name) 2032 return Val.Tgt; 2033 2034 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) { 2035 StringRef Suffix = Name.drop_front(Val.Name.size()); 2036 2037 unsigned Id; 2038 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 2039 return ET_INVALID; 2040 2041 // Disable leading zeroes 2042 if (Suffix.size() > 1 && Suffix[0] == '0') 2043 return ET_INVALID; 2044 2045 return Val.Tgt + Id; 2046 } 2047 } 2048 return ET_INVALID; 2049 } 2050 2051 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 2052 switch (Id) { 2053 case ET_NULL: 2054 return !isGFX11Plus(STI); 2055 case ET_POS4: 2056 case ET_PRIM: 2057 return isGFX10Plus(STI); 2058 case ET_DUAL_SRC_BLEND0: 2059 case ET_DUAL_SRC_BLEND1: 2060 return isGFX11Plus(STI); 2061 default: 2062 if (Id >= ET_PARAM0 && Id <= ET_PARAM31) 2063 return !isGFX11Plus(STI); 2064 return true; 2065 } 2066 } 2067 2068 } // namespace Exp 2069 2070 //===----------------------------------------------------------------------===// 2071 // MTBUF Format 2072 //===----------------------------------------------------------------------===// 2073 2074 namespace MTBUFFormat { 2075 2076 int64_t getDfmt(const StringRef Name) { 2077 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 2078 if (Name == DfmtSymbolic[Id]) 2079 return Id; 2080 } 2081 return DFMT_UNDEF; 2082 } 2083 2084 StringRef getDfmtName(unsigned Id) { 2085 assert(Id <= DFMT_MAX); 2086 return DfmtSymbolic[Id]; 2087 } 2088 2089 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 2090 if (isSI(STI) || isCI(STI)) 2091 return NfmtSymbolicSICI; 2092 if (isVI(STI) || isGFX9(STI)) 2093 return NfmtSymbolicVI; 2094 return NfmtSymbolicGFX10; 2095 } 2096 2097 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 2098 const auto *lookupTable = getNfmtLookupTable(STI); 2099 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 2100 if (Name == lookupTable[Id]) 2101 return Id; 2102 } 2103 return NFMT_UNDEF; 2104 } 2105 2106 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 2107 assert(Id <= NFMT_MAX); 2108 return getNfmtLookupTable(STI)[Id]; 2109 } 2110 2111 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 2112 unsigned Dfmt; 2113 unsigned Nfmt; 2114 decodeDfmtNfmt(Id, Dfmt, Nfmt); 2115 return isValidNfmt(Nfmt, STI); 2116 } 2117 2118 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 2119 return !getNfmtName(Id, STI).empty(); 2120 } 2121 2122 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 2123 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 2124 } 2125 2126 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 2127 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 2128 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 2129 } 2130 2131 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { 2132 if (isGFX11Plus(STI)) { 2133 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 2134 if (Name == UfmtSymbolicGFX11[Id]) 2135 return Id; 2136 } 2137 } else { 2138 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 2139 if (Name == UfmtSymbolicGFX10[Id]) 2140 return Id; 2141 } 2142 } 2143 return UFMT_UNDEF; 2144 } 2145 2146 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { 2147 if (isValidUnifiedFormat(Id, STI)) 2148 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; 2149 return ""; 2150 } 2151 2152 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { 2153 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; 2154 } 2155 2156 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 2157 const MCSubtargetInfo &STI) { 2158 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 2159 if (isGFX11Plus(STI)) { 2160 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 2161 if (Fmt == DfmtNfmt2UFmtGFX11[Id]) 2162 return Id; 2163 } 2164 } else { 2165 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 2166 if (Fmt == DfmtNfmt2UFmtGFX10[Id]) 2167 return Id; 2168 } 2169 } 2170 return UFMT_UNDEF; 2171 } 2172 2173 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 2174 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 2175 } 2176 2177 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 2178 if (isGFX10Plus(STI)) 2179 return UFMT_DEFAULT; 2180 return DFMT_NFMT_DEFAULT; 2181 } 2182 2183 } // namespace MTBUFFormat 2184 2185 //===----------------------------------------------------------------------===// 2186 // SendMsg 2187 //===----------------------------------------------------------------------===// 2188 2189 namespace SendMsg { 2190 2191 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { 2192 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; 2193 } 2194 2195 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { 2196 return (MsgId & ~(getMsgIdMask(STI))) == 0; 2197 } 2198 2199 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 2200 bool Strict) { 2201 assert(isValidMsgId(MsgId, STI)); 2202 2203 if (!Strict) 2204 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 2205 2206 if (msgRequiresOp(MsgId, STI)) { 2207 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP) 2208 return false; 2209 2210 return !getMsgOpName(MsgId, OpId, STI).empty(); 2211 } 2212 2213 return OpId == OP_NONE_; 2214 } 2215 2216 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 2217 const MCSubtargetInfo &STI, bool Strict) { 2218 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 2219 2220 if (!Strict) 2221 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 2222 2223 if (!isGFX11Plus(STI)) { 2224 switch (MsgId) { 2225 case ID_GS_PreGFX11: 2226 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 2227 case ID_GS_DONE_PreGFX11: 2228 return (OpId == OP_GS_NOP) 2229 ? (StreamId == STREAM_ID_NONE_) 2230 : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 2231 } 2232 } 2233 return StreamId == STREAM_ID_NONE_; 2234 } 2235 2236 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { 2237 return MsgId == ID_SYSMSG || 2238 (!isGFX11Plus(STI) && 2239 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); 2240 } 2241 2242 bool msgSupportsStream(int64_t MsgId, int64_t OpId, 2243 const MCSubtargetInfo &STI) { 2244 return !isGFX11Plus(STI) && 2245 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && 2246 OpId != OP_GS_NOP; 2247 } 2248 2249 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 2250 uint16_t &StreamId, const MCSubtargetInfo &STI) { 2251 MsgId = Val & getMsgIdMask(STI); 2252 if (isGFX11Plus(STI)) { 2253 OpId = 0; 2254 StreamId = 0; 2255 } else { 2256 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 2257 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 2258 } 2259 } 2260 2261 uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) { 2262 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); 2263 } 2264 2265 } // namespace SendMsg 2266 2267 //===----------------------------------------------------------------------===// 2268 // 2269 //===----------------------------------------------------------------------===// 2270 2271 unsigned getInitialPSInputAddr(const Function &F) { 2272 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0); 2273 } 2274 2275 bool getHasColorExport(const Function &F) { 2276 // As a safe default always respond as if PS has color exports. 2277 return F.getFnAttributeAsParsedInteger( 2278 "amdgpu-color-export", 2279 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; 2280 } 2281 2282 bool getHasDepthExport(const Function &F) { 2283 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0; 2284 } 2285 2286 unsigned getDynamicVGPRBlockSize(const Function &F) { 2287 unsigned BlockSize = 2288 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0); 2289 2290 if (BlockSize == 16 || BlockSize == 32) 2291 return BlockSize; 2292 2293 return 0; 2294 } 2295 2296 bool hasXNACK(const MCSubtargetInfo &STI) { 2297 return STI.hasFeature(AMDGPU::FeatureXNACK); 2298 } 2299 2300 bool hasSRAMECC(const MCSubtargetInfo &STI) { 2301 return STI.hasFeature(AMDGPU::FeatureSRAMECC); 2302 } 2303 2304 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 2305 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && 2306 !STI.hasFeature(AMDGPU::FeatureR128A16); 2307 } 2308 2309 bool hasA16(const MCSubtargetInfo &STI) { 2310 return STI.hasFeature(AMDGPU::FeatureA16); 2311 } 2312 2313 bool hasG16(const MCSubtargetInfo &STI) { 2314 return STI.hasFeature(AMDGPU::FeatureG16); 2315 } 2316 2317 bool hasPackedD16(const MCSubtargetInfo &STI) { 2318 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) && 2319 !isSI(STI); 2320 } 2321 2322 bool hasGDS(const MCSubtargetInfo &STI) { 2323 return STI.hasFeature(AMDGPU::FeatureGDS); 2324 } 2325 2326 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) { 2327 auto Version = getIsaVersion(STI.getCPU()); 2328 if (Version.Major == 10) 2329 return Version.Minor >= 3 ? 13 : 5; 2330 if (Version.Major == 11) 2331 return 5; 2332 if (Version.Major >= 12) 2333 return HasSampler ? 4 : 5; 2334 return 0; 2335 } 2336 2337 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; } 2338 2339 bool isSI(const MCSubtargetInfo &STI) { 2340 return STI.hasFeature(AMDGPU::FeatureSouthernIslands); 2341 } 2342 2343 bool isCI(const MCSubtargetInfo &STI) { 2344 return STI.hasFeature(AMDGPU::FeatureSeaIslands); 2345 } 2346 2347 bool isVI(const MCSubtargetInfo &STI) { 2348 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); 2349 } 2350 2351 bool isGFX9(const MCSubtargetInfo &STI) { 2352 return STI.hasFeature(AMDGPU::FeatureGFX9); 2353 } 2354 2355 bool isGFX9_GFX10(const MCSubtargetInfo &STI) { 2356 return isGFX9(STI) || isGFX10(STI); 2357 } 2358 2359 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) { 2360 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI); 2361 } 2362 2363 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { 2364 return isVI(STI) || isGFX9(STI) || isGFX10(STI); 2365 } 2366 2367 bool isGFX8Plus(const MCSubtargetInfo &STI) { 2368 return isVI(STI) || isGFX9Plus(STI); 2369 } 2370 2371 bool isGFX9Plus(const MCSubtargetInfo &STI) { 2372 return isGFX9(STI) || isGFX10Plus(STI); 2373 } 2374 2375 bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); } 2376 2377 bool isGFX10(const MCSubtargetInfo &STI) { 2378 return STI.hasFeature(AMDGPU::FeatureGFX10); 2379 } 2380 2381 bool isGFX10_GFX11(const MCSubtargetInfo &STI) { 2382 return isGFX10(STI) || isGFX11(STI); 2383 } 2384 2385 bool isGFX10Plus(const MCSubtargetInfo &STI) { 2386 return isGFX10(STI) || isGFX11Plus(STI); 2387 } 2388 2389 bool isGFX11(const MCSubtargetInfo &STI) { 2390 return STI.hasFeature(AMDGPU::FeatureGFX11); 2391 } 2392 2393 bool isGFX11Plus(const MCSubtargetInfo &STI) { 2394 return isGFX11(STI) || isGFX12Plus(STI); 2395 } 2396 2397 bool isGFX12(const MCSubtargetInfo &STI) { 2398 return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; 2399 } 2400 2401 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); } 2402 2403 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); } 2404 2405 bool isGFX1250(const MCSubtargetInfo &STI) { 2406 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts]; 2407 } 2408 2409 bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); } 2410 2411 bool isNotGFX10Plus(const MCSubtargetInfo &STI) { 2412 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); 2413 } 2414 2415 bool isGFX10Before1030(const MCSubtargetInfo &STI) { 2416 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); 2417 } 2418 2419 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 2420 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding); 2421 } 2422 2423 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { 2424 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding); 2425 } 2426 2427 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 2428 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding); 2429 } 2430 2431 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 2432 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts); 2433 } 2434 2435 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) { 2436 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI); 2437 } 2438 2439 bool isGFX90A(const MCSubtargetInfo &STI) { 2440 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); 2441 } 2442 2443 bool isGFX940(const MCSubtargetInfo &STI) { 2444 return STI.hasFeature(AMDGPU::FeatureGFX940Insts); 2445 } 2446 2447 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { 2448 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); 2449 } 2450 2451 bool hasMAIInsts(const MCSubtargetInfo &STI) { 2452 return STI.hasFeature(AMDGPU::FeatureMAIInsts); 2453 } 2454 2455 bool hasVOPD(const MCSubtargetInfo &STI) { 2456 return STI.hasFeature(AMDGPU::FeatureVOPD); 2457 } 2458 2459 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) { 2460 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR); 2461 } 2462 2463 unsigned hasKernargPreload(const MCSubtargetInfo &STI) { 2464 return STI.hasFeature(AMDGPU::FeatureKernargPreload); 2465 } 2466 2467 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, 2468 int32_t ArgNumVGPR) { 2469 if (has90AInsts && ArgNumAGPR) 2470 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; 2471 return std::max(ArgNumVGPR, ArgNumAGPR); 2472 } 2473 2474 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) { 2475 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 2476 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 2477 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 2478 Reg == AMDGPU::SCC; 2479 } 2480 2481 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) { 2482 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16; 2483 } 2484 2485 #define MAP_REG2REG \ 2486 using namespace AMDGPU; \ 2487 switch (Reg.id()) { \ 2488 default: \ 2489 return Reg; \ 2490 CASE_CI_VI(FLAT_SCR) \ 2491 CASE_CI_VI(FLAT_SCR_LO) \ 2492 CASE_CI_VI(FLAT_SCR_HI) \ 2493 CASE_VI_GFX9PLUS(TTMP0) \ 2494 CASE_VI_GFX9PLUS(TTMP1) \ 2495 CASE_VI_GFX9PLUS(TTMP2) \ 2496 CASE_VI_GFX9PLUS(TTMP3) \ 2497 CASE_VI_GFX9PLUS(TTMP4) \ 2498 CASE_VI_GFX9PLUS(TTMP5) \ 2499 CASE_VI_GFX9PLUS(TTMP6) \ 2500 CASE_VI_GFX9PLUS(TTMP7) \ 2501 CASE_VI_GFX9PLUS(TTMP8) \ 2502 CASE_VI_GFX9PLUS(TTMP9) \ 2503 CASE_VI_GFX9PLUS(TTMP10) \ 2504 CASE_VI_GFX9PLUS(TTMP11) \ 2505 CASE_VI_GFX9PLUS(TTMP12) \ 2506 CASE_VI_GFX9PLUS(TTMP13) \ 2507 CASE_VI_GFX9PLUS(TTMP14) \ 2508 CASE_VI_GFX9PLUS(TTMP15) \ 2509 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 2510 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 2511 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 2512 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 2513 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 2514 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 2515 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 2516 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 2517 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 2518 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 2519 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 2520 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 2521 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 2522 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 2523 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2524 CASE_VI_GFX9PLUS( \ 2525 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2526 CASE_GFXPRE11_GFX11PLUS(M0) \ 2527 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ 2528 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ 2529 } 2530 2531 #define CASE_CI_VI(node) \ 2532 assert(!isSI(STI)); \ 2533 case node: \ 2534 return isCI(STI) ? node##_ci : node##_vi; 2535 2536 #define CASE_VI_GFX9PLUS(node) \ 2537 case node: \ 2538 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 2539 2540 #define CASE_GFXPRE11_GFX11PLUS(node) \ 2541 case node: \ 2542 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; 2543 2544 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ 2545 case node: \ 2546 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; 2547 2548 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) { 2549 if (STI.getTargetTriple().getArch() == Triple::r600) 2550 return Reg; 2551 MAP_REG2REG 2552 } 2553 2554 #undef CASE_CI_VI 2555 #undef CASE_VI_GFX9PLUS 2556 #undef CASE_GFXPRE11_GFX11PLUS 2557 #undef CASE_GFXPRE11_GFX11PLUS_TO 2558 2559 #define CASE_CI_VI(node) \ 2560 case node##_ci: \ 2561 case node##_vi: \ 2562 return node; 2563 #define CASE_VI_GFX9PLUS(node) \ 2564 case node##_vi: \ 2565 case node##_gfx9plus: \ 2566 return node; 2567 #define CASE_GFXPRE11_GFX11PLUS(node) \ 2568 case node##_gfx11plus: \ 2569 case node##_gfxpre11: \ 2570 return node; 2571 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) 2572 2573 MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG } 2574 2575 bool isInlineValue(unsigned Reg) { 2576 switch (Reg) { 2577 case AMDGPU::SRC_SHARED_BASE_LO: 2578 case AMDGPU::SRC_SHARED_BASE: 2579 case AMDGPU::SRC_SHARED_LIMIT_LO: 2580 case AMDGPU::SRC_SHARED_LIMIT: 2581 case AMDGPU::SRC_PRIVATE_BASE_LO: 2582 case AMDGPU::SRC_PRIVATE_BASE: 2583 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 2584 case AMDGPU::SRC_PRIVATE_LIMIT: 2585 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2586 return true; 2587 case AMDGPU::SRC_VCCZ: 2588 case AMDGPU::SRC_EXECZ: 2589 case AMDGPU::SRC_SCC: 2590 return true; 2591 case AMDGPU::SGPR_NULL: 2592 return true; 2593 default: 2594 return false; 2595 } 2596 } 2597 2598 #undef CASE_CI_VI 2599 #undef CASE_VI_GFX9PLUS 2600 #undef CASE_GFXPRE11_GFX11PLUS 2601 #undef CASE_GFXPRE11_GFX11PLUS_TO 2602 #undef MAP_REG2REG 2603 2604 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2605 assert(OpNo < Desc.NumOperands); 2606 unsigned OpType = Desc.operands()[OpNo].OperandType; 2607 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 2608 OpType <= AMDGPU::OPERAND_SRC_LAST; 2609 } 2610 2611 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2612 assert(OpNo < Desc.NumOperands); 2613 unsigned OpType = Desc.operands()[OpNo].OperandType; 2614 return OpType >= AMDGPU::OPERAND_KIMM_FIRST && 2615 OpType <= AMDGPU::OPERAND_KIMM_LAST; 2616 } 2617 2618 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2619 assert(OpNo < Desc.NumOperands); 2620 unsigned OpType = Desc.operands()[OpNo].OperandType; 2621 switch (OpType) { 2622 case AMDGPU::OPERAND_REG_IMM_FP32: 2623 case AMDGPU::OPERAND_REG_IMM_FP64: 2624 case AMDGPU::OPERAND_REG_IMM_FP16: 2625 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2626 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2627 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2628 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2629 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2630 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2631 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2632 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2633 return true; 2634 default: 2635 return false; 2636 } 2637 } 2638 2639 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2640 assert(OpNo < Desc.NumOperands); 2641 unsigned OpType = Desc.operands()[OpNo].OperandType; 2642 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 2643 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) || 2644 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && 2645 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST); 2646 } 2647 2648 // Avoid using MCRegisterClass::getSize, since that function will go away 2649 // (move from MC* level to Target* level). Return size in bits. 2650 unsigned getRegBitWidth(unsigned RCID) { 2651 switch (RCID) { 2652 case AMDGPU::VGPR_16RegClassID: 2653 case AMDGPU::VGPR_16_Lo128RegClassID: 2654 case AMDGPU::SGPR_LO16RegClassID: 2655 case AMDGPU::AGPR_LO16RegClassID: 2656 return 16; 2657 case AMDGPU::SGPR_32RegClassID: 2658 case AMDGPU::VGPR_32RegClassID: 2659 case AMDGPU::VRegOrLds_32RegClassID: 2660 case AMDGPU::AGPR_32RegClassID: 2661 case AMDGPU::VS_32RegClassID: 2662 case AMDGPU::AV_32RegClassID: 2663 case AMDGPU::SReg_32RegClassID: 2664 case AMDGPU::SReg_32_XM0RegClassID: 2665 case AMDGPU::SRegOrLds_32RegClassID: 2666 return 32; 2667 case AMDGPU::SGPR_64RegClassID: 2668 case AMDGPU::VS_64RegClassID: 2669 case AMDGPU::SReg_64RegClassID: 2670 case AMDGPU::VReg_64RegClassID: 2671 case AMDGPU::AReg_64RegClassID: 2672 case AMDGPU::SReg_64_XEXECRegClassID: 2673 case AMDGPU::VReg_64_Align2RegClassID: 2674 case AMDGPU::AReg_64_Align2RegClassID: 2675 case AMDGPU::AV_64RegClassID: 2676 case AMDGPU::AV_64_Align2RegClassID: 2677 return 64; 2678 case AMDGPU::SGPR_96RegClassID: 2679 case AMDGPU::SReg_96RegClassID: 2680 case AMDGPU::VReg_96RegClassID: 2681 case AMDGPU::AReg_96RegClassID: 2682 case AMDGPU::VReg_96_Align2RegClassID: 2683 case AMDGPU::AReg_96_Align2RegClassID: 2684 case AMDGPU::AV_96RegClassID: 2685 case AMDGPU::AV_96_Align2RegClassID: 2686 return 96; 2687 case AMDGPU::SGPR_128RegClassID: 2688 case AMDGPU::SReg_128RegClassID: 2689 case AMDGPU::VReg_128RegClassID: 2690 case AMDGPU::AReg_128RegClassID: 2691 case AMDGPU::VReg_128_Align2RegClassID: 2692 case AMDGPU::AReg_128_Align2RegClassID: 2693 case AMDGPU::AV_128RegClassID: 2694 case AMDGPU::AV_128_Align2RegClassID: 2695 case AMDGPU::SReg_128_XNULLRegClassID: 2696 return 128; 2697 case AMDGPU::SGPR_160RegClassID: 2698 case AMDGPU::SReg_160RegClassID: 2699 case AMDGPU::VReg_160RegClassID: 2700 case AMDGPU::AReg_160RegClassID: 2701 case AMDGPU::VReg_160_Align2RegClassID: 2702 case AMDGPU::AReg_160_Align2RegClassID: 2703 case AMDGPU::AV_160RegClassID: 2704 case AMDGPU::AV_160_Align2RegClassID: 2705 return 160; 2706 case AMDGPU::SGPR_192RegClassID: 2707 case AMDGPU::SReg_192RegClassID: 2708 case AMDGPU::VReg_192RegClassID: 2709 case AMDGPU::AReg_192RegClassID: 2710 case AMDGPU::VReg_192_Align2RegClassID: 2711 case AMDGPU::AReg_192_Align2RegClassID: 2712 case AMDGPU::AV_192RegClassID: 2713 case AMDGPU::AV_192_Align2RegClassID: 2714 return 192; 2715 case AMDGPU::SGPR_224RegClassID: 2716 case AMDGPU::SReg_224RegClassID: 2717 case AMDGPU::VReg_224RegClassID: 2718 case AMDGPU::AReg_224RegClassID: 2719 case AMDGPU::VReg_224_Align2RegClassID: 2720 case AMDGPU::AReg_224_Align2RegClassID: 2721 case AMDGPU::AV_224RegClassID: 2722 case AMDGPU::AV_224_Align2RegClassID: 2723 return 224; 2724 case AMDGPU::SGPR_256RegClassID: 2725 case AMDGPU::SReg_256RegClassID: 2726 case AMDGPU::VReg_256RegClassID: 2727 case AMDGPU::AReg_256RegClassID: 2728 case AMDGPU::VReg_256_Align2RegClassID: 2729 case AMDGPU::AReg_256_Align2RegClassID: 2730 case AMDGPU::AV_256RegClassID: 2731 case AMDGPU::AV_256_Align2RegClassID: 2732 case AMDGPU::SReg_256_XNULLRegClassID: 2733 return 256; 2734 case AMDGPU::SGPR_288RegClassID: 2735 case AMDGPU::SReg_288RegClassID: 2736 case AMDGPU::VReg_288RegClassID: 2737 case AMDGPU::AReg_288RegClassID: 2738 case AMDGPU::VReg_288_Align2RegClassID: 2739 case AMDGPU::AReg_288_Align2RegClassID: 2740 case AMDGPU::AV_288RegClassID: 2741 case AMDGPU::AV_288_Align2RegClassID: 2742 return 288; 2743 case AMDGPU::SGPR_320RegClassID: 2744 case AMDGPU::SReg_320RegClassID: 2745 case AMDGPU::VReg_320RegClassID: 2746 case AMDGPU::AReg_320RegClassID: 2747 case AMDGPU::VReg_320_Align2RegClassID: 2748 case AMDGPU::AReg_320_Align2RegClassID: 2749 case AMDGPU::AV_320RegClassID: 2750 case AMDGPU::AV_320_Align2RegClassID: 2751 return 320; 2752 case AMDGPU::SGPR_352RegClassID: 2753 case AMDGPU::SReg_352RegClassID: 2754 case AMDGPU::VReg_352RegClassID: 2755 case AMDGPU::AReg_352RegClassID: 2756 case AMDGPU::VReg_352_Align2RegClassID: 2757 case AMDGPU::AReg_352_Align2RegClassID: 2758 case AMDGPU::AV_352RegClassID: 2759 case AMDGPU::AV_352_Align2RegClassID: 2760 return 352; 2761 case AMDGPU::SGPR_384RegClassID: 2762 case AMDGPU::SReg_384RegClassID: 2763 case AMDGPU::VReg_384RegClassID: 2764 case AMDGPU::AReg_384RegClassID: 2765 case AMDGPU::VReg_384_Align2RegClassID: 2766 case AMDGPU::AReg_384_Align2RegClassID: 2767 case AMDGPU::AV_384RegClassID: 2768 case AMDGPU::AV_384_Align2RegClassID: 2769 return 384; 2770 case AMDGPU::SGPR_512RegClassID: 2771 case AMDGPU::SReg_512RegClassID: 2772 case AMDGPU::VReg_512RegClassID: 2773 case AMDGPU::AReg_512RegClassID: 2774 case AMDGPU::VReg_512_Align2RegClassID: 2775 case AMDGPU::AReg_512_Align2RegClassID: 2776 case AMDGPU::AV_512RegClassID: 2777 case AMDGPU::AV_512_Align2RegClassID: 2778 return 512; 2779 case AMDGPU::SGPR_1024RegClassID: 2780 case AMDGPU::SReg_1024RegClassID: 2781 case AMDGPU::VReg_1024RegClassID: 2782 case AMDGPU::AReg_1024RegClassID: 2783 case AMDGPU::VReg_1024_Align2RegClassID: 2784 case AMDGPU::AReg_1024_Align2RegClassID: 2785 case AMDGPU::AV_1024RegClassID: 2786 case AMDGPU::AV_1024_Align2RegClassID: 2787 return 1024; 2788 default: 2789 llvm_unreachable("Unexpected register class"); 2790 } 2791 } 2792 2793 unsigned getRegBitWidth(const MCRegisterClass &RC) { 2794 return getRegBitWidth(RC.getID()); 2795 } 2796 2797 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 2798 unsigned OpNo) { 2799 assert(OpNo < Desc.NumOperands); 2800 unsigned RCID = Desc.operands()[OpNo].RegClass; 2801 return getRegBitWidth(RCID) / 8; 2802 } 2803 2804 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 2805 if (isInlinableIntLiteral(Literal)) 2806 return true; 2807 2808 uint64_t Val = static_cast<uint64_t>(Literal); 2809 return (Val == llvm::bit_cast<uint64_t>(0.0)) || 2810 (Val == llvm::bit_cast<uint64_t>(1.0)) || 2811 (Val == llvm::bit_cast<uint64_t>(-1.0)) || 2812 (Val == llvm::bit_cast<uint64_t>(0.5)) || 2813 (Val == llvm::bit_cast<uint64_t>(-0.5)) || 2814 (Val == llvm::bit_cast<uint64_t>(2.0)) || 2815 (Val == llvm::bit_cast<uint64_t>(-2.0)) || 2816 (Val == llvm::bit_cast<uint64_t>(4.0)) || 2817 (Val == llvm::bit_cast<uint64_t>(-4.0)) || 2818 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 2819 } 2820 2821 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 2822 if (isInlinableIntLiteral(Literal)) 2823 return true; 2824 2825 // The actual type of the operand does not seem to matter as long 2826 // as the bits match one of the inline immediate values. For example: 2827 // 2828 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 2829 // so it is a legal inline immediate. 2830 // 2831 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 2832 // floating-point, so it is a legal inline immediate. 2833 2834 uint32_t Val = static_cast<uint32_t>(Literal); 2835 return (Val == llvm::bit_cast<uint32_t>(0.0f)) || 2836 (Val == llvm::bit_cast<uint32_t>(1.0f)) || 2837 (Val == llvm::bit_cast<uint32_t>(-1.0f)) || 2838 (Val == llvm::bit_cast<uint32_t>(0.5f)) || 2839 (Val == llvm::bit_cast<uint32_t>(-0.5f)) || 2840 (Val == llvm::bit_cast<uint32_t>(2.0f)) || 2841 (Val == llvm::bit_cast<uint32_t>(-2.0f)) || 2842 (Val == llvm::bit_cast<uint32_t>(4.0f)) || 2843 (Val == llvm::bit_cast<uint32_t>(-4.0f)) || 2844 (Val == 0x3e22f983 && HasInv2Pi); 2845 } 2846 2847 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) { 2848 if (!HasInv2Pi) 2849 return false; 2850 if (isInlinableIntLiteral(Literal)) 2851 return true; 2852 uint16_t Val = static_cast<uint16_t>(Literal); 2853 return Val == 0x3F00 || // 0.5 2854 Val == 0xBF00 || // -0.5 2855 Val == 0x3F80 || // 1.0 2856 Val == 0xBF80 || // -1.0 2857 Val == 0x4000 || // 2.0 2858 Val == 0xC000 || // -2.0 2859 Val == 0x4080 || // 4.0 2860 Val == 0xC080 || // -4.0 2861 Val == 0x3E22; // 1.0 / (2.0 * pi) 2862 } 2863 2864 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) { 2865 return isInlinableLiteral32(Literal, HasInv2Pi); 2866 } 2867 2868 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) { 2869 if (!HasInv2Pi) 2870 return false; 2871 if (isInlinableIntLiteral(Literal)) 2872 return true; 2873 uint16_t Val = static_cast<uint16_t>(Literal); 2874 return Val == 0x3C00 || // 1.0 2875 Val == 0xBC00 || // -1.0 2876 Val == 0x3800 || // 0.5 2877 Val == 0xB800 || // -0.5 2878 Val == 0x4000 || // 2.0 2879 Val == 0xC000 || // -2.0 2880 Val == 0x4400 || // 4.0 2881 Val == 0xC400 || // -4.0 2882 Val == 0x3118; // 1/2pi 2883 } 2884 2885 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { 2886 // Unfortunately, the Instruction Set Architecture Reference Guide is 2887 // misleading about how the inline operands work for (packed) 16-bit 2888 // instructions. In a nutshell, the actual HW behavior is: 2889 // 2890 // - integer encodings (-16 .. 64) are always produced as sign-extended 2891 // 32-bit values 2892 // - float encodings are produced as: 2893 // - for F16 instructions: corresponding half-precision float values in 2894 // the LSBs, 0 in the MSBs 2895 // - for UI16 instructions: corresponding single-precision float value 2896 int32_t Signed = static_cast<int32_t>(Literal); 2897 if (Signed >= 0 && Signed <= 64) 2898 return 128 + Signed; 2899 2900 if (Signed >= -16 && Signed <= -1) 2901 return 192 + std::abs(Signed); 2902 2903 if (IsFloat) { 2904 // clang-format off 2905 switch (Literal) { 2906 case 0x3800: return 240; // 0.5 2907 case 0xB800: return 241; // -0.5 2908 case 0x3C00: return 242; // 1.0 2909 case 0xBC00: return 243; // -1.0 2910 case 0x4000: return 244; // 2.0 2911 case 0xC000: return 245; // -2.0 2912 case 0x4400: return 246; // 4.0 2913 case 0xC400: return 247; // -4.0 2914 case 0x3118: return 248; // 1.0 / (2.0 * pi) 2915 default: break; 2916 } 2917 // clang-format on 2918 } else { 2919 // clang-format off 2920 switch (Literal) { 2921 case 0x3F000000: return 240; // 0.5 2922 case 0xBF000000: return 241; // -0.5 2923 case 0x3F800000: return 242; // 1.0 2924 case 0xBF800000: return 243; // -1.0 2925 case 0x40000000: return 244; // 2.0 2926 case 0xC0000000: return 245; // -2.0 2927 case 0x40800000: return 246; // 4.0 2928 case 0xC0800000: return 247; // -4.0 2929 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) 2930 default: break; 2931 } 2932 // clang-format on 2933 } 2934 2935 return {}; 2936 } 2937 2938 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction 2939 // or nullopt. 2940 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { 2941 return getInlineEncodingV216(false, Literal); 2942 } 2943 2944 // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction 2945 // or nullopt. 2946 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) { 2947 int32_t Signed = static_cast<int32_t>(Literal); 2948 if (Signed >= 0 && Signed <= 64) 2949 return 128 + Signed; 2950 2951 if (Signed >= -16 && Signed <= -1) 2952 return 192 + std::abs(Signed); 2953 2954 // clang-format off 2955 switch (Literal) { 2956 case 0x3F00: return 240; // 0.5 2957 case 0xBF00: return 241; // -0.5 2958 case 0x3F80: return 242; // 1.0 2959 case 0xBF80: return 243; // -1.0 2960 case 0x4000: return 244; // 2.0 2961 case 0xC000: return 245; // -2.0 2962 case 0x4080: return 246; // 4.0 2963 case 0xC080: return 247; // -4.0 2964 case 0x3E22: return 248; // 1.0 / (2.0 * pi) 2965 default: break; 2966 } 2967 // clang-format on 2968 2969 return std::nullopt; 2970 } 2971 2972 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction 2973 // or nullopt. 2974 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { 2975 return getInlineEncodingV216(true, Literal); 2976 } 2977 2978 // Whether the given literal can be inlined for a V_PK_* instruction. 2979 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { 2980 switch (OpType) { 2981 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2982 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2983 return getInlineEncodingV216(false, Literal).has_value(); 2984 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2985 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2986 return getInlineEncodingV216(true, Literal).has_value(); 2987 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2988 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 2989 return isInlinableLiteralV2BF16(Literal); 2990 default: 2991 llvm_unreachable("bad packed operand type"); 2992 } 2993 } 2994 2995 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction. 2996 bool isInlinableLiteralV2I16(uint32_t Literal) { 2997 return getInlineEncodingV2I16(Literal).has_value(); 2998 } 2999 3000 // Whether the given literal can be inlined for a V_PK_*_BF16 instruction. 3001 bool isInlinableLiteralV2BF16(uint32_t Literal) { 3002 return getInlineEncodingV2BF16(Literal).has_value(); 3003 } 3004 3005 // Whether the given literal can be inlined for a V_PK_*_F16 instruction. 3006 bool isInlinableLiteralV2F16(uint32_t Literal) { 3007 return getInlineEncodingV2F16(Literal).has_value(); 3008 } 3009 3010 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { 3011 if (IsFP64) 3012 return !Lo_32(Val); 3013 3014 return isUInt<32>(Val) || isInt<32>(Val); 3015 } 3016 3017 bool isArgPassedInSGPR(const Argument *A) { 3018 const Function *F = A->getParent(); 3019 3020 // Arguments to compute shaders are never a source of divergence. 3021 CallingConv::ID CC = F->getCallingConv(); 3022 switch (CC) { 3023 case CallingConv::AMDGPU_KERNEL: 3024 case CallingConv::SPIR_KERNEL: 3025 return true; 3026 case CallingConv::AMDGPU_VS: 3027 case CallingConv::AMDGPU_LS: 3028 case CallingConv::AMDGPU_HS: 3029 case CallingConv::AMDGPU_ES: 3030 case CallingConv::AMDGPU_GS: 3031 case CallingConv::AMDGPU_PS: 3032 case CallingConv::AMDGPU_CS: 3033 case CallingConv::AMDGPU_Gfx: 3034 case CallingConv::AMDGPU_CS_Chain: 3035 case CallingConv::AMDGPU_CS_ChainPreserve: 3036 // For non-compute shaders, SGPR inputs are marked with either inreg or 3037 // byval. Everything else is in VGPRs. 3038 return A->hasAttribute(Attribute::InReg) || 3039 A->hasAttribute(Attribute::ByVal); 3040 default: 3041 // TODO: treat i1 as divergent? 3042 return A->hasAttribute(Attribute::InReg); 3043 } 3044 } 3045 3046 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { 3047 // Arguments to compute shaders are never a source of divergence. 3048 CallingConv::ID CC = CB->getCallingConv(); 3049 switch (CC) { 3050 case CallingConv::AMDGPU_KERNEL: 3051 case CallingConv::SPIR_KERNEL: 3052 return true; 3053 case CallingConv::AMDGPU_VS: 3054 case CallingConv::AMDGPU_LS: 3055 case CallingConv::AMDGPU_HS: 3056 case CallingConv::AMDGPU_ES: 3057 case CallingConv::AMDGPU_GS: 3058 case CallingConv::AMDGPU_PS: 3059 case CallingConv::AMDGPU_CS: 3060 case CallingConv::AMDGPU_Gfx: 3061 case CallingConv::AMDGPU_CS_Chain: 3062 case CallingConv::AMDGPU_CS_ChainPreserve: 3063 // For non-compute shaders, SGPR inputs are marked with either inreg or 3064 // byval. Everything else is in VGPRs. 3065 return CB->paramHasAttr(ArgNo, Attribute::InReg) || 3066 CB->paramHasAttr(ArgNo, Attribute::ByVal); 3067 default: 3068 return CB->paramHasAttr(ArgNo, Attribute::InReg); 3069 } 3070 } 3071 3072 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 3073 return isGCN3Encoding(ST) || isGFX10Plus(ST); 3074 } 3075 3076 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 3077 int64_t EncodedOffset) { 3078 if (isGFX12Plus(ST)) 3079 return isUInt<23>(EncodedOffset); 3080 3081 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 3082 : isUInt<8>(EncodedOffset); 3083 } 3084 3085 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 3086 int64_t EncodedOffset, bool IsBuffer) { 3087 if (isGFX12Plus(ST)) 3088 return isInt<24>(EncodedOffset); 3089 3090 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset); 3091 } 3092 3093 static bool isDwordAligned(uint64_t ByteOffset) { 3094 return (ByteOffset & 3) == 0; 3095 } 3096 3097 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 3098 uint64_t ByteOffset) { 3099 if (hasSMEMByteOffset(ST)) 3100 return ByteOffset; 3101 3102 assert(isDwordAligned(ByteOffset)); 3103 return ByteOffset >> 2; 3104 } 3105 3106 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 3107 int64_t ByteOffset, bool IsBuffer, 3108 bool HasSOffset) { 3109 // For unbuffered smem loads, it is illegal for the Immediate Offset to be 3110 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. 3111 // Handle case where SOffset is not present. 3112 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST)) 3113 return std::nullopt; 3114 3115 if (isGFX12Plus(ST)) // 24 bit signed offsets 3116 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 3117 : std::nullopt; 3118 3119 // The signed version is always a byte offset. 3120 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 3121 assert(hasSMEMByteOffset(ST)); 3122 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 3123 : std::nullopt; 3124 } 3125 3126 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 3127 return std::nullopt; 3128 3129 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 3130 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 3131 ? std::optional<int64_t>(EncodedOffset) 3132 : std::nullopt; 3133 } 3134 3135 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 3136 int64_t ByteOffset) { 3137 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 3138 return std::nullopt; 3139 3140 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 3141 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset) 3142 : std::nullopt; 3143 } 3144 3145 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { 3146 if (AMDGPU::isGFX10(ST)) 3147 return 12; 3148 3149 if (AMDGPU::isGFX12(ST)) 3150 return 24; 3151 return 13; 3152 } 3153 3154 namespace { 3155 3156 struct SourceOfDivergence { 3157 unsigned Intr; 3158 }; 3159 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 3160 3161 struct AlwaysUniform { 3162 unsigned Intr; 3163 }; 3164 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr); 3165 3166 #define GET_SourcesOfDivergence_IMPL 3167 #define GET_UniformIntrinsics_IMPL 3168 #define GET_Gfx9BufferFormat_IMPL 3169 #define GET_Gfx10BufferFormat_IMPL 3170 #define GET_Gfx11PlusBufferFormat_IMPL 3171 3172 #include "AMDGPUGenSearchableTables.inc" 3173 3174 } // end anonymous namespace 3175 3176 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 3177 return lookupSourceOfDivergence(IntrID); 3178 } 3179 3180 bool isIntrinsicAlwaysUniform(unsigned IntrID) { 3181 return lookupAlwaysUniform(IntrID); 3182 } 3183 3184 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 3185 uint8_t NumComponents, 3186 uint8_t NumFormat, 3187 const MCSubtargetInfo &STI) { 3188 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo( 3189 BitsPerComp, NumComponents, NumFormat) 3190 : isGFX10(STI) 3191 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat) 3192 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat); 3193 } 3194 3195 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 3196 const MCSubtargetInfo &STI) { 3197 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) 3198 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) 3199 : getGfx9BufferFormatInfo(Format); 3200 } 3201 3202 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { 3203 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) { 3204 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName); 3205 if (Idx == -1) 3206 continue; 3207 3208 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || 3209 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) 3210 return true; 3211 } 3212 3213 return false; 3214 } 3215 3216 bool isDPALU_DPP(const MCInstrDesc &OpDesc) { 3217 return hasAny64BitVGPROperands(OpDesc); 3218 } 3219 3220 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) { 3221 // Currently this is 128 for all subtargets 3222 return 128; 3223 } 3224 3225 } // namespace AMDGPU 3226 3227 raw_ostream &operator<<(raw_ostream &OS, 3228 const AMDGPU::IsaInfo::TargetIDSetting S) { 3229 switch (S) { 3230 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 3231 OS << "Unsupported"; 3232 break; 3233 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 3234 OS << "Any"; 3235 break; 3236 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 3237 OS << "Off"; 3238 break; 3239 case (AMDGPU::IsaInfo::TargetIDSetting::On): 3240 OS << "On"; 3241 break; 3242 } 3243 return OS; 3244 } 3245 3246 } // namespace llvm 3247