1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "AMDGPUSubtarget.h" 13 #include "SIDefines.h" 14 #include "llvm/IR/CallingConv.h" 15 #include "llvm/IR/InstrTypes.h" 16 #include "llvm/IR/Module.h" 17 #include "llvm/Support/Alignment.h" 18 #include <array> 19 #include <functional> 20 #include <utility> 21 22 struct amd_kernel_code_t; 23 24 namespace llvm { 25 26 struct Align; 27 class Argument; 28 class Function; 29 class GlobalValue; 30 class MCInstrInfo; 31 class MCRegisterClass; 32 class MCRegisterInfo; 33 class MCSubtargetInfo; 34 class StringRef; 35 class Triple; 36 class raw_ostream; 37 38 namespace AMDGPU { 39 40 struct AMDGPUMCKernelCodeT; 41 struct IsaVersion; 42 43 /// Generic target versions emitted by this version of LLVM. 44 /// 45 /// These numbers are incremented every time a codegen breaking change occurs 46 /// within a generic family. 47 namespace GenericVersion { 48 static constexpr unsigned GFX9 = 1; 49 static constexpr unsigned GFX10_1 = 1; 50 static constexpr unsigned GFX10_3 = 1; 51 static constexpr unsigned GFX11 = 1; 52 static constexpr unsigned GFX12 = 1; 53 } // namespace GenericVersion 54 55 enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 }; 56 57 /// \returns True if \p STI is AMDHSA. 58 bool isHsaAbi(const MCSubtargetInfo &STI); 59 60 /// \returns Code object version from the IR module flag. 61 unsigned getAMDHSACodeObjectVersion(const Module &M); 62 63 /// \returns Code object version from ELF's e_ident[EI_ABIVERSION]. 64 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion); 65 66 /// \returns The default HSA code object version. This should only be used when 67 /// we lack a more accurate CodeObjectVersion value (e.g. from the IR module 68 /// flag or a .amdhsa_code_object_version directive) 69 unsigned getDefaultAMDHSACodeObjectVersion(); 70 71 /// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param 72 /// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion(). 73 uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion); 74 75 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr 76 unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV); 77 78 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr 79 unsigned getHostcallImplicitArgPosition(unsigned COV); 80 81 unsigned getDefaultQueueImplicitArgPosition(unsigned COV); 82 unsigned getCompletionActionImplicitArgPosition(unsigned COV); 83 84 struct GcnBufferFormatInfo { 85 unsigned Format; 86 unsigned BitsPerComp; 87 unsigned NumComponents; 88 unsigned NumFormat; 89 unsigned DataFormat; 90 }; 91 92 struct MAIInstInfo { 93 uint16_t Opcode; 94 bool is_dgemm; 95 bool is_gfx940_xdl; 96 }; 97 98 #define GET_MIMGBaseOpcode_DECL 99 #define GET_MIMGDim_DECL 100 #define GET_MIMGEncoding_DECL 101 #define GET_MIMGLZMapping_DECL 102 #define GET_MIMGMIPMapping_DECL 103 #define GET_MIMGBiASMapping_DECL 104 #define GET_MAIInstInfoTable_DECL 105 #include "AMDGPUGenSearchableTables.inc" 106 107 namespace IsaInfo { 108 109 enum { 110 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 111 // doesn't spill SGPRs as much as when 80 is set. 112 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 113 TRAP_NUM_SGPRS = 16 114 }; 115 116 enum class TargetIDSetting { 117 Unsupported, 118 Any, 119 Off, 120 On 121 }; 122 123 class AMDGPUTargetID { 124 private: 125 const MCSubtargetInfo &STI; 126 TargetIDSetting XnackSetting; 127 TargetIDSetting SramEccSetting; 128 129 public: 130 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 131 ~AMDGPUTargetID() = default; 132 133 /// \return True if the current xnack setting is not "Unsupported". 134 bool isXnackSupported() const { 135 return XnackSetting != TargetIDSetting::Unsupported; 136 } 137 138 /// \returns True if the current xnack setting is "On" or "Any". 139 bool isXnackOnOrAny() const { 140 return XnackSetting == TargetIDSetting::On || 141 XnackSetting == TargetIDSetting::Any; 142 } 143 144 /// \returns True if current xnack setting is "On" or "Off", 145 /// false otherwise. 146 bool isXnackOnOrOff() const { 147 return getXnackSetting() == TargetIDSetting::On || 148 getXnackSetting() == TargetIDSetting::Off; 149 } 150 151 /// \returns The current xnack TargetIDSetting, possible options are 152 /// "Unsupported", "Any", "Off", and "On". 153 TargetIDSetting getXnackSetting() const { 154 return XnackSetting; 155 } 156 157 /// Sets xnack setting to \p NewXnackSetting. 158 void setXnackSetting(TargetIDSetting NewXnackSetting) { 159 XnackSetting = NewXnackSetting; 160 } 161 162 /// \return True if the current sramecc setting is not "Unsupported". 163 bool isSramEccSupported() const { 164 return SramEccSetting != TargetIDSetting::Unsupported; 165 } 166 167 /// \returns True if the current sramecc setting is "On" or "Any". 168 bool isSramEccOnOrAny() const { 169 return SramEccSetting == TargetIDSetting::On || 170 SramEccSetting == TargetIDSetting::Any; 171 } 172 173 /// \returns True if current sramecc setting is "On" or "Off", 174 /// false otherwise. 175 bool isSramEccOnOrOff() const { 176 return getSramEccSetting() == TargetIDSetting::On || 177 getSramEccSetting() == TargetIDSetting::Off; 178 } 179 180 /// \returns The current sramecc TargetIDSetting, possible options are 181 /// "Unsupported", "Any", "Off", and "On". 182 TargetIDSetting getSramEccSetting() const { 183 return SramEccSetting; 184 } 185 186 /// Sets sramecc setting to \p NewSramEccSetting. 187 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 188 SramEccSetting = NewSramEccSetting; 189 } 190 191 void setTargetIDFromFeaturesString(StringRef FS); 192 void setTargetIDFromTargetIDStream(StringRef TargetID); 193 194 /// \returns String representation of an object. 195 std::string toString() const; 196 }; 197 198 /// \returns Wavefront size for given subtarget \p STI. 199 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 200 201 /// \returns Local memory size in bytes for given subtarget \p STI. 202 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 203 204 /// \returns Maximum addressable local memory size in bytes for given subtarget 205 /// \p STI. 206 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI); 207 208 /// \returns Number of execution units per compute unit for given subtarget \p 209 /// STI. 210 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 211 212 /// \returns Maximum number of work groups per compute unit for given subtarget 213 /// \p STI and limited by given \p FlatWorkGroupSize. 214 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 215 unsigned FlatWorkGroupSize); 216 217 /// \returns Minimum number of waves per execution unit for given subtarget \p 218 /// STI. 219 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 220 221 /// \returns Maximum number of waves per execution unit for given subtarget \p 222 /// STI without any kind of limitation. 223 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 224 225 /// \returns Number of waves per execution unit required to support the given \p 226 /// FlatWorkGroupSize. 227 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 228 unsigned FlatWorkGroupSize); 229 230 /// \returns Minimum flat work group size for given subtarget \p STI. 231 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 232 233 /// \returns Maximum flat work group size for given subtarget \p STI. 234 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 235 236 /// \returns Number of waves per work group for given subtarget \p STI and 237 /// \p FlatWorkGroupSize. 238 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 239 unsigned FlatWorkGroupSize); 240 241 /// \returns SGPR allocation granularity for given subtarget \p STI. 242 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 243 244 /// \returns SGPR encoding granularity for given subtarget \p STI. 245 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 246 247 /// \returns Total number of SGPRs for given subtarget \p STI. 248 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 249 250 /// \returns Addressable number of SGPRs for given subtarget \p STI. 251 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 252 253 /// \returns Minimum number of SGPRs that meets the given number of waves per 254 /// execution unit requirement for given subtarget \p STI. 255 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 256 257 /// \returns Maximum number of SGPRs that meets the given number of waves per 258 /// execution unit requirement for given subtarget \p STI. 259 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 260 bool Addressable); 261 262 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 263 /// STI when the given special registers are used. 264 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 265 bool FlatScrUsed, bool XNACKUsed); 266 267 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 268 /// STI when the given special registers are used. XNACK is inferred from 269 /// \p STI. 270 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 271 bool FlatScrUsed); 272 273 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 274 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 275 /// register counts. 276 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 277 278 /// \returns VGPR allocation granularity for given subtarget \p STI. 279 /// 280 /// For subtargets which support it, \p EnableWavefrontSize32 should match 281 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 282 unsigned 283 getVGPRAllocGranule(const MCSubtargetInfo *STI, 284 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 285 286 /// \returns VGPR encoding granularity for given subtarget \p STI. 287 /// 288 /// For subtargets which support it, \p EnableWavefrontSize32 should match 289 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 290 unsigned getVGPREncodingGranule( 291 const MCSubtargetInfo *STI, 292 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 293 294 /// \returns Total number of VGPRs for given subtarget \p STI. 295 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 296 297 /// \returns Addressable number of architectural VGPRs for a given subtarget \p 298 /// STI. 299 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI); 300 301 /// \returns Addressable number of VGPRs for given subtarget \p STI. 302 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 303 304 /// \returns Minimum number of VGPRs that meets given number of waves per 305 /// execution unit requirement for given subtarget \p STI. 306 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 307 308 /// \returns Maximum number of VGPRs that meets given number of waves per 309 /// execution unit requirement for given subtarget \p STI. 310 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 311 312 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given 313 /// subtarget \p STI. 314 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 315 unsigned NumVGPRs); 316 317 /// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule 318 /// size, \p MaxWaves possible, and \p TotalNumVGPRs available. 319 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule, 320 unsigned MaxWaves, 321 unsigned TotalNumVGPRs); 322 323 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p 324 /// Gen. 325 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, 326 AMDGPUSubtarget::Generation Gen); 327 328 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 329 /// \p NumVGPRs are used. We actually return the number of blocks -1, since 330 /// that's what we encode. 331 /// 332 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 333 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 334 unsigned getEncodedNumVGPRBlocks( 335 const MCSubtargetInfo *STI, unsigned NumVGPRs, 336 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 337 338 /// \returns Number of VGPR blocks that need to be allocated for the given 339 /// subtarget \p STI when \p NumVGPRs are used. 340 unsigned getAllocatedNumVGPRBlocks( 341 const MCSubtargetInfo *STI, unsigned NumVGPRs, 342 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 343 344 } // end namespace IsaInfo 345 346 // Represents a field in an encoded value. 347 template <unsigned HighBit, unsigned LowBit, unsigned D = 0> 348 struct EncodingField { 349 static_assert(HighBit >= LowBit, "Invalid bit range!"); 350 static constexpr unsigned Offset = LowBit; 351 static constexpr unsigned Width = HighBit - LowBit + 1; 352 353 using ValueType = unsigned; 354 static constexpr ValueType Default = D; 355 356 ValueType Value; 357 constexpr EncodingField(ValueType Value) : Value(Value) {} 358 359 constexpr uint64_t encode() const { return Value; } 360 static ValueType decode(uint64_t Encoded) { return Encoded; } 361 }; 362 363 // Represents a single bit in an encoded value. 364 template <unsigned Bit, unsigned D = 0> 365 using EncodingBit = EncodingField<Bit, Bit, D>; 366 367 // A helper for encoding and decoding multiple fields. 368 template <typename... Fields> struct EncodingFields { 369 static constexpr uint64_t encode(Fields... Values) { 370 return ((Values.encode() << Values.Offset) | ...); 371 } 372 373 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) { 374 return {Fields::decode((Encoded >> Fields::Offset) & 375 maxUIntN(Fields::Width))...}; 376 } 377 }; 378 379 LLVM_READONLY 380 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 381 382 LLVM_READONLY 383 inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) { 384 return getNamedOperandIdx(Opcode, NamedIdx) != -1; 385 } 386 387 LLVM_READONLY 388 int getSOPPWithRelaxation(uint16_t Opcode); 389 390 struct MIMGBaseOpcodeInfo { 391 MIMGBaseOpcode BaseOpcode; 392 bool Store; 393 bool Atomic; 394 bool AtomicX2; 395 bool Sampler; 396 bool Gather4; 397 398 uint8_t NumExtraArgs; 399 bool Gradients; 400 bool G16; 401 bool Coordinates; 402 bool LodOrClampOrMip; 403 bool HasD16; 404 bool MSAA; 405 bool BVH; 406 bool A16; 407 bool NoReturn; 408 }; 409 410 LLVM_READONLY 411 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 412 413 LLVM_READONLY 414 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 415 416 struct MIMGDimInfo { 417 MIMGDim Dim; 418 uint8_t NumCoords; 419 uint8_t NumGradients; 420 bool MSAA; 421 bool DA; 422 uint8_t Encoding; 423 const char *AsmSuffix; 424 }; 425 426 LLVM_READONLY 427 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 428 429 LLVM_READONLY 430 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 431 432 LLVM_READONLY 433 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 434 435 struct MIMGLZMappingInfo { 436 MIMGBaseOpcode L; 437 MIMGBaseOpcode LZ; 438 }; 439 440 struct MIMGMIPMappingInfo { 441 MIMGBaseOpcode MIP; 442 MIMGBaseOpcode NONMIP; 443 }; 444 445 struct MIMGBiasMappingInfo { 446 MIMGBaseOpcode Bias; 447 MIMGBaseOpcode NoBias; 448 }; 449 450 struct MIMGOffsetMappingInfo { 451 MIMGBaseOpcode Offset; 452 MIMGBaseOpcode NoOffset; 453 }; 454 455 struct MIMGG16MappingInfo { 456 MIMGBaseOpcode G; 457 MIMGBaseOpcode G16; 458 }; 459 460 LLVM_READONLY 461 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 462 463 struct WMMAOpcodeMappingInfo { 464 unsigned Opcode2Addr; 465 unsigned Opcode3Addr; 466 }; 467 468 LLVM_READONLY 469 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 470 471 LLVM_READONLY 472 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); 473 474 LLVM_READONLY 475 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); 476 477 LLVM_READONLY 478 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 479 480 LLVM_READONLY 481 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 482 unsigned VDataDwords, unsigned VAddrDwords); 483 484 LLVM_READONLY 485 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 486 487 LLVM_READONLY 488 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 489 const MIMGDimInfo *Dim, bool IsA16, 490 bool IsG16Supported); 491 492 struct MIMGInfo { 493 uint16_t Opcode; 494 uint16_t BaseOpcode; 495 uint8_t MIMGEncoding; 496 uint8_t VDataDwords; 497 uint8_t VAddrDwords; 498 uint8_t VAddrOperands; 499 }; 500 501 LLVM_READONLY 502 const MIMGInfo *getMIMGInfo(unsigned Opc); 503 504 LLVM_READONLY 505 int getMTBUFBaseOpcode(unsigned Opc); 506 507 LLVM_READONLY 508 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 509 510 LLVM_READONLY 511 int getMTBUFElements(unsigned Opc); 512 513 LLVM_READONLY 514 bool getMTBUFHasVAddr(unsigned Opc); 515 516 LLVM_READONLY 517 bool getMTBUFHasSrsrc(unsigned Opc); 518 519 LLVM_READONLY 520 bool getMTBUFHasSoffset(unsigned Opc); 521 522 LLVM_READONLY 523 int getMUBUFBaseOpcode(unsigned Opc); 524 525 LLVM_READONLY 526 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 527 528 LLVM_READONLY 529 int getMUBUFElements(unsigned Opc); 530 531 LLVM_READONLY 532 bool getMUBUFHasVAddr(unsigned Opc); 533 534 LLVM_READONLY 535 bool getMUBUFHasSrsrc(unsigned Opc); 536 537 LLVM_READONLY 538 bool getMUBUFHasSoffset(unsigned Opc); 539 540 LLVM_READONLY 541 bool getMUBUFIsBufferInv(unsigned Opc); 542 543 LLVM_READONLY 544 bool getMUBUFTfe(unsigned Opc); 545 546 LLVM_READONLY 547 bool getSMEMIsBuffer(unsigned Opc); 548 549 LLVM_READONLY 550 bool getVOP1IsSingle(unsigned Opc); 551 552 LLVM_READONLY 553 bool getVOP2IsSingle(unsigned Opc); 554 555 LLVM_READONLY 556 bool getVOP3IsSingle(unsigned Opc); 557 558 LLVM_READONLY 559 bool isVOPC64DPP(unsigned Opc); 560 561 LLVM_READONLY 562 bool isVOPCAsmOnly(unsigned Opc); 563 564 /// Returns true if MAI operation is a double precision GEMM. 565 LLVM_READONLY 566 bool getMAIIsDGEMM(unsigned Opc); 567 568 LLVM_READONLY 569 bool getMAIIsGFX940XDL(unsigned Opc); 570 571 struct CanBeVOPD { 572 bool X; 573 bool Y; 574 }; 575 576 /// \returns SIEncodingFamily used for VOPD encoding on a \p ST. 577 LLVM_READONLY 578 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST); 579 580 LLVM_READONLY 581 CanBeVOPD getCanBeVOPD(unsigned Opc); 582 583 LLVM_READONLY 584 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 585 uint8_t NumComponents, 586 uint8_t NumFormat, 587 const MCSubtargetInfo &STI); 588 LLVM_READONLY 589 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 590 const MCSubtargetInfo &STI); 591 592 LLVM_READONLY 593 int getMCOpcode(uint16_t Opcode, unsigned Gen); 594 595 LLVM_READONLY 596 unsigned getVOPDOpcode(unsigned Opc); 597 598 LLVM_READONLY 599 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily); 600 601 LLVM_READONLY 602 bool isVOPD(unsigned Opc); 603 604 LLVM_READNONE 605 bool isMAC(unsigned Opc); 606 607 LLVM_READNONE 608 bool isPermlane16(unsigned Opc); 609 610 LLVM_READNONE 611 bool isGenericAtomic(unsigned Opc); 612 613 LLVM_READNONE 614 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc); 615 616 namespace VOPD { 617 618 enum Component : unsigned { 619 DST = 0, 620 SRC0, 621 SRC1, 622 SRC2, 623 624 DST_NUM = 1, 625 MAX_SRC_NUM = 3, 626 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM 627 }; 628 629 // LSB mask for VGPR banks per VOPD component operand. 630 // 4 banks result in a mask 3, setting 2 lower bits. 631 constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1}; 632 633 enum ComponentIndex : unsigned { X = 0, Y = 1 }; 634 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y}; 635 constexpr unsigned COMPONENTS_NUM = 2; 636 637 // Properties of VOPD components. 638 class ComponentProps { 639 private: 640 unsigned SrcOperandsNum = 0; 641 unsigned MandatoryLiteralIdx = ~0u; 642 bool HasSrc2Acc = false; 643 644 public: 645 ComponentProps() = default; 646 ComponentProps(const MCInstrDesc &OpDesc); 647 648 // Return the total number of src operands this component has. 649 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; } 650 651 // Return the number of src operands of this component visible to the parser. 652 unsigned getCompParsedSrcOperandsNum() const { 653 return SrcOperandsNum - HasSrc2Acc; 654 } 655 656 // Return true iif this component has a mandatory literal. 657 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; } 658 659 // If this component has a mandatory literal, return component operand 660 // index of this literal (i.e. either Component::SRC1 or Component::SRC2). 661 unsigned getMandatoryLiteralCompOperandIndex() const { 662 assert(hasMandatoryLiteral()); 663 return MandatoryLiteralIdx; 664 } 665 666 // Return true iif this component has operand 667 // with component index CompSrcIdx and this operand may be a register. 668 bool hasRegSrcOperand(unsigned CompSrcIdx) const { 669 assert(CompSrcIdx < Component::MAX_SRC_NUM); 670 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx); 671 } 672 673 // Return true iif this component has tied src2. 674 bool hasSrc2Acc() const { return HasSrc2Acc; } 675 676 private: 677 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const { 678 assert(CompSrcIdx < Component::MAX_SRC_NUM); 679 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx; 680 } 681 }; 682 683 enum ComponentKind : unsigned { 684 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD. 685 COMPONENT_X, // A VOPD instruction, X component. 686 COMPONENT_Y, // A VOPD instruction, Y component. 687 MAX = COMPONENT_Y 688 }; 689 690 // Interface functions of this class map VOPD component operand indices 691 // to indices of operands in MachineInstr/MCInst or parsed operands array. 692 // 693 // Note that this class operates with 3 kinds of indices: 694 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.); 695 // - MC operand indices (they refer operands in a MachineInstr/MCInst); 696 // - parsed operand indices (they refer operands in parsed operands array). 697 // 698 // For SINGLE components mapping between these indices is trivial. 699 // But things get more complicated for COMPONENT_X and 700 // COMPONENT_Y because these components share the same 701 // MachineInstr/MCInst and the same parsed operands array. 702 // Below is an example of component operand to parsed operand 703 // mapping for the following instruction: 704 // 705 // v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1 706 // 707 // PARSED COMPONENT PARSED 708 // COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX 709 // ------------------------------------------------------------------- 710 // "v_dual_add_f32" 0 711 // v_dual_add_f32 v255 0 (DST) --> 1 712 // v4 1 (SRC0) --> 2 713 // v5 2 (SRC1) --> 3 714 // "::" 4 715 // "v_dual_mov_b32" 5 716 // v_dual_mov_b32 v6 0 (DST) --> 6 717 // v1 1 (SRC0) --> 7 718 // ------------------------------------------------------------------- 719 // 720 class ComponentLayout { 721 private: 722 // Regular MachineInstr/MCInst operands are ordered as follows: 723 // dst, src0 [, other src operands] 724 // VOPD MachineInstr/MCInst operands are ordered as follows: 725 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 726 // Each ComponentKind has operand indices defined below. 727 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1}; 728 static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */}; 729 730 // Parsed operands of regular instructions are ordered as follows: 731 // Mnemo dst src0 [vsrc1 ...] 732 // Parsed VOPD operands are ordered as follows: 733 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 734 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 735 // Each ComponentKind has operand indices defined below. 736 static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 737 4 /* + OpX.ParsedSrcNum */}; 738 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = { 739 2, 2, 5 /* + OpX.ParsedSrcNum */}; 740 741 private: 742 const ComponentKind Kind; 743 const ComponentProps PrevComp; 744 745 public: 746 // Create layout for COMPONENT_X or SINGLE component. 747 ComponentLayout(ComponentKind Kind) : Kind(Kind) { 748 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X); 749 } 750 751 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout. 752 ComponentLayout(const ComponentProps &OpXProps) 753 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {} 754 755 public: 756 // Return the index of dst operand in MCInst operands. 757 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; } 758 759 // Return the index of the specified src operand in MCInst operands. 760 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const { 761 assert(CompSrcIdx < Component::MAX_SRC_NUM); 762 return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx; 763 } 764 765 // Return the index of dst operand in the parsed operands array. 766 unsigned getIndexOfDstInParsedOperands() const { 767 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum(); 768 } 769 770 // Return the index of the specified src operand in the parsed operands array. 771 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const { 772 assert(CompSrcIdx < Component::MAX_SRC_NUM); 773 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx; 774 } 775 776 private: 777 unsigned getPrevCompSrcNum() const { 778 return PrevComp.getCompSrcOperandsNum(); 779 } 780 unsigned getPrevCompParsedSrcNum() const { 781 return PrevComp.getCompParsedSrcOperandsNum(); 782 } 783 }; 784 785 // Layout and properties of VOPD components. 786 class ComponentInfo : public ComponentLayout, public ComponentProps { 787 public: 788 // Create ComponentInfo for COMPONENT_X or SINGLE component. 789 ComponentInfo(const MCInstrDesc &OpDesc, 790 ComponentKind Kind = ComponentKind::SINGLE) 791 : ComponentLayout(Kind), ComponentProps(OpDesc) {} 792 793 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout. 794 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps) 795 : ComponentLayout(OpXProps), ComponentProps(OpDesc) {} 796 797 // Map component operand index to parsed operand index. 798 // Return 0 if the specified operand does not exist. 799 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const; 800 }; 801 802 // Properties of VOPD instructions. 803 class InstInfo { 804 private: 805 const ComponentInfo CompInfo[COMPONENTS_NUM]; 806 807 public: 808 using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>; 809 810 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) 811 : CompInfo{OpX, OpY} {} 812 813 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY) 814 : CompInfo{OprInfoX, OprInfoY} {} 815 816 const ComponentInfo &operator[](size_t ComponentIdx) const { 817 assert(ComponentIdx < COMPONENTS_NUM); 818 return CompInfo[ComponentIdx]; 819 } 820 821 // Check VOPD operands constraints. 822 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 823 // for the specified component and MC operand. The callback must return 0 824 // if the operand is not a register or not a VGPR. 825 // If \p SkipSrc is set to true then constraints for source operands are not 826 // checked. 827 bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx, 828 bool SkipSrc = false) const { 829 return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value(); 830 } 831 832 // Check VOPD operands constraints. 833 // Return the index of an invalid component operand, if any. 834 // If \p SkipSrc is set to true then constraints for source operands are not 835 // checked. 836 std::optional<unsigned> getInvalidCompOperandIndex( 837 std::function<unsigned(unsigned, unsigned)> GetRegIdx, 838 bool SkipSrc = false) const; 839 840 private: 841 RegIndices 842 getRegIndices(unsigned ComponentIdx, 843 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const; 844 }; 845 846 } // namespace VOPD 847 848 LLVM_READONLY 849 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode); 850 851 LLVM_READONLY 852 // Get properties of 2 single VOP1/VOP2 instructions 853 // used as components to create a VOPD instruction. 854 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY); 855 856 LLVM_READONLY 857 // Get properties of VOPD X and Y components. 858 VOPD::InstInfo 859 getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo); 860 861 LLVM_READONLY 862 bool isTrue16Inst(unsigned Opc); 863 864 LLVM_READONLY 865 bool isInvalidSingleUseConsumerInst(unsigned Opc); 866 867 LLVM_READONLY 868 bool isInvalidSingleUseProducerInst(unsigned Opc); 869 870 LLVM_READONLY 871 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); 872 873 LLVM_READONLY 874 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); 875 876 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header, 877 const MCSubtargetInfo *STI); 878 879 bool isGroupSegment(const GlobalValue *GV); 880 bool isGlobalSegment(const GlobalValue *GV); 881 bool isReadOnlySegment(const GlobalValue *GV); 882 883 /// \returns True if constants should be emitted to .text section for given 884 /// target triple \p TT, false otherwise. 885 bool shouldEmitConstantsToTextSection(const Triple &TT); 886 887 /// \returns Integer value requested using \p F's \p Name attribute. 888 /// 889 /// \returns \p Default if attribute is not present. 890 /// 891 /// \returns \p Default and emits error if requested value cannot be converted 892 /// to integer. 893 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 894 895 /// \returns A pair of integer values requested using \p F's \p Name attribute 896 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 897 /// is false). 898 /// 899 /// \returns \p Default if attribute is not present. 900 /// 901 /// \returns \p Default and emits error if one of the requested values cannot be 902 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 903 /// not present. 904 std::pair<unsigned, unsigned> 905 getIntegerPairAttribute(const Function &F, StringRef Name, 906 std::pair<unsigned, unsigned> Default, 907 bool OnlyFirstRequired = false); 908 909 /// \returns Generate a vector of integer values requested using \p F's \p Name 910 /// attribute. 911 /// 912 /// \returns true if exactly Size (>2) number of integers are found in the 913 /// attribute. 914 /// 915 /// \returns false if any error occurs. 916 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name, 917 unsigned Size); 918 919 /// Represents the counter values to wait for in an s_waitcnt instruction. 920 /// 921 /// Large values (including the maximum possible integer) can be used to 922 /// represent "don't care" waits. 923 struct Waitcnt { 924 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12. 925 unsigned ExpCnt = ~0u; 926 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12. 927 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11. 928 unsigned SampleCnt = ~0u; // gfx12+ only. 929 unsigned BvhCnt = ~0u; // gfx12+ only. 930 unsigned KmCnt = ~0u; // gfx12+ only. 931 932 Waitcnt() = default; 933 // Pre-gfx12 constructor. 934 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 935 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt), 936 SampleCnt(~0u), BvhCnt(~0u), KmCnt(~0u) {} 937 938 // gfx12+ constructor. 939 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, 940 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt) 941 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt), 942 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {} 943 944 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); } 945 946 bool hasWaitExceptStoreCnt() const { 947 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u || 948 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u; 949 } 950 951 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; } 952 953 Waitcnt combined(const Waitcnt &Other) const { 954 // Does the right thing provided self and Other are either both pre-gfx12 955 // or both gfx12+. 956 return Waitcnt( 957 std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt), 958 std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt), 959 std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt), 960 std::min(KmCnt, Other.KmCnt)); 961 } 962 }; 963 964 // The following methods are only meaningful on targets that support 965 // S_WAITCNT. 966 967 /// \returns Vmcnt bit mask for given isa \p Version. 968 unsigned getVmcntBitMask(const IsaVersion &Version); 969 970 /// \returns Expcnt bit mask for given isa \p Version. 971 unsigned getExpcntBitMask(const IsaVersion &Version); 972 973 /// \returns Lgkmcnt bit mask for given isa \p Version. 974 unsigned getLgkmcntBitMask(const IsaVersion &Version); 975 976 /// \returns Waitcnt bit mask for given isa \p Version. 977 unsigned getWaitcntBitMask(const IsaVersion &Version); 978 979 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 980 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 981 982 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 983 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 984 985 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 986 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 987 988 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 989 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 990 /// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction 991 /// which needs it is deprecated 992 /// 993 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 994 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) 995 /// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) 996 /// \p Vmcnt = \p Waitcnt[15:10] (gfx11) 997 /// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) 998 /// \p Expcnt = \p Waitcnt[2:0] (gfx11) 999 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) 1000 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) 1001 /// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11) 1002 /// 1003 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 1004 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 1005 1006 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 1007 1008 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 1009 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1010 unsigned Vmcnt); 1011 1012 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 1013 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1014 unsigned Expcnt); 1015 1016 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 1017 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1018 unsigned Lgkmcnt); 1019 1020 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 1021 /// \p Version. Should not be used on gfx12+, the instruction which needs 1022 /// it is deprecated 1023 /// 1024 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 1025 /// Waitcnt[2:0] = \p Expcnt (gfx11+) 1026 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) 1027 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) 1028 /// Waitcnt[6:4] = \p Expcnt (pre-gfx11) 1029 /// Waitcnt[9:4] = \p Lgkmcnt (gfx11) 1030 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) 1031 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10) 1032 /// Waitcnt[15:10] = \p Vmcnt (gfx11) 1033 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) 1034 /// 1035 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 1036 /// isa \p Version. 1037 /// 1038 unsigned encodeWaitcnt(const IsaVersion &Version, 1039 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 1040 1041 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 1042 1043 // The following methods are only meaningful on targets that support 1044 // S_WAIT_*CNT, introduced with gfx12. 1045 1046 /// \returns Loadcnt bit mask for given isa \p Version. 1047 /// Returns 0 for versions that do not support LOADcnt 1048 unsigned getLoadcntBitMask(const IsaVersion &Version); 1049 1050 /// \returns Samplecnt bit mask for given isa \p Version. 1051 /// Returns 0 for versions that do not support SAMPLEcnt 1052 unsigned getSamplecntBitMask(const IsaVersion &Version); 1053 1054 /// \returns Bvhcnt bit mask for given isa \p Version. 1055 /// Returns 0 for versions that do not support BVHcnt 1056 unsigned getBvhcntBitMask(const IsaVersion &Version); 1057 1058 /// \returns Dscnt bit mask for given isa \p Version. 1059 /// Returns 0 for versions that do not support DScnt 1060 unsigned getDscntBitMask(const IsaVersion &Version); 1061 1062 /// \returns Dscnt bit mask for given isa \p Version. 1063 /// Returns 0 for versions that do not support KMcnt 1064 unsigned getKmcntBitMask(const IsaVersion &Version); 1065 1066 /// \return STOREcnt or VScnt bit mask for given isa \p Version. 1067 /// returns 0 for versions that do not support STOREcnt or VScnt. 1068 /// STOREcnt and VScnt are the same counter, the name used 1069 /// depends on the ISA version. 1070 unsigned getStorecntBitMask(const IsaVersion &Version); 1071 1072 // The following are only meaningful on targets that support 1073 // S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT. 1074 1075 /// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given 1076 /// isa \p Version. 1077 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt); 1078 1079 /// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given 1080 /// isa \p Version. 1081 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt); 1082 1083 /// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an 1084 /// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa 1085 /// \p Version. 1086 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); 1087 1088 /// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an 1089 /// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa 1090 /// \p Version. 1091 unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); 1092 1093 namespace Hwreg { 1094 1095 using HwregId = EncodingField<5, 0>; 1096 using HwregOffset = EncodingField<10, 6>; 1097 1098 struct HwregSize : EncodingField<15, 11, 32> { 1099 using EncodingField::EncodingField; 1100 constexpr uint64_t encode() const { return Value - 1; } 1101 static ValueType decode(uint64_t Encoded) { return Encoded + 1; } 1102 }; 1103 1104 using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>; 1105 1106 } // namespace Hwreg 1107 1108 namespace DepCtr { 1109 1110 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI); 1111 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1112 const MCSubtargetInfo &STI); 1113 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1114 const MCSubtargetInfo &STI); 1115 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1116 bool &IsDefault, const MCSubtargetInfo &STI); 1117 1118 /// \returns Decoded VaVdst from given immediate \p Encoded. 1119 unsigned decodeFieldVaVdst(unsigned Encoded); 1120 1121 /// \returns Decoded VmVsrc from given immediate \p Encoded. 1122 unsigned decodeFieldVmVsrc(unsigned Encoded); 1123 1124 /// \returns Decoded SaSdst from given immediate \p Encoded. 1125 unsigned decodeFieldSaSdst(unsigned Encoded); 1126 1127 /// \returns \p VmVsrc as an encoded Depctr immediate. 1128 unsigned encodeFieldVmVsrc(unsigned VmVsrc); 1129 1130 /// \returns \p Encoded combined with encoded \p VmVsrc. 1131 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc); 1132 1133 /// \returns \p VaVdst as an encoded Depctr immediate. 1134 unsigned encodeFieldVaVdst(unsigned VaVdst); 1135 1136 /// \returns \p Encoded combined with encoded \p VaVdst. 1137 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst); 1138 1139 /// \returns \p SaSdst as an encoded Depctr immediate. 1140 unsigned encodeFieldSaSdst(unsigned SaSdst); 1141 1142 /// \returns \p Encoded combined with encoded \p SaSdst. 1143 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst); 1144 1145 } // namespace DepCtr 1146 1147 namespace Exp { 1148 1149 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 1150 1151 LLVM_READONLY 1152 unsigned getTgtId(const StringRef Name); 1153 1154 LLVM_READNONE 1155 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 1156 1157 } // namespace Exp 1158 1159 namespace MTBUFFormat { 1160 1161 LLVM_READNONE 1162 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 1163 1164 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 1165 1166 int64_t getDfmt(const StringRef Name); 1167 1168 StringRef getDfmtName(unsigned Id); 1169 1170 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 1171 1172 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 1173 1174 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 1175 1176 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 1177 1178 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI); 1179 1180 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI); 1181 1182 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI); 1183 1184 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1185 const MCSubtargetInfo &STI); 1186 1187 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 1188 1189 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 1190 1191 } // namespace MTBUFFormat 1192 1193 namespace SendMsg { 1194 1195 LLVM_READNONE 1196 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI); 1197 1198 LLVM_READNONE 1199 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1200 bool Strict = true); 1201 1202 LLVM_READNONE 1203 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1204 const MCSubtargetInfo &STI, bool Strict = true); 1205 1206 LLVM_READNONE 1207 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI); 1208 1209 LLVM_READNONE 1210 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); 1211 1212 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1213 uint16_t &StreamId, const MCSubtargetInfo &STI); 1214 1215 LLVM_READNONE 1216 uint64_t encodeMsg(uint64_t MsgId, 1217 uint64_t OpId, 1218 uint64_t StreamId); 1219 1220 } // namespace SendMsg 1221 1222 1223 unsigned getInitialPSInputAddr(const Function &F); 1224 1225 bool getHasColorExport(const Function &F); 1226 1227 bool getHasDepthExport(const Function &F); 1228 1229 LLVM_READNONE 1230 bool isShader(CallingConv::ID CC); 1231 1232 LLVM_READNONE 1233 bool isGraphics(CallingConv::ID CC); 1234 1235 LLVM_READNONE 1236 bool isCompute(CallingConv::ID CC); 1237 1238 LLVM_READNONE 1239 bool isEntryFunctionCC(CallingConv::ID CC); 1240 1241 // These functions are considered entrypoints into the current module, i.e. they 1242 // are allowed to be called from outside the current module. This is different 1243 // from isEntryFunctionCC, which is only true for functions that are entered by 1244 // the hardware. Module entry points include all entry functions but also 1245 // include functions that can be called from other functions inside or outside 1246 // the current module. Module entry functions are allowed to allocate LDS. 1247 LLVM_READNONE 1248 bool isModuleEntryFunctionCC(CallingConv::ID CC); 1249 1250 LLVM_READNONE 1251 bool isChainCC(CallingConv::ID CC); 1252 1253 bool isKernelCC(const Function *Func); 1254 1255 // FIXME: Remove this when calling conventions cleaned up 1256 LLVM_READNONE 1257 inline bool isKernel(CallingConv::ID CC) { 1258 switch (CC) { 1259 case CallingConv::AMDGPU_KERNEL: 1260 case CallingConv::SPIR_KERNEL: 1261 return true; 1262 default: 1263 return false; 1264 } 1265 } 1266 1267 bool hasXNACK(const MCSubtargetInfo &STI); 1268 bool hasSRAMECC(const MCSubtargetInfo &STI); 1269 bool hasMIMG_R128(const MCSubtargetInfo &STI); 1270 bool hasA16(const MCSubtargetInfo &STI); 1271 bool hasG16(const MCSubtargetInfo &STI); 1272 bool hasPackedD16(const MCSubtargetInfo &STI); 1273 bool hasGDS(const MCSubtargetInfo &STI); 1274 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false); 1275 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI); 1276 1277 bool isSI(const MCSubtargetInfo &STI); 1278 bool isCI(const MCSubtargetInfo &STI); 1279 bool isVI(const MCSubtargetInfo &STI); 1280 bool isGFX9(const MCSubtargetInfo &STI); 1281 bool isGFX9_GFX10(const MCSubtargetInfo &STI); 1282 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI); 1283 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI); 1284 bool isGFX8Plus(const MCSubtargetInfo &STI); 1285 bool isGFX9Plus(const MCSubtargetInfo &STI); 1286 bool isNotGFX9Plus(const MCSubtargetInfo &STI); 1287 bool isGFX10(const MCSubtargetInfo &STI); 1288 bool isGFX10_GFX11(const MCSubtargetInfo &STI); 1289 bool isGFX10Plus(const MCSubtargetInfo &STI); 1290 bool isNotGFX10Plus(const MCSubtargetInfo &STI); 1291 bool isGFX10Before1030(const MCSubtargetInfo &STI); 1292 bool isGFX11(const MCSubtargetInfo &STI); 1293 bool isGFX11Plus(const MCSubtargetInfo &STI); 1294 bool isGFX12(const MCSubtargetInfo &STI); 1295 bool isGFX12Plus(const MCSubtargetInfo &STI); 1296 bool isNotGFX12Plus(const MCSubtargetInfo &STI); 1297 bool isNotGFX11Plus(const MCSubtargetInfo &STI); 1298 bool isGCN3Encoding(const MCSubtargetInfo &STI); 1299 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 1300 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 1301 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 1302 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI); 1303 bool isGFX90A(const MCSubtargetInfo &STI); 1304 bool isGFX940(const MCSubtargetInfo &STI); 1305 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 1306 bool hasMAIInsts(const MCSubtargetInfo &STI); 1307 bool hasVOPD(const MCSubtargetInfo &STI); 1308 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI); 1309 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); 1310 unsigned hasKernargPreload(const MCSubtargetInfo &STI); 1311 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST); 1312 1313 /// Is Reg - scalar register 1314 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 1315 1316 /// \returns if \p Reg occupies the high 16-bits of a 32-bit register. 1317 /// The bit indicating isHi is the LSB of the encoding. 1318 bool isHi(unsigned Reg, const MCRegisterInfo &MRI); 1319 1320 /// If \p Reg is a pseudo reg, return the correct hardware register given 1321 /// \p STI otherwise return \p Reg. 1322 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 1323 1324 /// Convert hardware register \p Reg to a pseudo register 1325 LLVM_READNONE 1326 unsigned mc2PseudoReg(unsigned Reg); 1327 1328 LLVM_READNONE 1329 bool isInlineValue(unsigned Reg); 1330 1331 /// Is this an AMDGPU specific source operand? These include registers, 1332 /// inline constants, literals and mandatory literals (KImm). 1333 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 1334 1335 /// Is this a KImm operand? 1336 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo); 1337 1338 /// Is this floating-point operand? 1339 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 1340 1341 /// Does this operand support only inlinable literals? 1342 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 1343 1344 /// Get the size in bits of a register from the register class \p RC. 1345 unsigned getRegBitWidth(unsigned RCID); 1346 1347 /// Get the size in bits of a register from the register class \p RC. 1348 unsigned getRegBitWidth(const MCRegisterClass &RC); 1349 1350 /// Get size of register operand 1351 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 1352 unsigned OpNo); 1353 1354 LLVM_READNONE 1355 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 1356 switch (OpInfo.OperandType) { 1357 case AMDGPU::OPERAND_REG_IMM_INT32: 1358 case AMDGPU::OPERAND_REG_IMM_FP32: 1359 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1360 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1361 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1362 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1363 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1364 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1365 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1366 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1367 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1368 case AMDGPU::OPERAND_KIMM32: 1369 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 1370 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1371 return 4; 1372 1373 case AMDGPU::OPERAND_REG_IMM_INT64: 1374 case AMDGPU::OPERAND_REG_IMM_FP64: 1375 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1376 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1377 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1378 return 8; 1379 1380 case AMDGPU::OPERAND_REG_IMM_INT16: 1381 case AMDGPU::OPERAND_REG_IMM_BF16: 1382 case AMDGPU::OPERAND_REG_IMM_FP16: 1383 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: 1384 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1385 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1386 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 1387 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1388 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1389 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 1390 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1391 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1392 case AMDGPU::OPERAND_REG_INLINE_AC_BF16: 1393 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1394 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1395 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: 1396 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1397 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1398 case AMDGPU::OPERAND_REG_IMM_V2BF16: 1399 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1400 return 2; 1401 1402 default: 1403 llvm_unreachable("unhandled operand type"); 1404 } 1405 } 1406 1407 LLVM_READNONE 1408 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 1409 return getOperandSize(Desc.operands()[OpNo]); 1410 } 1411 1412 /// Is this literal inlinable, and not one of the values intended for floating 1413 /// point values. 1414 LLVM_READNONE 1415 inline bool isInlinableIntLiteral(int64_t Literal) { 1416 return Literal >= -16 && Literal <= 64; 1417 } 1418 1419 /// Is this literal inlinable 1420 LLVM_READNONE 1421 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 1422 1423 LLVM_READNONE 1424 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 1425 1426 LLVM_READNONE 1427 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi); 1428 1429 LLVM_READNONE 1430 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi); 1431 1432 LLVM_READNONE 1433 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi); 1434 1435 LLVM_READNONE 1436 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi); 1437 1438 LLVM_READNONE 1439 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal); 1440 1441 LLVM_READNONE 1442 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal); 1443 1444 LLVM_READNONE 1445 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal); 1446 1447 LLVM_READNONE 1448 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType); 1449 1450 LLVM_READNONE 1451 bool isInlinableLiteralV2I16(uint32_t Literal); 1452 1453 LLVM_READNONE 1454 bool isInlinableLiteralV2BF16(uint32_t Literal); 1455 1456 LLVM_READNONE 1457 bool isInlinableLiteralV2F16(uint32_t Literal); 1458 1459 LLVM_READNONE 1460 bool isValid32BitLiteral(uint64_t Val, bool IsFP64); 1461 1462 bool isArgPassedInSGPR(const Argument *Arg); 1463 1464 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo); 1465 1466 LLVM_READONLY 1467 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 1468 int64_t EncodedOffset); 1469 1470 LLVM_READONLY 1471 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 1472 int64_t EncodedOffset, 1473 bool IsBuffer); 1474 1475 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 1476 /// offsets. 1477 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 1478 1479 /// \returns The encoding that will be used for \p ByteOffset in the 1480 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10 1481 /// S_LOAD instructions have a signed offset, on other subtargets it is 1482 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 1483 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 1484 int64_t ByteOffset, bool IsBuffer, 1485 bool HasSOffset = false); 1486 1487 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 1488 /// instruction. This is only useful on CI.s 1489 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 1490 int64_t ByteOffset); 1491 1492 /// For pre-GFX12 FLAT instructions the offset must be positive; 1493 /// MSB is ignored and forced to zero. 1494 /// 1495 /// \return The number of bits available for the signed offset field in flat 1496 /// instructions. Note that some forms of the instruction disallow negative 1497 /// offsets. 1498 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST); 1499 1500 /// \returns true if this offset is small enough to fit in the SMRD 1501 /// offset field. \p ByteOffset should be the offset in bytes and 1502 /// not the encoded offset. 1503 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 1504 1505 LLVM_READNONE 1506 inline bool isLegalDPALU_DPPControl(unsigned DC) { 1507 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 1508 } 1509 1510 /// \returns true if an instruction may have a 64-bit VGPR operand. 1511 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc); 1512 1513 /// \returns true if an instruction is a DP ALU DPP. 1514 bool isDPALU_DPP(const MCInstrDesc &OpDesc); 1515 1516 /// \returns true if the intrinsic is divergent 1517 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 1518 1519 /// \returns true if the intrinsic is uniform 1520 bool isIntrinsicAlwaysUniform(unsigned IntrID); 1521 1522 /// \returns lds block size in terms of dwords. \p 1523 /// This is used to calculate the lds size encoded for PAL metadata 3.0+ which 1524 /// must be defined in terms of bytes. 1525 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST); 1526 1527 } // end namespace AMDGPU 1528 1529 raw_ostream &operator<<(raw_ostream &OS, 1530 const AMDGPU::IsaInfo::TargetIDSetting S); 1531 1532 } // end namespace llvm 1533 1534 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1535