1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "SIDefines.h" 13 #include "llvm/IR/CallingConv.h" 14 #include "llvm/IR/InstrTypes.h" 15 #include "llvm/IR/Module.h" 16 #include "llvm/Support/Alignment.h" 17 #include <array> 18 #include <functional> 19 #include <utility> 20 21 struct amd_kernel_code_t; 22 23 namespace llvm { 24 25 struct Align; 26 class Argument; 27 class Function; 28 class GlobalValue; 29 class MCInstrInfo; 30 class MCRegisterClass; 31 class MCRegisterInfo; 32 class MCSubtargetInfo; 33 class StringRef; 34 class Triple; 35 class raw_ostream; 36 37 namespace amdhsa { 38 struct kernel_descriptor_t; 39 } 40 41 namespace AMDGPU { 42 43 struct IsaVersion; 44 45 enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 }; 46 47 /// \returns True if \p STI is AMDHSA. 48 bool isHsaAbi(const MCSubtargetInfo &STI); 49 50 /// \returns Code object version from the IR module flag. 51 unsigned getAMDHSACodeObjectVersion(const Module &M); 52 53 /// \returns The default HSA code object version. This should only be used when 54 /// we lack a more accurate CodeObjectVersion value (e.g. from the IR module 55 /// flag or a .amdhsa_code_object_version directive) 56 unsigned getDefaultAMDHSACodeObjectVersion(); 57 58 /// \returns ABIVersion suitable for use in ELF's e_ident[ABIVERSION]. \param 59 /// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion(). 60 uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion); 61 62 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr 63 unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV); 64 65 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr 66 unsigned getHostcallImplicitArgPosition(unsigned COV); 67 68 unsigned getDefaultQueueImplicitArgPosition(unsigned COV); 69 unsigned getCompletionActionImplicitArgPosition(unsigned COV); 70 71 struct GcnBufferFormatInfo { 72 unsigned Format; 73 unsigned BitsPerComp; 74 unsigned NumComponents; 75 unsigned NumFormat; 76 unsigned DataFormat; 77 }; 78 79 struct MAIInstInfo { 80 uint16_t Opcode; 81 bool is_dgemm; 82 bool is_gfx940_xdl; 83 }; 84 85 #define GET_MIMGBaseOpcode_DECL 86 #define GET_MIMGDim_DECL 87 #define GET_MIMGEncoding_DECL 88 #define GET_MIMGLZMapping_DECL 89 #define GET_MIMGMIPMapping_DECL 90 #define GET_MIMGBiASMapping_DECL 91 #define GET_MAIInstInfoTable_DECL 92 #include "AMDGPUGenSearchableTables.inc" 93 94 namespace IsaInfo { 95 96 enum { 97 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 98 // doesn't spill SGPRs as much as when 80 is set. 99 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 100 TRAP_NUM_SGPRS = 16 101 }; 102 103 enum class TargetIDSetting { 104 Unsupported, 105 Any, 106 Off, 107 On 108 }; 109 110 class AMDGPUTargetID { 111 private: 112 const MCSubtargetInfo &STI; 113 TargetIDSetting XnackSetting; 114 TargetIDSetting SramEccSetting; 115 116 public: 117 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 118 ~AMDGPUTargetID() = default; 119 120 /// \return True if the current xnack setting is not "Unsupported". 121 bool isXnackSupported() const { 122 return XnackSetting != TargetIDSetting::Unsupported; 123 } 124 125 /// \returns True if the current xnack setting is "On" or "Any". 126 bool isXnackOnOrAny() const { 127 return XnackSetting == TargetIDSetting::On || 128 XnackSetting == TargetIDSetting::Any; 129 } 130 131 /// \returns True if current xnack setting is "On" or "Off", 132 /// false otherwise. 133 bool isXnackOnOrOff() const { 134 return getXnackSetting() == TargetIDSetting::On || 135 getXnackSetting() == TargetIDSetting::Off; 136 } 137 138 /// \returns The current xnack TargetIDSetting, possible options are 139 /// "Unsupported", "Any", "Off", and "On". 140 TargetIDSetting getXnackSetting() const { 141 return XnackSetting; 142 } 143 144 /// Sets xnack setting to \p NewXnackSetting. 145 void setXnackSetting(TargetIDSetting NewXnackSetting) { 146 XnackSetting = NewXnackSetting; 147 } 148 149 /// \return True if the current sramecc setting is not "Unsupported". 150 bool isSramEccSupported() const { 151 return SramEccSetting != TargetIDSetting::Unsupported; 152 } 153 154 /// \returns True if the current sramecc setting is "On" or "Any". 155 bool isSramEccOnOrAny() const { 156 return SramEccSetting == TargetIDSetting::On || 157 SramEccSetting == TargetIDSetting::Any; 158 } 159 160 /// \returns True if current sramecc setting is "On" or "Off", 161 /// false otherwise. 162 bool isSramEccOnOrOff() const { 163 return getSramEccSetting() == TargetIDSetting::On || 164 getSramEccSetting() == TargetIDSetting::Off; 165 } 166 167 /// \returns The current sramecc TargetIDSetting, possible options are 168 /// "Unsupported", "Any", "Off", and "On". 169 TargetIDSetting getSramEccSetting() const { 170 return SramEccSetting; 171 } 172 173 /// Sets sramecc setting to \p NewSramEccSetting. 174 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 175 SramEccSetting = NewSramEccSetting; 176 } 177 178 void setTargetIDFromFeaturesString(StringRef FS); 179 void setTargetIDFromTargetIDStream(StringRef TargetID); 180 181 /// \returns String representation of an object. 182 std::string toString() const; 183 }; 184 185 /// \returns Wavefront size for given subtarget \p STI. 186 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 187 188 /// \returns Local memory size in bytes for given subtarget \p STI. 189 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 190 191 /// \returns Maximum addressable local memory size in bytes for given subtarget 192 /// \p STI. 193 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI); 194 195 /// \returns Number of execution units per compute unit for given subtarget \p 196 /// STI. 197 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 198 199 /// \returns Maximum number of work groups per compute unit for given subtarget 200 /// \p STI and limited by given \p FlatWorkGroupSize. 201 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 202 unsigned FlatWorkGroupSize); 203 204 /// \returns Minimum number of waves per execution unit for given subtarget \p 205 /// STI. 206 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 207 208 /// \returns Maximum number of waves per execution unit for given subtarget \p 209 /// STI without any kind of limitation. 210 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 211 212 /// \returns Number of waves per execution unit required to support the given \p 213 /// FlatWorkGroupSize. 214 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 215 unsigned FlatWorkGroupSize); 216 217 /// \returns Minimum flat work group size for given subtarget \p STI. 218 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 219 220 /// \returns Maximum flat work group size for given subtarget \p STI. 221 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 222 223 /// \returns Number of waves per work group for given subtarget \p STI and 224 /// \p FlatWorkGroupSize. 225 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 226 unsigned FlatWorkGroupSize); 227 228 /// \returns SGPR allocation granularity for given subtarget \p STI. 229 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 230 231 /// \returns SGPR encoding granularity for given subtarget \p STI. 232 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 233 234 /// \returns Total number of SGPRs for given subtarget \p STI. 235 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 236 237 /// \returns Addressable number of SGPRs for given subtarget \p STI. 238 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 239 240 /// \returns Minimum number of SGPRs that meets the given number of waves per 241 /// execution unit requirement for given subtarget \p STI. 242 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 243 244 /// \returns Maximum number of SGPRs that meets the given number of waves per 245 /// execution unit requirement for given subtarget \p STI. 246 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 247 bool Addressable); 248 249 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 250 /// STI when the given special registers are used. 251 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 252 bool FlatScrUsed, bool XNACKUsed); 253 254 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 255 /// STI when the given special registers are used. XNACK is inferred from 256 /// \p STI. 257 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 258 bool FlatScrUsed); 259 260 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 261 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 262 /// register counts. 263 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 264 265 /// \returns VGPR allocation granularity for given subtarget \p STI. 266 /// 267 /// For subtargets which support it, \p EnableWavefrontSize32 should match 268 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 269 unsigned 270 getVGPRAllocGranule(const MCSubtargetInfo *STI, 271 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 272 273 /// \returns VGPR encoding granularity for given subtarget \p STI. 274 /// 275 /// For subtargets which support it, \p EnableWavefrontSize32 should match 276 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 277 unsigned getVGPREncodingGranule( 278 const MCSubtargetInfo *STI, 279 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 280 281 /// \returns Total number of VGPRs for given subtarget \p STI. 282 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 283 284 /// \returns Addressable number of VGPRs for given subtarget \p STI. 285 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 286 287 /// \returns Minimum number of VGPRs that meets given number of waves per 288 /// execution unit requirement for given subtarget \p STI. 289 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 290 291 /// \returns Maximum number of VGPRs that meets given number of waves per 292 /// execution unit requirement for given subtarget \p STI. 293 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 294 295 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given 296 /// subtarget \p STI. 297 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 298 unsigned NumVGPRs); 299 300 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 301 /// \p NumVGPRs are used. 302 /// 303 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 304 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 305 unsigned 306 getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 307 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 308 309 } // end namespace IsaInfo 310 311 LLVM_READONLY 312 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 313 314 LLVM_READONLY 315 inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) { 316 return getNamedOperandIdx(Opcode, NamedIdx) != -1; 317 } 318 319 LLVM_READONLY 320 int getSOPPWithRelaxation(uint16_t Opcode); 321 322 struct MIMGBaseOpcodeInfo { 323 MIMGBaseOpcode BaseOpcode; 324 bool Store; 325 bool Atomic; 326 bool AtomicX2; 327 bool Sampler; 328 bool Gather4; 329 330 uint8_t NumExtraArgs; 331 bool Gradients; 332 bool G16; 333 bool Coordinates; 334 bool LodOrClampOrMip; 335 bool HasD16; 336 bool MSAA; 337 bool BVH; 338 bool A16; 339 }; 340 341 LLVM_READONLY 342 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 343 344 LLVM_READONLY 345 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 346 347 struct MIMGDimInfo { 348 MIMGDim Dim; 349 uint8_t NumCoords; 350 uint8_t NumGradients; 351 bool MSAA; 352 bool DA; 353 uint8_t Encoding; 354 const char *AsmSuffix; 355 }; 356 357 LLVM_READONLY 358 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 359 360 LLVM_READONLY 361 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 362 363 LLVM_READONLY 364 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 365 366 struct MIMGLZMappingInfo { 367 MIMGBaseOpcode L; 368 MIMGBaseOpcode LZ; 369 }; 370 371 struct MIMGMIPMappingInfo { 372 MIMGBaseOpcode MIP; 373 MIMGBaseOpcode NONMIP; 374 }; 375 376 struct MIMGBiasMappingInfo { 377 MIMGBaseOpcode Bias; 378 MIMGBaseOpcode NoBias; 379 }; 380 381 struct MIMGOffsetMappingInfo { 382 MIMGBaseOpcode Offset; 383 MIMGBaseOpcode NoOffset; 384 }; 385 386 struct MIMGG16MappingInfo { 387 MIMGBaseOpcode G; 388 MIMGBaseOpcode G16; 389 }; 390 391 LLVM_READONLY 392 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 393 394 struct WMMAOpcodeMappingInfo { 395 unsigned Opcode2Addr; 396 unsigned Opcode3Addr; 397 }; 398 399 LLVM_READONLY 400 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 401 402 LLVM_READONLY 403 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); 404 405 LLVM_READONLY 406 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); 407 408 LLVM_READONLY 409 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 410 411 LLVM_READONLY 412 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 413 unsigned VDataDwords, unsigned VAddrDwords); 414 415 LLVM_READONLY 416 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 417 418 LLVM_READONLY 419 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 420 const MIMGDimInfo *Dim, bool IsA16, 421 bool IsG16Supported); 422 423 struct MIMGInfo { 424 uint16_t Opcode; 425 uint16_t BaseOpcode; 426 uint8_t MIMGEncoding; 427 uint8_t VDataDwords; 428 uint8_t VAddrDwords; 429 uint8_t VAddrOperands; 430 }; 431 432 LLVM_READONLY 433 const MIMGInfo *getMIMGInfo(unsigned Opc); 434 435 LLVM_READONLY 436 int getMTBUFBaseOpcode(unsigned Opc); 437 438 LLVM_READONLY 439 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 440 441 LLVM_READONLY 442 int getMTBUFElements(unsigned Opc); 443 444 LLVM_READONLY 445 bool getMTBUFHasVAddr(unsigned Opc); 446 447 LLVM_READONLY 448 bool getMTBUFHasSrsrc(unsigned Opc); 449 450 LLVM_READONLY 451 bool getMTBUFHasSoffset(unsigned Opc); 452 453 LLVM_READONLY 454 int getMUBUFBaseOpcode(unsigned Opc); 455 456 LLVM_READONLY 457 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 458 459 LLVM_READONLY 460 int getMUBUFElements(unsigned Opc); 461 462 LLVM_READONLY 463 bool getMUBUFHasVAddr(unsigned Opc); 464 465 LLVM_READONLY 466 bool getMUBUFHasSrsrc(unsigned Opc); 467 468 LLVM_READONLY 469 bool getMUBUFHasSoffset(unsigned Opc); 470 471 LLVM_READONLY 472 bool getMUBUFIsBufferInv(unsigned Opc); 473 474 LLVM_READONLY 475 bool getSMEMIsBuffer(unsigned Opc); 476 477 LLVM_READONLY 478 bool getVOP1IsSingle(unsigned Opc); 479 480 LLVM_READONLY 481 bool getVOP2IsSingle(unsigned Opc); 482 483 LLVM_READONLY 484 bool getVOP3IsSingle(unsigned Opc); 485 486 LLVM_READONLY 487 bool isVOPC64DPP(unsigned Opc); 488 489 /// Returns true if MAI operation is a double precision GEMM. 490 LLVM_READONLY 491 bool getMAIIsDGEMM(unsigned Opc); 492 493 LLVM_READONLY 494 bool getMAIIsGFX940XDL(unsigned Opc); 495 496 struct CanBeVOPD { 497 bool X; 498 bool Y; 499 }; 500 501 /// \returns SIEncodingFamily used for VOPD encoding on a \p ST. 502 LLVM_READONLY 503 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST); 504 505 LLVM_READONLY 506 CanBeVOPD getCanBeVOPD(unsigned Opc); 507 508 LLVM_READONLY 509 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 510 uint8_t NumComponents, 511 uint8_t NumFormat, 512 const MCSubtargetInfo &STI); 513 LLVM_READONLY 514 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 515 const MCSubtargetInfo &STI); 516 517 LLVM_READONLY 518 int getMCOpcode(uint16_t Opcode, unsigned Gen); 519 520 LLVM_READONLY 521 unsigned getVOPDOpcode(unsigned Opc); 522 523 LLVM_READONLY 524 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily); 525 526 LLVM_READONLY 527 bool isVOPD(unsigned Opc); 528 529 LLVM_READNONE 530 bool isMAC(unsigned Opc); 531 532 LLVM_READNONE 533 bool isPermlane16(unsigned Opc); 534 535 LLVM_READNONE 536 bool isGenericAtomic(unsigned Opc); 537 538 LLVM_READNONE 539 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc); 540 541 namespace VOPD { 542 543 enum Component : unsigned { 544 DST = 0, 545 SRC0, 546 SRC1, 547 SRC2, 548 549 DST_NUM = 1, 550 MAX_SRC_NUM = 3, 551 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM 552 }; 553 554 // LSB mask for VGPR banks per VOPD component operand. 555 // 4 banks result in a mask 3, setting 2 lower bits. 556 constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1}; 557 558 enum ComponentIndex : unsigned { X = 0, Y = 1 }; 559 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y}; 560 constexpr unsigned COMPONENTS_NUM = 2; 561 562 // Properties of VOPD components. 563 class ComponentProps { 564 private: 565 unsigned SrcOperandsNum = 0; 566 unsigned MandatoryLiteralIdx = ~0u; 567 bool HasSrc2Acc = false; 568 569 public: 570 ComponentProps() = default; 571 ComponentProps(const MCInstrDesc &OpDesc); 572 573 // Return the total number of src operands this component has. 574 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; } 575 576 // Return the number of src operands of this component visible to the parser. 577 unsigned getCompParsedSrcOperandsNum() const { 578 return SrcOperandsNum - HasSrc2Acc; 579 } 580 581 // Return true iif this component has a mandatory literal. 582 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; } 583 584 // If this component has a mandatory literal, return component operand 585 // index of this literal (i.e. either Component::SRC1 or Component::SRC2). 586 unsigned getMandatoryLiteralCompOperandIndex() const { 587 assert(hasMandatoryLiteral()); 588 return MandatoryLiteralIdx; 589 } 590 591 // Return true iif this component has operand 592 // with component index CompSrcIdx and this operand may be a register. 593 bool hasRegSrcOperand(unsigned CompSrcIdx) const { 594 assert(CompSrcIdx < Component::MAX_SRC_NUM); 595 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx); 596 } 597 598 // Return true iif this component has tied src2. 599 bool hasSrc2Acc() const { return HasSrc2Acc; } 600 601 private: 602 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const { 603 assert(CompSrcIdx < Component::MAX_SRC_NUM); 604 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx; 605 } 606 }; 607 608 enum ComponentKind : unsigned { 609 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD. 610 COMPONENT_X, // A VOPD instruction, X component. 611 COMPONENT_Y, // A VOPD instruction, Y component. 612 MAX = COMPONENT_Y 613 }; 614 615 // Interface functions of this class map VOPD component operand indices 616 // to indices of operands in MachineInstr/MCInst or parsed operands array. 617 // 618 // Note that this class operates with 3 kinds of indices: 619 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.); 620 // - MC operand indices (they refer operands in a MachineInstr/MCInst); 621 // - parsed operand indices (they refer operands in parsed operands array). 622 // 623 // For SINGLE components mapping between these indices is trivial. 624 // But things get more complicated for COMPONENT_X and 625 // COMPONENT_Y because these components share the same 626 // MachineInstr/MCInst and the same parsed operands array. 627 // Below is an example of component operand to parsed operand 628 // mapping for the following instruction: 629 // 630 // v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1 631 // 632 // PARSED COMPONENT PARSED 633 // COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX 634 // ------------------------------------------------------------------- 635 // "v_dual_add_f32" 0 636 // v_dual_add_f32 v255 0 (DST) --> 1 637 // v4 1 (SRC0) --> 2 638 // v5 2 (SRC1) --> 3 639 // "::" 4 640 // "v_dual_mov_b32" 5 641 // v_dual_mov_b32 v6 0 (DST) --> 6 642 // v1 1 (SRC0) --> 7 643 // ------------------------------------------------------------------- 644 // 645 class ComponentLayout { 646 private: 647 // Regular MachineInstr/MCInst operands are ordered as follows: 648 // dst, src0 [, other src operands] 649 // VOPD MachineInstr/MCInst operands are ordered as follows: 650 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 651 // Each ComponentKind has operand indices defined below. 652 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1}; 653 static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */}; 654 655 // Parsed operands of regular instructions are ordered as follows: 656 // Mnemo dst src0 [vsrc1 ...] 657 // Parsed VOPD operands are ordered as follows: 658 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 659 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 660 // Each ComponentKind has operand indices defined below. 661 static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 662 4 /* + OpX.ParsedSrcNum */}; 663 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = { 664 2, 2, 5 /* + OpX.ParsedSrcNum */}; 665 666 private: 667 const ComponentKind Kind; 668 const ComponentProps PrevComp; 669 670 public: 671 // Create layout for COMPONENT_X or SINGLE component. 672 ComponentLayout(ComponentKind Kind) : Kind(Kind) { 673 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X); 674 } 675 676 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout. 677 ComponentLayout(const ComponentProps &OpXProps) 678 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {} 679 680 public: 681 // Return the index of dst operand in MCInst operands. 682 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; } 683 684 // Return the index of the specified src operand in MCInst operands. 685 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const { 686 assert(CompSrcIdx < Component::MAX_SRC_NUM); 687 return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx; 688 } 689 690 // Return the index of dst operand in the parsed operands array. 691 unsigned getIndexOfDstInParsedOperands() const { 692 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum(); 693 } 694 695 // Return the index of the specified src operand in the parsed operands array. 696 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const { 697 assert(CompSrcIdx < Component::MAX_SRC_NUM); 698 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx; 699 } 700 701 private: 702 unsigned getPrevCompSrcNum() const { 703 return PrevComp.getCompSrcOperandsNum(); 704 } 705 unsigned getPrevCompParsedSrcNum() const { 706 return PrevComp.getCompParsedSrcOperandsNum(); 707 } 708 }; 709 710 // Layout and properties of VOPD components. 711 class ComponentInfo : public ComponentLayout, public ComponentProps { 712 public: 713 // Create ComponentInfo for COMPONENT_X or SINGLE component. 714 ComponentInfo(const MCInstrDesc &OpDesc, 715 ComponentKind Kind = ComponentKind::SINGLE) 716 : ComponentLayout(Kind), ComponentProps(OpDesc) {} 717 718 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout. 719 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps) 720 : ComponentLayout(OpXProps), ComponentProps(OpDesc) {} 721 722 // Map component operand index to parsed operand index. 723 // Return 0 if the specified operand does not exist. 724 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const; 725 }; 726 727 // Properties of VOPD instructions. 728 class InstInfo { 729 private: 730 const ComponentInfo CompInfo[COMPONENTS_NUM]; 731 732 public: 733 using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>; 734 735 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) 736 : CompInfo{OpX, OpY} {} 737 738 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY) 739 : CompInfo{OprInfoX, OprInfoY} {} 740 741 const ComponentInfo &operator[](size_t ComponentIdx) const { 742 assert(ComponentIdx < COMPONENTS_NUM); 743 return CompInfo[ComponentIdx]; 744 } 745 746 // Check VOPD operands constraints. 747 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 748 // for the specified component and MC operand. The callback must return 0 749 // if the operand is not a register or not a VGPR. 750 // If \p SkipSrc is set to true then constraints for source operands are not 751 // checked. 752 bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx, 753 bool SkipSrc = false) const { 754 return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value(); 755 } 756 757 // Check VOPD operands constraints. 758 // Return the index of an invalid component operand, if any. 759 // If \p SkipSrc is set to true then constraints for source operands are not 760 // checked. 761 std::optional<unsigned> getInvalidCompOperandIndex( 762 std::function<unsigned(unsigned, unsigned)> GetRegIdx, 763 bool SkipSrc = false) const; 764 765 private: 766 RegIndices 767 getRegIndices(unsigned ComponentIdx, 768 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const; 769 }; 770 771 } // namespace VOPD 772 773 LLVM_READONLY 774 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode); 775 776 LLVM_READONLY 777 // Get properties of 2 single VOP1/VOP2 instructions 778 // used as components to create a VOPD instruction. 779 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY); 780 781 LLVM_READONLY 782 // Get properties of VOPD X and Y components. 783 VOPD::InstInfo 784 getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo); 785 786 LLVM_READONLY 787 bool isTrue16Inst(unsigned Opc); 788 789 LLVM_READONLY 790 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); 791 792 LLVM_READONLY 793 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); 794 795 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 796 const MCSubtargetInfo *STI); 797 798 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 799 const MCSubtargetInfo *STI); 800 801 bool isGroupSegment(const GlobalValue *GV); 802 bool isGlobalSegment(const GlobalValue *GV); 803 bool isReadOnlySegment(const GlobalValue *GV); 804 805 /// \returns True if constants should be emitted to .text section for given 806 /// target triple \p TT, false otherwise. 807 bool shouldEmitConstantsToTextSection(const Triple &TT); 808 809 /// \returns Integer value requested using \p F's \p Name attribute. 810 /// 811 /// \returns \p Default if attribute is not present. 812 /// 813 /// \returns \p Default and emits error if requested value cannot be converted 814 /// to integer. 815 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 816 817 /// \returns A pair of integer values requested using \p F's \p Name attribute 818 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 819 /// is false). 820 /// 821 /// \returns \p Default if attribute is not present. 822 /// 823 /// \returns \p Default and emits error if one of the requested values cannot be 824 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 825 /// not present. 826 std::pair<unsigned, unsigned> 827 getIntegerPairAttribute(const Function &F, StringRef Name, 828 std::pair<unsigned, unsigned> Default, 829 bool OnlyFirstRequired = false); 830 831 /// Represents the counter values to wait for in an s_waitcnt instruction. 832 /// 833 /// Large values (including the maximum possible integer) can be used to 834 /// represent "don't care" waits. 835 struct Waitcnt { 836 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12. 837 unsigned ExpCnt = ~0u; 838 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12. 839 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11. 840 unsigned SampleCnt = ~0u; // gfx12+ only. 841 unsigned BvhCnt = ~0u; // gfx12+ only. 842 unsigned KmCnt = ~0u; // gfx12+ only. 843 844 Waitcnt() = default; 845 // Pre-gfx12 constructor. 846 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 847 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt), 848 SampleCnt(~0u), BvhCnt(~0u), KmCnt(~0u) {} 849 850 // gfx12+ constructor. 851 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, 852 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt) 853 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt), 854 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {} 855 856 static Waitcnt allZero(bool Extended, bool HasStorecnt) { 857 return Extended ? Waitcnt(0, 0, 0, 0, 0, 0, 0) 858 : Waitcnt(0, 0, 0, HasStorecnt ? 0 : ~0u); 859 } 860 861 static Waitcnt allZeroExceptVsCnt(bool Extended) { 862 return Extended ? Waitcnt(0, 0, 0, ~0u, 0, 0, 0) : Waitcnt(0, 0, 0, ~0u); 863 } 864 865 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); } 866 867 bool hasWaitExceptStoreCnt() const { 868 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u || 869 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u; 870 } 871 872 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; } 873 874 Waitcnt combined(const Waitcnt &Other) const { 875 // Does the right thing provided self and Other are either both pre-gfx12 876 // or both gfx12+. 877 return Waitcnt( 878 std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt), 879 std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt), 880 std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt), 881 std::min(KmCnt, Other.KmCnt)); 882 } 883 }; 884 885 // The following methods are only meaningful on targets that support 886 // S_WAITCNT. 887 888 /// \returns Vmcnt bit mask for given isa \p Version. 889 unsigned getVmcntBitMask(const IsaVersion &Version); 890 891 /// \returns Expcnt bit mask for given isa \p Version. 892 unsigned getExpcntBitMask(const IsaVersion &Version); 893 894 /// \returns Lgkmcnt bit mask for given isa \p Version. 895 unsigned getLgkmcntBitMask(const IsaVersion &Version); 896 897 /// \returns Waitcnt bit mask for given isa \p Version. 898 unsigned getWaitcntBitMask(const IsaVersion &Version); 899 900 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 901 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 902 903 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 904 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 905 906 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 907 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 908 909 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 910 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 911 /// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction 912 /// which needs it is deprecated 913 /// 914 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 915 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) 916 /// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) 917 /// \p Vmcnt = \p Waitcnt[15:10] (gfx11) 918 /// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) 919 /// \p Expcnt = \p Waitcnt[2:0] (gfx11) 920 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) 921 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) 922 /// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11) 923 /// 924 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 925 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 926 927 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 928 929 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 930 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 931 unsigned Vmcnt); 932 933 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 934 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 935 unsigned Expcnt); 936 937 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 938 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 939 unsigned Lgkmcnt); 940 941 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 942 /// \p Version. Should not be used on gfx12+, the instruction which needs 943 /// it is deprecated 944 /// 945 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 946 /// Waitcnt[2:0] = \p Expcnt (gfx11+) 947 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) 948 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) 949 /// Waitcnt[6:4] = \p Expcnt (pre-gfx11) 950 /// Waitcnt[9:4] = \p Lgkmcnt (gfx11) 951 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) 952 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10) 953 /// Waitcnt[15:10] = \p Vmcnt (gfx11) 954 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) 955 /// 956 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 957 /// isa \p Version. 958 /// 959 unsigned encodeWaitcnt(const IsaVersion &Version, 960 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 961 962 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 963 964 // The following methods are only meaningful on targets that support 965 // S_WAIT_*CNT, introduced with gfx12. 966 967 /// \returns Loadcnt bit mask for given isa \p Version. 968 /// Returns 0 for versions that do not support LOADcnt 969 unsigned getLoadcntBitMask(const IsaVersion &Version); 970 971 /// \returns Samplecnt bit mask for given isa \p Version. 972 /// Returns 0 for versions that do not support SAMPLEcnt 973 unsigned getSamplecntBitMask(const IsaVersion &Version); 974 975 /// \returns Bvhcnt bit mask for given isa \p Version. 976 /// Returns 0 for versions that do not support BVHcnt 977 unsigned getBvhcntBitMask(const IsaVersion &Version); 978 979 /// \returns Dscnt bit mask for given isa \p Version. 980 /// Returns 0 for versions that do not support DScnt 981 unsigned getDscntBitMask(const IsaVersion &Version); 982 983 /// \returns Dscnt bit mask for given isa \p Version. 984 /// Returns 0 for versions that do not support KMcnt 985 unsigned getKmcntBitMask(const IsaVersion &Version); 986 987 /// \return STOREcnt or VScnt bit mask for given isa \p Version. 988 /// returns 0 for versions that do not support STOREcnt or VScnt. 989 /// STOREcnt and VScnt are the same counter, the name used 990 /// depends on the ISA version. 991 unsigned getStorecntBitMask(const IsaVersion &Version); 992 993 // The following are only meaningful on targets that support 994 // S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT. 995 996 /// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given 997 /// isa \p Version. 998 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt); 999 1000 /// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given 1001 /// isa \p Version. 1002 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt); 1003 1004 /// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an 1005 /// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa 1006 /// \p Version. 1007 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); 1008 1009 /// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an 1010 /// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa 1011 /// \p Version. 1012 unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); 1013 1014 namespace Hwreg { 1015 1016 LLVM_READONLY 1017 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI); 1018 1019 LLVM_READNONE 1020 bool isValidHwreg(int64_t Id); 1021 1022 LLVM_READNONE 1023 bool isValidHwregOffset(int64_t Offset); 1024 1025 LLVM_READNONE 1026 bool isValidHwregWidth(int64_t Width); 1027 1028 LLVM_READNONE 1029 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 1030 1031 LLVM_READNONE 1032 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 1033 1034 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 1035 1036 } // namespace Hwreg 1037 1038 namespace DepCtr { 1039 1040 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI); 1041 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1042 const MCSubtargetInfo &STI); 1043 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1044 const MCSubtargetInfo &STI); 1045 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1046 bool &IsDefault, const MCSubtargetInfo &STI); 1047 1048 /// \returns Decoded VaVdst from given immediate \p Encoded. 1049 unsigned decodeFieldVaVdst(unsigned Encoded); 1050 1051 /// \returns Decoded VmVsrc from given immediate \p Encoded. 1052 unsigned decodeFieldVmVsrc(unsigned Encoded); 1053 1054 /// \returns Decoded SaSdst from given immediate \p Encoded. 1055 unsigned decodeFieldSaSdst(unsigned Encoded); 1056 1057 /// \returns \p VmVsrc as an encoded Depctr immediate. 1058 unsigned encodeFieldVmVsrc(unsigned VmVsrc); 1059 1060 /// \returns \p Encoded combined with encoded \p VmVsrc. 1061 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc); 1062 1063 /// \returns \p VaVdst as an encoded Depctr immediate. 1064 unsigned encodeFieldVaVdst(unsigned VaVdst); 1065 1066 /// \returns \p Encoded combined with encoded \p VaVdst. 1067 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst); 1068 1069 /// \returns \p SaSdst as an encoded Depctr immediate. 1070 unsigned encodeFieldSaSdst(unsigned SaSdst); 1071 1072 /// \returns \p Encoded combined with encoded \p SaSdst. 1073 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst); 1074 1075 } // namespace DepCtr 1076 1077 namespace Exp { 1078 1079 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 1080 1081 LLVM_READONLY 1082 unsigned getTgtId(const StringRef Name); 1083 1084 LLVM_READNONE 1085 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 1086 1087 } // namespace Exp 1088 1089 namespace MTBUFFormat { 1090 1091 LLVM_READNONE 1092 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 1093 1094 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 1095 1096 int64_t getDfmt(const StringRef Name); 1097 1098 StringRef getDfmtName(unsigned Id); 1099 1100 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 1101 1102 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 1103 1104 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 1105 1106 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 1107 1108 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI); 1109 1110 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI); 1111 1112 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI); 1113 1114 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1115 const MCSubtargetInfo &STI); 1116 1117 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 1118 1119 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 1120 1121 } // namespace MTBUFFormat 1122 1123 namespace SendMsg { 1124 1125 LLVM_READONLY 1126 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI); 1127 1128 LLVM_READONLY 1129 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 1130 1131 LLVM_READNONE 1132 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI); 1133 1134 LLVM_READNONE 1135 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); 1136 1137 LLVM_READNONE 1138 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI); 1139 1140 LLVM_READNONE 1141 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1142 bool Strict = true); 1143 1144 LLVM_READNONE 1145 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1146 const MCSubtargetInfo &STI, bool Strict = true); 1147 1148 LLVM_READNONE 1149 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI); 1150 1151 LLVM_READNONE 1152 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); 1153 1154 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1155 uint16_t &StreamId, const MCSubtargetInfo &STI); 1156 1157 LLVM_READNONE 1158 uint64_t encodeMsg(uint64_t MsgId, 1159 uint64_t OpId, 1160 uint64_t StreamId); 1161 1162 } // namespace SendMsg 1163 1164 1165 unsigned getInitialPSInputAddr(const Function &F); 1166 1167 bool getHasColorExport(const Function &F); 1168 1169 bool getHasDepthExport(const Function &F); 1170 1171 LLVM_READNONE 1172 bool isShader(CallingConv::ID CC); 1173 1174 LLVM_READNONE 1175 bool isGraphics(CallingConv::ID CC); 1176 1177 LLVM_READNONE 1178 bool isCompute(CallingConv::ID CC); 1179 1180 LLVM_READNONE 1181 bool isEntryFunctionCC(CallingConv::ID CC); 1182 1183 // These functions are considered entrypoints into the current module, i.e. they 1184 // are allowed to be called from outside the current module. This is different 1185 // from isEntryFunctionCC, which is only true for functions that are entered by 1186 // the hardware. Module entry points include all entry functions but also 1187 // include functions that can be called from other functions inside or outside 1188 // the current module. Module entry functions are allowed to allocate LDS. 1189 LLVM_READNONE 1190 bool isModuleEntryFunctionCC(CallingConv::ID CC); 1191 1192 LLVM_READNONE 1193 bool isChainCC(CallingConv::ID CC); 1194 1195 bool isKernelCC(const Function *Func); 1196 1197 // FIXME: Remove this when calling conventions cleaned up 1198 LLVM_READNONE 1199 inline bool isKernel(CallingConv::ID CC) { 1200 switch (CC) { 1201 case CallingConv::AMDGPU_KERNEL: 1202 case CallingConv::SPIR_KERNEL: 1203 return true; 1204 default: 1205 return false; 1206 } 1207 } 1208 1209 bool hasXNACK(const MCSubtargetInfo &STI); 1210 bool hasSRAMECC(const MCSubtargetInfo &STI); 1211 bool hasMIMG_R128(const MCSubtargetInfo &STI); 1212 bool hasA16(const MCSubtargetInfo &STI); 1213 bool hasG16(const MCSubtargetInfo &STI); 1214 bool hasPackedD16(const MCSubtargetInfo &STI); 1215 bool hasGDS(const MCSubtargetInfo &STI); 1216 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false); 1217 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI); 1218 1219 bool isSI(const MCSubtargetInfo &STI); 1220 bool isCI(const MCSubtargetInfo &STI); 1221 bool isVI(const MCSubtargetInfo &STI); 1222 bool isGFX9(const MCSubtargetInfo &STI); 1223 bool isGFX9_GFX10(const MCSubtargetInfo &STI); 1224 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI); 1225 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI); 1226 bool isGFX8Plus(const MCSubtargetInfo &STI); 1227 bool isGFX9Plus(const MCSubtargetInfo &STI); 1228 bool isGFX10(const MCSubtargetInfo &STI); 1229 bool isGFX10_GFX11(const MCSubtargetInfo &STI); 1230 bool isGFX10Plus(const MCSubtargetInfo &STI); 1231 bool isNotGFX10Plus(const MCSubtargetInfo &STI); 1232 bool isGFX10Before1030(const MCSubtargetInfo &STI); 1233 bool isGFX11(const MCSubtargetInfo &STI); 1234 bool isGFX11Plus(const MCSubtargetInfo &STI); 1235 bool isGFX12(const MCSubtargetInfo &STI); 1236 bool isGFX12Plus(const MCSubtargetInfo &STI); 1237 bool isNotGFX12Plus(const MCSubtargetInfo &STI); 1238 bool isNotGFX11Plus(const MCSubtargetInfo &STI); 1239 bool isGCN3Encoding(const MCSubtargetInfo &STI); 1240 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 1241 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 1242 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 1243 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI); 1244 bool isGFX90A(const MCSubtargetInfo &STI); 1245 bool isGFX940(const MCSubtargetInfo &STI); 1246 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 1247 bool hasMAIInsts(const MCSubtargetInfo &STI); 1248 bool hasVOPD(const MCSubtargetInfo &STI); 1249 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI); 1250 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); 1251 unsigned hasKernargPreload(const MCSubtargetInfo &STI); 1252 1253 /// Is Reg - scalar register 1254 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 1255 1256 /// \returns if \p Reg occupies the high 16-bits of a 32-bit register. 1257 /// The bit indicating isHi is the LSB of the encoding. 1258 bool isHi(unsigned Reg, const MCRegisterInfo &MRI); 1259 1260 /// If \p Reg is a pseudo reg, return the correct hardware register given 1261 /// \p STI otherwise return \p Reg. 1262 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 1263 1264 /// Convert hardware register \p Reg to a pseudo register 1265 LLVM_READNONE 1266 unsigned mc2PseudoReg(unsigned Reg); 1267 1268 LLVM_READNONE 1269 bool isInlineValue(unsigned Reg); 1270 1271 /// Is this an AMDGPU specific source operand? These include registers, 1272 /// inline constants, literals and mandatory literals (KImm). 1273 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 1274 1275 /// Is this a KImm operand? 1276 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo); 1277 1278 /// Is this floating-point operand? 1279 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 1280 1281 /// Does this operand support only inlinable literals? 1282 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 1283 1284 /// Get the size in bits of a register from the register class \p RC. 1285 unsigned getRegBitWidth(unsigned RCID); 1286 1287 /// Get the size in bits of a register from the register class \p RC. 1288 unsigned getRegBitWidth(const MCRegisterClass &RC); 1289 1290 /// Get size of register operand 1291 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 1292 unsigned OpNo); 1293 1294 LLVM_READNONE 1295 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 1296 switch (OpInfo.OperandType) { 1297 case AMDGPU::OPERAND_REG_IMM_INT32: 1298 case AMDGPU::OPERAND_REG_IMM_FP32: 1299 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1300 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1301 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1302 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1303 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1304 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1305 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1306 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1307 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1308 case AMDGPU::OPERAND_KIMM32: 1309 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 1310 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1311 return 4; 1312 1313 case AMDGPU::OPERAND_REG_IMM_INT64: 1314 case AMDGPU::OPERAND_REG_IMM_FP64: 1315 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1316 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1317 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1318 return 8; 1319 1320 case AMDGPU::OPERAND_REG_IMM_INT16: 1321 case AMDGPU::OPERAND_REG_IMM_FP16: 1322 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1323 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1324 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1325 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1326 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1327 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1328 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1329 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1330 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1331 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1332 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1333 return 2; 1334 1335 default: 1336 llvm_unreachable("unhandled operand type"); 1337 } 1338 } 1339 1340 LLVM_READNONE 1341 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 1342 return getOperandSize(Desc.operands()[OpNo]); 1343 } 1344 1345 /// Is this literal inlinable, and not one of the values intended for floating 1346 /// point values. 1347 LLVM_READNONE 1348 inline bool isInlinableIntLiteral(int64_t Literal) { 1349 return Literal >= -16 && Literal <= 64; 1350 } 1351 1352 /// Is this literal inlinable 1353 LLVM_READNONE 1354 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 1355 1356 LLVM_READNONE 1357 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 1358 1359 LLVM_READNONE 1360 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 1361 1362 LLVM_READNONE 1363 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal); 1364 1365 LLVM_READNONE 1366 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal); 1367 1368 LLVM_READNONE 1369 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType); 1370 1371 LLVM_READNONE 1372 bool isInlinableLiteralV2I16(uint32_t Literal); 1373 1374 LLVM_READNONE 1375 bool isInlinableLiteralV2F16(uint32_t Literal); 1376 1377 LLVM_READNONE 1378 bool isValid32BitLiteral(uint64_t Val, bool IsFP64); 1379 1380 bool isArgPassedInSGPR(const Argument *Arg); 1381 1382 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo); 1383 1384 LLVM_READONLY 1385 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 1386 int64_t EncodedOffset); 1387 1388 LLVM_READONLY 1389 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 1390 int64_t EncodedOffset, 1391 bool IsBuffer); 1392 1393 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 1394 /// offsets. 1395 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 1396 1397 /// \returns The encoding that will be used for \p ByteOffset in the 1398 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10 1399 /// S_LOAD instructions have a signed offset, on other subtargets it is 1400 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 1401 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 1402 int64_t ByteOffset, bool IsBuffer); 1403 1404 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 1405 /// instruction. This is only useful on CI.s 1406 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 1407 int64_t ByteOffset); 1408 1409 /// For pre-GFX12 FLAT instructions the offset must be positive; 1410 /// MSB is ignored and forced to zero. 1411 /// 1412 /// \return The number of bits available for the signed offset field in flat 1413 /// instructions. Note that some forms of the instruction disallow negative 1414 /// offsets. 1415 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST); 1416 1417 /// \returns true if this offset is small enough to fit in the SMRD 1418 /// offset field. \p ByteOffset should be the offset in bytes and 1419 /// not the encoded offset. 1420 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 1421 1422 LLVM_READNONE 1423 inline bool isLegalDPALU_DPPControl(unsigned DC) { 1424 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 1425 } 1426 1427 /// \returns true if an instruction may have a 64-bit VGPR operand. 1428 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc); 1429 1430 /// \returns true if an instruction is a DP ALU DPP. 1431 bool isDPALU_DPP(const MCInstrDesc &OpDesc); 1432 1433 /// \returns true if the intrinsic is divergent 1434 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 1435 1436 /// \returns true if the intrinsic is uniform 1437 bool isIntrinsicAlwaysUniform(unsigned IntrID); 1438 1439 } // end namespace AMDGPU 1440 1441 raw_ostream &operator<<(raw_ostream &OS, 1442 const AMDGPU::IsaInfo::TargetIDSetting S); 1443 1444 } // end namespace llvm 1445 1446 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1447