1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "AMDGPUSubtarget.h" 13 #include "SIDefines.h" 14 #include "llvm/IR/CallingConv.h" 15 #include "llvm/IR/InstrTypes.h" 16 #include "llvm/IR/Module.h" 17 #include "llvm/Support/Alignment.h" 18 #include <array> 19 #include <functional> 20 #include <utility> 21 22 // Pull in OpName enum definition and getNamedOperandIdx() declaration. 23 #define GET_INSTRINFO_OPERAND_ENUM 24 #include "AMDGPUGenInstrInfo.inc" 25 26 struct amd_kernel_code_t; 27 28 namespace llvm { 29 30 struct Align; 31 class Argument; 32 class Function; 33 class GlobalValue; 34 class MCInstrInfo; 35 class MCRegisterClass; 36 class MCRegisterInfo; 37 class MCSubtargetInfo; 38 class StringRef; 39 class Triple; 40 class raw_ostream; 41 42 namespace AMDGPU { 43 44 struct AMDGPUMCKernelCodeT; 45 struct IsaVersion; 46 47 /// Generic target versions emitted by this version of LLVM. 48 /// 49 /// These numbers are incremented every time a codegen breaking change occurs 50 /// within a generic family. 51 namespace GenericVersion { 52 static constexpr unsigned GFX9 = 1; 53 static constexpr unsigned GFX9_4 = 1; 54 static constexpr unsigned GFX10_1 = 1; 55 static constexpr unsigned GFX10_3 = 1; 56 static constexpr unsigned GFX11 = 1; 57 static constexpr unsigned GFX12 = 1; 58 } // namespace GenericVersion 59 60 enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 }; 61 62 enum class FPType { None, FP4, FP8 }; 63 64 /// \returns True if \p STI is AMDHSA. 65 bool isHsaAbi(const MCSubtargetInfo &STI); 66 67 /// \returns Code object version from the IR module flag. 68 unsigned getAMDHSACodeObjectVersion(const Module &M); 69 70 /// \returns Code object version from ELF's e_ident[EI_ABIVERSION]. 71 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion); 72 73 /// \returns The default HSA code object version. This should only be used when 74 /// we lack a more accurate CodeObjectVersion value (e.g. from the IR module 75 /// flag or a .amdhsa_code_object_version directive) 76 unsigned getDefaultAMDHSACodeObjectVersion(); 77 78 /// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param 79 /// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion(). 80 uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion); 81 82 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr 83 unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV); 84 85 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr 86 unsigned getHostcallImplicitArgPosition(unsigned COV); 87 88 unsigned getDefaultQueueImplicitArgPosition(unsigned COV); 89 unsigned getCompletionActionImplicitArgPosition(unsigned COV); 90 91 struct GcnBufferFormatInfo { 92 unsigned Format; 93 unsigned BitsPerComp; 94 unsigned NumComponents; 95 unsigned NumFormat; 96 unsigned DataFormat; 97 }; 98 99 struct MAIInstInfo { 100 uint16_t Opcode; 101 bool is_dgemm; 102 bool is_gfx940_xdl; 103 }; 104 105 struct MFMA_F8F6F4_Info { 106 unsigned Opcode; 107 unsigned F8F8Opcode; 108 uint8_t NumRegsSrcA; 109 uint8_t NumRegsSrcB; 110 }; 111 112 struct CvtScaleF32_F32F16ToF8F4_Info { 113 unsigned Opcode; 114 }; 115 116 struct True16D16Info { 117 unsigned T16Op; 118 unsigned HiOp; 119 unsigned LoOp; 120 }; 121 122 #define GET_MIMGBaseOpcode_DECL 123 #define GET_MIMGDim_DECL 124 #define GET_MIMGEncoding_DECL 125 #define GET_MIMGLZMapping_DECL 126 #define GET_MIMGMIPMapping_DECL 127 #define GET_MIMGBiASMapping_DECL 128 #define GET_MAIInstInfoTable_DECL 129 #define GET_isMFMA_F8F6F4Table_DECL 130 #define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL 131 #define GET_True16D16Table_DECL 132 #include "AMDGPUGenSearchableTables.inc" 133 134 namespace IsaInfo { 135 136 enum { 137 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 138 // doesn't spill SGPRs as much as when 80 is set. 139 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 140 TRAP_NUM_SGPRS = 16 141 }; 142 143 enum class TargetIDSetting { Unsupported, Any, Off, On }; 144 145 class AMDGPUTargetID { 146 private: 147 const MCSubtargetInfo &STI; 148 TargetIDSetting XnackSetting; 149 TargetIDSetting SramEccSetting; 150 151 public: 152 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 153 ~AMDGPUTargetID() = default; 154 155 /// \return True if the current xnack setting is not "Unsupported". 156 bool isXnackSupported() const { 157 return XnackSetting != TargetIDSetting::Unsupported; 158 } 159 160 /// \returns True if the current xnack setting is "On" or "Any". 161 bool isXnackOnOrAny() const { 162 return XnackSetting == TargetIDSetting::On || 163 XnackSetting == TargetIDSetting::Any; 164 } 165 166 /// \returns True if current xnack setting is "On" or "Off", 167 /// false otherwise. 168 bool isXnackOnOrOff() const { 169 return getXnackSetting() == TargetIDSetting::On || 170 getXnackSetting() == TargetIDSetting::Off; 171 } 172 173 /// \returns The current xnack TargetIDSetting, possible options are 174 /// "Unsupported", "Any", "Off", and "On". 175 TargetIDSetting getXnackSetting() const { return XnackSetting; } 176 177 /// Sets xnack setting to \p NewXnackSetting. 178 void setXnackSetting(TargetIDSetting NewXnackSetting) { 179 XnackSetting = NewXnackSetting; 180 } 181 182 /// \return True if the current sramecc setting is not "Unsupported". 183 bool isSramEccSupported() const { 184 return SramEccSetting != TargetIDSetting::Unsupported; 185 } 186 187 /// \returns True if the current sramecc setting is "On" or "Any". 188 bool isSramEccOnOrAny() const { 189 return SramEccSetting == TargetIDSetting::On || 190 SramEccSetting == TargetIDSetting::Any; 191 } 192 193 /// \returns True if current sramecc setting is "On" or "Off", 194 /// false otherwise. 195 bool isSramEccOnOrOff() const { 196 return getSramEccSetting() == TargetIDSetting::On || 197 getSramEccSetting() == TargetIDSetting::Off; 198 } 199 200 /// \returns The current sramecc TargetIDSetting, possible options are 201 /// "Unsupported", "Any", "Off", and "On". 202 TargetIDSetting getSramEccSetting() const { return SramEccSetting; } 203 204 /// Sets sramecc setting to \p NewSramEccSetting. 205 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 206 SramEccSetting = NewSramEccSetting; 207 } 208 209 void setTargetIDFromFeaturesString(StringRef FS); 210 void setTargetIDFromTargetIDStream(StringRef TargetID); 211 212 /// \returns String representation of an object. 213 std::string toString() const; 214 }; 215 216 /// \returns Wavefront size for given subtarget \p STI. 217 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 218 219 /// \returns Local memory size in bytes for given subtarget \p STI. 220 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 221 222 /// \returns Maximum addressable local memory size in bytes for given subtarget 223 /// \p STI. 224 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI); 225 226 /// \returns Number of execution units per compute unit for given subtarget \p 227 /// STI. 228 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 229 230 /// \returns Maximum number of work groups per compute unit for given subtarget 231 /// \p STI and limited by given \p FlatWorkGroupSize. 232 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 233 unsigned FlatWorkGroupSize); 234 235 /// \returns Minimum number of waves per execution unit for given subtarget \p 236 /// STI. 237 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 238 239 /// \returns Maximum number of waves per execution unit for given subtarget \p 240 /// STI without any kind of limitation. 241 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 242 243 /// \returns Number of waves per execution unit required to support the given \p 244 /// FlatWorkGroupSize. 245 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 246 unsigned FlatWorkGroupSize); 247 248 /// \returns Minimum flat work group size for given subtarget \p STI. 249 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 250 251 /// \returns Maximum flat work group size for given subtarget \p STI. 252 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 253 254 /// \returns Number of waves per work group for given subtarget \p STI and 255 /// \p FlatWorkGroupSize. 256 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 257 unsigned FlatWorkGroupSize); 258 259 /// \returns SGPR allocation granularity for given subtarget \p STI. 260 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 261 262 /// \returns SGPR encoding granularity for given subtarget \p STI. 263 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 264 265 /// \returns Total number of SGPRs for given subtarget \p STI. 266 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 267 268 /// \returns Addressable number of SGPRs for given subtarget \p STI. 269 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 270 271 /// \returns Minimum number of SGPRs that meets the given number of waves per 272 /// execution unit requirement for given subtarget \p STI. 273 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 274 275 /// \returns Maximum number of SGPRs that meets the given number of waves per 276 /// execution unit requirement for given subtarget \p STI. 277 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 278 bool Addressable); 279 280 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 281 /// STI when the given special registers are used. 282 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 283 bool FlatScrUsed, bool XNACKUsed); 284 285 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 286 /// STI when the given special registers are used. XNACK is inferred from 287 /// \p STI. 288 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 289 bool FlatScrUsed); 290 291 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 292 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 293 /// register counts. 294 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 295 296 /// \returns VGPR allocation granularity for given subtarget \p STI. 297 /// 298 /// For subtargets which support it, \p EnableWavefrontSize32 should match 299 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 300 unsigned 301 getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, 302 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 303 304 /// \returns VGPR encoding granularity for given subtarget \p STI. 305 /// 306 /// For subtargets which support it, \p EnableWavefrontSize32 should match 307 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 308 unsigned getVGPREncodingGranule( 309 const MCSubtargetInfo *STI, 310 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 311 312 /// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, 313 /// returns the allocation granule for ArchVGPRs. 314 unsigned getArchVGPRAllocGranule(); 315 316 /// \returns Total number of VGPRs for given subtarget \p STI. 317 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 318 319 /// \returns Addressable number of architectural VGPRs for a given subtarget \p 320 /// STI. 321 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI); 322 323 /// \returns Addressable number of VGPRs for given subtarget \p STI. 324 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, 325 unsigned DynamicVGPRBlockSize); 326 327 /// \returns Minimum number of VGPRs that meets given number of waves per 328 /// execution unit requirement for given subtarget \p STI. 329 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 330 unsigned DynamicVGPRBlockSize); 331 332 /// \returns Maximum number of VGPRs that meets given number of waves per 333 /// execution unit requirement for given subtarget \p STI. 334 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 335 unsigned DynamicVGPRBlockSize); 336 337 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given 338 /// subtarget \p STI. 339 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 340 unsigned NumVGPRs, 341 unsigned DynamicVGPRBlockSize); 342 343 /// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule 344 /// size, \p MaxWaves possible, and \p TotalNumVGPRs available. 345 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule, 346 unsigned MaxWaves, 347 unsigned TotalNumVGPRs); 348 349 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p 350 /// Gen. 351 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, 352 AMDGPUSubtarget::Generation Gen); 353 354 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 355 /// \p NumVGPRs are used. We actually return the number of blocks -1, since 356 /// that's what we encode. 357 /// 358 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 359 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 360 unsigned getEncodedNumVGPRBlocks( 361 const MCSubtargetInfo *STI, unsigned NumVGPRs, 362 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 363 364 /// \returns Number of VGPR blocks that need to be allocated for the given 365 /// subtarget \p STI when \p NumVGPRs are used. 366 unsigned getAllocatedNumVGPRBlocks( 367 const MCSubtargetInfo *STI, unsigned NumVGPRs, 368 unsigned DynamicVGPRBlockSize, 369 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 370 371 } // end namespace IsaInfo 372 373 // Represents a field in an encoded value. 374 template <unsigned HighBit, unsigned LowBit, unsigned D = 0> 375 struct EncodingField { 376 static_assert(HighBit >= LowBit, "Invalid bit range!"); 377 static constexpr unsigned Offset = LowBit; 378 static constexpr unsigned Width = HighBit - LowBit + 1; 379 380 using ValueType = unsigned; 381 static constexpr ValueType Default = D; 382 383 ValueType Value; 384 constexpr EncodingField(ValueType Value) : Value(Value) {} 385 386 constexpr uint64_t encode() const { return Value; } 387 static ValueType decode(uint64_t Encoded) { return Encoded; } 388 }; 389 390 // Represents a single bit in an encoded value. 391 template <unsigned Bit, unsigned D = 0> 392 using EncodingBit = EncodingField<Bit, Bit, D>; 393 394 // A helper for encoding and decoding multiple fields. 395 template <typename... Fields> struct EncodingFields { 396 static constexpr uint64_t encode(Fields... Values) { 397 return ((Values.encode() << Values.Offset) | ...); 398 } 399 400 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) { 401 return {Fields::decode((Encoded >> Fields::Offset) & 402 maxUIntN(Fields::Width))...}; 403 } 404 }; 405 406 LLVM_READONLY 407 inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) { 408 return getNamedOperandIdx(Opcode, NamedIdx) != -1; 409 } 410 411 LLVM_READONLY 412 int getSOPPWithRelaxation(uint16_t Opcode); 413 414 struct MIMGBaseOpcodeInfo { 415 MIMGBaseOpcode BaseOpcode; 416 bool Store; 417 bool Atomic; 418 bool AtomicX2; 419 bool Sampler; 420 bool Gather4; 421 422 uint8_t NumExtraArgs; 423 bool Gradients; 424 bool G16; 425 bool Coordinates; 426 bool LodOrClampOrMip; 427 bool HasD16; 428 bool MSAA; 429 bool BVH; 430 bool A16; 431 bool NoReturn; 432 bool PointSampleAccel; 433 }; 434 435 LLVM_READONLY 436 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 437 438 LLVM_READONLY 439 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 440 441 struct MIMGDimInfo { 442 MIMGDim Dim; 443 uint8_t NumCoords; 444 uint8_t NumGradients; 445 bool MSAA; 446 bool DA; 447 uint8_t Encoding; 448 const char *AsmSuffix; 449 }; 450 451 LLVM_READONLY 452 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 453 454 LLVM_READONLY 455 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 456 457 LLVM_READONLY 458 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 459 460 struct MIMGLZMappingInfo { 461 MIMGBaseOpcode L; 462 MIMGBaseOpcode LZ; 463 }; 464 465 struct MIMGMIPMappingInfo { 466 MIMGBaseOpcode MIP; 467 MIMGBaseOpcode NONMIP; 468 }; 469 470 struct MIMGBiasMappingInfo { 471 MIMGBaseOpcode Bias; 472 MIMGBaseOpcode NoBias; 473 }; 474 475 struct MIMGOffsetMappingInfo { 476 MIMGBaseOpcode Offset; 477 MIMGBaseOpcode NoOffset; 478 }; 479 480 struct MIMGG16MappingInfo { 481 MIMGBaseOpcode G; 482 MIMGBaseOpcode G16; 483 }; 484 485 LLVM_READONLY 486 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 487 488 struct WMMAOpcodeMappingInfo { 489 unsigned Opcode2Addr; 490 unsigned Opcode3Addr; 491 }; 492 493 LLVM_READONLY 494 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 495 496 LLVM_READONLY 497 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); 498 499 LLVM_READONLY 500 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); 501 502 LLVM_READONLY 503 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 504 505 LLVM_READONLY 506 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 507 unsigned VDataDwords, unsigned VAddrDwords); 508 509 LLVM_READONLY 510 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 511 512 LLVM_READONLY 513 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 514 const MIMGDimInfo *Dim, bool IsA16, 515 bool IsG16Supported); 516 517 struct MIMGInfo { 518 uint16_t Opcode; 519 uint16_t BaseOpcode; 520 uint8_t MIMGEncoding; 521 uint8_t VDataDwords; 522 uint8_t VAddrDwords; 523 uint8_t VAddrOperands; 524 }; 525 526 LLVM_READONLY 527 const MIMGInfo *getMIMGInfo(unsigned Opc); 528 529 LLVM_READONLY 530 int getMTBUFBaseOpcode(unsigned Opc); 531 532 LLVM_READONLY 533 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 534 535 LLVM_READONLY 536 int getMTBUFElements(unsigned Opc); 537 538 LLVM_READONLY 539 bool getMTBUFHasVAddr(unsigned Opc); 540 541 LLVM_READONLY 542 bool getMTBUFHasSrsrc(unsigned Opc); 543 544 LLVM_READONLY 545 bool getMTBUFHasSoffset(unsigned Opc); 546 547 LLVM_READONLY 548 int getMUBUFBaseOpcode(unsigned Opc); 549 550 LLVM_READONLY 551 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 552 553 LLVM_READONLY 554 int getMUBUFElements(unsigned Opc); 555 556 LLVM_READONLY 557 bool getMUBUFHasVAddr(unsigned Opc); 558 559 LLVM_READONLY 560 bool getMUBUFHasSrsrc(unsigned Opc); 561 562 LLVM_READONLY 563 bool getMUBUFHasSoffset(unsigned Opc); 564 565 LLVM_READONLY 566 bool getMUBUFIsBufferInv(unsigned Opc); 567 568 LLVM_READONLY 569 bool getMUBUFTfe(unsigned Opc); 570 571 LLVM_READONLY 572 bool getSMEMIsBuffer(unsigned Opc); 573 574 LLVM_READONLY 575 bool getVOP1IsSingle(unsigned Opc); 576 577 LLVM_READONLY 578 bool getVOP2IsSingle(unsigned Opc); 579 580 LLVM_READONLY 581 bool getVOP3IsSingle(unsigned Opc); 582 583 LLVM_READONLY 584 bool isVOPC64DPP(unsigned Opc); 585 586 LLVM_READONLY 587 bool isVOPCAsmOnly(unsigned Opc); 588 589 /// Returns true if MAI operation is a double precision GEMM. 590 LLVM_READONLY 591 bool getMAIIsDGEMM(unsigned Opc); 592 593 LLVM_READONLY 594 bool getMAIIsGFX940XDL(unsigned Opc); 595 596 // Get an equivalent BitOp3 for a binary logical \p Opc. 597 // \returns BitOp3 modifier for the logical operation or zero. 598 // Used in VOPD3 conversion. 599 unsigned getBitOp2(unsigned Opc); 600 601 struct CanBeVOPD { 602 bool X; 603 bool Y; 604 }; 605 606 /// \returns SIEncodingFamily used for VOPD encoding on a \p ST. 607 LLVM_READONLY 608 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST); 609 610 LLVM_READONLY 611 CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3); 612 613 LLVM_READNONE 614 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal); 615 616 LLVM_READONLY 617 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, 618 unsigned BLGP, 619 unsigned F8F8Opcode); 620 621 LLVM_READONLY 622 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 623 uint8_t NumComponents, 624 uint8_t NumFormat, 625 const MCSubtargetInfo &STI); 626 LLVM_READONLY 627 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 628 const MCSubtargetInfo &STI); 629 630 LLVM_READONLY 631 int getMCOpcode(uint16_t Opcode, unsigned Gen); 632 633 LLVM_READONLY 634 unsigned getVOPDOpcode(unsigned Opc, bool VOPD3); 635 636 LLVM_READONLY 637 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, 638 bool VOPD3); 639 640 LLVM_READONLY 641 bool isVOPD(unsigned Opc); 642 643 LLVM_READNONE 644 bool isMAC(unsigned Opc); 645 646 LLVM_READNONE 647 bool isPermlane16(unsigned Opc); 648 649 LLVM_READNONE 650 bool isGenericAtomic(unsigned Opc); 651 652 LLVM_READNONE 653 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc); 654 655 namespace VOPD { 656 657 enum Component : unsigned { 658 DST = 0, 659 SRC0, 660 SRC1, 661 SRC2, 662 663 DST_NUM = 1, 664 MAX_SRC_NUM = 3, 665 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM 666 }; 667 668 // LSB mask for VGPR banks per VOPD component operand. 669 // 4 banks result in a mask 3, setting 2 lower bits. 670 constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1}; 671 constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3}; 672 673 enum ComponentIndex : unsigned { X = 0, Y = 1 }; 674 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y}; 675 constexpr unsigned COMPONENTS_NUM = 2; 676 677 // Properties of VOPD components. 678 class ComponentProps { 679 private: 680 unsigned SrcOperandsNum = 0; 681 unsigned MandatoryLiteralIdx = ~0u; 682 bool HasSrc2Acc = false; 683 unsigned NumVOPD3Mods = 0; 684 unsigned Opcode = 0; 685 bool IsVOP3 = false; 686 687 public: 688 ComponentProps() = default; 689 ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false); 690 691 // Return the total number of src operands this component has. 692 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; } 693 694 // Return the number of src operands of this component visible to the parser. 695 unsigned getCompParsedSrcOperandsNum() const { 696 return SrcOperandsNum - HasSrc2Acc; 697 } 698 699 // Return true iif this component has a mandatory literal. 700 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; } 701 702 // If this component has a mandatory literal, return component operand 703 // index of this literal (i.e. either Component::SRC1 or Component::SRC2). 704 unsigned getMandatoryLiteralCompOperandIndex() const { 705 assert(hasMandatoryLiteral()); 706 return MandatoryLiteralIdx; 707 } 708 709 // Return true iif this component has operand 710 // with component index CompSrcIdx and this operand may be a register. 711 bool hasRegSrcOperand(unsigned CompSrcIdx) const { 712 assert(CompSrcIdx < Component::MAX_SRC_NUM); 713 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx); 714 } 715 716 // Return true iif this component has tied src2. 717 bool hasSrc2Acc() const { return HasSrc2Acc; } 718 719 // Return a number of source modifiers if instruction is used in VOPD3. 720 unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; } 721 722 // Return opcode of the component. 723 unsigned getOpcode() const { return Opcode; } 724 725 // Returns if component opcode is in VOP3 encoding. 726 unsigned isVOP3() const { return IsVOP3; } 727 728 // Return index of BitOp3 operand or -1. 729 int getBitOp3OperandIdx() const; 730 731 private: 732 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const { 733 assert(CompSrcIdx < Component::MAX_SRC_NUM); 734 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx; 735 } 736 }; 737 738 enum ComponentKind : unsigned { 739 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD. 740 COMPONENT_X, // A VOPD instruction, X component. 741 COMPONENT_Y, // A VOPD instruction, Y component. 742 MAX = COMPONENT_Y 743 }; 744 745 // Interface functions of this class map VOPD component operand indices 746 // to indices of operands in MachineInstr/MCInst or parsed operands array. 747 // 748 // Note that this class operates with 3 kinds of indices: 749 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.); 750 // - MC operand indices (they refer operands in a MachineInstr/MCInst); 751 // - parsed operand indices (they refer operands in parsed operands array). 752 // 753 // For SINGLE components mapping between these indices is trivial. 754 // But things get more complicated for COMPONENT_X and 755 // COMPONENT_Y because these components share the same 756 // MachineInstr/MCInst and the same parsed operands array. 757 // Below is an example of component operand to parsed operand 758 // mapping for the following instruction: 759 // 760 // v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1 761 // 762 // PARSED COMPONENT PARSED 763 // COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX 764 // ------------------------------------------------------------------- 765 // "v_dual_add_f32" 0 766 // v_dual_add_f32 v255 0 (DST) --> 1 767 // v4 1 (SRC0) --> 2 768 // v5 2 (SRC1) --> 3 769 // "::" 4 770 // "v_dual_mov_b32" 5 771 // v_dual_mov_b32 v6 0 (DST) --> 6 772 // v1 1 (SRC0) --> 7 773 // ------------------------------------------------------------------- 774 // 775 class ComponentLayout { 776 private: 777 // Regular MachineInstr/MCInst operands are ordered as follows: 778 // dst, src0 [, other src operands] 779 // VOPD MachineInstr/MCInst operands are ordered as follows: 780 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 781 // Each ComponentKind has operand indices defined below. 782 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1}; 783 784 // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not 785 // used if there is tied accumulator. Indexing of this array: 786 // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE 787 // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second 788 // component add OpX.MCSrcNum + OpX.VOPD3ModsNum. 789 // For VOPD1/VOPD2 use column with zero modifiers. 790 static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = { 791 {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}}; 792 793 // Parsed operands of regular instructions are ordered as follows: 794 // Mnemo dst src0 [vsrc1 ...] 795 // Parsed VOPD operands are ordered as follows: 796 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 797 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 798 // Each ComponentKind has operand indices defined below. 799 static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 800 4 /* + OpX.ParsedSrcNum */}; 801 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = { 802 2, 2, 5 /* + OpX.ParsedSrcNum */}; 803 804 private: 805 const ComponentKind Kind; 806 const ComponentProps PrevComp; 807 const unsigned VOPD3ModsNum; 808 const int BitOp3Idx; // Index of bitop3 operand or -1 809 810 public: 811 // Create layout for COMPONENT_X or SINGLE component. 812 ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx) 813 : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) { 814 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X); 815 } 816 817 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout. 818 ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum, 819 int BitOp3Idx) 820 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps), 821 VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {} 822 823 public: 824 // Return the index of dst operand in MCInst operands. 825 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; } 826 827 // Return the index of the specified src operand in MCInst operands. 828 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const { 829 assert(CompSrcIdx < Component::MAX_SRC_NUM); 830 831 if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1) 832 return BitOp3Idx; 833 834 if (VOPD3) { 835 return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() + 836 getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0); 837 } 838 839 return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() + 840 (Kind != SINGLE ? 1 : 0); 841 } 842 843 // Return the index of dst operand in the parsed operands array. 844 unsigned getIndexOfDstInParsedOperands() const { 845 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum(); 846 } 847 848 // Return the index of the specified src operand in the parsed operands array. 849 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const { 850 assert(CompSrcIdx < Component::MAX_SRC_NUM); 851 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx; 852 } 853 854 private: 855 unsigned getPrevCompSrcNum() const { 856 return PrevComp.getCompSrcOperandsNum(); 857 } 858 unsigned getPrevCompParsedSrcNum() const { 859 return PrevComp.getCompParsedSrcOperandsNum(); 860 } 861 unsigned getPrevCompVOPD3ModsNum() const { 862 return PrevComp.getCompVOPD3ModsNum(); 863 } 864 }; 865 866 // Layout and properties of VOPD components. 867 class ComponentInfo : public ComponentProps, public ComponentLayout { 868 public: 869 // Create ComponentInfo for COMPONENT_X or SINGLE component. 870 ComponentInfo(const MCInstrDesc &OpDesc, 871 ComponentKind Kind = ComponentKind::SINGLE, 872 bool VOP3Layout = false) 873 : ComponentProps(OpDesc, VOP3Layout), 874 ComponentLayout(Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {} 875 876 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout. 877 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps, 878 bool VOP3Layout = false) 879 : ComponentProps(OpDesc, VOP3Layout), 880 ComponentLayout(OpXProps, getCompVOPD3ModsNum(), 881 getBitOp3OperandIdx()) {} 882 883 // Map component operand index to parsed operand index. 884 // Return 0 if the specified operand does not exist. 885 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const; 886 }; 887 888 // Properties of VOPD instructions. 889 class InstInfo { 890 private: 891 const ComponentInfo CompInfo[COMPONENTS_NUM]; 892 893 public: 894 using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>; 895 896 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) 897 : CompInfo{OpX, OpY} {} 898 899 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY) 900 : CompInfo{OprInfoX, OprInfoY} {} 901 902 const ComponentInfo &operator[](size_t ComponentIdx) const { 903 assert(ComponentIdx < COMPONENTS_NUM); 904 return CompInfo[ComponentIdx]; 905 } 906 907 // Check VOPD operands constraints. 908 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 909 // for the specified component and MC operand. The callback must return 0 910 // if the operand is not a register or not a VGPR. 911 // If \p SkipSrc is set to true then constraints for source operands are not 912 // checked. 913 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources 914 // even though it violates requirement to be from different banks. 915 // If \p VOPD3 is set to true both dst registers allowed to be either odd 916 // or even and instruction may have real src2 as opposed to tied accumulator. 917 bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx, 918 const MCRegisterInfo &MRI, bool SkipSrc = false, 919 bool AllowSameVGPR = false, bool VOPD3 = false) const { 920 return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR, 921 VOPD3) 922 .has_value(); 923 } 924 925 // Check VOPD operands constraints. 926 // Return the index of an invalid component operand, if any. 927 // If \p SkipSrc is set to true then constraints for source operands are not 928 // checked except for being from the same halves of VGPR file on gfx1250. 929 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources 930 // even though it violates requirement to be from different banks. 931 // If \p VOPD3 is set to true both dst registers allowed to be either odd 932 // or even and instruction may have real src2 as opposed to tied accumulator. 933 std::optional<unsigned> getInvalidCompOperandIndex( 934 std::function<unsigned(unsigned, unsigned)> GetRegIdx, 935 const MCRegisterInfo &MRI, bool SkipSrc = false, 936 bool AllowSameVGPR = false, bool VOPD3 = false) const; 937 938 private: 939 RegIndices 940 getRegIndices(unsigned ComponentIdx, 941 std::function<unsigned(unsigned, unsigned)> GetRegIdx, 942 bool VOPD3) const; 943 }; 944 945 } // namespace VOPD 946 947 LLVM_READONLY 948 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode); 949 950 LLVM_READONLY 951 // Get properties of 2 single VOP1/VOP2 instructions 952 // used as components to create a VOPD instruction. 953 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY); 954 955 LLVM_READONLY 956 // Get properties of VOPD X and Y components. 957 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, 958 const MCInstrInfo *InstrInfo); 959 960 LLVM_READONLY 961 bool isAsyncStore(unsigned Opc); 962 LLVM_READONLY 963 bool isTensorStore(unsigned Opc); 964 LLVM_READONLY 965 unsigned getTemporalHintType(const MCInstrDesc TID); 966 967 LLVM_READONLY 968 bool isTrue16Inst(unsigned Opc); 969 970 LLVM_READONLY 971 FPType getFPDstSelType(unsigned Opc); 972 973 LLVM_READONLY 974 bool isInvalidSingleUseConsumerInst(unsigned Opc); 975 976 LLVM_READONLY 977 bool isInvalidSingleUseProducerInst(unsigned Opc); 978 979 bool isDPMACCInstruction(unsigned Opc); 980 981 LLVM_READONLY 982 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); 983 984 LLVM_READONLY 985 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); 986 987 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header, 988 const MCSubtargetInfo *STI); 989 990 bool isGroupSegment(const GlobalValue *GV); 991 bool isGlobalSegment(const GlobalValue *GV); 992 bool isReadOnlySegment(const GlobalValue *GV); 993 994 /// \returns True if constants should be emitted to .text section for given 995 /// target triple \p TT, false otherwise. 996 bool shouldEmitConstantsToTextSection(const Triple &TT); 997 998 /// \returns Integer value requested using \p F's \p Name attribute. 999 /// 1000 /// \returns \p Default if attribute is not present. 1001 /// 1002 /// \returns \p Default and emits error if requested value cannot be converted 1003 /// to integer. 1004 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 1005 1006 /// \returns A pair of integer values requested using \p F's \p Name attribute 1007 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 1008 /// is false). 1009 /// 1010 /// \returns \p Default if attribute is not present. 1011 /// 1012 /// \returns \p Default and emits error if one of the requested values cannot be 1013 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 1014 /// not present. 1015 std::pair<unsigned, unsigned> 1016 getIntegerPairAttribute(const Function &F, StringRef Name, 1017 std::pair<unsigned, unsigned> Default, 1018 bool OnlyFirstRequired = false); 1019 1020 /// \returns A pair of integer values requested using \p F's \p Name attribute 1021 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 1022 /// is false). 1023 /// 1024 /// \returns \p std::nullopt if attribute is not present. 1025 /// 1026 /// \returns \p std::nullopt and emits error if one of the requested values 1027 /// cannot be converted to integer, or \p OnlyFirstRequired is false and 1028 /// "second" value is not present. 1029 std::optional<std::pair<unsigned, std::optional<unsigned>>> 1030 getIntegerPairAttribute(const Function &F, StringRef Name, 1031 bool OnlyFirstRequired = false); 1032 1033 /// \returns Generate a vector of integer values requested using \p F's \p Name 1034 /// attribute. 1035 /// \returns A vector of size \p Size, with all elements set to \p DefaultVal, 1036 /// if any error occurs. The corresponding error will also be emitted. 1037 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name, 1038 unsigned Size, 1039 unsigned DefaultVal); 1040 /// Similar to the function above, but returns std::nullopt if any error occurs. 1041 std::optional<SmallVector<unsigned>> 1042 getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size); 1043 1044 /// Represents the counter values to wait for in an s_waitcnt instruction. 1045 /// 1046 /// Large values (including the maximum possible integer) can be used to 1047 /// represent "don't care" waits. 1048 struct Waitcnt { 1049 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12. 1050 unsigned ExpCnt = ~0u; 1051 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12. 1052 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11. 1053 unsigned SampleCnt = ~0u; // gfx12+ only. 1054 unsigned BvhCnt = ~0u; // gfx12+ only. 1055 unsigned KmCnt = ~0u; // gfx12+ only. 1056 unsigned XCnt = ~0u; // gfx1250. 1057 1058 Waitcnt() = default; 1059 // Pre-gfx12 constructor. 1060 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 1061 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {} 1062 1063 // gfx12+ constructor. 1064 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, 1065 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt) 1066 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt), 1067 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {} 1068 1069 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); } 1070 1071 bool hasWaitExceptStoreCnt() const { 1072 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u || 1073 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u; 1074 } 1075 1076 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; } 1077 1078 Waitcnt combined(const Waitcnt &Other) const { 1079 // Does the right thing provided self and Other are either both pre-gfx12 1080 // or both gfx12+. 1081 return Waitcnt( 1082 std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt), 1083 std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt), 1084 std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt), 1085 std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt)); 1086 } 1087 }; 1088 1089 // The following methods are only meaningful on targets that support 1090 // S_WAITCNT. 1091 1092 /// \returns Vmcnt bit mask for given isa \p Version. 1093 unsigned getVmcntBitMask(const IsaVersion &Version); 1094 1095 /// \returns Expcnt bit mask for given isa \p Version. 1096 unsigned getExpcntBitMask(const IsaVersion &Version); 1097 1098 /// \returns Lgkmcnt bit mask for given isa \p Version. 1099 unsigned getLgkmcntBitMask(const IsaVersion &Version); 1100 1101 /// \returns Waitcnt bit mask for given isa \p Version. 1102 unsigned getWaitcntBitMask(const IsaVersion &Version); 1103 1104 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 1105 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 1106 1107 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 1108 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 1109 1110 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 1111 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 1112 1113 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 1114 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 1115 /// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction 1116 /// which needs it is deprecated 1117 /// 1118 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 1119 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) 1120 /// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) 1121 /// \p Vmcnt = \p Waitcnt[15:10] (gfx11) 1122 /// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) 1123 /// \p Expcnt = \p Waitcnt[2:0] (gfx11) 1124 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) 1125 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) 1126 /// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11) 1127 /// 1128 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, 1129 unsigned &Expcnt, unsigned &Lgkmcnt); 1130 1131 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 1132 1133 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 1134 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1135 unsigned Vmcnt); 1136 1137 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 1138 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1139 unsigned Expcnt); 1140 1141 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 1142 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1143 unsigned Lgkmcnt); 1144 1145 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 1146 /// \p Version. Should not be used on gfx12+, the instruction which needs 1147 /// it is deprecated 1148 /// 1149 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 1150 /// Waitcnt[2:0] = \p Expcnt (gfx11+) 1151 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) 1152 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) 1153 /// Waitcnt[6:4] = \p Expcnt (pre-gfx11) 1154 /// Waitcnt[9:4] = \p Lgkmcnt (gfx11) 1155 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) 1156 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10) 1157 /// Waitcnt[15:10] = \p Vmcnt (gfx11) 1158 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) 1159 /// 1160 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 1161 /// isa \p Version. 1162 /// 1163 unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, 1164 unsigned Expcnt, unsigned Lgkmcnt); 1165 1166 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 1167 1168 // The following methods are only meaningful on targets that support 1169 // S_WAIT_*CNT, introduced with gfx12. 1170 1171 /// \returns Loadcnt bit mask for given isa \p Version. 1172 /// Returns 0 for versions that do not support LOADcnt 1173 unsigned getLoadcntBitMask(const IsaVersion &Version); 1174 1175 /// \returns Samplecnt bit mask for given isa \p Version. 1176 /// Returns 0 for versions that do not support SAMPLEcnt 1177 unsigned getSamplecntBitMask(const IsaVersion &Version); 1178 1179 /// \returns Bvhcnt bit mask for given isa \p Version. 1180 /// Returns 0 for versions that do not support BVHcnt 1181 unsigned getBvhcntBitMask(const IsaVersion &Version); 1182 1183 /// \returns Dscnt bit mask for given isa \p Version. 1184 /// Returns 0 for versions that do not support DScnt 1185 unsigned getDscntBitMask(const IsaVersion &Version); 1186 1187 /// \returns Dscnt bit mask for given isa \p Version. 1188 /// Returns 0 for versions that do not support KMcnt 1189 unsigned getKmcntBitMask(const IsaVersion &Version); 1190 1191 /// \returns Xcnt bit mask for given isa \p Version. 1192 /// Returns 0 for versions that do not support Xcnt. 1193 unsigned getXcntBitMask(const IsaVersion &Version); 1194 1195 /// \return STOREcnt or VScnt bit mask for given isa \p Version. 1196 /// returns 0 for versions that do not support STOREcnt or VScnt. 1197 /// STOREcnt and VScnt are the same counter, the name used 1198 /// depends on the ISA version. 1199 unsigned getStorecntBitMask(const IsaVersion &Version); 1200 1201 // The following are only meaningful on targets that support 1202 // S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT. 1203 1204 /// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given 1205 /// isa \p Version. 1206 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt); 1207 1208 /// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given 1209 /// isa \p Version. 1210 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt); 1211 1212 /// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an 1213 /// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa 1214 /// \p Version. 1215 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); 1216 1217 /// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an 1218 /// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa 1219 /// \p Version. 1220 unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); 1221 1222 namespace Hwreg { 1223 1224 using HwregId = EncodingField<5, 0>; 1225 using HwregOffset = EncodingField<10, 6>; 1226 1227 struct HwregSize : EncodingField<15, 11, 32> { 1228 using EncodingField::EncodingField; 1229 constexpr uint64_t encode() const { return Value - 1; } 1230 static ValueType decode(uint64_t Encoded) { return Encoded + 1; } 1231 }; 1232 1233 using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>; 1234 1235 } // namespace Hwreg 1236 1237 namespace DepCtr { 1238 1239 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI); 1240 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1241 const MCSubtargetInfo &STI); 1242 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1243 const MCSubtargetInfo &STI); 1244 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1245 bool &IsDefault, const MCSubtargetInfo &STI); 1246 1247 /// \returns Decoded VaVdst from given immediate \p Encoded. 1248 unsigned decodeFieldVaVdst(unsigned Encoded); 1249 1250 /// \returns Decoded VmVsrc from given immediate \p Encoded. 1251 unsigned decodeFieldVmVsrc(unsigned Encoded); 1252 1253 /// \returns Decoded SaSdst from given immediate \p Encoded. 1254 unsigned decodeFieldSaSdst(unsigned Encoded); 1255 1256 /// \returns Decoded VaSdst from given immediate \p Encoded. 1257 unsigned decodeFieldVaSdst(unsigned Encoded); 1258 1259 /// \returns Decoded VaVcc from given immediate \p Encoded. 1260 unsigned decodeFieldVaVcc(unsigned Encoded); 1261 1262 /// \returns Decoded SaSrc from given immediate \p Encoded. 1263 unsigned decodeFieldVaSsrc(unsigned Encoded); 1264 1265 /// \returns Decoded HoldCnt from given immediate \p Encoded. 1266 unsigned decodeFieldHoldCnt(unsigned Encoded); 1267 1268 /// \returns \p VmVsrc as an encoded Depctr immediate. 1269 unsigned encodeFieldVmVsrc(unsigned VmVsrc); 1270 1271 /// \returns \p Encoded combined with encoded \p VmVsrc. 1272 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc); 1273 1274 /// \returns \p VaVdst as an encoded Depctr immediate. 1275 unsigned encodeFieldVaVdst(unsigned VaVdst); 1276 1277 /// \returns \p Encoded combined with encoded \p VaVdst. 1278 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst); 1279 1280 /// \returns \p SaSdst as an encoded Depctr immediate. 1281 unsigned encodeFieldSaSdst(unsigned SaSdst); 1282 1283 /// \returns \p Encoded combined with encoded \p SaSdst. 1284 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst); 1285 1286 /// \returns \p VaSdst as an encoded Depctr immediate. 1287 unsigned encodeFieldVaSdst(unsigned VaSdst); 1288 1289 /// \returns \p Encoded combined with encoded \p VaSdst. 1290 unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst); 1291 1292 /// \returns \p VaVcc as an encoded Depctr immediate. 1293 unsigned encodeFieldVaVcc(unsigned VaVcc); 1294 1295 /// \returns \p Encoded combined with encoded \p VaVcc. 1296 unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc); 1297 1298 /// \returns \p HoldCnt as an encoded Depctr immediate. 1299 unsigned encodeFieldHoldCnt(unsigned HoldCnt); 1300 1301 /// \returns \p Encoded combined with encoded \p HoldCnt. 1302 unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded); 1303 1304 /// \returns \p VaSsrc as an encoded Depctr immediate. 1305 unsigned encodeFieldVaSsrc(unsigned VaSsrc); 1306 1307 /// \returns \p Encoded combined with encoded \p VaSsrc. 1308 unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc); 1309 1310 } // namespace DepCtr 1311 1312 namespace Exp { 1313 1314 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 1315 1316 LLVM_READONLY 1317 unsigned getTgtId(const StringRef Name); 1318 1319 LLVM_READNONE 1320 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 1321 1322 } // namespace Exp 1323 1324 namespace MTBUFFormat { 1325 1326 LLVM_READNONE 1327 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 1328 1329 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 1330 1331 int64_t getDfmt(const StringRef Name); 1332 1333 StringRef getDfmtName(unsigned Id); 1334 1335 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 1336 1337 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 1338 1339 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 1340 1341 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 1342 1343 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI); 1344 1345 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI); 1346 1347 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI); 1348 1349 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1350 const MCSubtargetInfo &STI); 1351 1352 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 1353 1354 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 1355 1356 } // namespace MTBUFFormat 1357 1358 namespace SendMsg { 1359 1360 LLVM_READNONE 1361 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI); 1362 1363 LLVM_READNONE 1364 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1365 bool Strict = true); 1366 1367 LLVM_READNONE 1368 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1369 const MCSubtargetInfo &STI, bool Strict = true); 1370 1371 LLVM_READNONE 1372 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI); 1373 1374 LLVM_READNONE 1375 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); 1376 1377 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1378 uint16_t &StreamId, const MCSubtargetInfo &STI); 1379 1380 LLVM_READNONE 1381 uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId); 1382 1383 } // namespace SendMsg 1384 1385 unsigned getInitialPSInputAddr(const Function &F); 1386 1387 bool getHasColorExport(const Function &F); 1388 1389 bool getHasDepthExport(const Function &F); 1390 1391 bool hasDynamicVGPR(const Function &F); 1392 1393 // Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if 1394 // the attribute is missing or its value is invalid. 1395 unsigned getDynamicVGPRBlockSize(const Function &F); 1396 1397 LLVM_READNONE 1398 constexpr bool isShader(CallingConv::ID CC) { 1399 switch (CC) { 1400 case CallingConv::AMDGPU_VS: 1401 case CallingConv::AMDGPU_LS: 1402 case CallingConv::AMDGPU_HS: 1403 case CallingConv::AMDGPU_ES: 1404 case CallingConv::AMDGPU_GS: 1405 case CallingConv::AMDGPU_PS: 1406 case CallingConv::AMDGPU_CS_Chain: 1407 case CallingConv::AMDGPU_CS_ChainPreserve: 1408 case CallingConv::AMDGPU_CS: 1409 return true; 1410 default: 1411 return false; 1412 } 1413 } 1414 1415 LLVM_READNONE 1416 constexpr bool isGraphics(CallingConv::ID CC) { 1417 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx; 1418 } 1419 1420 LLVM_READNONE 1421 constexpr bool isCompute(CallingConv::ID CC) { 1422 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS; 1423 } 1424 1425 LLVM_READNONE 1426 constexpr bool isEntryFunctionCC(CallingConv::ID CC) { 1427 switch (CC) { 1428 case CallingConv::AMDGPU_KERNEL: 1429 case CallingConv::SPIR_KERNEL: 1430 case CallingConv::AMDGPU_VS: 1431 case CallingConv::AMDGPU_GS: 1432 case CallingConv::AMDGPU_PS: 1433 case CallingConv::AMDGPU_CS: 1434 case CallingConv::AMDGPU_ES: 1435 case CallingConv::AMDGPU_HS: 1436 case CallingConv::AMDGPU_LS: 1437 return true; 1438 default: 1439 return false; 1440 } 1441 } 1442 1443 LLVM_READNONE 1444 constexpr bool isChainCC(CallingConv::ID CC) { 1445 switch (CC) { 1446 case CallingConv::AMDGPU_CS_Chain: 1447 case CallingConv::AMDGPU_CS_ChainPreserve: 1448 return true; 1449 default: 1450 return false; 1451 } 1452 } 1453 1454 // These functions are considered entrypoints into the current module, i.e. they 1455 // are allowed to be called from outside the current module. This is different 1456 // from isEntryFunctionCC, which is only true for functions that are entered by 1457 // the hardware. Module entry points include all entry functions but also 1458 // include functions that can be called from other functions inside or outside 1459 // the current module. Module entry functions are allowed to allocate LDS. 1460 LLVM_READNONE 1461 constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) { 1462 switch (CC) { 1463 case CallingConv::AMDGPU_Gfx: 1464 return true; 1465 default: 1466 return isEntryFunctionCC(CC) || isChainCC(CC); 1467 } 1468 } 1469 1470 LLVM_READNONE 1471 constexpr inline bool isKernel(CallingConv::ID CC) { 1472 switch (CC) { 1473 case CallingConv::AMDGPU_KERNEL: 1474 case CallingConv::SPIR_KERNEL: 1475 return true; 1476 default: 1477 return false; 1478 } 1479 } 1480 1481 LLVM_READNONE 1482 constexpr bool canGuaranteeTCO(CallingConv::ID CC) { 1483 return CC == CallingConv::Fast; 1484 } 1485 1486 /// Return true if we might ever do TCO for calls with this calling convention. 1487 LLVM_READNONE 1488 constexpr bool mayTailCallThisCC(CallingConv::ID CC) { 1489 switch (CC) { 1490 case CallingConv::C: 1491 case CallingConv::AMDGPU_Gfx: 1492 return true; 1493 default: 1494 return canGuaranteeTCO(CC); 1495 } 1496 } 1497 1498 bool hasXNACK(const MCSubtargetInfo &STI); 1499 bool hasSRAMECC(const MCSubtargetInfo &STI); 1500 bool hasMIMG_R128(const MCSubtargetInfo &STI); 1501 bool hasA16(const MCSubtargetInfo &STI); 1502 bool hasG16(const MCSubtargetInfo &STI); 1503 bool hasPackedD16(const MCSubtargetInfo &STI); 1504 bool hasGDS(const MCSubtargetInfo &STI); 1505 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false); 1506 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI); 1507 1508 bool isSI(const MCSubtargetInfo &STI); 1509 bool isCI(const MCSubtargetInfo &STI); 1510 bool isVI(const MCSubtargetInfo &STI); 1511 bool isGFX9(const MCSubtargetInfo &STI); 1512 bool isGFX9_GFX10(const MCSubtargetInfo &STI); 1513 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI); 1514 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI); 1515 bool isGFX8Plus(const MCSubtargetInfo &STI); 1516 bool isGFX9Plus(const MCSubtargetInfo &STI); 1517 bool isNotGFX9Plus(const MCSubtargetInfo &STI); 1518 bool isGFX10(const MCSubtargetInfo &STI); 1519 bool isGFX10_GFX11(const MCSubtargetInfo &STI); 1520 bool isGFX10Plus(const MCSubtargetInfo &STI); 1521 bool isNotGFX10Plus(const MCSubtargetInfo &STI); 1522 bool isGFX10Before1030(const MCSubtargetInfo &STI); 1523 bool isGFX11(const MCSubtargetInfo &STI); 1524 bool isGFX11Plus(const MCSubtargetInfo &STI); 1525 bool isGFX12(const MCSubtargetInfo &STI); 1526 bool isGFX12Plus(const MCSubtargetInfo &STI); 1527 bool isGFX1250(const MCSubtargetInfo &STI); 1528 bool isNotGFX12Plus(const MCSubtargetInfo &STI); 1529 bool isNotGFX11Plus(const MCSubtargetInfo &STI); 1530 bool isGCN3Encoding(const MCSubtargetInfo &STI); 1531 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 1532 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 1533 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 1534 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI); 1535 bool isGFX90A(const MCSubtargetInfo &STI); 1536 bool isGFX940(const MCSubtargetInfo &STI); 1537 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 1538 bool hasMAIInsts(const MCSubtargetInfo &STI); 1539 bool hasVOPD(const MCSubtargetInfo &STI); 1540 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI); 1541 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); 1542 unsigned hasKernargPreload(const MCSubtargetInfo &STI); 1543 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST); 1544 1545 /// Is Reg - scalar register 1546 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI); 1547 1548 /// \returns if \p Reg occupies the high 16-bits of a 32-bit register. 1549 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI); 1550 1551 /// If \p Reg is a pseudo reg, return the correct hardware register given 1552 /// \p STI otherwise return \p Reg. 1553 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI); 1554 1555 /// Convert hardware register \p Reg to a pseudo register 1556 LLVM_READNONE 1557 MCRegister mc2PseudoReg(MCRegister Reg); 1558 1559 LLVM_READNONE 1560 bool isInlineValue(unsigned Reg); 1561 1562 /// Is this an AMDGPU specific source operand? These include registers, 1563 /// inline constants, literals and mandatory literals (KImm). 1564 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 1565 1566 /// Is this a KImm operand? 1567 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo); 1568 1569 /// Is this floating-point operand? 1570 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 1571 1572 /// Does this operand support only inlinable literals? 1573 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 1574 1575 /// Get the size in bits of a register from the register class \p RC. 1576 unsigned getRegBitWidth(unsigned RCID); 1577 1578 /// Get the size in bits of a register from the register class \p RC. 1579 unsigned getRegBitWidth(const MCRegisterClass &RC); 1580 1581 /// Get size of register operand 1582 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 1583 unsigned OpNo); 1584 1585 LLVM_READNONE 1586 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 1587 switch (OpInfo.OperandType) { 1588 case AMDGPU::OPERAND_REG_IMM_INT32: 1589 case AMDGPU::OPERAND_REG_IMM_FP32: 1590 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1591 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1592 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1593 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1594 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1595 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1596 case AMDGPU::OPERAND_KIMM32: 1597 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 1598 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1599 return 4; 1600 1601 case AMDGPU::OPERAND_REG_IMM_INT64: 1602 case AMDGPU::OPERAND_REG_IMM_FP64: 1603 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1604 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1605 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1606 case AMDGPU::OPERAND_KIMM64: 1607 return 8; 1608 1609 case AMDGPU::OPERAND_REG_IMM_INT16: 1610 case AMDGPU::OPERAND_REG_IMM_BF16: 1611 case AMDGPU::OPERAND_REG_IMM_FP16: 1612 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1613 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 1614 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1615 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1616 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 1617 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1618 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1619 case AMDGPU::OPERAND_REG_IMM_V2BF16: 1620 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1621 return 2; 1622 1623 default: 1624 llvm_unreachable("unhandled operand type"); 1625 } 1626 } 1627 1628 LLVM_READNONE 1629 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 1630 return getOperandSize(Desc.operands()[OpNo]); 1631 } 1632 1633 /// Is this literal inlinable, and not one of the values intended for floating 1634 /// point values. 1635 LLVM_READNONE 1636 inline bool isInlinableIntLiteral(int64_t Literal) { 1637 return Literal >= -16 && Literal <= 64; 1638 } 1639 1640 /// Is this literal inlinable 1641 LLVM_READNONE 1642 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 1643 1644 LLVM_READNONE 1645 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 1646 1647 LLVM_READNONE 1648 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi); 1649 1650 LLVM_READNONE 1651 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi); 1652 1653 LLVM_READNONE 1654 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi); 1655 1656 LLVM_READNONE 1657 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi); 1658 1659 LLVM_READNONE 1660 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal); 1661 1662 LLVM_READNONE 1663 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal); 1664 1665 LLVM_READNONE 1666 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal); 1667 1668 LLVM_READNONE 1669 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType); 1670 1671 LLVM_READNONE 1672 bool isInlinableLiteralV2I16(uint32_t Literal); 1673 1674 LLVM_READNONE 1675 bool isInlinableLiteralV2BF16(uint32_t Literal); 1676 1677 LLVM_READNONE 1678 bool isInlinableLiteralV2F16(uint32_t Literal); 1679 1680 LLVM_READNONE 1681 bool isValid32BitLiteral(uint64_t Val, bool IsFP64); 1682 1683 bool isArgPassedInSGPR(const Argument *Arg); 1684 1685 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo); 1686 1687 LLVM_READONLY 1688 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 1689 int64_t EncodedOffset); 1690 1691 LLVM_READONLY 1692 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 1693 int64_t EncodedOffset, bool IsBuffer); 1694 1695 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 1696 /// offsets. 1697 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 1698 1699 /// \returns The encoding that will be used for \p ByteOffset in the 1700 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10 1701 /// S_LOAD instructions have a signed offset, on other subtargets it is 1702 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 1703 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 1704 int64_t ByteOffset, bool IsBuffer, 1705 bool HasSOffset = false); 1706 1707 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 1708 /// instruction. This is only useful on CI.s 1709 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 1710 int64_t ByteOffset); 1711 1712 /// For pre-GFX12 FLAT instructions the offset must be positive; 1713 /// MSB is ignored and forced to zero. 1714 /// 1715 /// \return The number of bits available for the signed offset field in flat 1716 /// instructions. Note that some forms of the instruction disallow negative 1717 /// offsets. 1718 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST); 1719 1720 /// \returns true if this offset is small enough to fit in the SMRD 1721 /// offset field. \p ByteOffset should be the offset in bytes and 1722 /// not the encoded offset. 1723 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 1724 1725 LLVM_READNONE 1726 inline bool isLegalDPALU_DPPControl(unsigned DC) { 1727 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 1728 } 1729 1730 /// \returns true if an instruction may have a 64-bit VGPR operand. 1731 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc); 1732 1733 /// \returns true if an instruction is a DP ALU DPP. 1734 bool isDPALU_DPP(const MCInstrDesc &OpDesc); 1735 1736 /// \returns true if the intrinsic is divergent 1737 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 1738 1739 /// \returns true if the intrinsic is uniform 1740 bool isIntrinsicAlwaysUniform(unsigned IntrID); 1741 1742 /// \returns lds block size in terms of dwords. \p 1743 /// This is used to calculate the lds size encoded for PAL metadata 3.0+ which 1744 /// must be defined in terms of bytes. 1745 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST); 1746 1747 } // end namespace AMDGPU 1748 1749 raw_ostream &operator<<(raw_ostream &OS, 1750 const AMDGPU::IsaInfo::TargetIDSetting S); 1751 1752 } // end namespace llvm 1753 1754 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1755