1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "SIDefines.h" 13 #include "llvm/ADT/FloatingPointMode.h" 14 #include "llvm/IR/CallingConv.h" 15 #include "llvm/Support/Alignment.h" 16 #include <array> 17 #include <functional> 18 #include <utility> 19 20 struct amd_kernel_code_t; 21 22 namespace llvm { 23 24 struct Align; 25 class Argument; 26 class Function; 27 class GCNSubtarget; 28 class GlobalValue; 29 class MCInstrInfo; 30 class MCRegisterClass; 31 class MCRegisterInfo; 32 class MCSubtargetInfo; 33 class StringRef; 34 class Triple; 35 class raw_ostream; 36 37 namespace amdhsa { 38 struct kernel_descriptor_t; 39 } 40 41 namespace AMDGPU { 42 43 struct IsaVersion; 44 45 /// \returns HSA OS ABI Version identification. 46 std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); 47 /// \returns True if HSA OS ABI Version identification is 2, 48 /// false otherwise. 49 bool isHsaAbiVersion2(const MCSubtargetInfo *STI); 50 /// \returns True if HSA OS ABI Version identification is 3, 51 /// false otherwise. 52 bool isHsaAbiVersion3(const MCSubtargetInfo *STI); 53 /// \returns True if HSA OS ABI Version identification is 4, 54 /// false otherwise. 55 bool isHsaAbiVersion4(const MCSubtargetInfo *STI); 56 /// \returns True if HSA OS ABI Version identification is 5, 57 /// false otherwise. 58 bool isHsaAbiVersion5(const MCSubtargetInfo *STI); 59 /// \returns True if HSA OS ABI Version identification is 3 and above, 60 /// false otherwise. 61 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); 62 63 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr 64 unsigned getMultigridSyncArgImplicitArgPosition(); 65 66 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr 67 unsigned getHostcallImplicitArgPosition(); 68 69 unsigned getDefaultQueueImplicitArgPosition(); 70 unsigned getCompletionActionImplicitArgPosition(); 71 72 /// \returns Code object version. 73 unsigned getAmdhsaCodeObjectVersion(); 74 75 struct GcnBufferFormatInfo { 76 unsigned Format; 77 unsigned BitsPerComp; 78 unsigned NumComponents; 79 unsigned NumFormat; 80 unsigned DataFormat; 81 }; 82 83 struct MAIInstInfo { 84 uint16_t Opcode; 85 bool is_dgemm; 86 bool is_gfx940_xdl; 87 }; 88 89 #define GET_MIMGBaseOpcode_DECL 90 #define GET_MIMGDim_DECL 91 #define GET_MIMGEncoding_DECL 92 #define GET_MIMGLZMapping_DECL 93 #define GET_MIMGMIPMapping_DECL 94 #define GET_MIMGBiASMapping_DECL 95 #define GET_MAIInstInfoTable_DECL 96 #include "AMDGPUGenSearchableTables.inc" 97 98 namespace IsaInfo { 99 100 enum { 101 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 102 // doesn't spill SGPRs as much as when 80 is set. 103 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 104 TRAP_NUM_SGPRS = 16 105 }; 106 107 enum class TargetIDSetting { 108 Unsupported, 109 Any, 110 Off, 111 On 112 }; 113 114 class AMDGPUTargetID { 115 private: 116 const MCSubtargetInfo &STI; 117 TargetIDSetting XnackSetting; 118 TargetIDSetting SramEccSetting; 119 120 public: 121 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 122 ~AMDGPUTargetID() = default; 123 124 /// \return True if the current xnack setting is not "Unsupported". 125 bool isXnackSupported() const { 126 return XnackSetting != TargetIDSetting::Unsupported; 127 } 128 129 /// \returns True if the current xnack setting is "On" or "Any". 130 bool isXnackOnOrAny() const { 131 return XnackSetting == TargetIDSetting::On || 132 XnackSetting == TargetIDSetting::Any; 133 } 134 135 /// \returns True if current xnack setting is "On" or "Off", 136 /// false otherwise. 137 bool isXnackOnOrOff() const { 138 return getXnackSetting() == TargetIDSetting::On || 139 getXnackSetting() == TargetIDSetting::Off; 140 } 141 142 /// \returns The current xnack TargetIDSetting, possible options are 143 /// "Unsupported", "Any", "Off", and "On". 144 TargetIDSetting getXnackSetting() const { 145 return XnackSetting; 146 } 147 148 /// Sets xnack setting to \p NewXnackSetting. 149 void setXnackSetting(TargetIDSetting NewXnackSetting) { 150 XnackSetting = NewXnackSetting; 151 } 152 153 /// \return True if the current sramecc setting is not "Unsupported". 154 bool isSramEccSupported() const { 155 return SramEccSetting != TargetIDSetting::Unsupported; 156 } 157 158 /// \returns True if the current sramecc setting is "On" or "Any". 159 bool isSramEccOnOrAny() const { 160 return SramEccSetting == TargetIDSetting::On || 161 SramEccSetting == TargetIDSetting::Any; 162 } 163 164 /// \returns True if current sramecc setting is "On" or "Off", 165 /// false otherwise. 166 bool isSramEccOnOrOff() const { 167 return getSramEccSetting() == TargetIDSetting::On || 168 getSramEccSetting() == TargetIDSetting::Off; 169 } 170 171 /// \returns The current sramecc TargetIDSetting, possible options are 172 /// "Unsupported", "Any", "Off", and "On". 173 TargetIDSetting getSramEccSetting() const { 174 return SramEccSetting; 175 } 176 177 /// Sets sramecc setting to \p NewSramEccSetting. 178 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 179 SramEccSetting = NewSramEccSetting; 180 } 181 182 void setTargetIDFromFeaturesString(StringRef FS); 183 void setTargetIDFromTargetIDStream(StringRef TargetID); 184 185 /// \returns String representation of an object. 186 std::string toString() const; 187 }; 188 189 /// \returns Wavefront size for given subtarget \p STI. 190 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 191 192 /// \returns Local memory size in bytes for given subtarget \p STI. 193 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 194 195 /// \returns Maximum addressable local memory size in bytes for given subtarget 196 /// \p STI. 197 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI); 198 199 /// \returns Number of execution units per compute unit for given subtarget \p 200 /// STI. 201 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 202 203 /// \returns Maximum number of work groups per compute unit for given subtarget 204 /// \p STI and limited by given \p FlatWorkGroupSize. 205 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 206 unsigned FlatWorkGroupSize); 207 208 /// \returns Minimum number of waves per execution unit for given subtarget \p 209 /// STI. 210 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 211 212 /// \returns Maximum number of waves per execution unit for given subtarget \p 213 /// STI without any kind of limitation. 214 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 215 216 /// \returns Number of waves per execution unit required to support the given \p 217 /// FlatWorkGroupSize. 218 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 219 unsigned FlatWorkGroupSize); 220 221 /// \returns Minimum flat work group size for given subtarget \p STI. 222 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 223 224 /// \returns Maximum flat work group size for given subtarget \p STI. 225 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 226 227 /// \returns Number of waves per work group for given subtarget \p STI and 228 /// \p FlatWorkGroupSize. 229 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 230 unsigned FlatWorkGroupSize); 231 232 /// \returns SGPR allocation granularity for given subtarget \p STI. 233 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 234 235 /// \returns SGPR encoding granularity for given subtarget \p STI. 236 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 237 238 /// \returns Total number of SGPRs for given subtarget \p STI. 239 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 240 241 /// \returns Addressable number of SGPRs for given subtarget \p STI. 242 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 243 244 /// \returns Minimum number of SGPRs that meets the given number of waves per 245 /// execution unit requirement for given subtarget \p STI. 246 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 247 248 /// \returns Maximum number of SGPRs that meets the given number of waves per 249 /// execution unit requirement for given subtarget \p STI. 250 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 251 bool Addressable); 252 253 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 254 /// STI when the given special registers are used. 255 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 256 bool FlatScrUsed, bool XNACKUsed); 257 258 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 259 /// STI when the given special registers are used. XNACK is inferred from 260 /// \p STI. 261 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 262 bool FlatScrUsed); 263 264 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 265 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 266 /// register counts. 267 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 268 269 /// \returns VGPR allocation granularity for given subtarget \p STI. 270 /// 271 /// For subtargets which support it, \p EnableWavefrontSize32 should match 272 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 273 unsigned 274 getVGPRAllocGranule(const MCSubtargetInfo *STI, 275 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 276 277 /// \returns VGPR encoding granularity for given subtarget \p STI. 278 /// 279 /// For subtargets which support it, \p EnableWavefrontSize32 should match 280 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 281 unsigned getVGPREncodingGranule( 282 const MCSubtargetInfo *STI, 283 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 284 285 /// \returns Total number of VGPRs for given subtarget \p STI. 286 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 287 288 /// \returns Addressable number of VGPRs for given subtarget \p STI. 289 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 290 291 /// \returns Minimum number of VGPRs that meets given number of waves per 292 /// execution unit requirement for given subtarget \p STI. 293 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 294 295 /// \returns Maximum number of VGPRs that meets given number of waves per 296 /// execution unit requirement for given subtarget \p STI. 297 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 298 299 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given 300 /// subtarget \p STI. 301 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 302 unsigned NumVGPRs); 303 304 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 305 /// \p NumVGPRs are used. 306 /// 307 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 308 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 309 unsigned 310 getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 311 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 312 313 } // end namespace IsaInfo 314 315 LLVM_READONLY 316 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 317 318 LLVM_READONLY 319 inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) { 320 return getNamedOperandIdx(Opcode, NamedIdx) != -1; 321 } 322 323 LLVM_READONLY 324 int getSOPPWithRelaxation(uint16_t Opcode); 325 326 struct MIMGBaseOpcodeInfo { 327 MIMGBaseOpcode BaseOpcode; 328 bool Store; 329 bool Atomic; 330 bool AtomicX2; 331 bool Sampler; 332 bool Gather4; 333 334 uint8_t NumExtraArgs; 335 bool Gradients; 336 bool G16; 337 bool Coordinates; 338 bool LodOrClampOrMip; 339 bool HasD16; 340 bool MSAA; 341 bool BVH; 342 }; 343 344 LLVM_READONLY 345 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 346 347 LLVM_READONLY 348 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 349 350 struct MIMGDimInfo { 351 MIMGDim Dim; 352 uint8_t NumCoords; 353 uint8_t NumGradients; 354 bool MSAA; 355 bool DA; 356 uint8_t Encoding; 357 const char *AsmSuffix; 358 }; 359 360 LLVM_READONLY 361 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 362 363 LLVM_READONLY 364 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 365 366 LLVM_READONLY 367 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 368 369 struct MIMGLZMappingInfo { 370 MIMGBaseOpcode L; 371 MIMGBaseOpcode LZ; 372 }; 373 374 struct MIMGMIPMappingInfo { 375 MIMGBaseOpcode MIP; 376 MIMGBaseOpcode NONMIP; 377 }; 378 379 struct MIMGBiasMappingInfo { 380 MIMGBaseOpcode Bias; 381 MIMGBaseOpcode NoBias; 382 }; 383 384 struct MIMGOffsetMappingInfo { 385 MIMGBaseOpcode Offset; 386 MIMGBaseOpcode NoOffset; 387 }; 388 389 struct MIMGG16MappingInfo { 390 MIMGBaseOpcode G; 391 MIMGBaseOpcode G16; 392 }; 393 394 LLVM_READONLY 395 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 396 397 struct WMMAOpcodeMappingInfo { 398 unsigned Opcode2Addr; 399 unsigned Opcode3Addr; 400 }; 401 402 LLVM_READONLY 403 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 404 405 LLVM_READONLY 406 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); 407 408 LLVM_READONLY 409 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); 410 411 LLVM_READONLY 412 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 413 414 LLVM_READONLY 415 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 416 unsigned VDataDwords, unsigned VAddrDwords); 417 418 LLVM_READONLY 419 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 420 421 LLVM_READONLY 422 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 423 const MIMGDimInfo *Dim, bool IsA16, 424 bool IsG16Supported); 425 426 struct MIMGInfo { 427 uint16_t Opcode; 428 uint16_t BaseOpcode; 429 uint8_t MIMGEncoding; 430 uint8_t VDataDwords; 431 uint8_t VAddrDwords; 432 uint8_t VAddrOperands; 433 }; 434 435 LLVM_READONLY 436 const MIMGInfo *getMIMGInfo(unsigned Opc); 437 438 LLVM_READONLY 439 int getMTBUFBaseOpcode(unsigned Opc); 440 441 LLVM_READONLY 442 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 443 444 LLVM_READONLY 445 int getMTBUFElements(unsigned Opc); 446 447 LLVM_READONLY 448 bool getMTBUFHasVAddr(unsigned Opc); 449 450 LLVM_READONLY 451 bool getMTBUFHasSrsrc(unsigned Opc); 452 453 LLVM_READONLY 454 bool getMTBUFHasSoffset(unsigned Opc); 455 456 LLVM_READONLY 457 int getMUBUFBaseOpcode(unsigned Opc); 458 459 LLVM_READONLY 460 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 461 462 LLVM_READONLY 463 int getMUBUFElements(unsigned Opc); 464 465 LLVM_READONLY 466 bool getMUBUFHasVAddr(unsigned Opc); 467 468 LLVM_READONLY 469 bool getMUBUFHasSrsrc(unsigned Opc); 470 471 LLVM_READONLY 472 bool getMUBUFHasSoffset(unsigned Opc); 473 474 LLVM_READONLY 475 bool getMUBUFIsBufferInv(unsigned Opc); 476 477 LLVM_READONLY 478 bool getSMEMIsBuffer(unsigned Opc); 479 480 LLVM_READONLY 481 bool getVOP1IsSingle(unsigned Opc); 482 483 LLVM_READONLY 484 bool getVOP2IsSingle(unsigned Opc); 485 486 LLVM_READONLY 487 bool getVOP3IsSingle(unsigned Opc); 488 489 LLVM_READONLY 490 bool isVOPC64DPP(unsigned Opc); 491 492 /// Returns true if MAI operation is a double precision GEMM. 493 LLVM_READONLY 494 bool getMAIIsDGEMM(unsigned Opc); 495 496 LLVM_READONLY 497 bool getMAIIsGFX940XDL(unsigned Opc); 498 499 struct CanBeVOPD { 500 bool X; 501 bool Y; 502 }; 503 504 LLVM_READONLY 505 CanBeVOPD getCanBeVOPD(unsigned Opc); 506 507 LLVM_READONLY 508 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 509 uint8_t NumComponents, 510 uint8_t NumFormat, 511 const MCSubtargetInfo &STI); 512 LLVM_READONLY 513 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 514 const MCSubtargetInfo &STI); 515 516 LLVM_READONLY 517 int getMCOpcode(uint16_t Opcode, unsigned Gen); 518 519 LLVM_READONLY 520 unsigned getVOPDOpcode(unsigned Opc); 521 522 LLVM_READONLY 523 int getVOPDFull(unsigned OpX, unsigned OpY); 524 525 LLVM_READONLY 526 bool isVOPD(unsigned Opc); 527 528 LLVM_READNONE 529 bool isMAC(unsigned Opc); 530 531 LLVM_READNONE 532 bool isPermlane16(unsigned Opc); 533 534 namespace VOPD { 535 536 enum Component : unsigned { 537 DST = 0, 538 SRC0, 539 SRC1, 540 SRC2, 541 542 DST_NUM = 1, 543 MAX_SRC_NUM = 3, 544 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM 545 }; 546 547 // Number of VGPR banks per VOPD component operand. 548 constexpr unsigned BANKS_NUM[] = {2, 4, 4, 2}; 549 550 enum ComponentIndex : unsigned { X = 0, Y = 1 }; 551 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y}; 552 constexpr unsigned COMPONENTS_NUM = 2; 553 554 // Properties of VOPD components. 555 class ComponentProps { 556 private: 557 unsigned SrcOperandsNum = 0; 558 std::optional<unsigned> MandatoryLiteralIdx; 559 bool HasSrc2Acc = false; 560 561 public: 562 ComponentProps() = default; 563 ComponentProps(const MCInstrDesc &OpDesc); 564 565 // Return the total number of src operands this component has. 566 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; } 567 568 // Return the number of src operands of this component visible to the parser. 569 unsigned getCompParsedSrcOperandsNum() const { 570 return SrcOperandsNum - HasSrc2Acc; 571 } 572 573 // Return true iif this component has a mandatory literal. 574 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx.has_value(); } 575 576 // If this component has a mandatory literal, return component operand 577 // index of this literal (i.e. either Component::SRC1 or Component::SRC2). 578 unsigned getMandatoryLiteralCompOperandIndex() const { 579 assert(hasMandatoryLiteral()); 580 return *MandatoryLiteralIdx; 581 } 582 583 // Return true iif this component has operand 584 // with component index CompSrcIdx and this operand may be a register. 585 bool hasRegSrcOperand(unsigned CompSrcIdx) const { 586 assert(CompSrcIdx < Component::MAX_SRC_NUM); 587 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx); 588 } 589 590 // Return true iif this component has tied src2. 591 bool hasSrc2Acc() const { return HasSrc2Acc; } 592 593 private: 594 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const { 595 assert(CompSrcIdx < Component::MAX_SRC_NUM); 596 return hasMandatoryLiteral() && 597 *MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx; 598 } 599 }; 600 601 enum ComponentKind : unsigned { 602 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD. 603 COMPONENT_X, // A VOPD instruction, X component. 604 COMPONENT_Y, // A VOPD instruction, Y component. 605 MAX = COMPONENT_Y 606 }; 607 608 // Interface functions of this class map VOPD component operand indices 609 // to indices of operands in MachineInstr/MCInst or parsed operands array. 610 // 611 // Note that this class operates with 3 kinds of indices: 612 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.); 613 // - MC operand indices (they refer operands in a MachineInstr/MCInst); 614 // - parsed operand indices (they refer operands in parsed operands array). 615 // 616 // For SINGLE components mapping between these indices is trivial. 617 // But things get more complicated for COMPONENT_X and 618 // COMPONENT_Y because these components share the same 619 // MachineInstr/MCInst and the same parsed operands array. 620 // Below is an example of component operand to parsed operand 621 // mapping for the following instruction: 622 // 623 // v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1 624 // 625 // PARSED COMPONENT PARSED 626 // COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX 627 // ------------------------------------------------------------------- 628 // "v_dual_add_f32" 0 629 // v_dual_add_f32 v255 0 (DST) --> 1 630 // v4 1 (SRC0) --> 2 631 // v5 2 (SRC1) --> 3 632 // "::" 4 633 // "v_dual_mov_b32" 5 634 // v_dual_mov_b32 v6 0 (DST) --> 6 635 // v1 1 (SRC0) --> 7 636 // ------------------------------------------------------------------- 637 // 638 class ComponentLayout { 639 private: 640 // Regular MachineInstr/MCInst operands are ordered as follows: 641 // dst, src0 [, other src operands] 642 // VOPD MachineInstr/MCInst operands are ordered as follows: 643 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 644 // Each ComponentKind has operand indices defined below. 645 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1}; 646 static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */}; 647 648 // Parsed operands of regular instructions are ordered as follows: 649 // Mnemo dst src0 [vsrc1 ...] 650 // Parsed VOPD operands are ordered as follows: 651 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 652 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 653 // Each ComponentKind has operand indices defined below. 654 static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 655 4 /* + OpX.ParsedSrcNum */}; 656 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = { 657 2, 2, 5 /* + OpX.ParsedSrcNum */}; 658 659 private: 660 const ComponentKind Kind; 661 const ComponentProps PrevComp; 662 663 public: 664 // Create layout for COMPONENT_X or SINGLE component. 665 ComponentLayout(ComponentKind Kind) : Kind(Kind) { 666 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X); 667 } 668 669 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout. 670 ComponentLayout(const ComponentProps &OpXProps) 671 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {} 672 673 public: 674 // Return the index of dst operand in MCInst operands. 675 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; } 676 677 // Return the index of the specified src operand in MCInst operands. 678 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const { 679 assert(CompSrcIdx < Component::MAX_SRC_NUM); 680 return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx; 681 } 682 683 // Return the index of dst operand in the parsed operands array. 684 unsigned getIndexOfDstInParsedOperands() const { 685 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum(); 686 } 687 688 // Return the index of the specified src operand in the parsed operands array. 689 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const { 690 assert(CompSrcIdx < Component::MAX_SRC_NUM); 691 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx; 692 } 693 694 private: 695 unsigned getPrevCompSrcNum() const { 696 return PrevComp.getCompSrcOperandsNum(); 697 } 698 unsigned getPrevCompParsedSrcNum() const { 699 return PrevComp.getCompParsedSrcOperandsNum(); 700 } 701 }; 702 703 // Layout and properties of VOPD components. 704 class ComponentInfo : public ComponentLayout, public ComponentProps { 705 public: 706 // Create ComponentInfo for COMPONENT_X or SINGLE component. 707 ComponentInfo(const MCInstrDesc &OpDesc, 708 ComponentKind Kind = ComponentKind::SINGLE) 709 : ComponentLayout(Kind), ComponentProps(OpDesc) {} 710 711 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout. 712 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps) 713 : ComponentLayout(OpXProps), ComponentProps(OpDesc) {} 714 715 // Map component operand index to parsed operand index. 716 // Return 0 if the specified operand does not exist. 717 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const; 718 }; 719 720 // Properties of VOPD instructions. 721 class InstInfo { 722 private: 723 const ComponentInfo CompInfo[COMPONENTS_NUM]; 724 725 public: 726 using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>; 727 728 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) 729 : CompInfo{OpX, OpY} {} 730 731 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY) 732 : CompInfo{OprInfoX, OprInfoY} {} 733 734 const ComponentInfo &operator[](size_t ComponentIdx) const { 735 assert(ComponentIdx < COMPONENTS_NUM); 736 return CompInfo[ComponentIdx]; 737 } 738 739 // Check VOPD operands constraints. 740 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 741 // for the specified component and MC operand. The callback must return 0 742 // if the operand is not a register or not a VGPR. 743 bool hasInvalidOperand( 744 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { 745 return getInvalidCompOperandIndex(GetRegIdx).has_value(); 746 } 747 748 // Check VOPD operands constraints. 749 // Return the index of an invalid component operand, if any. 750 std::optional<unsigned> getInvalidCompOperandIndex( 751 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const; 752 753 private: 754 RegIndices 755 getRegIndices(unsigned ComponentIdx, 756 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const; 757 }; 758 759 } // namespace VOPD 760 761 LLVM_READONLY 762 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode); 763 764 LLVM_READONLY 765 // Get properties of 2 single VOP1/VOP2 instructions 766 // used as components to create a VOPD instruction. 767 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY); 768 769 LLVM_READONLY 770 // Get properties of VOPD X and Y components. 771 VOPD::InstInfo 772 getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo); 773 774 LLVM_READONLY 775 bool isTrue16Inst(unsigned Opc); 776 777 LLVM_READONLY 778 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); 779 780 LLVM_READONLY 781 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); 782 783 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 784 const MCSubtargetInfo *STI); 785 786 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 787 const MCSubtargetInfo *STI); 788 789 bool isGroupSegment(const GlobalValue *GV); 790 bool isGlobalSegment(const GlobalValue *GV); 791 bool isReadOnlySegment(const GlobalValue *GV); 792 793 /// \returns True if constants should be emitted to .text section for given 794 /// target triple \p TT, false otherwise. 795 bool shouldEmitConstantsToTextSection(const Triple &TT); 796 797 /// \returns Integer value requested using \p F's \p Name attribute. 798 /// 799 /// \returns \p Default if attribute is not present. 800 /// 801 /// \returns \p Default and emits error if requested value cannot be converted 802 /// to integer. 803 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 804 805 /// \returns A pair of integer values requested using \p F's \p Name attribute 806 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 807 /// is false). 808 /// 809 /// \returns \p Default if attribute is not present. 810 /// 811 /// \returns \p Default and emits error if one of the requested values cannot be 812 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 813 /// not present. 814 std::pair<int, int> getIntegerPairAttribute(const Function &F, 815 StringRef Name, 816 std::pair<int, int> Default, 817 bool OnlyFirstRequired = false); 818 819 /// Represents the counter values to wait for in an s_waitcnt instruction. 820 /// 821 /// Large values (including the maximum possible integer) can be used to 822 /// represent "don't care" waits. 823 struct Waitcnt { 824 unsigned VmCnt = ~0u; 825 unsigned ExpCnt = ~0u; 826 unsigned LgkmCnt = ~0u; 827 unsigned VsCnt = ~0u; 828 829 Waitcnt() = default; 830 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 831 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 832 833 static Waitcnt allZero(bool HasVscnt) { 834 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); 835 } 836 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 837 838 bool hasWait() const { 839 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 840 } 841 842 bool hasWaitExceptVsCnt() const { 843 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u; 844 } 845 846 bool hasWaitVsCnt() const { 847 return VsCnt != ~0u; 848 } 849 850 bool dominates(const Waitcnt &Other) const { 851 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 852 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 853 } 854 855 Waitcnt combined(const Waitcnt &Other) const { 856 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 857 std::min(LgkmCnt, Other.LgkmCnt), 858 std::min(VsCnt, Other.VsCnt)); 859 } 860 }; 861 862 /// \returns Vmcnt bit mask for given isa \p Version. 863 unsigned getVmcntBitMask(const IsaVersion &Version); 864 865 /// \returns Expcnt bit mask for given isa \p Version. 866 unsigned getExpcntBitMask(const IsaVersion &Version); 867 868 /// \returns Lgkmcnt bit mask for given isa \p Version. 869 unsigned getLgkmcntBitMask(const IsaVersion &Version); 870 871 /// \returns Waitcnt bit mask for given isa \p Version. 872 unsigned getWaitcntBitMask(const IsaVersion &Version); 873 874 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 875 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 876 877 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 878 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 879 880 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 881 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 882 883 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 884 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 885 /// \p Lgkmcnt respectively. 886 /// 887 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 888 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) 889 /// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) 890 /// \p Vmcnt = \p Waitcnt[15:10] (gfx11+) 891 /// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) 892 /// \p Expcnt = \p Waitcnt[2:0] (gfx11+) 893 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) 894 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) 895 /// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11+) 896 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 897 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 898 899 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 900 901 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 902 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 903 unsigned Vmcnt); 904 905 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 906 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 907 unsigned Expcnt); 908 909 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 910 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 911 unsigned Lgkmcnt); 912 913 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 914 /// \p Version. 915 /// 916 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 917 /// Waitcnt[2:0] = \p Expcnt (gfx11+) 918 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) 919 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) 920 /// Waitcnt[6:4] = \p Expcnt (pre-gfx11) 921 /// Waitcnt[9:4] = \p Lgkmcnt (gfx11+) 922 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) 923 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10) 924 /// Waitcnt[15:10] = \p Vmcnt (gfx11+) 925 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) 926 /// 927 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 928 /// isa \p Version. 929 unsigned encodeWaitcnt(const IsaVersion &Version, 930 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 931 932 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 933 934 namespace Hwreg { 935 936 LLVM_READONLY 937 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI); 938 939 LLVM_READNONE 940 bool isValidHwreg(int64_t Id); 941 942 LLVM_READNONE 943 bool isValidHwregOffset(int64_t Offset); 944 945 LLVM_READNONE 946 bool isValidHwregWidth(int64_t Width); 947 948 LLVM_READNONE 949 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 950 951 LLVM_READNONE 952 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 953 954 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 955 956 } // namespace Hwreg 957 958 namespace DepCtr { 959 960 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI); 961 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 962 const MCSubtargetInfo &STI); 963 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 964 const MCSubtargetInfo &STI); 965 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 966 bool &IsDefault, const MCSubtargetInfo &STI); 967 968 } // namespace DepCtr 969 970 namespace Exp { 971 972 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 973 974 LLVM_READONLY 975 unsigned getTgtId(const StringRef Name); 976 977 LLVM_READNONE 978 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 979 980 } // namespace Exp 981 982 namespace MTBUFFormat { 983 984 LLVM_READNONE 985 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 986 987 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 988 989 int64_t getDfmt(const StringRef Name); 990 991 StringRef getDfmtName(unsigned Id); 992 993 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 994 995 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 996 997 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 998 999 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 1000 1001 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI); 1002 1003 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI); 1004 1005 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI); 1006 1007 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1008 const MCSubtargetInfo &STI); 1009 1010 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 1011 1012 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 1013 1014 } // namespace MTBUFFormat 1015 1016 namespace SendMsg { 1017 1018 LLVM_READONLY 1019 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI); 1020 1021 LLVM_READONLY 1022 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 1023 1024 LLVM_READNONE 1025 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI); 1026 1027 LLVM_READNONE 1028 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); 1029 1030 LLVM_READNONE 1031 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI); 1032 1033 LLVM_READNONE 1034 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1035 bool Strict = true); 1036 1037 LLVM_READNONE 1038 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1039 const MCSubtargetInfo &STI, bool Strict = true); 1040 1041 LLVM_READNONE 1042 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI); 1043 1044 LLVM_READNONE 1045 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); 1046 1047 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1048 uint16_t &StreamId, const MCSubtargetInfo &STI); 1049 1050 LLVM_READNONE 1051 uint64_t encodeMsg(uint64_t MsgId, 1052 uint64_t OpId, 1053 uint64_t StreamId); 1054 1055 } // namespace SendMsg 1056 1057 1058 unsigned getInitialPSInputAddr(const Function &F); 1059 1060 bool getHasColorExport(const Function &F); 1061 1062 bool getHasDepthExport(const Function &F); 1063 1064 LLVM_READNONE 1065 bool isShader(CallingConv::ID CC); 1066 1067 LLVM_READNONE 1068 bool isGraphics(CallingConv::ID CC); 1069 1070 LLVM_READNONE 1071 bool isCompute(CallingConv::ID CC); 1072 1073 LLVM_READNONE 1074 bool isEntryFunctionCC(CallingConv::ID CC); 1075 1076 // These functions are considered entrypoints into the current module, i.e. they 1077 // are allowed to be called from outside the current module. This is different 1078 // from isEntryFunctionCC, which is only true for functions that are entered by 1079 // the hardware. Module entry points include all entry functions but also 1080 // include functions that can be called from other functions inside or outside 1081 // the current module. Module entry functions are allowed to allocate LDS. 1082 LLVM_READNONE 1083 bool isModuleEntryFunctionCC(CallingConv::ID CC); 1084 1085 bool isKernelCC(const Function *Func); 1086 1087 // FIXME: Remove this when calling conventions cleaned up 1088 LLVM_READNONE 1089 inline bool isKernel(CallingConv::ID CC) { 1090 switch (CC) { 1091 case CallingConv::AMDGPU_KERNEL: 1092 case CallingConv::SPIR_KERNEL: 1093 return true; 1094 default: 1095 return false; 1096 } 1097 } 1098 1099 bool hasXNACK(const MCSubtargetInfo &STI); 1100 bool hasSRAMECC(const MCSubtargetInfo &STI); 1101 bool hasMIMG_R128(const MCSubtargetInfo &STI); 1102 bool hasA16(const MCSubtargetInfo &STI); 1103 bool hasG16(const MCSubtargetInfo &STI); 1104 bool hasPackedD16(const MCSubtargetInfo &STI); 1105 1106 bool isSI(const MCSubtargetInfo &STI); 1107 bool isCI(const MCSubtargetInfo &STI); 1108 bool isVI(const MCSubtargetInfo &STI); 1109 bool isGFX9(const MCSubtargetInfo &STI); 1110 bool isGFX9_GFX10(const MCSubtargetInfo &STI); 1111 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI); 1112 bool isGFX8Plus(const MCSubtargetInfo &STI); 1113 bool isGFX9Plus(const MCSubtargetInfo &STI); 1114 bool isGFX10(const MCSubtargetInfo &STI); 1115 bool isGFX10Plus(const MCSubtargetInfo &STI); 1116 bool isNotGFX10Plus(const MCSubtargetInfo &STI); 1117 bool isGFX10Before1030(const MCSubtargetInfo &STI); 1118 bool isGFX11(const MCSubtargetInfo &STI); 1119 bool isGFX11Plus(const MCSubtargetInfo &STI); 1120 bool isNotGFX11Plus(const MCSubtargetInfo &STI); 1121 bool isGCN3Encoding(const MCSubtargetInfo &STI); 1122 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 1123 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 1124 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 1125 bool isGFX90A(const MCSubtargetInfo &STI); 1126 bool isGFX940(const MCSubtargetInfo &STI); 1127 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 1128 bool hasMAIInsts(const MCSubtargetInfo &STI); 1129 bool hasVOPD(const MCSubtargetInfo &STI); 1130 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); 1131 1132 /// Is Reg - scalar register 1133 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 1134 1135 /// If \p Reg is a pseudo reg, return the correct hardware register given 1136 /// \p STI otherwise return \p Reg. 1137 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 1138 1139 /// Convert hardware register \p Reg to a pseudo register 1140 LLVM_READNONE 1141 unsigned mc2PseudoReg(unsigned Reg); 1142 1143 LLVM_READNONE 1144 bool isInlineValue(unsigned Reg); 1145 1146 /// Is this an AMDGPU specific source operand? These include registers, 1147 /// inline constants, literals and mandatory literals (KImm). 1148 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 1149 1150 /// Is this a KImm operand? 1151 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo); 1152 1153 /// Is this floating-point operand? 1154 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 1155 1156 /// Does this operand support only inlinable literals? 1157 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 1158 1159 /// Get the size in bits of a register from the register class \p RC. 1160 unsigned getRegBitWidth(unsigned RCID); 1161 1162 /// Get the size in bits of a register from the register class \p RC. 1163 unsigned getRegBitWidth(const MCRegisterClass &RC); 1164 1165 /// Get size of register operand 1166 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 1167 unsigned OpNo); 1168 1169 LLVM_READNONE 1170 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 1171 switch (OpInfo.OperandType) { 1172 case AMDGPU::OPERAND_REG_IMM_INT32: 1173 case AMDGPU::OPERAND_REG_IMM_FP32: 1174 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1175 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1176 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1177 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1178 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1179 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1180 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1181 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1182 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1183 case AMDGPU::OPERAND_KIMM32: 1184 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 1185 return 4; 1186 1187 case AMDGPU::OPERAND_REG_IMM_INT64: 1188 case AMDGPU::OPERAND_REG_IMM_FP64: 1189 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1190 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1191 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1192 return 8; 1193 1194 case AMDGPU::OPERAND_REG_IMM_INT16: 1195 case AMDGPU::OPERAND_REG_IMM_FP16: 1196 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1197 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1198 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1199 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1200 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1201 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1202 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1203 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1204 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1205 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1206 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1207 return 2; 1208 1209 default: 1210 llvm_unreachable("unhandled operand type"); 1211 } 1212 } 1213 1214 LLVM_READNONE 1215 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 1216 return getOperandSize(Desc.operands()[OpNo]); 1217 } 1218 1219 /// Is this literal inlinable, and not one of the values intended for floating 1220 /// point values. 1221 LLVM_READNONE 1222 inline bool isInlinableIntLiteral(int64_t Literal) { 1223 return Literal >= -16 && Literal <= 64; 1224 } 1225 1226 /// Is this literal inlinable 1227 LLVM_READNONE 1228 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 1229 1230 LLVM_READNONE 1231 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 1232 1233 LLVM_READNONE 1234 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 1235 1236 LLVM_READNONE 1237 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 1238 1239 LLVM_READNONE 1240 bool isInlinableIntLiteralV216(int32_t Literal); 1241 1242 LLVM_READNONE 1243 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 1244 1245 bool isArgPassedInSGPR(const Argument *Arg); 1246 1247 LLVM_READONLY 1248 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 1249 int64_t EncodedOffset); 1250 1251 LLVM_READONLY 1252 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 1253 int64_t EncodedOffset, 1254 bool IsBuffer); 1255 1256 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 1257 /// offsets. 1258 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 1259 1260 /// \returns The encoding that will be used for \p ByteOffset in the 1261 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10 1262 /// S_LOAD instructions have a signed offset, on other subtargets it is 1263 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 1264 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 1265 int64_t ByteOffset, bool IsBuffer); 1266 1267 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 1268 /// instruction. This is only useful on CI.s 1269 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 1270 int64_t ByteOffset); 1271 1272 /// For FLAT segment the offset must be positive; 1273 /// MSB is ignored and forced to zero. 1274 /// 1275 /// \return The number of bits available for the signed offset field in flat 1276 /// instructions. Note that some forms of the instruction disallow negative 1277 /// offsets. 1278 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST); 1279 1280 /// \returns true if this offset is small enough to fit in the SMRD 1281 /// offset field. \p ByteOffset should be the offset in bytes and 1282 /// not the encoded offset. 1283 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 1284 1285 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1286 const GCNSubtarget *Subtarget, 1287 Align Alignment = Align(4)); 1288 1289 LLVM_READNONE 1290 inline bool isLegal64BitDPPControl(unsigned DC) { 1291 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 1292 } 1293 1294 /// \returns true if the intrinsic is divergent 1295 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 1296 1297 // Track defaults for fields in the MODE register. 1298 struct SIModeRegisterDefaults { 1299 /// Floating point opcodes that support exception flag gathering quiet and 1300 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 1301 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 1302 /// quieting. 1303 bool IEEE : 1; 1304 1305 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 1306 /// clamp NaN to zero; otherwise, pass NaN through. 1307 bool DX10Clamp : 1; 1308 1309 /// If this is set, neither input or output denormals are flushed for most f32 1310 /// instructions. 1311 DenormalMode FP32Denormals; 1312 1313 /// If this is set, neither input or output denormals are flushed for both f64 1314 /// and f16/v2f16 instructions. 1315 DenormalMode FP64FP16Denormals; 1316 1317 SIModeRegisterDefaults() : 1318 IEEE(true), 1319 DX10Clamp(true), 1320 FP32Denormals(DenormalMode::getIEEE()), 1321 FP64FP16Denormals(DenormalMode::getIEEE()) {} 1322 1323 SIModeRegisterDefaults(const Function &F); 1324 1325 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 1326 SIModeRegisterDefaults Mode; 1327 Mode.IEEE = !AMDGPU::isShader(CC); 1328 return Mode; 1329 } 1330 1331 bool operator ==(const SIModeRegisterDefaults Other) const { 1332 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 1333 FP32Denormals == Other.FP32Denormals && 1334 FP64FP16Denormals == Other.FP64FP16Denormals; 1335 } 1336 1337 bool allFP32Denormals() const { 1338 return FP32Denormals == DenormalMode::getIEEE(); 1339 } 1340 1341 bool allFP64FP16Denormals() const { 1342 return FP64FP16Denormals == DenormalMode::getIEEE(); 1343 } 1344 1345 /// Get the encoding value for the FP_DENORM bits of the mode register for the 1346 /// FP32 denormal mode. 1347 uint32_t fpDenormModeSPValue() const { 1348 if (FP32Denormals == DenormalMode::getPreserveSign()) 1349 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 1350 if (FP32Denormals.Output == DenormalMode::PreserveSign) 1351 return FP_DENORM_FLUSH_OUT; 1352 if (FP32Denormals.Input == DenormalMode::PreserveSign) 1353 return FP_DENORM_FLUSH_IN; 1354 return FP_DENORM_FLUSH_NONE; 1355 } 1356 1357 /// Get the encoding value for the FP_DENORM bits of the mode register for the 1358 /// FP64/FP16 denormal mode. 1359 uint32_t fpDenormModeDPValue() const { 1360 if (FP64FP16Denormals == DenormalMode::getPreserveSign()) 1361 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 1362 if (FP64FP16Denormals.Output == DenormalMode::PreserveSign) 1363 return FP_DENORM_FLUSH_OUT; 1364 if (FP64FP16Denormals.Input == DenormalMode::PreserveSign) 1365 return FP_DENORM_FLUSH_IN; 1366 return FP_DENORM_FLUSH_NONE; 1367 } 1368 1369 /// Returns true if a flag is compatible if it's enabled in the callee, but 1370 /// disabled in the caller. 1371 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 1372 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 1373 } 1374 1375 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 1376 // be able to override. 1377 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 1378 if (DX10Clamp != CalleeMode.DX10Clamp) 1379 return false; 1380 if (IEEE != CalleeMode.IEEE) 1381 return false; 1382 1383 // Allow inlining denormals enabled into denormals flushed functions. 1384 return oneWayCompatible(FP64FP16Denormals.Input != 1385 DenormalMode::PreserveSign, 1386 CalleeMode.FP64FP16Denormals.Input != 1387 DenormalMode::PreserveSign) && 1388 oneWayCompatible(FP64FP16Denormals.Output != 1389 DenormalMode::PreserveSign, 1390 CalleeMode.FP64FP16Denormals.Output != 1391 DenormalMode::PreserveSign) && 1392 oneWayCompatible(FP32Denormals.Input != DenormalMode::PreserveSign, 1393 CalleeMode.FP32Denormals.Input != 1394 DenormalMode::PreserveSign) && 1395 oneWayCompatible(FP32Denormals.Output != DenormalMode::PreserveSign, 1396 CalleeMode.FP32Denormals.Output != 1397 DenormalMode::PreserveSign); 1398 } 1399 }; 1400 1401 } // end namespace AMDGPU 1402 1403 raw_ostream &operator<<(raw_ostream &OS, 1404 const AMDGPU::IsaInfo::TargetIDSetting S); 1405 1406 } // end namespace llvm 1407 1408 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1409