1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "SIDefines.h" 13 #include "llvm/IR/CallingConv.h" 14 #include "llvm/Support/Alignment.h" 15 16 struct amd_kernel_code_t; 17 18 namespace llvm { 19 20 struct Align; 21 class Argument; 22 class Function; 23 class GCNSubtarget; 24 class GlobalValue; 25 class MCRegisterClass; 26 class MCRegisterInfo; 27 class MCSubtargetInfo; 28 class StringRef; 29 class Triple; 30 31 namespace amdhsa { 32 struct kernel_descriptor_t; 33 } 34 35 namespace AMDGPU { 36 37 struct IsaVersion; 38 39 /// \returns HSA OS ABI Version identification. 40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); 41 /// \returns True if HSA OS ABI Version identification is 2, 42 /// false otherwise. 43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI); 44 /// \returns True if HSA OS ABI Version identification is 3, 45 /// false otherwise. 46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI); 47 /// \returns True if HSA OS ABI Version identification is 4, 48 /// false otherwise. 49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI); 50 /// \returns True if HSA OS ABI Version identification is 5, 51 /// false otherwise. 52 bool isHsaAbiVersion5(const MCSubtargetInfo *STI); 53 /// \returns True if HSA OS ABI Version identification is 3 and above, 54 /// false otherwise. 55 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); 56 57 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr 58 unsigned getMultigridSyncArgImplicitArgPosition(); 59 60 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr 61 unsigned getHostcallImplicitArgPosition(); 62 63 /// \returns Code object version. 64 unsigned getAmdhsaCodeObjectVersion(); 65 66 struct GcnBufferFormatInfo { 67 unsigned Format; 68 unsigned BitsPerComp; 69 unsigned NumComponents; 70 unsigned NumFormat; 71 unsigned DataFormat; 72 }; 73 74 struct MAIInstInfo { 75 uint16_t Opcode; 76 bool is_dgemm; 77 bool is_gfx940_xdl; 78 }; 79 80 #define GET_MIMGBaseOpcode_DECL 81 #define GET_MIMGDim_DECL 82 #define GET_MIMGEncoding_DECL 83 #define GET_MIMGLZMapping_DECL 84 #define GET_MIMGMIPMapping_DECL 85 #define GET_MIMGBiASMapping_DECL 86 #define GET_MAIInstInfoTable_DECL 87 #include "AMDGPUGenSearchableTables.inc" 88 89 namespace IsaInfo { 90 91 enum { 92 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 93 // doesn't spill SGPRs as much as when 80 is set. 94 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 95 TRAP_NUM_SGPRS = 16 96 }; 97 98 enum class TargetIDSetting { 99 Unsupported, 100 Any, 101 Off, 102 On 103 }; 104 105 class AMDGPUTargetID { 106 private: 107 const MCSubtargetInfo &STI; 108 TargetIDSetting XnackSetting; 109 TargetIDSetting SramEccSetting; 110 111 public: 112 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 113 ~AMDGPUTargetID() = default; 114 115 /// \return True if the current xnack setting is not "Unsupported". 116 bool isXnackSupported() const { 117 return XnackSetting != TargetIDSetting::Unsupported; 118 } 119 120 /// \returns True if the current xnack setting is "On" or "Any". 121 bool isXnackOnOrAny() const { 122 return XnackSetting == TargetIDSetting::On || 123 XnackSetting == TargetIDSetting::Any; 124 } 125 126 /// \returns True if current xnack setting is "On" or "Off", 127 /// false otherwise. 128 bool isXnackOnOrOff() const { 129 return getXnackSetting() == TargetIDSetting::On || 130 getXnackSetting() == TargetIDSetting::Off; 131 } 132 133 /// \returns The current xnack TargetIDSetting, possible options are 134 /// "Unsupported", "Any", "Off", and "On". 135 TargetIDSetting getXnackSetting() const { 136 return XnackSetting; 137 } 138 139 /// Sets xnack setting to \p NewXnackSetting. 140 void setXnackSetting(TargetIDSetting NewXnackSetting) { 141 XnackSetting = NewXnackSetting; 142 } 143 144 /// \return True if the current sramecc setting is not "Unsupported". 145 bool isSramEccSupported() const { 146 return SramEccSetting != TargetIDSetting::Unsupported; 147 } 148 149 /// \returns True if the current sramecc setting is "On" or "Any". 150 bool isSramEccOnOrAny() const { 151 return SramEccSetting == TargetIDSetting::On || 152 SramEccSetting == TargetIDSetting::Any; 153 } 154 155 /// \returns True if current sramecc setting is "On" or "Off", 156 /// false otherwise. 157 bool isSramEccOnOrOff() const { 158 return getSramEccSetting() == TargetIDSetting::On || 159 getSramEccSetting() == TargetIDSetting::Off; 160 } 161 162 /// \returns The current sramecc TargetIDSetting, possible options are 163 /// "Unsupported", "Any", "Off", and "On". 164 TargetIDSetting getSramEccSetting() const { 165 return SramEccSetting; 166 } 167 168 /// Sets sramecc setting to \p NewSramEccSetting. 169 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 170 SramEccSetting = NewSramEccSetting; 171 } 172 173 void setTargetIDFromFeaturesString(StringRef FS); 174 void setTargetIDFromTargetIDStream(StringRef TargetID); 175 176 /// \returns String representation of an object. 177 std::string toString() const; 178 }; 179 180 /// \returns Wavefront size for given subtarget \p STI. 181 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 182 183 /// \returns Local memory size in bytes for given subtarget \p STI. 184 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 185 186 /// \returns Number of execution units per compute unit for given subtarget \p 187 /// STI. 188 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 189 190 /// \returns Maximum number of work groups per compute unit for given subtarget 191 /// \p STI and limited by given \p FlatWorkGroupSize. 192 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 193 unsigned FlatWorkGroupSize); 194 195 /// \returns Minimum number of waves per execution unit for given subtarget \p 196 /// STI. 197 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 198 199 /// \returns Maximum number of waves per execution unit for given subtarget \p 200 /// STI without any kind of limitation. 201 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 202 203 /// \returns Number of waves per execution unit required to support the given \p 204 /// FlatWorkGroupSize. 205 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 206 unsigned FlatWorkGroupSize); 207 208 /// \returns Minimum flat work group size for given subtarget \p STI. 209 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 210 211 /// \returns Maximum flat work group size for given subtarget \p STI. 212 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 213 214 /// \returns Number of waves per work group for given subtarget \p STI and 215 /// \p FlatWorkGroupSize. 216 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 217 unsigned FlatWorkGroupSize); 218 219 /// \returns SGPR allocation granularity for given subtarget \p STI. 220 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 221 222 /// \returns SGPR encoding granularity for given subtarget \p STI. 223 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 224 225 /// \returns Total number of SGPRs for given subtarget \p STI. 226 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 227 228 /// \returns Addressable number of SGPRs for given subtarget \p STI. 229 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 230 231 /// \returns Minimum number of SGPRs that meets the given number of waves per 232 /// execution unit requirement for given subtarget \p STI. 233 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 234 235 /// \returns Maximum number of SGPRs that meets the given number of waves per 236 /// execution unit requirement for given subtarget \p STI. 237 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 238 bool Addressable); 239 240 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 241 /// STI when the given special registers are used. 242 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 243 bool FlatScrUsed, bool XNACKUsed); 244 245 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 246 /// STI when the given special registers are used. XNACK is inferred from 247 /// \p STI. 248 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 249 bool FlatScrUsed); 250 251 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 252 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 253 /// register counts. 254 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 255 256 /// \returns VGPR allocation granularity for given subtarget \p STI. 257 /// 258 /// For subtargets which support it, \p EnableWavefrontSize32 should match 259 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 260 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 261 Optional<bool> EnableWavefrontSize32 = None); 262 263 /// \returns VGPR encoding granularity for given subtarget \p STI. 264 /// 265 /// For subtargets which support it, \p EnableWavefrontSize32 should match 266 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 267 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 268 Optional<bool> EnableWavefrontSize32 = None); 269 270 /// \returns Total number of VGPRs for given subtarget \p STI. 271 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 272 273 /// \returns Addressable number of VGPRs for given subtarget \p STI. 274 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 275 276 /// \returns Minimum number of VGPRs that meets given number of waves per 277 /// execution unit requirement for given subtarget \p STI. 278 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 279 280 /// \returns Maximum number of VGPRs that meets given number of waves per 281 /// execution unit requirement for given subtarget \p STI. 282 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 283 284 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 285 /// \p NumVGPRs are used. 286 /// 287 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 288 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 289 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 290 Optional<bool> EnableWavefrontSize32 = None); 291 292 } // end namespace IsaInfo 293 294 LLVM_READONLY 295 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 296 297 LLVM_READONLY 298 int getSOPPWithRelaxation(uint16_t Opcode); 299 300 struct MIMGBaseOpcodeInfo { 301 MIMGBaseOpcode BaseOpcode; 302 bool Store; 303 bool Atomic; 304 bool AtomicX2; 305 bool Sampler; 306 bool Gather4; 307 308 uint8_t NumExtraArgs; 309 bool Gradients; 310 bool G16; 311 bool Coordinates; 312 bool LodOrClampOrMip; 313 bool HasD16; 314 bool MSAA; 315 bool BVH; 316 }; 317 318 LLVM_READONLY 319 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 320 321 LLVM_READONLY 322 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 323 324 struct MIMGDimInfo { 325 MIMGDim Dim; 326 uint8_t NumCoords; 327 uint8_t NumGradients; 328 bool MSAA; 329 bool DA; 330 uint8_t Encoding; 331 const char *AsmSuffix; 332 }; 333 334 LLVM_READONLY 335 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 336 337 LLVM_READONLY 338 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 339 340 LLVM_READONLY 341 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 342 343 struct MIMGLZMappingInfo { 344 MIMGBaseOpcode L; 345 MIMGBaseOpcode LZ; 346 }; 347 348 struct MIMGMIPMappingInfo { 349 MIMGBaseOpcode MIP; 350 MIMGBaseOpcode NONMIP; 351 }; 352 353 struct MIMGBiasMappingInfo { 354 MIMGBaseOpcode Bias; 355 MIMGBaseOpcode NoBias; 356 }; 357 358 struct MIMGOffsetMappingInfo { 359 MIMGBaseOpcode Offset; 360 MIMGBaseOpcode NoOffset; 361 }; 362 363 struct MIMGG16MappingInfo { 364 MIMGBaseOpcode G; 365 MIMGBaseOpcode G16; 366 }; 367 368 LLVM_READONLY 369 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 370 371 struct WMMAOpcodeMappingInfo { 372 unsigned Opcode2Addr; 373 unsigned Opcode3Addr; 374 }; 375 376 LLVM_READONLY 377 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 378 379 LLVM_READONLY 380 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); 381 382 LLVM_READONLY 383 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); 384 385 LLVM_READONLY 386 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 387 388 LLVM_READONLY 389 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 390 unsigned VDataDwords, unsigned VAddrDwords); 391 392 LLVM_READONLY 393 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 394 395 LLVM_READONLY 396 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 397 const MIMGDimInfo *Dim, bool IsA16, 398 bool IsG16Supported); 399 400 struct MIMGInfo { 401 uint16_t Opcode; 402 uint16_t BaseOpcode; 403 uint8_t MIMGEncoding; 404 uint8_t VDataDwords; 405 uint8_t VAddrDwords; 406 uint8_t VAddrOperands; 407 }; 408 409 LLVM_READONLY 410 const MIMGInfo *getMIMGInfo(unsigned Opc); 411 412 LLVM_READONLY 413 int getMTBUFBaseOpcode(unsigned Opc); 414 415 LLVM_READONLY 416 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 417 418 LLVM_READONLY 419 int getMTBUFElements(unsigned Opc); 420 421 LLVM_READONLY 422 bool getMTBUFHasVAddr(unsigned Opc); 423 424 LLVM_READONLY 425 bool getMTBUFHasSrsrc(unsigned Opc); 426 427 LLVM_READONLY 428 bool getMTBUFHasSoffset(unsigned Opc); 429 430 LLVM_READONLY 431 int getMUBUFBaseOpcode(unsigned Opc); 432 433 LLVM_READONLY 434 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 435 436 LLVM_READONLY 437 int getMUBUFElements(unsigned Opc); 438 439 LLVM_READONLY 440 bool getMUBUFHasVAddr(unsigned Opc); 441 442 LLVM_READONLY 443 bool getMUBUFHasSrsrc(unsigned Opc); 444 445 LLVM_READONLY 446 bool getMUBUFHasSoffset(unsigned Opc); 447 448 LLVM_READONLY 449 bool getMUBUFIsBufferInv(unsigned Opc); 450 451 LLVM_READONLY 452 bool getSMEMIsBuffer(unsigned Opc); 453 454 LLVM_READONLY 455 bool getVOP1IsSingle(unsigned Opc); 456 457 LLVM_READONLY 458 bool getVOP2IsSingle(unsigned Opc); 459 460 LLVM_READONLY 461 bool getVOP3IsSingle(unsigned Opc); 462 463 LLVM_READONLY 464 bool isVOPC64DPP(unsigned Opc); 465 466 /// Returns true if MAI operation is a double precision GEMM. 467 LLVM_READONLY 468 bool getMAIIsDGEMM(unsigned Opc); 469 470 LLVM_READONLY 471 bool getMAIIsGFX940XDL(unsigned Opc); 472 473 struct CanBeVOPD { 474 bool X; 475 bool Y; 476 }; 477 478 LLVM_READONLY 479 CanBeVOPD getCanBeVOPD(unsigned Opc); 480 481 LLVM_READONLY 482 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 483 uint8_t NumComponents, 484 uint8_t NumFormat, 485 const MCSubtargetInfo &STI); 486 LLVM_READONLY 487 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 488 const MCSubtargetInfo &STI); 489 490 LLVM_READONLY 491 int getMCOpcode(uint16_t Opcode, unsigned Gen); 492 493 LLVM_READONLY 494 unsigned getVOPDOpcode(unsigned Opc); 495 496 LLVM_READONLY 497 int getVOPDFull(unsigned OpX, unsigned OpY); 498 499 LLVM_READONLY 500 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); 501 502 LLVM_READONLY 503 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); 504 505 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 506 const MCSubtargetInfo *STI); 507 508 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 509 const MCSubtargetInfo *STI); 510 511 bool isGroupSegment(const GlobalValue *GV); 512 bool isGlobalSegment(const GlobalValue *GV); 513 bool isReadOnlySegment(const GlobalValue *GV); 514 515 /// \returns True if constants should be emitted to .text section for given 516 /// target triple \p TT, false otherwise. 517 bool shouldEmitConstantsToTextSection(const Triple &TT); 518 519 /// \returns Integer value requested using \p F's \p Name attribute. 520 /// 521 /// \returns \p Default if attribute is not present. 522 /// 523 /// \returns \p Default and emits error if requested value cannot be converted 524 /// to integer. 525 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 526 527 /// \returns A pair of integer values requested using \p F's \p Name attribute 528 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 529 /// is false). 530 /// 531 /// \returns \p Default if attribute is not present. 532 /// 533 /// \returns \p Default and emits error if one of the requested values cannot be 534 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 535 /// not present. 536 std::pair<int, int> getIntegerPairAttribute(const Function &F, 537 StringRef Name, 538 std::pair<int, int> Default, 539 bool OnlyFirstRequired = false); 540 541 /// Represents the counter values to wait for in an s_waitcnt instruction. 542 /// 543 /// Large values (including the maximum possible integer) can be used to 544 /// represent "don't care" waits. 545 struct Waitcnt { 546 unsigned VmCnt = ~0u; 547 unsigned ExpCnt = ~0u; 548 unsigned LgkmCnt = ~0u; 549 unsigned VsCnt = ~0u; 550 551 Waitcnt() = default; 552 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 553 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 554 555 static Waitcnt allZero(bool HasVscnt) { 556 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); 557 } 558 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 559 560 bool hasWait() const { 561 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 562 } 563 564 bool hasWaitExceptVsCnt() const { 565 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u; 566 } 567 568 bool hasWaitVsCnt() const { 569 return VsCnt != ~0u; 570 } 571 572 bool dominates(const Waitcnt &Other) const { 573 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 574 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 575 } 576 577 Waitcnt combined(const Waitcnt &Other) const { 578 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 579 std::min(LgkmCnt, Other.LgkmCnt), 580 std::min(VsCnt, Other.VsCnt)); 581 } 582 }; 583 584 /// \returns Vmcnt bit mask for given isa \p Version. 585 unsigned getVmcntBitMask(const IsaVersion &Version); 586 587 /// \returns Expcnt bit mask for given isa \p Version. 588 unsigned getExpcntBitMask(const IsaVersion &Version); 589 590 /// \returns Lgkmcnt bit mask for given isa \p Version. 591 unsigned getLgkmcntBitMask(const IsaVersion &Version); 592 593 /// \returns Waitcnt bit mask for given isa \p Version. 594 unsigned getWaitcntBitMask(const IsaVersion &Version); 595 596 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 597 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 598 599 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 600 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 601 602 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 603 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 604 605 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 606 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 607 /// \p Lgkmcnt respectively. 608 /// 609 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 610 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) 611 /// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) 612 /// \p Vmcnt = \p Waitcnt[15:10] (gfx11+) 613 /// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) 614 /// \p Expcnt = \p Waitcnt[2:0] (gfx11+) 615 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) 616 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) 617 /// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11+) 618 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 619 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 620 621 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 622 623 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 624 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 625 unsigned Vmcnt); 626 627 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 628 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 629 unsigned Expcnt); 630 631 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 632 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 633 unsigned Lgkmcnt); 634 635 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 636 /// \p Version. 637 /// 638 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 639 /// Waitcnt[2:0] = \p Expcnt (gfx11+) 640 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) 641 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) 642 /// Waitcnt[6:4] = \p Expcnt (pre-gfx11) 643 /// Waitcnt[9:4] = \p Lgkmcnt (gfx11+) 644 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) 645 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10) 646 /// Waitcnt[15:10] = \p Vmcnt (gfx11+) 647 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) 648 /// 649 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 650 /// isa \p Version. 651 unsigned encodeWaitcnt(const IsaVersion &Version, 652 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 653 654 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 655 656 namespace Hwreg { 657 658 LLVM_READONLY 659 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI); 660 661 LLVM_READNONE 662 bool isValidHwreg(int64_t Id); 663 664 LLVM_READNONE 665 bool isValidHwregOffset(int64_t Offset); 666 667 LLVM_READNONE 668 bool isValidHwregWidth(int64_t Width); 669 670 LLVM_READNONE 671 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 672 673 LLVM_READNONE 674 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 675 676 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 677 678 } // namespace Hwreg 679 680 namespace DepCtr { 681 682 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI); 683 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 684 const MCSubtargetInfo &STI); 685 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 686 const MCSubtargetInfo &STI); 687 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 688 bool &IsDefault, const MCSubtargetInfo &STI); 689 690 } // namespace DepCtr 691 692 namespace Exp { 693 694 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 695 696 LLVM_READONLY 697 unsigned getTgtId(const StringRef Name); 698 699 LLVM_READNONE 700 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 701 702 } // namespace Exp 703 704 namespace MTBUFFormat { 705 706 LLVM_READNONE 707 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 708 709 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 710 711 int64_t getDfmt(const StringRef Name); 712 713 StringRef getDfmtName(unsigned Id); 714 715 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 716 717 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 718 719 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 720 721 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 722 723 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI); 724 725 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI); 726 727 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI); 728 729 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 730 const MCSubtargetInfo &STI); 731 732 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 733 734 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 735 736 } // namespace MTBUFFormat 737 738 namespace SendMsg { 739 740 LLVM_READONLY 741 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI); 742 743 LLVM_READONLY 744 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 745 746 LLVM_READNONE 747 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI); 748 749 LLVM_READNONE 750 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); 751 752 LLVM_READNONE 753 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI); 754 755 LLVM_READNONE 756 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 757 bool Strict = true); 758 759 LLVM_READNONE 760 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 761 const MCSubtargetInfo &STI, bool Strict = true); 762 763 LLVM_READNONE 764 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI); 765 766 LLVM_READNONE 767 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); 768 769 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 770 uint16_t &StreamId, const MCSubtargetInfo &STI); 771 772 LLVM_READNONE 773 uint64_t encodeMsg(uint64_t MsgId, 774 uint64_t OpId, 775 uint64_t StreamId); 776 777 } // namespace SendMsg 778 779 780 unsigned getInitialPSInputAddr(const Function &F); 781 782 bool getHasColorExport(const Function &F); 783 784 bool getHasDepthExport(const Function &F); 785 786 LLVM_READNONE 787 bool isShader(CallingConv::ID CC); 788 789 LLVM_READNONE 790 bool isGraphics(CallingConv::ID CC); 791 792 LLVM_READNONE 793 bool isCompute(CallingConv::ID CC); 794 795 LLVM_READNONE 796 bool isEntryFunctionCC(CallingConv::ID CC); 797 798 // These functions are considered entrypoints into the current module, i.e. they 799 // are allowed to be called from outside the current module. This is different 800 // from isEntryFunctionCC, which is only true for functions that are entered by 801 // the hardware. Module entry points include all entry functions but also 802 // include functions that can be called from other functions inside or outside 803 // the current module. Module entry functions are allowed to allocate LDS. 804 LLVM_READNONE 805 bool isModuleEntryFunctionCC(CallingConv::ID CC); 806 807 bool isKernelCC(const Function *Func); 808 809 // FIXME: Remove this when calling conventions cleaned up 810 LLVM_READNONE 811 inline bool isKernel(CallingConv::ID CC) { 812 switch (CC) { 813 case CallingConv::AMDGPU_KERNEL: 814 case CallingConv::SPIR_KERNEL: 815 return true; 816 default: 817 return false; 818 } 819 } 820 821 bool hasXNACK(const MCSubtargetInfo &STI); 822 bool hasSRAMECC(const MCSubtargetInfo &STI); 823 bool hasMIMG_R128(const MCSubtargetInfo &STI); 824 bool hasGFX10A16(const MCSubtargetInfo &STI); 825 bool hasG16(const MCSubtargetInfo &STI); 826 bool hasPackedD16(const MCSubtargetInfo &STI); 827 828 bool isSI(const MCSubtargetInfo &STI); 829 bool isCI(const MCSubtargetInfo &STI); 830 bool isVI(const MCSubtargetInfo &STI); 831 bool isGFX9(const MCSubtargetInfo &STI); 832 bool isGFX9_GFX10(const MCSubtargetInfo &STI); 833 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI); 834 bool isGFX8Plus(const MCSubtargetInfo &STI); 835 bool isGFX9Plus(const MCSubtargetInfo &STI); 836 bool isGFX10(const MCSubtargetInfo &STI); 837 bool isGFX10Plus(const MCSubtargetInfo &STI); 838 bool isNotGFX10Plus(const MCSubtargetInfo &STI); 839 bool isGFX10Before1030(const MCSubtargetInfo &STI); 840 bool isGFX11(const MCSubtargetInfo &STI); 841 bool isGFX11Plus(const MCSubtargetInfo &STI); 842 bool isNotGFX11Plus(const MCSubtargetInfo &STI); 843 bool isGCN3Encoding(const MCSubtargetInfo &STI); 844 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 845 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 846 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 847 bool isGFX90A(const MCSubtargetInfo &STI); 848 bool isGFX940(const MCSubtargetInfo &STI); 849 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 850 bool hasMAIInsts(const MCSubtargetInfo &STI); 851 bool hasVOPD(const MCSubtargetInfo &STI); 852 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); 853 854 /// Is Reg - scalar register 855 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 856 857 /// If \p Reg is a pseudo reg, return the correct hardware register given 858 /// \p STI otherwise return \p Reg. 859 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 860 861 /// Convert hardware register \p Reg to a pseudo register 862 LLVM_READNONE 863 unsigned mc2PseudoReg(unsigned Reg); 864 865 /// Can this operand also contain immediate values? 866 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 867 868 /// Is this floating-point operand? 869 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 870 871 /// Does this operand support only inlinable literals? 872 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 873 874 /// Get the size in bits of a register from the register class \p RC. 875 unsigned getRegBitWidth(unsigned RCID); 876 877 /// Get the size in bits of a register from the register class \p RC. 878 unsigned getRegBitWidth(const MCRegisterClass &RC); 879 880 /// Get size of register operand 881 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 882 unsigned OpNo); 883 884 LLVM_READNONE 885 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 886 switch (OpInfo.OperandType) { 887 case AMDGPU::OPERAND_REG_IMM_INT32: 888 case AMDGPU::OPERAND_REG_IMM_FP32: 889 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 890 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 891 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 892 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 893 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 894 case AMDGPU::OPERAND_REG_IMM_V2INT32: 895 case AMDGPU::OPERAND_REG_IMM_V2FP32: 896 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 897 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 898 case AMDGPU::OPERAND_KIMM32: 899 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 900 return 4; 901 902 case AMDGPU::OPERAND_REG_IMM_INT64: 903 case AMDGPU::OPERAND_REG_IMM_FP64: 904 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 905 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 906 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 907 return 8; 908 909 case AMDGPU::OPERAND_REG_IMM_INT16: 910 case AMDGPU::OPERAND_REG_IMM_FP16: 911 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 912 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 913 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 914 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 915 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 916 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 917 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 918 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 919 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 920 case AMDGPU::OPERAND_REG_IMM_V2INT16: 921 case AMDGPU::OPERAND_REG_IMM_V2FP16: 922 return 2; 923 924 default: 925 llvm_unreachable("unhandled operand type"); 926 } 927 } 928 929 LLVM_READNONE 930 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 931 return getOperandSize(Desc.OpInfo[OpNo]); 932 } 933 934 /// Is this literal inlinable, and not one of the values intended for floating 935 /// point values. 936 LLVM_READNONE 937 inline bool isInlinableIntLiteral(int64_t Literal) { 938 return Literal >= -16 && Literal <= 64; 939 } 940 941 /// Is this literal inlinable 942 LLVM_READNONE 943 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 944 945 LLVM_READNONE 946 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 947 948 LLVM_READNONE 949 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 950 951 LLVM_READNONE 952 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 953 954 LLVM_READNONE 955 bool isInlinableIntLiteralV216(int32_t Literal); 956 957 LLVM_READNONE 958 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 959 960 bool isArgPassedInSGPR(const Argument *Arg); 961 962 LLVM_READONLY 963 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 964 int64_t EncodedOffset); 965 966 LLVM_READONLY 967 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 968 int64_t EncodedOffset, 969 bool IsBuffer); 970 971 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 972 /// offsets. 973 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 974 975 /// \returns The encoding that will be used for \p ByteOffset in the 976 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 977 /// S_LOAD instructions have a signed offset, on other subtargets it is 978 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 979 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 980 int64_t ByteOffset, bool IsBuffer); 981 982 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 983 /// instruction. This is only useful on CI.s 984 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 985 int64_t ByteOffset); 986 987 /// For FLAT segment the offset must be positive; 988 /// MSB is ignored and forced to zero. 989 /// 990 /// \return The number of bits available for the offset field in flat 991 /// instructions. 992 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed); 993 994 /// \returns true if this offset is small enough to fit in the SMRD 995 /// offset field. \p ByteOffset should be the offset in bytes and 996 /// not the encoded offset. 997 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 998 999 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1000 const GCNSubtarget *Subtarget, 1001 Align Alignment = Align(4)); 1002 1003 LLVM_READNONE 1004 inline bool isLegal64BitDPPControl(unsigned DC) { 1005 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 1006 } 1007 1008 /// \returns true if the intrinsic is divergent 1009 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 1010 1011 // Track defaults for fields in the MODE register. 1012 struct SIModeRegisterDefaults { 1013 /// Floating point opcodes that support exception flag gathering quiet and 1014 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 1015 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 1016 /// quieting. 1017 bool IEEE : 1; 1018 1019 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 1020 /// clamp NaN to zero; otherwise, pass NaN through. 1021 bool DX10Clamp : 1; 1022 1023 /// If this is set, neither input or output denormals are flushed for most f32 1024 /// instructions. 1025 bool FP32InputDenormals : 1; 1026 bool FP32OutputDenormals : 1; 1027 1028 /// If this is set, neither input or output denormals are flushed for both f64 1029 /// and f16/v2f16 instructions. 1030 bool FP64FP16InputDenormals : 1; 1031 bool FP64FP16OutputDenormals : 1; 1032 1033 SIModeRegisterDefaults() : 1034 IEEE(true), 1035 DX10Clamp(true), 1036 FP32InputDenormals(true), 1037 FP32OutputDenormals(true), 1038 FP64FP16InputDenormals(true), 1039 FP64FP16OutputDenormals(true) {} 1040 1041 SIModeRegisterDefaults(const Function &F); 1042 1043 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 1044 SIModeRegisterDefaults Mode; 1045 Mode.IEEE = !AMDGPU::isShader(CC); 1046 return Mode; 1047 } 1048 1049 bool operator ==(const SIModeRegisterDefaults Other) const { 1050 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 1051 FP32InputDenormals == Other.FP32InputDenormals && 1052 FP32OutputDenormals == Other.FP32OutputDenormals && 1053 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 1054 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 1055 } 1056 1057 bool allFP32Denormals() const { 1058 return FP32InputDenormals && FP32OutputDenormals; 1059 } 1060 1061 bool allFP64FP16Denormals() const { 1062 return FP64FP16InputDenormals && FP64FP16OutputDenormals; 1063 } 1064 1065 /// Get the encoding value for the FP_DENORM bits of the mode register for the 1066 /// FP32 denormal mode. 1067 uint32_t fpDenormModeSPValue() const { 1068 if (FP32InputDenormals && FP32OutputDenormals) 1069 return FP_DENORM_FLUSH_NONE; 1070 if (FP32InputDenormals) 1071 return FP_DENORM_FLUSH_OUT; 1072 if (FP32OutputDenormals) 1073 return FP_DENORM_FLUSH_IN; 1074 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 1075 } 1076 1077 /// Get the encoding value for the FP_DENORM bits of the mode register for the 1078 /// FP64/FP16 denormal mode. 1079 uint32_t fpDenormModeDPValue() const { 1080 if (FP64FP16InputDenormals && FP64FP16OutputDenormals) 1081 return FP_DENORM_FLUSH_NONE; 1082 if (FP64FP16InputDenormals) 1083 return FP_DENORM_FLUSH_OUT; 1084 if (FP64FP16OutputDenormals) 1085 return FP_DENORM_FLUSH_IN; 1086 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 1087 } 1088 1089 /// Returns true if a flag is compatible if it's enabled in the callee, but 1090 /// disabled in the caller. 1091 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 1092 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 1093 } 1094 1095 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 1096 // be able to override. 1097 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 1098 if (DX10Clamp != CalleeMode.DX10Clamp) 1099 return false; 1100 if (IEEE != CalleeMode.IEEE) 1101 return false; 1102 1103 // Allow inlining denormals enabled into denormals flushed functions. 1104 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && 1105 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && 1106 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && 1107 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); 1108 } 1109 }; 1110 1111 } // end namespace AMDGPU 1112 1113 raw_ostream &operator<<(raw_ostream &OS, 1114 const AMDGPU::IsaInfo::TargetIDSetting S); 1115 1116 } // end namespace llvm 1117 1118 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1119