1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "SIDefines.h" 13 #include "llvm/IR/CallingConv.h" 14 #include "llvm/Support/Alignment.h" 15 16 struct amd_kernel_code_t; 17 18 namespace llvm { 19 20 struct Align; 21 class Argument; 22 class Function; 23 class GCNSubtarget; 24 class GlobalValue; 25 class MCRegisterClass; 26 class MCRegisterInfo; 27 class MCSubtargetInfo; 28 class StringRef; 29 class Triple; 30 31 namespace amdhsa { 32 struct kernel_descriptor_t; 33 } 34 35 namespace AMDGPU { 36 37 struct IsaVersion; 38 39 /// \returns HSA OS ABI Version identification. 40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); 41 /// \returns True if HSA OS ABI Version identification is 2, 42 /// false otherwise. 43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI); 44 /// \returns True if HSA OS ABI Version identification is 3, 45 /// false otherwise. 46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI); 47 /// \returns True if HSA OS ABI Version identification is 4, 48 /// false otherwise. 49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI); 50 /// \returns True if HSA OS ABI Version identification is 5, 51 /// false otherwise. 52 bool isHsaAbiVersion5(const MCSubtargetInfo *STI); 53 /// \returns True if HSA OS ABI Version identification is 3 or 4, 54 /// false otherwise. 55 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); 56 57 struct GcnBufferFormatInfo { 58 unsigned Format; 59 unsigned BitsPerComp; 60 unsigned NumComponents; 61 unsigned NumFormat; 62 unsigned DataFormat; 63 }; 64 65 #define GET_MIMGBaseOpcode_DECL 66 #define GET_MIMGDim_DECL 67 #define GET_MIMGEncoding_DECL 68 #define GET_MIMGLZMapping_DECL 69 #define GET_MIMGMIPMapping_DECL 70 #define GET_MIMGBiASMapping_DECL 71 #include "AMDGPUGenSearchableTables.inc" 72 73 namespace IsaInfo { 74 75 enum { 76 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 77 // doesn't spill SGPRs as much as when 80 is set. 78 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 79 TRAP_NUM_SGPRS = 16 80 }; 81 82 enum class TargetIDSetting { 83 Unsupported, 84 Any, 85 Off, 86 On 87 }; 88 89 class AMDGPUTargetID { 90 private: 91 const MCSubtargetInfo &STI; 92 TargetIDSetting XnackSetting; 93 TargetIDSetting SramEccSetting; 94 95 public: 96 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 97 ~AMDGPUTargetID() = default; 98 99 /// \return True if the current xnack setting is not "Unsupported". 100 bool isXnackSupported() const { 101 return XnackSetting != TargetIDSetting::Unsupported; 102 } 103 104 /// \returns True if the current xnack setting is "On" or "Any". 105 bool isXnackOnOrAny() const { 106 return XnackSetting == TargetIDSetting::On || 107 XnackSetting == TargetIDSetting::Any; 108 } 109 110 /// \returns True if current xnack setting is "On" or "Off", 111 /// false otherwise. 112 bool isXnackOnOrOff() const { 113 return getXnackSetting() == TargetIDSetting::On || 114 getXnackSetting() == TargetIDSetting::Off; 115 } 116 117 /// \returns The current xnack TargetIDSetting, possible options are 118 /// "Unsupported", "Any", "Off", and "On". 119 TargetIDSetting getXnackSetting() const { 120 return XnackSetting; 121 } 122 123 /// Sets xnack setting to \p NewXnackSetting. 124 void setXnackSetting(TargetIDSetting NewXnackSetting) { 125 XnackSetting = NewXnackSetting; 126 } 127 128 /// \return True if the current sramecc setting is not "Unsupported". 129 bool isSramEccSupported() const { 130 return SramEccSetting != TargetIDSetting::Unsupported; 131 } 132 133 /// \returns True if the current sramecc setting is "On" or "Any". 134 bool isSramEccOnOrAny() const { 135 return SramEccSetting == TargetIDSetting::On || 136 SramEccSetting == TargetIDSetting::Any; 137 } 138 139 /// \returns True if current sramecc setting is "On" or "Off", 140 /// false otherwise. 141 bool isSramEccOnOrOff() const { 142 return getSramEccSetting() == TargetIDSetting::On || 143 getSramEccSetting() == TargetIDSetting::Off; 144 } 145 146 /// \returns The current sramecc TargetIDSetting, possible options are 147 /// "Unsupported", "Any", "Off", and "On". 148 TargetIDSetting getSramEccSetting() const { 149 return SramEccSetting; 150 } 151 152 /// Sets sramecc setting to \p NewSramEccSetting. 153 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 154 SramEccSetting = NewSramEccSetting; 155 } 156 157 void setTargetIDFromFeaturesString(StringRef FS); 158 void setTargetIDFromTargetIDStream(StringRef TargetID); 159 160 /// \returns String representation of an object. 161 std::string toString() const; 162 }; 163 164 /// \returns Wavefront size for given subtarget \p STI. 165 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 166 167 /// \returns Local memory size in bytes for given subtarget \p STI. 168 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 169 170 /// \returns Number of execution units per compute unit for given subtarget \p 171 /// STI. 172 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 173 174 /// \returns Maximum number of work groups per compute unit for given subtarget 175 /// \p STI and limited by given \p FlatWorkGroupSize. 176 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 177 unsigned FlatWorkGroupSize); 178 179 /// \returns Minimum number of waves per execution unit for given subtarget \p 180 /// STI. 181 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 182 183 /// \returns Maximum number of waves per execution unit for given subtarget \p 184 /// STI without any kind of limitation. 185 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 186 187 /// \returns Number of waves per execution unit required to support the given \p 188 /// FlatWorkGroupSize. 189 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 190 unsigned FlatWorkGroupSize); 191 192 /// \returns Minimum flat work group size for given subtarget \p STI. 193 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 194 195 /// \returns Maximum flat work group size for given subtarget \p STI. 196 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 197 198 /// \returns Number of waves per work group for given subtarget \p STI and 199 /// \p FlatWorkGroupSize. 200 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 201 unsigned FlatWorkGroupSize); 202 203 /// \returns SGPR allocation granularity for given subtarget \p STI. 204 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 205 206 /// \returns SGPR encoding granularity for given subtarget \p STI. 207 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 208 209 /// \returns Total number of SGPRs for given subtarget \p STI. 210 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 211 212 /// \returns Addressable number of SGPRs for given subtarget \p STI. 213 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 214 215 /// \returns Minimum number of SGPRs that meets the given number of waves per 216 /// execution unit requirement for given subtarget \p STI. 217 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 218 219 /// \returns Maximum number of SGPRs that meets the given number of waves per 220 /// execution unit requirement for given subtarget \p STI. 221 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 222 bool Addressable); 223 224 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 225 /// STI when the given special registers are used. 226 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 227 bool FlatScrUsed, bool XNACKUsed); 228 229 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 230 /// STI when the given special registers are used. XNACK is inferred from 231 /// \p STI. 232 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 233 bool FlatScrUsed); 234 235 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 236 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 237 /// register counts. 238 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 239 240 /// \returns VGPR allocation granularity for given subtarget \p STI. 241 /// 242 /// For subtargets which support it, \p EnableWavefrontSize32 should match 243 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 244 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 245 Optional<bool> EnableWavefrontSize32 = None); 246 247 /// \returns VGPR encoding granularity for given subtarget \p STI. 248 /// 249 /// For subtargets which support it, \p EnableWavefrontSize32 should match 250 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 251 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 252 Optional<bool> EnableWavefrontSize32 = None); 253 254 /// \returns Total number of VGPRs for given subtarget \p STI. 255 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 256 257 /// \returns Addressable number of VGPRs for given subtarget \p STI. 258 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 259 260 /// \returns Minimum number of VGPRs that meets given number of waves per 261 /// execution unit requirement for given subtarget \p STI. 262 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 263 264 /// \returns Maximum number of VGPRs that meets given number of waves per 265 /// execution unit requirement for given subtarget \p STI. 266 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 267 268 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 269 /// \p NumVGPRs are used. 270 /// 271 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 272 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 273 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 274 Optional<bool> EnableWavefrontSize32 = None); 275 276 } // end namespace IsaInfo 277 278 LLVM_READONLY 279 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 280 281 LLVM_READONLY 282 int getSOPPWithRelaxation(uint16_t Opcode); 283 284 struct MIMGBaseOpcodeInfo { 285 MIMGBaseOpcode BaseOpcode; 286 bool Store; 287 bool Atomic; 288 bool AtomicX2; 289 bool Sampler; 290 bool Gather4; 291 292 uint8_t NumExtraArgs; 293 bool Gradients; 294 bool G16; 295 bool Coordinates; 296 bool LodOrClampOrMip; 297 bool HasD16; 298 bool MSAA; 299 bool BVH; 300 }; 301 302 LLVM_READONLY 303 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 304 305 LLVM_READONLY 306 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 307 308 struct MIMGDimInfo { 309 MIMGDim Dim; 310 uint8_t NumCoords; 311 uint8_t NumGradients; 312 bool MSAA; 313 bool DA; 314 uint8_t Encoding; 315 const char *AsmSuffix; 316 }; 317 318 LLVM_READONLY 319 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 320 321 LLVM_READONLY 322 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 323 324 LLVM_READONLY 325 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 326 327 struct MIMGLZMappingInfo { 328 MIMGBaseOpcode L; 329 MIMGBaseOpcode LZ; 330 }; 331 332 struct MIMGMIPMappingInfo { 333 MIMGBaseOpcode MIP; 334 MIMGBaseOpcode NONMIP; 335 }; 336 337 struct MIMGBiasMappingInfo { 338 MIMGBaseOpcode Bias; 339 MIMGBaseOpcode NoBias; 340 }; 341 342 struct MIMGOffsetMappingInfo { 343 MIMGBaseOpcode Offset; 344 MIMGBaseOpcode NoOffset; 345 }; 346 347 struct MIMGG16MappingInfo { 348 MIMGBaseOpcode G; 349 MIMGBaseOpcode G16; 350 }; 351 352 LLVM_READONLY 353 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 354 355 LLVM_READONLY 356 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 357 358 LLVM_READONLY 359 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); 360 361 LLVM_READONLY 362 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); 363 364 LLVM_READONLY 365 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 366 367 LLVM_READONLY 368 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 369 unsigned VDataDwords, unsigned VAddrDwords); 370 371 LLVM_READONLY 372 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 373 374 LLVM_READONLY 375 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 376 const MIMGDimInfo *Dim, bool IsA16, 377 bool IsG16Supported); 378 379 struct MIMGInfo { 380 uint16_t Opcode; 381 uint16_t BaseOpcode; 382 uint8_t MIMGEncoding; 383 uint8_t VDataDwords; 384 uint8_t VAddrDwords; 385 }; 386 387 LLVM_READONLY 388 const MIMGInfo *getMIMGInfo(unsigned Opc); 389 390 LLVM_READONLY 391 int getMTBUFBaseOpcode(unsigned Opc); 392 393 LLVM_READONLY 394 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 395 396 LLVM_READONLY 397 int getMTBUFElements(unsigned Opc); 398 399 LLVM_READONLY 400 bool getMTBUFHasVAddr(unsigned Opc); 401 402 LLVM_READONLY 403 bool getMTBUFHasSrsrc(unsigned Opc); 404 405 LLVM_READONLY 406 bool getMTBUFHasSoffset(unsigned Opc); 407 408 LLVM_READONLY 409 int getMUBUFBaseOpcode(unsigned Opc); 410 411 LLVM_READONLY 412 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 413 414 LLVM_READONLY 415 int getMUBUFElements(unsigned Opc); 416 417 LLVM_READONLY 418 bool getMUBUFHasVAddr(unsigned Opc); 419 420 LLVM_READONLY 421 bool getMUBUFHasSrsrc(unsigned Opc); 422 423 LLVM_READONLY 424 bool getMUBUFHasSoffset(unsigned Opc); 425 426 LLVM_READONLY 427 bool getMUBUFIsBufferInv(unsigned Opc); 428 429 LLVM_READONLY 430 bool getSMEMIsBuffer(unsigned Opc); 431 432 LLVM_READONLY 433 bool getVOP1IsSingle(unsigned Opc); 434 435 LLVM_READONLY 436 bool getVOP2IsSingle(unsigned Opc); 437 438 LLVM_READONLY 439 bool getVOP3IsSingle(unsigned Opc); 440 441 LLVM_READONLY 442 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 443 uint8_t NumComponents, 444 uint8_t NumFormat, 445 const MCSubtargetInfo &STI); 446 LLVM_READONLY 447 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 448 const MCSubtargetInfo &STI); 449 450 LLVM_READONLY 451 int getMCOpcode(uint16_t Opcode, unsigned Gen); 452 453 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 454 const MCSubtargetInfo *STI); 455 456 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 457 const MCSubtargetInfo *STI); 458 459 bool isGroupSegment(const GlobalValue *GV); 460 bool isGlobalSegment(const GlobalValue *GV); 461 bool isReadOnlySegment(const GlobalValue *GV); 462 463 /// \returns True if constants should be emitted to .text section for given 464 /// target triple \p TT, false otherwise. 465 bool shouldEmitConstantsToTextSection(const Triple &TT); 466 467 /// \returns Integer value requested using \p F's \p Name attribute. 468 /// 469 /// \returns \p Default if attribute is not present. 470 /// 471 /// \returns \p Default and emits error if requested value cannot be converted 472 /// to integer. 473 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 474 475 /// \returns A pair of integer values requested using \p F's \p Name attribute 476 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 477 /// is false). 478 /// 479 /// \returns \p Default if attribute is not present. 480 /// 481 /// \returns \p Default and emits error if one of the requested values cannot be 482 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 483 /// not present. 484 std::pair<int, int> getIntegerPairAttribute(const Function &F, 485 StringRef Name, 486 std::pair<int, int> Default, 487 bool OnlyFirstRequired = false); 488 489 /// Represents the counter values to wait for in an s_waitcnt instruction. 490 /// 491 /// Large values (including the maximum possible integer) can be used to 492 /// represent "don't care" waits. 493 struct Waitcnt { 494 unsigned VmCnt = ~0u; 495 unsigned ExpCnt = ~0u; 496 unsigned LgkmCnt = ~0u; 497 unsigned VsCnt = ~0u; 498 499 Waitcnt() {} 500 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 501 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 502 503 static Waitcnt allZero(bool HasVscnt) { 504 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); 505 } 506 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 507 508 bool hasWait() const { 509 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 510 } 511 512 bool hasWaitExceptVsCnt() const { 513 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u; 514 } 515 516 bool hasWaitVsCnt() const { 517 return VsCnt != ~0u; 518 } 519 520 bool dominates(const Waitcnt &Other) const { 521 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 522 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 523 } 524 525 Waitcnt combined(const Waitcnt &Other) const { 526 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 527 std::min(LgkmCnt, Other.LgkmCnt), 528 std::min(VsCnt, Other.VsCnt)); 529 } 530 }; 531 532 /// \returns Vmcnt bit mask for given isa \p Version. 533 unsigned getVmcntBitMask(const IsaVersion &Version); 534 535 /// \returns Expcnt bit mask for given isa \p Version. 536 unsigned getExpcntBitMask(const IsaVersion &Version); 537 538 /// \returns Lgkmcnt bit mask for given isa \p Version. 539 unsigned getLgkmcntBitMask(const IsaVersion &Version); 540 541 /// \returns Waitcnt bit mask for given isa \p Version. 542 unsigned getWaitcntBitMask(const IsaVersion &Version); 543 544 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 545 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 546 547 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 548 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 549 550 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 551 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 552 553 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 554 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 555 /// \p Lgkmcnt respectively. 556 /// 557 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 558 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 559 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 560 /// \p Expcnt = \p Waitcnt[6:4] 561 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 562 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 563 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 564 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 565 566 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 567 568 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 569 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 570 unsigned Vmcnt); 571 572 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 573 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 574 unsigned Expcnt); 575 576 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 577 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 578 unsigned Lgkmcnt); 579 580 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 581 /// \p Version. 582 /// 583 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 584 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 585 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 586 /// Waitcnt[6:4] = \p Expcnt 587 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 588 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 589 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 590 /// 591 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 592 /// isa \p Version. 593 unsigned encodeWaitcnt(const IsaVersion &Version, 594 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 595 596 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 597 598 namespace Hwreg { 599 600 LLVM_READONLY 601 int64_t getHwregId(const StringRef Name); 602 603 LLVM_READNONE 604 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); 605 606 LLVM_READNONE 607 bool isValidHwreg(int64_t Id); 608 609 LLVM_READNONE 610 bool isValidHwregOffset(int64_t Offset); 611 612 LLVM_READNONE 613 bool isValidHwregWidth(int64_t Width); 614 615 LLVM_READNONE 616 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 617 618 LLVM_READNONE 619 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 620 621 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 622 623 } // namespace Hwreg 624 625 namespace Exp { 626 627 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 628 629 LLVM_READONLY 630 unsigned getTgtId(const StringRef Name); 631 632 LLVM_READNONE 633 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 634 635 } // namespace Exp 636 637 namespace MTBUFFormat { 638 639 LLVM_READNONE 640 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 641 642 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 643 644 int64_t getDfmt(const StringRef Name); 645 646 StringRef getDfmtName(unsigned Id); 647 648 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 649 650 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 651 652 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 653 654 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 655 656 int64_t getUnifiedFormat(const StringRef Name); 657 658 StringRef getUnifiedFormatName(unsigned Id); 659 660 bool isValidUnifiedFormat(unsigned Val); 661 662 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt); 663 664 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 665 666 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 667 668 } // namespace MTBUFFormat 669 670 namespace SendMsg { 671 672 LLVM_READONLY 673 int64_t getMsgId(const StringRef Name); 674 675 LLVM_READONLY 676 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 677 678 LLVM_READNONE 679 StringRef getMsgName(int64_t MsgId); 680 681 LLVM_READNONE 682 StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 683 684 LLVM_READNONE 685 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 686 687 LLVM_READNONE 688 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 689 bool Strict = true); 690 691 LLVM_READNONE 692 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 693 const MCSubtargetInfo &STI, bool Strict = true); 694 695 LLVM_READNONE 696 bool msgRequiresOp(int64_t MsgId); 697 698 LLVM_READNONE 699 bool msgSupportsStream(int64_t MsgId, int64_t OpId); 700 701 void decodeMsg(unsigned Val, 702 uint16_t &MsgId, 703 uint16_t &OpId, 704 uint16_t &StreamId); 705 706 LLVM_READNONE 707 uint64_t encodeMsg(uint64_t MsgId, 708 uint64_t OpId, 709 uint64_t StreamId); 710 711 } // namespace SendMsg 712 713 714 unsigned getInitialPSInputAddr(const Function &F); 715 716 bool getHasColorExport(const Function &F); 717 718 bool getHasDepthExport(const Function &F); 719 720 LLVM_READNONE 721 bool isShader(CallingConv::ID CC); 722 723 LLVM_READNONE 724 bool isGraphics(CallingConv::ID CC); 725 726 LLVM_READNONE 727 bool isCompute(CallingConv::ID CC); 728 729 LLVM_READNONE 730 bool isEntryFunctionCC(CallingConv::ID CC); 731 732 // These functions are considered entrypoints into the current module, i.e. they 733 // are allowed to be called from outside the current module. This is different 734 // from isEntryFunctionCC, which is only true for functions that are entered by 735 // the hardware. Module entry points include all entry functions but also 736 // include functions that can be called from other functions inside or outside 737 // the current module. Module entry functions are allowed to allocate LDS. 738 LLVM_READNONE 739 bool isModuleEntryFunctionCC(CallingConv::ID CC); 740 741 // FIXME: Remove this when calling conventions cleaned up 742 LLVM_READNONE 743 inline bool isKernel(CallingConv::ID CC) { 744 switch (CC) { 745 case CallingConv::AMDGPU_KERNEL: 746 case CallingConv::SPIR_KERNEL: 747 return true; 748 default: 749 return false; 750 } 751 } 752 753 bool hasXNACK(const MCSubtargetInfo &STI); 754 bool hasSRAMECC(const MCSubtargetInfo &STI); 755 bool hasMIMG_R128(const MCSubtargetInfo &STI); 756 bool hasGFX10A16(const MCSubtargetInfo &STI); 757 bool hasG16(const MCSubtargetInfo &STI); 758 bool hasPackedD16(const MCSubtargetInfo &STI); 759 760 bool isSI(const MCSubtargetInfo &STI); 761 bool isCI(const MCSubtargetInfo &STI); 762 bool isVI(const MCSubtargetInfo &STI); 763 bool isGFX9(const MCSubtargetInfo &STI); 764 bool isGFX9Plus(const MCSubtargetInfo &STI); 765 bool isGFX10(const MCSubtargetInfo &STI); 766 bool isGFX10Plus(const MCSubtargetInfo &STI); 767 bool isGCN3Encoding(const MCSubtargetInfo &STI); 768 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 769 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 770 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 771 bool isGFX90A(const MCSubtargetInfo &STI); 772 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 773 774 /// Is Reg - scalar register 775 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 776 777 /// Is there any intersection between registers 778 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 779 780 /// If \p Reg is a pseudo reg, return the correct hardware register given 781 /// \p STI otherwise return \p Reg. 782 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 783 784 /// Convert hardware register \p Reg to a pseudo register 785 LLVM_READNONE 786 unsigned mc2PseudoReg(unsigned Reg); 787 788 /// Can this operand also contain immediate values? 789 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 790 791 /// Is this floating-point operand? 792 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 793 794 /// Does this operand support only inlinable literals? 795 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 796 797 /// Get the size in bits of a register from the register class \p RC. 798 unsigned getRegBitWidth(unsigned RCID); 799 800 /// Get the size in bits of a register from the register class \p RC. 801 unsigned getRegBitWidth(const MCRegisterClass &RC); 802 803 /// Get size of register operand 804 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 805 unsigned OpNo); 806 807 LLVM_READNONE 808 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 809 switch (OpInfo.OperandType) { 810 case AMDGPU::OPERAND_REG_IMM_INT32: 811 case AMDGPU::OPERAND_REG_IMM_FP32: 812 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 813 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 814 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 815 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 816 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 817 case AMDGPU::OPERAND_REG_IMM_V2INT32: 818 case AMDGPU::OPERAND_REG_IMM_V2FP32: 819 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 820 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 821 case AMDGPU::OPERAND_KIMM32: 822 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 823 return 4; 824 825 case AMDGPU::OPERAND_REG_IMM_INT64: 826 case AMDGPU::OPERAND_REG_IMM_FP64: 827 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 828 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 829 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 830 return 8; 831 832 case AMDGPU::OPERAND_REG_IMM_INT16: 833 case AMDGPU::OPERAND_REG_IMM_FP16: 834 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 835 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 836 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 837 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 838 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 839 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 840 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 841 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 842 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 843 case AMDGPU::OPERAND_REG_IMM_V2INT16: 844 case AMDGPU::OPERAND_REG_IMM_V2FP16: 845 return 2; 846 847 default: 848 llvm_unreachable("unhandled operand type"); 849 } 850 } 851 852 LLVM_READNONE 853 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 854 return getOperandSize(Desc.OpInfo[OpNo]); 855 } 856 857 /// Is this literal inlinable, and not one of the values intended for floating 858 /// point values. 859 LLVM_READNONE 860 inline bool isInlinableIntLiteral(int64_t Literal) { 861 return Literal >= -16 && Literal <= 64; 862 } 863 864 /// Is this literal inlinable 865 LLVM_READNONE 866 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 867 868 LLVM_READNONE 869 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 870 871 LLVM_READNONE 872 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 873 874 LLVM_READNONE 875 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 876 877 LLVM_READNONE 878 bool isInlinableIntLiteralV216(int32_t Literal); 879 880 LLVM_READNONE 881 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 882 883 bool isArgPassedInSGPR(const Argument *Arg); 884 885 LLVM_READONLY 886 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 887 int64_t EncodedOffset); 888 889 LLVM_READONLY 890 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 891 int64_t EncodedOffset, 892 bool IsBuffer); 893 894 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 895 /// offsets. 896 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 897 898 /// \returns The encoding that will be used for \p ByteOffset in the 899 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 900 /// S_LOAD instructions have a signed offset, on other subtargets it is 901 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 902 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 903 int64_t ByteOffset, bool IsBuffer); 904 905 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 906 /// instruction. This is only useful on CI.s 907 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 908 int64_t ByteOffset); 909 910 /// For FLAT segment the offset must be positive; 911 /// MSB is ignored and forced to zero. 912 /// 913 /// \return The number of bits available for the offset field in flat 914 /// instructions. 915 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed); 916 917 /// \returns true if this offset is small enough to fit in the SMRD 918 /// offset field. \p ByteOffset should be the offset in bytes and 919 /// not the encoded offset. 920 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 921 922 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 923 const GCNSubtarget *Subtarget, 924 Align Alignment = Align(4)); 925 926 LLVM_READNONE 927 inline bool isLegal64BitDPPControl(unsigned DC) { 928 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 929 } 930 931 /// \returns true if the intrinsic is divergent 932 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 933 934 // Track defaults for fields in the MODE registser. 935 struct SIModeRegisterDefaults { 936 /// Floating point opcodes that support exception flag gathering quiet and 937 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 938 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 939 /// quieting. 940 bool IEEE : 1; 941 942 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 943 /// clamp NaN to zero; otherwise, pass NaN through. 944 bool DX10Clamp : 1; 945 946 /// If this is set, neither input or output denormals are flushed for most f32 947 /// instructions. 948 bool FP32InputDenormals : 1; 949 bool FP32OutputDenormals : 1; 950 951 /// If this is set, neither input or output denormals are flushed for both f64 952 /// and f16/v2f16 instructions. 953 bool FP64FP16InputDenormals : 1; 954 bool FP64FP16OutputDenormals : 1; 955 956 SIModeRegisterDefaults() : 957 IEEE(true), 958 DX10Clamp(true), 959 FP32InputDenormals(true), 960 FP32OutputDenormals(true), 961 FP64FP16InputDenormals(true), 962 FP64FP16OutputDenormals(true) {} 963 964 SIModeRegisterDefaults(const Function &F); 965 966 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 967 SIModeRegisterDefaults Mode; 968 Mode.IEEE = !AMDGPU::isShader(CC); 969 return Mode; 970 } 971 972 bool operator ==(const SIModeRegisterDefaults Other) const { 973 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 974 FP32InputDenormals == Other.FP32InputDenormals && 975 FP32OutputDenormals == Other.FP32OutputDenormals && 976 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 977 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 978 } 979 980 bool allFP32Denormals() const { 981 return FP32InputDenormals && FP32OutputDenormals; 982 } 983 984 bool allFP64FP16Denormals() const { 985 return FP64FP16InputDenormals && FP64FP16OutputDenormals; 986 } 987 988 /// Get the encoding value for the FP_DENORM bits of the mode register for the 989 /// FP32 denormal mode. 990 uint32_t fpDenormModeSPValue() const { 991 if (FP32InputDenormals && FP32OutputDenormals) 992 return FP_DENORM_FLUSH_NONE; 993 if (FP32InputDenormals) 994 return FP_DENORM_FLUSH_OUT; 995 if (FP32OutputDenormals) 996 return FP_DENORM_FLUSH_IN; 997 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 998 } 999 1000 /// Get the encoding value for the FP_DENORM bits of the mode register for the 1001 /// FP64/FP16 denormal mode. 1002 uint32_t fpDenormModeDPValue() const { 1003 if (FP64FP16InputDenormals && FP64FP16OutputDenormals) 1004 return FP_DENORM_FLUSH_NONE; 1005 if (FP64FP16InputDenormals) 1006 return FP_DENORM_FLUSH_OUT; 1007 if (FP64FP16OutputDenormals) 1008 return FP_DENORM_FLUSH_IN; 1009 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 1010 } 1011 1012 /// Returns true if a flag is compatible if it's enabled in the callee, but 1013 /// disabled in the caller. 1014 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 1015 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 1016 } 1017 1018 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 1019 // be able to override. 1020 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 1021 if (DX10Clamp != CalleeMode.DX10Clamp) 1022 return false; 1023 if (IEEE != CalleeMode.IEEE) 1024 return false; 1025 1026 // Allow inlining denormals enabled into denormals flushed functions. 1027 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && 1028 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && 1029 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && 1030 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); 1031 } 1032 }; 1033 1034 } // end namespace AMDGPU 1035 1036 raw_ostream &operator<<(raw_ostream &OS, 1037 const AMDGPU::IsaInfo::TargetIDSetting S); 1038 1039 } // end namespace llvm 1040 1041 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1042