1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "SIDefines.h" 13 #include "llvm/IR/CallingConv.h" 14 #include "llvm/Support/Alignment.h" 15 16 struct amd_kernel_code_t; 17 18 namespace llvm { 19 20 struct Align; 21 class Argument; 22 class Function; 23 class GCNSubtarget; 24 class GlobalValue; 25 class MCRegisterClass; 26 class MCRegisterInfo; 27 class MCSubtargetInfo; 28 class StringRef; 29 class Triple; 30 31 namespace amdhsa { 32 struct kernel_descriptor_t; 33 } 34 35 namespace AMDGPU { 36 37 struct IsaVersion; 38 39 /// \returns HSA OS ABI Version identification. 40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); 41 /// \returns True if HSA OS ABI Version identification is 2, 42 /// false otherwise. 43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI); 44 /// \returns True if HSA OS ABI Version identification is 3, 45 /// false otherwise. 46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI); 47 /// \returns True if HSA OS ABI Version identification is 4, 48 /// false otherwise. 49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI); 50 /// \returns True if HSA OS ABI Version identification is 3 or 4, 51 /// false otherwise. 52 bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI); 53 54 struct GcnBufferFormatInfo { 55 unsigned Format; 56 unsigned BitsPerComp; 57 unsigned NumComponents; 58 unsigned NumFormat; 59 unsigned DataFormat; 60 }; 61 62 #define GET_MIMGBaseOpcode_DECL 63 #define GET_MIMGDim_DECL 64 #define GET_MIMGEncoding_DECL 65 #define GET_MIMGLZMapping_DECL 66 #define GET_MIMGMIPMapping_DECL 67 #include "AMDGPUGenSearchableTables.inc" 68 69 namespace IsaInfo { 70 71 enum { 72 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 73 // doesn't spill SGPRs as much as when 80 is set. 74 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 75 TRAP_NUM_SGPRS = 16 76 }; 77 78 enum class TargetIDSetting { 79 Unsupported, 80 Any, 81 Off, 82 On 83 }; 84 85 class AMDGPUTargetID { 86 private: 87 const MCSubtargetInfo &STI; 88 TargetIDSetting XnackSetting; 89 TargetIDSetting SramEccSetting; 90 91 public: 92 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 93 ~AMDGPUTargetID() = default; 94 95 /// \return True if the current xnack setting is not "Unsupported". 96 bool isXnackSupported() const { 97 return XnackSetting != TargetIDSetting::Unsupported; 98 } 99 100 /// \returns True if the current xnack setting is "On" or "Any". 101 bool isXnackOnOrAny() const { 102 return XnackSetting == TargetIDSetting::On || 103 XnackSetting == TargetIDSetting::Any; 104 } 105 106 /// \returns True if current xnack setting is "On" or "Off", 107 /// false otherwise. 108 bool isXnackOnOrOff() const { 109 return getXnackSetting() == TargetIDSetting::On || 110 getXnackSetting() == TargetIDSetting::Off; 111 } 112 113 /// \returns The current xnack TargetIDSetting, possible options are 114 /// "Unsupported", "Any", "Off", and "On". 115 TargetIDSetting getXnackSetting() const { 116 return XnackSetting; 117 } 118 119 /// Sets xnack setting to \p NewXnackSetting. 120 void setXnackSetting(TargetIDSetting NewXnackSetting) { 121 XnackSetting = NewXnackSetting; 122 } 123 124 /// \return True if the current sramecc setting is not "Unsupported". 125 bool isSramEccSupported() const { 126 return SramEccSetting != TargetIDSetting::Unsupported; 127 } 128 129 /// \returns True if the current sramecc setting is "On" or "Any". 130 bool isSramEccOnOrAny() const { 131 return SramEccSetting == TargetIDSetting::On || 132 SramEccSetting == TargetIDSetting::Any; 133 } 134 135 /// \returns True if current sramecc setting is "On" or "Off", 136 /// false otherwise. 137 bool isSramEccOnOrOff() const { 138 return getSramEccSetting() == TargetIDSetting::On || 139 getSramEccSetting() == TargetIDSetting::Off; 140 } 141 142 /// \returns The current sramecc TargetIDSetting, possible options are 143 /// "Unsupported", "Any", "Off", and "On". 144 TargetIDSetting getSramEccSetting() const { 145 return SramEccSetting; 146 } 147 148 /// Sets sramecc setting to \p NewSramEccSetting. 149 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 150 SramEccSetting = NewSramEccSetting; 151 } 152 153 void setTargetIDFromFeaturesString(StringRef FS); 154 void setTargetIDFromTargetIDStream(StringRef TargetID); 155 156 /// \returns String representation of an object. 157 std::string toString() const; 158 }; 159 160 /// \returns Wavefront size for given subtarget \p STI. 161 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 162 163 /// \returns Local memory size in bytes for given subtarget \p STI. 164 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 165 166 /// \returns Number of execution units per compute unit for given subtarget \p 167 /// STI. 168 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 169 170 /// \returns Maximum number of work groups per compute unit for given subtarget 171 /// \p STI and limited by given \p FlatWorkGroupSize. 172 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 173 unsigned FlatWorkGroupSize); 174 175 /// \returns Minimum number of waves per execution unit for given subtarget \p 176 /// STI. 177 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 178 179 /// \returns Maximum number of waves per execution unit for given subtarget \p 180 /// STI without any kind of limitation. 181 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 182 183 /// \returns Number of waves per execution unit required to support the given \p 184 /// FlatWorkGroupSize. 185 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 186 unsigned FlatWorkGroupSize); 187 188 /// \returns Minimum flat work group size for given subtarget \p STI. 189 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 190 191 /// \returns Maximum flat work group size for given subtarget \p STI. 192 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 193 194 /// \returns Number of waves per work group for given subtarget \p STI and 195 /// \p FlatWorkGroupSize. 196 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 197 unsigned FlatWorkGroupSize); 198 199 /// \returns SGPR allocation granularity for given subtarget \p STI. 200 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 201 202 /// \returns SGPR encoding granularity for given subtarget \p STI. 203 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 204 205 /// \returns Total number of SGPRs for given subtarget \p STI. 206 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 207 208 /// \returns Addressable number of SGPRs for given subtarget \p STI. 209 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 210 211 /// \returns Minimum number of SGPRs that meets the given number of waves per 212 /// execution unit requirement for given subtarget \p STI. 213 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 214 215 /// \returns Maximum number of SGPRs that meets the given number of waves per 216 /// execution unit requirement for given subtarget \p STI. 217 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 218 bool Addressable); 219 220 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 221 /// STI when the given special registers are used. 222 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 223 bool FlatScrUsed, bool XNACKUsed); 224 225 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 226 /// STI when the given special registers are used. XNACK is inferred from 227 /// \p STI. 228 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 229 bool FlatScrUsed); 230 231 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 232 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 233 /// register counts. 234 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 235 236 /// \returns VGPR allocation granularity for given subtarget \p STI. 237 /// 238 /// For subtargets which support it, \p EnableWavefrontSize32 should match 239 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 240 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 241 Optional<bool> EnableWavefrontSize32 = None); 242 243 /// \returns VGPR encoding granularity for given subtarget \p STI. 244 /// 245 /// For subtargets which support it, \p EnableWavefrontSize32 should match 246 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 247 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 248 Optional<bool> EnableWavefrontSize32 = None); 249 250 /// \returns Total number of VGPRs for given subtarget \p STI. 251 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 252 253 /// \returns Addressable number of VGPRs for given subtarget \p STI. 254 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 255 256 /// \returns Minimum number of VGPRs that meets given number of waves per 257 /// execution unit requirement for given subtarget \p STI. 258 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 259 260 /// \returns Maximum number of VGPRs that meets given number of waves per 261 /// execution unit requirement for given subtarget \p STI. 262 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 263 264 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 265 /// \p NumVGPRs are used. 266 /// 267 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 268 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 269 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 270 Optional<bool> EnableWavefrontSize32 = None); 271 272 } // end namespace IsaInfo 273 274 LLVM_READONLY 275 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 276 277 LLVM_READONLY 278 int getSOPPWithRelaxation(uint16_t Opcode); 279 280 struct MIMGBaseOpcodeInfo { 281 MIMGBaseOpcode BaseOpcode; 282 bool Store; 283 bool Atomic; 284 bool AtomicX2; 285 bool Sampler; 286 bool Gather4; 287 288 uint8_t NumExtraArgs; 289 bool Gradients; 290 bool G16; 291 bool Coordinates; 292 bool LodOrClampOrMip; 293 bool HasD16; 294 bool MSAA; 295 }; 296 297 LLVM_READONLY 298 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 299 300 struct MIMGDimInfo { 301 MIMGDim Dim; 302 uint8_t NumCoords; 303 uint8_t NumGradients; 304 bool MSAA; 305 bool DA; 306 uint8_t Encoding; 307 const char *AsmSuffix; 308 }; 309 310 LLVM_READONLY 311 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 312 313 LLVM_READONLY 314 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 315 316 LLVM_READONLY 317 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 318 319 struct MIMGLZMappingInfo { 320 MIMGBaseOpcode L; 321 MIMGBaseOpcode LZ; 322 }; 323 324 struct MIMGMIPMappingInfo { 325 MIMGBaseOpcode MIP; 326 MIMGBaseOpcode NONMIP; 327 }; 328 329 struct MIMGG16MappingInfo { 330 MIMGBaseOpcode G; 331 MIMGBaseOpcode G16; 332 }; 333 334 LLVM_READONLY 335 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 336 337 LLVM_READONLY 338 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 339 340 LLVM_READONLY 341 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 342 343 LLVM_READONLY 344 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 345 unsigned VDataDwords, unsigned VAddrDwords); 346 347 LLVM_READONLY 348 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 349 350 LLVM_READONLY 351 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 352 const MIMGDimInfo *Dim, bool IsA16, 353 bool IsG16Supported); 354 355 struct MIMGInfo { 356 uint16_t Opcode; 357 uint16_t BaseOpcode; 358 uint8_t MIMGEncoding; 359 uint8_t VDataDwords; 360 uint8_t VAddrDwords; 361 }; 362 363 LLVM_READONLY 364 const MIMGInfo *getMIMGInfo(unsigned Opc); 365 366 LLVM_READONLY 367 int getMTBUFBaseOpcode(unsigned Opc); 368 369 LLVM_READONLY 370 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 371 372 LLVM_READONLY 373 int getMTBUFElements(unsigned Opc); 374 375 LLVM_READONLY 376 bool getMTBUFHasVAddr(unsigned Opc); 377 378 LLVM_READONLY 379 bool getMTBUFHasSrsrc(unsigned Opc); 380 381 LLVM_READONLY 382 bool getMTBUFHasSoffset(unsigned Opc); 383 384 LLVM_READONLY 385 int getMUBUFBaseOpcode(unsigned Opc); 386 387 LLVM_READONLY 388 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 389 390 LLVM_READONLY 391 int getMUBUFElements(unsigned Opc); 392 393 LLVM_READONLY 394 bool getMUBUFHasVAddr(unsigned Opc); 395 396 LLVM_READONLY 397 bool getMUBUFHasSrsrc(unsigned Opc); 398 399 LLVM_READONLY 400 bool getMUBUFHasSoffset(unsigned Opc); 401 402 LLVM_READONLY 403 bool getMUBUFIsBufferInv(unsigned Opc); 404 405 LLVM_READONLY 406 bool getSMEMIsBuffer(unsigned Opc); 407 408 LLVM_READONLY 409 bool getVOP1IsSingle(unsigned Opc); 410 411 LLVM_READONLY 412 bool getVOP2IsSingle(unsigned Opc); 413 414 LLVM_READONLY 415 bool getVOP3IsSingle(unsigned Opc); 416 417 LLVM_READONLY 418 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 419 uint8_t NumComponents, 420 uint8_t NumFormat, 421 const MCSubtargetInfo &STI); 422 LLVM_READONLY 423 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 424 const MCSubtargetInfo &STI); 425 426 LLVM_READONLY 427 int getMCOpcode(uint16_t Opcode, unsigned Gen); 428 429 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 430 const MCSubtargetInfo *STI); 431 432 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 433 const MCSubtargetInfo *STI); 434 435 bool isGroupSegment(const GlobalValue *GV); 436 bool isGlobalSegment(const GlobalValue *GV); 437 bool isReadOnlySegment(const GlobalValue *GV); 438 439 /// \returns True if constants should be emitted to .text section for given 440 /// target triple \p TT, false otherwise. 441 bool shouldEmitConstantsToTextSection(const Triple &TT); 442 443 /// \returns Integer value requested using \p F's \p Name attribute. 444 /// 445 /// \returns \p Default if attribute is not present. 446 /// 447 /// \returns \p Default and emits error if requested value cannot be converted 448 /// to integer. 449 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 450 451 /// \returns A pair of integer values requested using \p F's \p Name attribute 452 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 453 /// is false). 454 /// 455 /// \returns \p Default if attribute is not present. 456 /// 457 /// \returns \p Default and emits error if one of the requested values cannot be 458 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 459 /// not present. 460 std::pair<int, int> getIntegerPairAttribute(const Function &F, 461 StringRef Name, 462 std::pair<int, int> Default, 463 bool OnlyFirstRequired = false); 464 465 /// Represents the counter values to wait for in an s_waitcnt instruction. 466 /// 467 /// Large values (including the maximum possible integer) can be used to 468 /// represent "don't care" waits. 469 struct Waitcnt { 470 unsigned VmCnt = ~0u; 471 unsigned ExpCnt = ~0u; 472 unsigned LgkmCnt = ~0u; 473 unsigned VsCnt = ~0u; 474 475 Waitcnt() {} 476 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 477 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 478 479 static Waitcnt allZero(bool HasVscnt) { 480 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); 481 } 482 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 483 484 bool hasWait() const { 485 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 486 } 487 488 bool hasWaitExceptVsCnt() const { 489 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u; 490 } 491 492 bool hasWaitVsCnt() const { 493 return VsCnt != ~0u; 494 } 495 496 bool dominates(const Waitcnt &Other) const { 497 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 498 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 499 } 500 501 Waitcnt combined(const Waitcnt &Other) const { 502 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 503 std::min(LgkmCnt, Other.LgkmCnt), 504 std::min(VsCnt, Other.VsCnt)); 505 } 506 }; 507 508 /// \returns Vmcnt bit mask for given isa \p Version. 509 unsigned getVmcntBitMask(const IsaVersion &Version); 510 511 /// \returns Expcnt bit mask for given isa \p Version. 512 unsigned getExpcntBitMask(const IsaVersion &Version); 513 514 /// \returns Lgkmcnt bit mask for given isa \p Version. 515 unsigned getLgkmcntBitMask(const IsaVersion &Version); 516 517 /// \returns Waitcnt bit mask for given isa \p Version. 518 unsigned getWaitcntBitMask(const IsaVersion &Version); 519 520 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 521 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 522 523 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 524 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 525 526 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 527 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 528 529 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 530 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 531 /// \p Lgkmcnt respectively. 532 /// 533 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 534 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 535 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 536 /// \p Expcnt = \p Waitcnt[6:4] 537 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 538 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 539 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 540 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 541 542 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 543 544 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 545 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 546 unsigned Vmcnt); 547 548 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 549 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 550 unsigned Expcnt); 551 552 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 553 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 554 unsigned Lgkmcnt); 555 556 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 557 /// \p Version. 558 /// 559 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 560 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 561 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 562 /// Waitcnt[6:4] = \p Expcnt 563 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 564 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 565 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 566 /// 567 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 568 /// isa \p Version. 569 unsigned encodeWaitcnt(const IsaVersion &Version, 570 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 571 572 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 573 574 namespace Hwreg { 575 576 LLVM_READONLY 577 int64_t getHwregId(const StringRef Name); 578 579 LLVM_READNONE 580 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); 581 582 LLVM_READNONE 583 bool isValidHwreg(int64_t Id); 584 585 LLVM_READNONE 586 bool isValidHwregOffset(int64_t Offset); 587 588 LLVM_READNONE 589 bool isValidHwregWidth(int64_t Width); 590 591 LLVM_READNONE 592 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 593 594 LLVM_READNONE 595 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 596 597 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 598 599 } // namespace Hwreg 600 601 namespace Exp { 602 603 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 604 605 LLVM_READONLY 606 unsigned getTgtId(const StringRef Name); 607 608 LLVM_READNONE 609 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 610 611 } // namespace Exp 612 613 namespace MTBUFFormat { 614 615 LLVM_READNONE 616 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 617 618 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 619 620 int64_t getDfmt(const StringRef Name); 621 622 StringRef getDfmtName(unsigned Id); 623 624 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 625 626 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 627 628 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 629 630 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 631 632 int64_t getUnifiedFormat(const StringRef Name); 633 634 StringRef getUnifiedFormatName(unsigned Id); 635 636 bool isValidUnifiedFormat(unsigned Val); 637 638 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt); 639 640 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 641 642 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 643 644 } // namespace MTBUFFormat 645 646 namespace SendMsg { 647 648 LLVM_READONLY 649 int64_t getMsgId(const StringRef Name); 650 651 LLVM_READONLY 652 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 653 654 LLVM_READNONE 655 StringRef getMsgName(int64_t MsgId); 656 657 LLVM_READNONE 658 StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 659 660 LLVM_READNONE 661 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 662 663 LLVM_READNONE 664 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 665 bool Strict = true); 666 667 LLVM_READNONE 668 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 669 const MCSubtargetInfo &STI, bool Strict = true); 670 671 LLVM_READNONE 672 bool msgRequiresOp(int64_t MsgId); 673 674 LLVM_READNONE 675 bool msgSupportsStream(int64_t MsgId, int64_t OpId); 676 677 void decodeMsg(unsigned Val, 678 uint16_t &MsgId, 679 uint16_t &OpId, 680 uint16_t &StreamId); 681 682 LLVM_READNONE 683 uint64_t encodeMsg(uint64_t MsgId, 684 uint64_t OpId, 685 uint64_t StreamId); 686 687 } // namespace SendMsg 688 689 690 unsigned getInitialPSInputAddr(const Function &F); 691 692 bool getHasColorExport(const Function &F); 693 694 bool getHasDepthExport(const Function &F); 695 696 LLVM_READNONE 697 bool isShader(CallingConv::ID CC); 698 699 LLVM_READNONE 700 bool isGraphics(CallingConv::ID CC); 701 702 LLVM_READNONE 703 bool isCompute(CallingConv::ID CC); 704 705 LLVM_READNONE 706 bool isEntryFunctionCC(CallingConv::ID CC); 707 708 // These functions are considered entrypoints into the current module, i.e. they 709 // are allowed to be called from outside the current module. This is different 710 // from isEntryFunctionCC, which is only true for functions that are entered by 711 // the hardware. Module entry points include all entry functions but also 712 // include functions that can be called from other functions inside or outside 713 // the current module. Module entry functions are allowed to allocate LDS. 714 LLVM_READNONE 715 bool isModuleEntryFunctionCC(CallingConv::ID CC); 716 717 // FIXME: Remove this when calling conventions cleaned up 718 LLVM_READNONE 719 inline bool isKernel(CallingConv::ID CC) { 720 switch (CC) { 721 case CallingConv::AMDGPU_KERNEL: 722 case CallingConv::SPIR_KERNEL: 723 return true; 724 default: 725 return false; 726 } 727 } 728 729 bool hasXNACK(const MCSubtargetInfo &STI); 730 bool hasSRAMECC(const MCSubtargetInfo &STI); 731 bool hasMIMG_R128(const MCSubtargetInfo &STI); 732 bool hasGFX10A16(const MCSubtargetInfo &STI); 733 bool hasG16(const MCSubtargetInfo &STI); 734 bool hasPackedD16(const MCSubtargetInfo &STI); 735 736 bool isSI(const MCSubtargetInfo &STI); 737 bool isCI(const MCSubtargetInfo &STI); 738 bool isVI(const MCSubtargetInfo &STI); 739 bool isGFX9(const MCSubtargetInfo &STI); 740 bool isGFX9Plus(const MCSubtargetInfo &STI); 741 bool isGFX10(const MCSubtargetInfo &STI); 742 bool isGFX10Plus(const MCSubtargetInfo &STI); 743 bool isGCN3Encoding(const MCSubtargetInfo &STI); 744 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 745 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 746 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 747 bool isGFX90A(const MCSubtargetInfo &STI); 748 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 749 750 /// Is Reg - scalar register 751 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 752 753 /// Is there any intersection between registers 754 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 755 756 /// If \p Reg is a pseudo reg, return the correct hardware register given 757 /// \p STI otherwise return \p Reg. 758 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 759 760 /// Convert hardware register \p Reg to a pseudo register 761 LLVM_READNONE 762 unsigned mc2PseudoReg(unsigned Reg); 763 764 /// Can this operand also contain immediate values? 765 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 766 767 /// Is this floating-point operand? 768 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 769 770 /// Does this opearnd support only inlinable literals? 771 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 772 773 /// Get the size in bits of a register from the register class \p RC. 774 unsigned getRegBitWidth(unsigned RCID); 775 776 /// Get the size in bits of a register from the register class \p RC. 777 unsigned getRegBitWidth(const MCRegisterClass &RC); 778 779 /// Get size of register operand 780 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 781 unsigned OpNo); 782 783 LLVM_READNONE 784 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 785 switch (OpInfo.OperandType) { 786 case AMDGPU::OPERAND_REG_IMM_INT32: 787 case AMDGPU::OPERAND_REG_IMM_FP32: 788 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 789 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 790 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 791 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 792 case AMDGPU::OPERAND_REG_IMM_V2INT32: 793 case AMDGPU::OPERAND_REG_IMM_V2FP32: 794 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 795 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 796 return 4; 797 798 case AMDGPU::OPERAND_REG_IMM_INT64: 799 case AMDGPU::OPERAND_REG_IMM_FP64: 800 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 801 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 802 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 803 return 8; 804 805 case AMDGPU::OPERAND_REG_IMM_INT16: 806 case AMDGPU::OPERAND_REG_IMM_FP16: 807 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 808 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 809 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 810 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 811 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 812 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 813 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 814 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 815 case AMDGPU::OPERAND_REG_IMM_V2INT16: 816 case AMDGPU::OPERAND_REG_IMM_V2FP16: 817 return 2; 818 819 default: 820 llvm_unreachable("unhandled operand type"); 821 } 822 } 823 824 LLVM_READNONE 825 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 826 return getOperandSize(Desc.OpInfo[OpNo]); 827 } 828 829 /// Is this literal inlinable, and not one of the values intended for floating 830 /// point values. 831 LLVM_READNONE 832 inline bool isInlinableIntLiteral(int64_t Literal) { 833 return Literal >= -16 && Literal <= 64; 834 } 835 836 /// Is this literal inlinable 837 LLVM_READNONE 838 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 839 840 LLVM_READNONE 841 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 842 843 LLVM_READNONE 844 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 845 846 LLVM_READNONE 847 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 848 849 LLVM_READNONE 850 bool isInlinableIntLiteralV216(int32_t Literal); 851 852 LLVM_READNONE 853 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 854 855 bool isArgPassedInSGPR(const Argument *Arg); 856 857 LLVM_READONLY 858 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 859 int64_t EncodedOffset); 860 861 LLVM_READONLY 862 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 863 int64_t EncodedOffset, 864 bool IsBuffer); 865 866 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 867 /// offsets. 868 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 869 870 /// \returns The encoding that will be used for \p ByteOffset in the 871 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 872 /// S_LOAD instructions have a signed offset, on other subtargets it is 873 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 874 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 875 int64_t ByteOffset, bool IsBuffer); 876 877 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 878 /// instruction. This is only useful on CI.s 879 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 880 int64_t ByteOffset); 881 882 /// For FLAT segment the offset must be positive; 883 /// MSB is ignored and forced to zero. 884 /// 885 /// \return The number of bits available for the offset field in flat 886 /// instructions. 887 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed); 888 889 /// \returns true if this offset is small enough to fit in the SMRD 890 /// offset field. \p ByteOffset should be the offset in bytes and 891 /// not the encoded offset. 892 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 893 894 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 895 const GCNSubtarget *Subtarget, 896 Align Alignment = Align(4)); 897 898 LLVM_READNONE 899 inline bool isLegal64BitDPPControl(unsigned DC) { 900 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 901 } 902 903 /// \returns true if the intrinsic is divergent 904 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 905 906 // Track defaults for fields in the MODE registser. 907 struct SIModeRegisterDefaults { 908 /// Floating point opcodes that support exception flag gathering quiet and 909 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 910 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 911 /// quieting. 912 bool IEEE : 1; 913 914 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 915 /// clamp NaN to zero; otherwise, pass NaN through. 916 bool DX10Clamp : 1; 917 918 /// If this is set, neither input or output denormals are flushed for most f32 919 /// instructions. 920 bool FP32InputDenormals : 1; 921 bool FP32OutputDenormals : 1; 922 923 /// If this is set, neither input or output denormals are flushed for both f64 924 /// and f16/v2f16 instructions. 925 bool FP64FP16InputDenormals : 1; 926 bool FP64FP16OutputDenormals : 1; 927 928 SIModeRegisterDefaults() : 929 IEEE(true), 930 DX10Clamp(true), 931 FP32InputDenormals(true), 932 FP32OutputDenormals(true), 933 FP64FP16InputDenormals(true), 934 FP64FP16OutputDenormals(true) {} 935 936 SIModeRegisterDefaults(const Function &F); 937 938 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 939 SIModeRegisterDefaults Mode; 940 Mode.IEEE = !AMDGPU::isShader(CC); 941 return Mode; 942 } 943 944 bool operator ==(const SIModeRegisterDefaults Other) const { 945 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 946 FP32InputDenormals == Other.FP32InputDenormals && 947 FP32OutputDenormals == Other.FP32OutputDenormals && 948 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 949 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 950 } 951 952 bool allFP32Denormals() const { 953 return FP32InputDenormals && FP32OutputDenormals; 954 } 955 956 bool allFP64FP16Denormals() const { 957 return FP64FP16InputDenormals && FP64FP16OutputDenormals; 958 } 959 960 /// Get the encoding value for the FP_DENORM bits of the mode register for the 961 /// FP32 denormal mode. 962 uint32_t fpDenormModeSPValue() const { 963 if (FP32InputDenormals && FP32OutputDenormals) 964 return FP_DENORM_FLUSH_NONE; 965 if (FP32InputDenormals) 966 return FP_DENORM_FLUSH_OUT; 967 if (FP32OutputDenormals) 968 return FP_DENORM_FLUSH_IN; 969 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 970 } 971 972 /// Get the encoding value for the FP_DENORM bits of the mode register for the 973 /// FP64/FP16 denormal mode. 974 uint32_t fpDenormModeDPValue() const { 975 if (FP64FP16InputDenormals && FP64FP16OutputDenormals) 976 return FP_DENORM_FLUSH_NONE; 977 if (FP64FP16InputDenormals) 978 return FP_DENORM_FLUSH_OUT; 979 if (FP64FP16OutputDenormals) 980 return FP_DENORM_FLUSH_IN; 981 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 982 } 983 984 /// Returns true if a flag is compatible if it's enabled in the callee, but 985 /// disabled in the caller. 986 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 987 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 988 } 989 990 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 991 // be able to override. 992 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 993 if (DX10Clamp != CalleeMode.DX10Clamp) 994 return false; 995 if (IEEE != CalleeMode.IEEE) 996 return false; 997 998 // Allow inlining denormals enabled into denormals flushed functions. 999 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && 1000 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && 1001 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && 1002 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); 1003 } 1004 }; 1005 1006 } // end namespace AMDGPU 1007 1008 raw_ostream &operator<<(raw_ostream &OS, 1009 const AMDGPU::IsaInfo::TargetIDSetting S); 1010 1011 } // end namespace llvm 1012 1013 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1014