1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "SIDefines.h" 13 #include "llvm/IR/CallingConv.h" 14 #include "llvm/Support/Alignment.h" 15 16 struct amd_kernel_code_t; 17 18 namespace llvm { 19 20 struct Align; 21 class Argument; 22 class Function; 23 class GCNSubtarget; 24 class GlobalValue; 25 class MCRegisterClass; 26 class MCRegisterInfo; 27 class MCSubtargetInfo; 28 class StringRef; 29 class Triple; 30 31 namespace amdhsa { 32 struct kernel_descriptor_t; 33 } 34 35 namespace AMDGPU { 36 37 struct IsaVersion; 38 39 /// \returns HSA OS ABI Version identification. 40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); 41 /// \returns True if HSA OS ABI Version identification is 2, 42 /// false otherwise. 43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI); 44 /// \returns True if HSA OS ABI Version identification is 3, 45 /// false otherwise. 46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI); 47 48 struct GcnBufferFormatInfo { 49 unsigned Format; 50 unsigned BitsPerComp; 51 unsigned NumComponents; 52 unsigned NumFormat; 53 unsigned DataFormat; 54 }; 55 56 #define GET_MIMGBaseOpcode_DECL 57 #define GET_MIMGDim_DECL 58 #define GET_MIMGEncoding_DECL 59 #define GET_MIMGLZMapping_DECL 60 #define GET_MIMGMIPMapping_DECL 61 #include "AMDGPUGenSearchableTables.inc" 62 63 namespace IsaInfo { 64 65 enum { 66 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 67 // doesn't spill SGPRs as much as when 80 is set. 68 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 69 TRAP_NUM_SGPRS = 16 70 }; 71 72 enum class TargetIDSetting { 73 Unsupported, 74 Any, 75 Off, 76 On 77 }; 78 79 class AMDGPUTargetID { 80 private: 81 TargetIDSetting XnackSetting; 82 TargetIDSetting SramEccSetting; 83 84 public: 85 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 86 ~AMDGPUTargetID() = default; 87 88 /// \return True if the current xnack setting is not "Unsupported". 89 bool isXnackSupported() const { 90 return XnackSetting != TargetIDSetting::Unsupported; 91 } 92 93 /// \returns True if the current xnack setting is "On" or "Any". 94 bool isXnackOnOrAny() const { 95 return XnackSetting == TargetIDSetting::On || 96 XnackSetting == TargetIDSetting::Any; 97 } 98 99 /// \returns True if current xnack setting is "On" or "Off", 100 /// false otherwise. 101 bool isXnackOnOrOff() const { 102 return getXnackSetting() == TargetIDSetting::On || 103 getXnackSetting() == TargetIDSetting::Off; 104 } 105 106 /// \returns The current xnack TargetIDSetting, possible options are 107 /// "Unsupported", "Any", "Off", and "On". 108 TargetIDSetting getXnackSetting() const { 109 return XnackSetting; 110 } 111 112 /// Sets xnack setting to \p NewXnackSetting. 113 void setXnackSetting(TargetIDSetting NewXnackSetting) { 114 XnackSetting = NewXnackSetting; 115 } 116 117 /// \return True if the current sramecc setting is not "Unsupported". 118 bool isSramEccSupported() const { 119 return SramEccSetting != TargetIDSetting::Unsupported; 120 } 121 122 /// \returns True if the current sramecc setting is "On" or "Any". 123 bool isSramEccOnOrAny() const { 124 return SramEccSetting == TargetIDSetting::On || 125 SramEccSetting == TargetIDSetting::Any; 126 } 127 128 /// \returns True if current sramecc setting is "On" or "Off", 129 /// false otherwise. 130 bool isSramEccOnOrOff() const { 131 return getSramEccSetting() == TargetIDSetting::On || 132 getSramEccSetting() == TargetIDSetting::Off; 133 } 134 135 /// \returns The current sramecc TargetIDSetting, possible options are 136 /// "Unsupported", "Any", "Off", and "On". 137 TargetIDSetting getSramEccSetting() const { 138 return SramEccSetting; 139 } 140 141 /// Sets sramecc setting to \p NewSramEccSetting. 142 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 143 SramEccSetting = NewSramEccSetting; 144 } 145 146 void setTargetIDFromFeaturesString(StringRef FS); 147 void setTargetIDFromTargetIDStream(StringRef TargetID); 148 }; 149 150 /// Streams isa version string for given subtarget \p STI into \p Stream. 151 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); 152 153 /// \returns Wavefront size for given subtarget \p STI. 154 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 155 156 /// \returns Local memory size in bytes for given subtarget \p STI. 157 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 158 159 /// \returns Number of execution units per compute unit for given subtarget \p 160 /// STI. 161 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 162 163 /// \returns Maximum number of work groups per compute unit for given subtarget 164 /// \p STI and limited by given \p FlatWorkGroupSize. 165 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 166 unsigned FlatWorkGroupSize); 167 168 /// \returns Minimum number of waves per execution unit for given subtarget \p 169 /// STI. 170 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 171 172 /// \returns Maximum number of waves per execution unit for given subtarget \p 173 /// STI without any kind of limitation. 174 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 175 176 /// \returns Number of waves per execution unit required to support the given \p 177 /// FlatWorkGroupSize. 178 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 179 unsigned FlatWorkGroupSize); 180 181 /// \returns Minimum flat work group size for given subtarget \p STI. 182 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 183 184 /// \returns Maximum flat work group size for given subtarget \p STI. 185 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 186 187 /// \returns Number of waves per work group for given subtarget \p STI and 188 /// \p FlatWorkGroupSize. 189 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 190 unsigned FlatWorkGroupSize); 191 192 /// \returns SGPR allocation granularity for given subtarget \p STI. 193 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 194 195 /// \returns SGPR encoding granularity for given subtarget \p STI. 196 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 197 198 /// \returns Total number of SGPRs for given subtarget \p STI. 199 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 200 201 /// \returns Addressable number of SGPRs for given subtarget \p STI. 202 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 203 204 /// \returns Minimum number of SGPRs that meets the given number of waves per 205 /// execution unit requirement for given subtarget \p STI. 206 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 207 208 /// \returns Maximum number of SGPRs that meets the given number of waves per 209 /// execution unit requirement for given subtarget \p STI. 210 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 211 bool Addressable); 212 213 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 214 /// STI when the given special registers are used. 215 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 216 bool FlatScrUsed, bool XNACKUsed); 217 218 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 219 /// STI when the given special registers are used. XNACK is inferred from 220 /// \p STI. 221 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 222 bool FlatScrUsed); 223 224 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 225 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 226 /// register counts. 227 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 228 229 /// \returns VGPR allocation granularity for given subtarget \p STI. 230 /// 231 /// For subtargets which support it, \p EnableWavefrontSize32 should match 232 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 233 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 234 Optional<bool> EnableWavefrontSize32 = None); 235 236 /// \returns VGPR encoding granularity for given subtarget \p STI. 237 /// 238 /// For subtargets which support it, \p EnableWavefrontSize32 should match 239 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 240 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 241 Optional<bool> EnableWavefrontSize32 = None); 242 243 /// \returns Total number of VGPRs for given subtarget \p STI. 244 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 245 246 /// \returns Addressable number of VGPRs for given subtarget \p STI. 247 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 248 249 /// \returns Minimum number of VGPRs that meets given number of waves per 250 /// execution unit requirement for given subtarget \p STI. 251 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 252 253 /// \returns Maximum number of VGPRs that meets given number of waves per 254 /// execution unit requirement for given subtarget \p STI. 255 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 256 257 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 258 /// \p NumVGPRs are used. 259 /// 260 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 261 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 262 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 263 Optional<bool> EnableWavefrontSize32 = None); 264 265 } // end namespace IsaInfo 266 267 LLVM_READONLY 268 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 269 270 LLVM_READONLY 271 int getSOPPWithRelaxation(uint16_t Opcode); 272 273 struct MIMGBaseOpcodeInfo { 274 MIMGBaseOpcode BaseOpcode; 275 bool Store; 276 bool Atomic; 277 bool AtomicX2; 278 bool Sampler; 279 bool Gather4; 280 281 uint8_t NumExtraArgs; 282 bool Gradients; 283 bool G16; 284 bool Coordinates; 285 bool LodOrClampOrMip; 286 bool HasD16; 287 }; 288 289 LLVM_READONLY 290 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 291 292 struct MIMGDimInfo { 293 MIMGDim Dim; 294 uint8_t NumCoords; 295 uint8_t NumGradients; 296 bool DA; 297 uint8_t Encoding; 298 const char *AsmSuffix; 299 }; 300 301 LLVM_READONLY 302 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 303 304 LLVM_READONLY 305 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 306 307 LLVM_READONLY 308 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 309 310 struct MIMGLZMappingInfo { 311 MIMGBaseOpcode L; 312 MIMGBaseOpcode LZ; 313 }; 314 315 struct MIMGMIPMappingInfo { 316 MIMGBaseOpcode MIP; 317 MIMGBaseOpcode NONMIP; 318 }; 319 320 struct MIMGG16MappingInfo { 321 MIMGBaseOpcode G; 322 MIMGBaseOpcode G16; 323 }; 324 325 LLVM_READONLY 326 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 327 328 LLVM_READONLY 329 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 330 331 LLVM_READONLY 332 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 333 334 LLVM_READONLY 335 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 336 unsigned VDataDwords, unsigned VAddrDwords); 337 338 LLVM_READONLY 339 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 340 341 struct MIMGInfo { 342 uint16_t Opcode; 343 uint16_t BaseOpcode; 344 uint8_t MIMGEncoding; 345 uint8_t VDataDwords; 346 uint8_t VAddrDwords; 347 }; 348 349 LLVM_READONLY 350 const MIMGInfo *getMIMGInfo(unsigned Opc); 351 352 LLVM_READONLY 353 int getMTBUFBaseOpcode(unsigned Opc); 354 355 LLVM_READONLY 356 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 357 358 LLVM_READONLY 359 int getMTBUFElements(unsigned Opc); 360 361 LLVM_READONLY 362 bool getMTBUFHasVAddr(unsigned Opc); 363 364 LLVM_READONLY 365 bool getMTBUFHasSrsrc(unsigned Opc); 366 367 LLVM_READONLY 368 bool getMTBUFHasSoffset(unsigned Opc); 369 370 LLVM_READONLY 371 int getMUBUFBaseOpcode(unsigned Opc); 372 373 LLVM_READONLY 374 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 375 376 LLVM_READONLY 377 int getMUBUFElements(unsigned Opc); 378 379 LLVM_READONLY 380 bool getMUBUFHasVAddr(unsigned Opc); 381 382 LLVM_READONLY 383 bool getMUBUFHasSrsrc(unsigned Opc); 384 385 LLVM_READONLY 386 bool getMUBUFHasSoffset(unsigned Opc); 387 388 LLVM_READONLY 389 bool getSMEMIsBuffer(unsigned Opc); 390 391 LLVM_READONLY 392 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 393 uint8_t NumComponents, 394 uint8_t NumFormat, 395 const MCSubtargetInfo &STI); 396 LLVM_READONLY 397 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 398 const MCSubtargetInfo &STI); 399 400 LLVM_READONLY 401 int getMCOpcode(uint16_t Opcode, unsigned Gen); 402 403 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 404 const MCSubtargetInfo *STI); 405 406 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 407 const MCSubtargetInfo *STI); 408 409 bool isGroupSegment(const GlobalValue *GV); 410 bool isGlobalSegment(const GlobalValue *GV); 411 bool isReadOnlySegment(const GlobalValue *GV); 412 413 /// \returns True if constants should be emitted to .text section for given 414 /// target triple \p TT, false otherwise. 415 bool shouldEmitConstantsToTextSection(const Triple &TT); 416 417 /// \returns Integer value requested using \p F's \p Name attribute. 418 /// 419 /// \returns \p Default if attribute is not present. 420 /// 421 /// \returns \p Default and emits error if requested value cannot be converted 422 /// to integer. 423 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 424 425 /// \returns A pair of integer values requested using \p F's \p Name attribute 426 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 427 /// is false). 428 /// 429 /// \returns \p Default if attribute is not present. 430 /// 431 /// \returns \p Default and emits error if one of the requested values cannot be 432 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 433 /// not present. 434 std::pair<int, int> getIntegerPairAttribute(const Function &F, 435 StringRef Name, 436 std::pair<int, int> Default, 437 bool OnlyFirstRequired = false); 438 439 /// Represents the counter values to wait for in an s_waitcnt instruction. 440 /// 441 /// Large values (including the maximum possible integer) can be used to 442 /// represent "don't care" waits. 443 struct Waitcnt { 444 unsigned VmCnt = ~0u; 445 unsigned ExpCnt = ~0u; 446 unsigned LgkmCnt = ~0u; 447 unsigned VsCnt = ~0u; 448 449 Waitcnt() {} 450 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 451 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 452 453 static Waitcnt allZero(bool HasVscnt) { 454 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); 455 } 456 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 457 458 bool hasWait() const { 459 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 460 } 461 462 bool dominates(const Waitcnt &Other) const { 463 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 464 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 465 } 466 467 Waitcnt combined(const Waitcnt &Other) const { 468 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 469 std::min(LgkmCnt, Other.LgkmCnt), 470 std::min(VsCnt, Other.VsCnt)); 471 } 472 }; 473 474 /// \returns Vmcnt bit mask for given isa \p Version. 475 unsigned getVmcntBitMask(const IsaVersion &Version); 476 477 /// \returns Expcnt bit mask for given isa \p Version. 478 unsigned getExpcntBitMask(const IsaVersion &Version); 479 480 /// \returns Lgkmcnt bit mask for given isa \p Version. 481 unsigned getLgkmcntBitMask(const IsaVersion &Version); 482 483 /// \returns Waitcnt bit mask for given isa \p Version. 484 unsigned getWaitcntBitMask(const IsaVersion &Version); 485 486 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 487 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 488 489 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 490 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 491 492 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 493 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 494 495 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 496 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 497 /// \p Lgkmcnt respectively. 498 /// 499 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 500 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 501 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 502 /// \p Expcnt = \p Waitcnt[6:4] 503 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 504 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 505 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 506 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 507 508 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 509 510 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 511 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 512 unsigned Vmcnt); 513 514 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 515 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 516 unsigned Expcnt); 517 518 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 519 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 520 unsigned Lgkmcnt); 521 522 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 523 /// \p Version. 524 /// 525 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 526 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 527 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 528 /// Waitcnt[6:4] = \p Expcnt 529 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 530 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 531 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 532 /// 533 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 534 /// isa \p Version. 535 unsigned encodeWaitcnt(const IsaVersion &Version, 536 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 537 538 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 539 540 namespace Hwreg { 541 542 LLVM_READONLY 543 int64_t getHwregId(const StringRef Name); 544 545 LLVM_READNONE 546 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); 547 548 LLVM_READNONE 549 bool isValidHwreg(int64_t Id); 550 551 LLVM_READNONE 552 bool isValidHwregOffset(int64_t Offset); 553 554 LLVM_READNONE 555 bool isValidHwregWidth(int64_t Width); 556 557 LLVM_READNONE 558 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 559 560 LLVM_READNONE 561 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 562 563 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 564 565 } // namespace Hwreg 566 567 namespace Exp { 568 569 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 570 571 LLVM_READONLY 572 unsigned getTgtId(const StringRef Name); 573 574 LLVM_READNONE 575 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 576 577 } // namespace Exp 578 579 namespace MTBUFFormat { 580 581 LLVM_READNONE 582 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 583 584 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 585 586 int64_t getDfmt(const StringRef Name); 587 588 StringRef getDfmtName(unsigned Id); 589 590 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 591 592 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 593 594 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 595 596 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 597 598 int64_t getUnifiedFormat(const StringRef Name); 599 600 StringRef getUnifiedFormatName(unsigned Id); 601 602 bool isValidUnifiedFormat(unsigned Val); 603 604 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt); 605 606 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 607 608 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 609 610 } // namespace MTBUFFormat 611 612 namespace SendMsg { 613 614 LLVM_READONLY 615 int64_t getMsgId(const StringRef Name); 616 617 LLVM_READONLY 618 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 619 620 LLVM_READNONE 621 StringRef getMsgName(int64_t MsgId); 622 623 LLVM_READNONE 624 StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 625 626 LLVM_READNONE 627 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 628 629 LLVM_READNONE 630 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true); 631 632 LLVM_READNONE 633 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true); 634 635 LLVM_READNONE 636 bool msgRequiresOp(int64_t MsgId); 637 638 LLVM_READNONE 639 bool msgSupportsStream(int64_t MsgId, int64_t OpId); 640 641 void decodeMsg(unsigned Val, 642 uint16_t &MsgId, 643 uint16_t &OpId, 644 uint16_t &StreamId); 645 646 LLVM_READNONE 647 uint64_t encodeMsg(uint64_t MsgId, 648 uint64_t OpId, 649 uint64_t StreamId); 650 651 } // namespace SendMsg 652 653 654 unsigned getInitialPSInputAddr(const Function &F); 655 656 LLVM_READNONE 657 bool isShader(CallingConv::ID CC); 658 659 LLVM_READNONE 660 bool isGraphics(CallingConv::ID CC); 661 662 LLVM_READNONE 663 bool isCompute(CallingConv::ID CC); 664 665 LLVM_READNONE 666 bool isEntryFunctionCC(CallingConv::ID CC); 667 668 // These functions are considered entrypoints into the current module, i.e. they 669 // are allowed to be called from outside the current module. This is different 670 // from isEntryFunctionCC, which is only true for functions that are entered by 671 // the hardware. Module entry points include all entry functions but also 672 // include functions that can be called from other functions inside or outside 673 // the current module. Module entry functions are allowed to allocate LDS. 674 LLVM_READNONE 675 bool isModuleEntryFunctionCC(CallingConv::ID CC); 676 677 // FIXME: Remove this when calling conventions cleaned up 678 LLVM_READNONE 679 inline bool isKernel(CallingConv::ID CC) { 680 switch (CC) { 681 case CallingConv::AMDGPU_KERNEL: 682 case CallingConv::SPIR_KERNEL: 683 return true; 684 default: 685 return false; 686 } 687 } 688 689 bool hasXNACK(const MCSubtargetInfo &STI); 690 bool hasSRAMECC(const MCSubtargetInfo &STI); 691 bool hasMIMG_R128(const MCSubtargetInfo &STI); 692 bool hasGFX10A16(const MCSubtargetInfo &STI); 693 bool hasG16(const MCSubtargetInfo &STI); 694 bool hasPackedD16(const MCSubtargetInfo &STI); 695 696 bool isSI(const MCSubtargetInfo &STI); 697 bool isCI(const MCSubtargetInfo &STI); 698 bool isVI(const MCSubtargetInfo &STI); 699 bool isGFX9(const MCSubtargetInfo &STI); 700 bool isGFX9Plus(const MCSubtargetInfo &STI); 701 bool isGFX10(const MCSubtargetInfo &STI); 702 bool isGFX10Plus(const MCSubtargetInfo &STI); 703 bool isGCN3Encoding(const MCSubtargetInfo &STI); 704 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 705 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 706 707 /// Is Reg - scalar register 708 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 709 710 /// Is there any intersection between registers 711 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 712 713 /// If \p Reg is a pseudo reg, return the correct hardware register given 714 /// \p STI otherwise return \p Reg. 715 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 716 717 /// Convert hardware register \p Reg to a pseudo register 718 LLVM_READNONE 719 unsigned mc2PseudoReg(unsigned Reg); 720 721 /// Can this operand also contain immediate values? 722 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 723 724 /// Is this floating-point operand? 725 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 726 727 /// Does this opearnd support only inlinable literals? 728 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 729 730 /// Get the size in bits of a register from the register class \p RC. 731 unsigned getRegBitWidth(unsigned RCID); 732 733 /// Get the size in bits of a register from the register class \p RC. 734 unsigned getRegBitWidth(const MCRegisterClass &RC); 735 736 /// Get size of register operand 737 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 738 unsigned OpNo); 739 740 LLVM_READNONE 741 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 742 switch (OpInfo.OperandType) { 743 case AMDGPU::OPERAND_REG_IMM_INT32: 744 case AMDGPU::OPERAND_REG_IMM_FP32: 745 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 746 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 747 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 748 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 749 return 4; 750 751 case AMDGPU::OPERAND_REG_IMM_INT64: 752 case AMDGPU::OPERAND_REG_IMM_FP64: 753 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 754 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 755 return 8; 756 757 case AMDGPU::OPERAND_REG_IMM_INT16: 758 case AMDGPU::OPERAND_REG_IMM_FP16: 759 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 760 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 761 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 762 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 763 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 764 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 765 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 766 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 767 case AMDGPU::OPERAND_REG_IMM_V2INT16: 768 case AMDGPU::OPERAND_REG_IMM_V2FP16: 769 return 2; 770 771 default: 772 llvm_unreachable("unhandled operand type"); 773 } 774 } 775 776 LLVM_READNONE 777 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 778 return getOperandSize(Desc.OpInfo[OpNo]); 779 } 780 781 /// Is this literal inlinable, and not one of the values intended for floating 782 /// point values. 783 LLVM_READNONE 784 inline bool isInlinableIntLiteral(int64_t Literal) { 785 return Literal >= -16 && Literal <= 64; 786 } 787 788 /// Is this literal inlinable 789 LLVM_READNONE 790 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 791 792 LLVM_READNONE 793 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 794 795 LLVM_READNONE 796 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 797 798 LLVM_READNONE 799 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 800 801 LLVM_READNONE 802 bool isInlinableIntLiteralV216(int32_t Literal); 803 804 LLVM_READNONE 805 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 806 807 bool isArgPassedInSGPR(const Argument *Arg); 808 809 LLVM_READONLY 810 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 811 int64_t EncodedOffset); 812 813 LLVM_READONLY 814 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 815 int64_t EncodedOffset, 816 bool IsBuffer); 817 818 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 819 /// offsets. 820 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 821 822 /// \returns The encoding that will be used for \p ByteOffset in the 823 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 824 /// S_LOAD instructions have a signed offset, on other subtargets it is 825 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 826 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 827 int64_t ByteOffset, bool IsBuffer); 828 829 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 830 /// instruction. This is only useful on CI.s 831 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 832 int64_t ByteOffset); 833 834 /// For FLAT segment the offset must be positive; 835 /// MSB is ignored and forced to zero. 836 /// 837 /// \return The number of bits available for the offset field in flat 838 /// instructions. 839 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed); 840 841 /// \returns true if this offset is small enough to fit in the SMRD 842 /// offset field. \p ByteOffset should be the offset in bytes and 843 /// not the encoded offset. 844 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 845 846 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 847 const GCNSubtarget *Subtarget, 848 Align Alignment = Align(4)); 849 850 /// \returns true if the intrinsic is divergent 851 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 852 853 // Track defaults for fields in the MODE registser. 854 struct SIModeRegisterDefaults { 855 /// Floating point opcodes that support exception flag gathering quiet and 856 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 857 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 858 /// quieting. 859 bool IEEE : 1; 860 861 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 862 /// clamp NaN to zero; otherwise, pass NaN through. 863 bool DX10Clamp : 1; 864 865 /// If this is set, neither input or output denormals are flushed for most f32 866 /// instructions. 867 bool FP32InputDenormals : 1; 868 bool FP32OutputDenormals : 1; 869 870 /// If this is set, neither input or output denormals are flushed for both f64 871 /// and f16/v2f16 instructions. 872 bool FP64FP16InputDenormals : 1; 873 bool FP64FP16OutputDenormals : 1; 874 875 SIModeRegisterDefaults() : 876 IEEE(true), 877 DX10Clamp(true), 878 FP32InputDenormals(true), 879 FP32OutputDenormals(true), 880 FP64FP16InputDenormals(true), 881 FP64FP16OutputDenormals(true) {} 882 883 SIModeRegisterDefaults(const Function &F); 884 885 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 886 SIModeRegisterDefaults Mode; 887 Mode.IEEE = !AMDGPU::isShader(CC); 888 return Mode; 889 } 890 891 bool operator ==(const SIModeRegisterDefaults Other) const { 892 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 893 FP32InputDenormals == Other.FP32InputDenormals && 894 FP32OutputDenormals == Other.FP32OutputDenormals && 895 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 896 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 897 } 898 899 bool allFP32Denormals() const { 900 return FP32InputDenormals && FP32OutputDenormals; 901 } 902 903 bool allFP64FP16Denormals() const { 904 return FP64FP16InputDenormals && FP64FP16OutputDenormals; 905 } 906 907 /// Get the encoding value for the FP_DENORM bits of the mode register for the 908 /// FP32 denormal mode. 909 uint32_t fpDenormModeSPValue() const { 910 if (FP32InputDenormals && FP32OutputDenormals) 911 return FP_DENORM_FLUSH_NONE; 912 if (FP32InputDenormals) 913 return FP_DENORM_FLUSH_OUT; 914 if (FP32OutputDenormals) 915 return FP_DENORM_FLUSH_IN; 916 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 917 } 918 919 /// Get the encoding value for the FP_DENORM bits of the mode register for the 920 /// FP64/FP16 denormal mode. 921 uint32_t fpDenormModeDPValue() const { 922 if (FP64FP16InputDenormals && FP64FP16OutputDenormals) 923 return FP_DENORM_FLUSH_NONE; 924 if (FP64FP16InputDenormals) 925 return FP_DENORM_FLUSH_OUT; 926 if (FP64FP16OutputDenormals) 927 return FP_DENORM_FLUSH_IN; 928 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 929 } 930 931 /// Returns true if a flag is compatible if it's enabled in the callee, but 932 /// disabled in the caller. 933 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 934 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 935 } 936 937 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 938 // be able to override. 939 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 940 if (DX10Clamp != CalleeMode.DX10Clamp) 941 return false; 942 if (IEEE != CalleeMode.IEEE) 943 return false; 944 945 // Allow inlining denormals enabled into denormals flushed functions. 946 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && 947 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && 948 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && 949 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); 950 } 951 }; 952 953 } // end namespace AMDGPU 954 955 raw_ostream &operator<<(raw_ostream &OS, 956 const AMDGPU::IsaInfo::TargetIDSetting S); 957 958 } // end namespace llvm 959 960 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 961