1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "AMDGPU.h" 13 #include "AMDKernelCodeT.h" 14 #include "SIDefines.h" 15 #include "llvm/IR/CallingConv.h" 16 #include "llvm/MC/MCInstrDesc.h" 17 #include "llvm/Support/AMDHSAKernelDescriptor.h" 18 #include "llvm/Support/Alignment.h" 19 #include "llvm/Support/Compiler.h" 20 #include "llvm/Support/ErrorHandling.h" 21 #include "llvm/Support/TargetParser.h" 22 #include <cstdint> 23 #include <string> 24 #include <utility> 25 26 namespace llvm { 27 28 class Argument; 29 class Function; 30 class GCNSubtarget; 31 class GlobalValue; 32 class MCRegisterClass; 33 class MCRegisterInfo; 34 class MCSubtargetInfo; 35 class StringRef; 36 class Triple; 37 38 namespace AMDGPU { 39 40 struct GcnBufferFormatInfo { 41 unsigned Format; 42 unsigned BitsPerComp; 43 unsigned NumComponents; 44 unsigned NumFormat; 45 unsigned DataFormat; 46 }; 47 48 #define GET_MIMGBaseOpcode_DECL 49 #define GET_MIMGDim_DECL 50 #define GET_MIMGEncoding_DECL 51 #define GET_MIMGLZMapping_DECL 52 #define GET_MIMGMIPMapping_DECL 53 #include "AMDGPUGenSearchableTables.inc" 54 55 namespace IsaInfo { 56 57 enum { 58 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 59 // doesn't spill SGPRs as much as when 80 is set. 60 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 61 TRAP_NUM_SGPRS = 16 62 }; 63 64 /// Streams isa version string for given subtarget \p STI into \p Stream. 65 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); 66 67 /// \returns True if given subtarget \p STI supports code object version 3, 68 /// false otherwise. 69 bool hasCodeObjectV3(const MCSubtargetInfo *STI); 70 71 /// \returns Wavefront size for given subtarget \p STI. 72 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 73 74 /// \returns Local memory size in bytes for given subtarget \p STI. 75 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 76 77 /// \returns Number of execution units per compute unit for given subtarget \p 78 /// STI. 79 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 80 81 /// \returns Maximum number of work groups per compute unit for given subtarget 82 /// \p STI and limited by given \p FlatWorkGroupSize. 83 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 84 unsigned FlatWorkGroupSize); 85 86 /// \returns Minimum number of waves per execution unit for given subtarget \p 87 /// STI. 88 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 89 90 /// \returns Maximum number of waves per execution unit for given subtarget \p 91 /// STI without any kind of limitation. 92 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 93 94 /// \returns Number of waves per execution unit required to support the given \p 95 /// FlatWorkGroupSize. 96 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 97 unsigned FlatWorkGroupSize); 98 99 /// \returns Minimum flat work group size for given subtarget \p STI. 100 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 101 102 /// \returns Maximum flat work group size for given subtarget \p STI. 103 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 104 105 /// \returns Number of waves per work group for given subtarget \p STI and 106 /// \p FlatWorkGroupSize. 107 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 108 unsigned FlatWorkGroupSize); 109 110 /// \returns SGPR allocation granularity for given subtarget \p STI. 111 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 112 113 /// \returns SGPR encoding granularity for given subtarget \p STI. 114 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 115 116 /// \returns Total number of SGPRs for given subtarget \p STI. 117 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 118 119 /// \returns Addressable number of SGPRs for given subtarget \p STI. 120 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 121 122 /// \returns Minimum number of SGPRs that meets the given number of waves per 123 /// execution unit requirement for given subtarget \p STI. 124 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 125 126 /// \returns Maximum number of SGPRs that meets the given number of waves per 127 /// execution unit requirement for given subtarget \p STI. 128 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 129 bool Addressable); 130 131 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 132 /// STI when the given special registers are used. 133 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 134 bool FlatScrUsed, bool XNACKUsed); 135 136 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 137 /// STI when the given special registers are used. XNACK is inferred from 138 /// \p STI. 139 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 140 bool FlatScrUsed); 141 142 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 143 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 144 /// register counts. 145 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 146 147 /// \returns VGPR allocation granularity for given subtarget \p STI. 148 /// 149 /// For subtargets which support it, \p EnableWavefrontSize32 should match 150 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 151 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 152 Optional<bool> EnableWavefrontSize32 = None); 153 154 /// \returns VGPR encoding granularity for given subtarget \p STI. 155 /// 156 /// For subtargets which support it, \p EnableWavefrontSize32 should match 157 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 158 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 159 Optional<bool> EnableWavefrontSize32 = None); 160 161 /// \returns Total number of VGPRs for given subtarget \p STI. 162 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 163 164 /// \returns Addressable number of VGPRs for given subtarget \p STI. 165 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 166 167 /// \returns Minimum number of VGPRs that meets given number of waves per 168 /// execution unit requirement for given subtarget \p STI. 169 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 170 171 /// \returns Maximum number of VGPRs that meets given number of waves per 172 /// execution unit requirement for given subtarget \p STI. 173 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 174 175 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 176 /// \p NumVGPRs are used. 177 /// 178 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 179 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 180 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 181 Optional<bool> EnableWavefrontSize32 = None); 182 183 } // end namespace IsaInfo 184 185 LLVM_READONLY 186 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 187 188 LLVM_READONLY 189 int getSOPPWithRelaxation(uint16_t Opcode); 190 191 struct MIMGBaseOpcodeInfo { 192 MIMGBaseOpcode BaseOpcode; 193 bool Store; 194 bool Atomic; 195 bool AtomicX2; 196 bool Sampler; 197 bool Gather4; 198 199 uint8_t NumExtraArgs; 200 bool Gradients; 201 bool G16; 202 bool Coordinates; 203 bool LodOrClampOrMip; 204 bool HasD16; 205 }; 206 207 LLVM_READONLY 208 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 209 210 struct MIMGDimInfo { 211 MIMGDim Dim; 212 uint8_t NumCoords; 213 uint8_t NumGradients; 214 bool DA; 215 uint8_t Encoding; 216 const char *AsmSuffix; 217 }; 218 219 LLVM_READONLY 220 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 221 222 LLVM_READONLY 223 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 224 225 LLVM_READONLY 226 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 227 228 struct MIMGLZMappingInfo { 229 MIMGBaseOpcode L; 230 MIMGBaseOpcode LZ; 231 }; 232 233 struct MIMGMIPMappingInfo { 234 MIMGBaseOpcode MIP; 235 MIMGBaseOpcode NONMIP; 236 }; 237 238 struct MIMGG16MappingInfo { 239 MIMGBaseOpcode G; 240 MIMGBaseOpcode G16; 241 }; 242 243 LLVM_READONLY 244 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 245 246 LLVM_READONLY 247 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 248 249 LLVM_READONLY 250 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 251 252 LLVM_READONLY 253 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 254 unsigned VDataDwords, unsigned VAddrDwords); 255 256 LLVM_READONLY 257 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 258 259 struct MIMGInfo { 260 uint16_t Opcode; 261 uint16_t BaseOpcode; 262 uint8_t MIMGEncoding; 263 uint8_t VDataDwords; 264 uint8_t VAddrDwords; 265 }; 266 267 LLVM_READONLY 268 const MIMGInfo *getMIMGInfo(unsigned Opc); 269 270 LLVM_READONLY 271 int getMTBUFBaseOpcode(unsigned Opc); 272 273 LLVM_READONLY 274 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 275 276 LLVM_READONLY 277 int getMTBUFElements(unsigned Opc); 278 279 LLVM_READONLY 280 bool getMTBUFHasVAddr(unsigned Opc); 281 282 LLVM_READONLY 283 bool getMTBUFHasSrsrc(unsigned Opc); 284 285 LLVM_READONLY 286 bool getMTBUFHasSoffset(unsigned Opc); 287 288 LLVM_READONLY 289 int getMUBUFBaseOpcode(unsigned Opc); 290 291 LLVM_READONLY 292 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 293 294 LLVM_READONLY 295 int getMUBUFElements(unsigned Opc); 296 297 LLVM_READONLY 298 bool getMUBUFHasVAddr(unsigned Opc); 299 300 LLVM_READONLY 301 bool getMUBUFHasSrsrc(unsigned Opc); 302 303 LLVM_READONLY 304 bool getMUBUFHasSoffset(unsigned Opc); 305 306 LLVM_READONLY 307 bool getSMEMIsBuffer(unsigned Opc); 308 309 LLVM_READONLY 310 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 311 uint8_t NumComponents, 312 uint8_t NumFormat, 313 const MCSubtargetInfo &STI); 314 LLVM_READONLY 315 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 316 const MCSubtargetInfo &STI); 317 318 LLVM_READONLY 319 int getMCOpcode(uint16_t Opcode, unsigned Gen); 320 321 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 322 const MCSubtargetInfo *STI); 323 324 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 325 const MCSubtargetInfo *STI); 326 327 bool isGroupSegment(const GlobalValue *GV); 328 bool isGlobalSegment(const GlobalValue *GV); 329 bool isReadOnlySegment(const GlobalValue *GV); 330 331 /// \returns True if constants should be emitted to .text section for given 332 /// target triple \p TT, false otherwise. 333 bool shouldEmitConstantsToTextSection(const Triple &TT); 334 335 /// \returns Integer value requested using \p F's \p Name attribute. 336 /// 337 /// \returns \p Default if attribute is not present. 338 /// 339 /// \returns \p Default and emits error if requested value cannot be converted 340 /// to integer. 341 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 342 343 /// \returns A pair of integer values requested using \p F's \p Name attribute 344 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 345 /// is false). 346 /// 347 /// \returns \p Default if attribute is not present. 348 /// 349 /// \returns \p Default and emits error if one of the requested values cannot be 350 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 351 /// not present. 352 std::pair<int, int> getIntegerPairAttribute(const Function &F, 353 StringRef Name, 354 std::pair<int, int> Default, 355 bool OnlyFirstRequired = false); 356 357 /// Represents the counter values to wait for in an s_waitcnt instruction. 358 /// 359 /// Large values (including the maximum possible integer) can be used to 360 /// represent "don't care" waits. 361 struct Waitcnt { 362 unsigned VmCnt = ~0u; 363 unsigned ExpCnt = ~0u; 364 unsigned LgkmCnt = ~0u; 365 unsigned VsCnt = ~0u; 366 367 Waitcnt() {} 368 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 369 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 370 371 static Waitcnt allZero(const IsaVersion &Version) { 372 return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u); 373 } 374 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 375 376 bool hasWait() const { 377 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 378 } 379 380 bool dominates(const Waitcnt &Other) const { 381 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 382 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 383 } 384 385 Waitcnt combined(const Waitcnt &Other) const { 386 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 387 std::min(LgkmCnt, Other.LgkmCnt), 388 std::min(VsCnt, Other.VsCnt)); 389 } 390 }; 391 392 /// \returns Vmcnt bit mask for given isa \p Version. 393 unsigned getVmcntBitMask(const IsaVersion &Version); 394 395 /// \returns Expcnt bit mask for given isa \p Version. 396 unsigned getExpcntBitMask(const IsaVersion &Version); 397 398 /// \returns Lgkmcnt bit mask for given isa \p Version. 399 unsigned getLgkmcntBitMask(const IsaVersion &Version); 400 401 /// \returns Waitcnt bit mask for given isa \p Version. 402 unsigned getWaitcntBitMask(const IsaVersion &Version); 403 404 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 405 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 406 407 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 408 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 409 410 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 411 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 412 413 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 414 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 415 /// \p Lgkmcnt respectively. 416 /// 417 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 418 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 419 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 420 /// \p Expcnt = \p Waitcnt[6:4] 421 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 422 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 423 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 424 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 425 426 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 427 428 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 429 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 430 unsigned Vmcnt); 431 432 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 433 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 434 unsigned Expcnt); 435 436 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 437 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 438 unsigned Lgkmcnt); 439 440 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 441 /// \p Version. 442 /// 443 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 444 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 445 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 446 /// Waitcnt[6:4] = \p Expcnt 447 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 448 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 449 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 450 /// 451 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 452 /// isa \p Version. 453 unsigned encodeWaitcnt(const IsaVersion &Version, 454 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 455 456 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 457 458 namespace Hwreg { 459 460 LLVM_READONLY 461 int64_t getHwregId(const StringRef Name); 462 463 LLVM_READNONE 464 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); 465 466 LLVM_READNONE 467 bool isValidHwreg(int64_t Id); 468 469 LLVM_READNONE 470 bool isValidHwregOffset(int64_t Offset); 471 472 LLVM_READNONE 473 bool isValidHwregWidth(int64_t Width); 474 475 LLVM_READNONE 476 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 477 478 LLVM_READNONE 479 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 480 481 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 482 483 } // namespace Hwreg 484 485 namespace SendMsg { 486 487 LLVM_READONLY 488 int64_t getMsgId(const StringRef Name); 489 490 LLVM_READONLY 491 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 492 493 LLVM_READNONE 494 StringRef getMsgName(int64_t MsgId); 495 496 LLVM_READNONE 497 StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 498 499 LLVM_READNONE 500 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 501 502 LLVM_READNONE 503 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true); 504 505 LLVM_READNONE 506 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true); 507 508 LLVM_READNONE 509 bool msgRequiresOp(int64_t MsgId); 510 511 LLVM_READNONE 512 bool msgSupportsStream(int64_t MsgId, int64_t OpId); 513 514 void decodeMsg(unsigned Val, 515 uint16_t &MsgId, 516 uint16_t &OpId, 517 uint16_t &StreamId); 518 519 LLVM_READNONE 520 uint64_t encodeMsg(uint64_t MsgId, 521 uint64_t OpId, 522 uint64_t StreamId); 523 524 } // namespace SendMsg 525 526 527 unsigned getInitialPSInputAddr(const Function &F); 528 529 LLVM_READNONE 530 bool isShader(CallingConv::ID CC); 531 532 LLVM_READNONE 533 bool isCompute(CallingConv::ID CC); 534 535 LLVM_READNONE 536 bool isEntryFunctionCC(CallingConv::ID CC); 537 538 // FIXME: Remove this when calling conventions cleaned up 539 LLVM_READNONE 540 inline bool isKernel(CallingConv::ID CC) { 541 switch (CC) { 542 case CallingConv::AMDGPU_KERNEL: 543 case CallingConv::SPIR_KERNEL: 544 return true; 545 default: 546 return false; 547 } 548 } 549 550 bool hasXNACK(const MCSubtargetInfo &STI); 551 bool hasSRAMECC(const MCSubtargetInfo &STI); 552 bool hasMIMG_R128(const MCSubtargetInfo &STI); 553 bool hasGFX10A16(const MCSubtargetInfo &STI); 554 bool hasG16(const MCSubtargetInfo &STI); 555 bool hasPackedD16(const MCSubtargetInfo &STI); 556 557 bool isSI(const MCSubtargetInfo &STI); 558 bool isCI(const MCSubtargetInfo &STI); 559 bool isVI(const MCSubtargetInfo &STI); 560 bool isGFX9(const MCSubtargetInfo &STI); 561 bool isGFX10(const MCSubtargetInfo &STI); 562 bool isGCN3Encoding(const MCSubtargetInfo &STI); 563 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 564 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 565 566 /// Is Reg - scalar register 567 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 568 569 /// Is there any intersection between registers 570 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 571 572 /// If \p Reg is a pseudo reg, return the correct hardware register given 573 /// \p STI otherwise return \p Reg. 574 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 575 576 /// Convert hardware register \p Reg to a pseudo register 577 LLVM_READNONE 578 unsigned mc2PseudoReg(unsigned Reg); 579 580 /// Can this operand also contain immediate values? 581 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 582 583 /// Is this floating-point operand? 584 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 585 586 /// Does this opearnd support only inlinable literals? 587 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 588 589 /// Get the size in bits of a register from the register class \p RC. 590 unsigned getRegBitWidth(unsigned RCID); 591 592 /// Get the size in bits of a register from the register class \p RC. 593 unsigned getRegBitWidth(const MCRegisterClass &RC); 594 595 /// Get size of register operand 596 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 597 unsigned OpNo); 598 599 LLVM_READNONE 600 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 601 switch (OpInfo.OperandType) { 602 case AMDGPU::OPERAND_REG_IMM_INT32: 603 case AMDGPU::OPERAND_REG_IMM_FP32: 604 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 605 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 606 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 607 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 608 return 4; 609 610 case AMDGPU::OPERAND_REG_IMM_INT64: 611 case AMDGPU::OPERAND_REG_IMM_FP64: 612 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 613 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 614 return 8; 615 616 case AMDGPU::OPERAND_REG_IMM_INT16: 617 case AMDGPU::OPERAND_REG_IMM_FP16: 618 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 619 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 620 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 621 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 622 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 623 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 624 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 625 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 626 case AMDGPU::OPERAND_REG_IMM_V2INT16: 627 case AMDGPU::OPERAND_REG_IMM_V2FP16: 628 return 2; 629 630 default: 631 llvm_unreachable("unhandled operand type"); 632 } 633 } 634 635 LLVM_READNONE 636 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 637 return getOperandSize(Desc.OpInfo[OpNo]); 638 } 639 640 /// Is this literal inlinable, and not one of the values intended for floating 641 /// point values. 642 LLVM_READNONE 643 inline bool isInlinableIntLiteral(int64_t Literal) { 644 return Literal >= -16 && Literal <= 64; 645 } 646 647 /// Is this literal inlinable 648 LLVM_READNONE 649 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 650 651 LLVM_READNONE 652 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 653 654 LLVM_READNONE 655 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 656 657 LLVM_READNONE 658 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 659 660 LLVM_READNONE 661 bool isInlinableIntLiteralV216(int32_t Literal); 662 663 LLVM_READNONE 664 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 665 666 bool isArgPassedInSGPR(const Argument *Arg); 667 668 LLVM_READONLY 669 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 670 int64_t EncodedOffset); 671 672 LLVM_READONLY 673 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 674 int64_t EncodedOffset, 675 bool IsBuffer); 676 677 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 678 /// offsets. 679 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 680 681 /// \returns The encoding that will be used for \p ByteOffset in the 682 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 683 /// S_LOAD instructions have a signed offset, on other subtargets it is 684 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 685 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 686 int64_t ByteOffset, bool IsBuffer); 687 688 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 689 /// instruction. This is only useful on CI.s 690 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 691 int64_t ByteOffset); 692 693 /// \returns true if this offset is small enough to fit in the SMRD 694 /// offset field. \p ByteOffset should be the offset in bytes and 695 /// not the encoded offset. 696 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 697 698 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 699 const GCNSubtarget *Subtarget, 700 Align Alignment = Align(4)); 701 702 /// \returns true if the intrinsic is divergent 703 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 704 705 // Track defaults for fields in the MODE registser. 706 struct SIModeRegisterDefaults { 707 /// Floating point opcodes that support exception flag gathering quiet and 708 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 709 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 710 /// quieting. 711 bool IEEE : 1; 712 713 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 714 /// clamp NaN to zero; otherwise, pass NaN through. 715 bool DX10Clamp : 1; 716 717 /// If this is set, neither input or output denormals are flushed for most f32 718 /// instructions. 719 bool FP32InputDenormals : 1; 720 bool FP32OutputDenormals : 1; 721 722 /// If this is set, neither input or output denormals are flushed for both f64 723 /// and f16/v2f16 instructions. 724 bool FP64FP16InputDenormals : 1; 725 bool FP64FP16OutputDenormals : 1; 726 727 SIModeRegisterDefaults() : 728 IEEE(true), 729 DX10Clamp(true), 730 FP32InputDenormals(true), 731 FP32OutputDenormals(true), 732 FP64FP16InputDenormals(true), 733 FP64FP16OutputDenormals(true) {} 734 735 SIModeRegisterDefaults(const Function &F); 736 737 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 738 const bool IsCompute = AMDGPU::isCompute(CC); 739 740 SIModeRegisterDefaults Mode; 741 Mode.IEEE = IsCompute; 742 return Mode; 743 } 744 745 bool operator ==(const SIModeRegisterDefaults Other) const { 746 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 747 FP32InputDenormals == Other.FP32InputDenormals && 748 FP32OutputDenormals == Other.FP32OutputDenormals && 749 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 750 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 751 } 752 753 bool allFP32Denormals() const { 754 return FP32InputDenormals && FP32OutputDenormals; 755 } 756 757 bool allFP64FP16Denormals() const { 758 return FP64FP16InputDenormals && FP64FP16OutputDenormals; 759 } 760 761 /// Get the encoding value for the FP_DENORM bits of the mode register for the 762 /// FP32 denormal mode. 763 uint32_t fpDenormModeSPValue() const { 764 if (FP32InputDenormals && FP32OutputDenormals) 765 return FP_DENORM_FLUSH_NONE; 766 if (FP32InputDenormals) 767 return FP_DENORM_FLUSH_OUT; 768 if (FP32OutputDenormals) 769 return FP_DENORM_FLUSH_IN; 770 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 771 } 772 773 /// Get the encoding value for the FP_DENORM bits of the mode register for the 774 /// FP64/FP16 denormal mode. 775 uint32_t fpDenormModeDPValue() const { 776 if (FP64FP16InputDenormals && FP64FP16OutputDenormals) 777 return FP_DENORM_FLUSH_NONE; 778 if (FP64FP16InputDenormals) 779 return FP_DENORM_FLUSH_OUT; 780 if (FP64FP16OutputDenormals) 781 return FP_DENORM_FLUSH_IN; 782 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 783 } 784 785 /// Returns true if a flag is compatible if it's enabled in the callee, but 786 /// disabled in the caller. 787 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 788 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 789 } 790 791 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 792 // be able to override. 793 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 794 if (DX10Clamp != CalleeMode.DX10Clamp) 795 return false; 796 if (IEEE != CalleeMode.IEEE) 797 return false; 798 799 // Allow inlining denormals enabled into denormals flushed functions. 800 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && 801 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && 802 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && 803 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); 804 } 805 }; 806 807 } // end namespace AMDGPU 808 } // end namespace llvm 809 810 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 811