1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "AMDGPU.h" 13 #include "AMDKernelCodeT.h" 14 #include "SIDefines.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/IR/CallingConv.h" 17 #include "llvm/MC/MCInstrDesc.h" 18 #include "llvm/Support/AMDHSAKernelDescriptor.h" 19 #include "llvm/Support/Compiler.h" 20 #include "llvm/Support/ErrorHandling.h" 21 #include "llvm/Support/TargetParser.h" 22 #include <cstdint> 23 #include <string> 24 #include <utility> 25 26 namespace llvm { 27 28 class Argument; 29 class AMDGPUSubtarget; 30 class FeatureBitset; 31 class Function; 32 class GCNSubtarget; 33 class GlobalValue; 34 class MCContext; 35 class MCRegisterClass; 36 class MCRegisterInfo; 37 class MCSection; 38 class MCSubtargetInfo; 39 class MachineMemOperand; 40 class Triple; 41 42 namespace AMDGPU { 43 44 struct GcnBufferFormatInfo { 45 unsigned Format; 46 unsigned BitsPerComp; 47 unsigned NumComponents; 48 unsigned NumFormat; 49 unsigned DataFormat; 50 }; 51 52 #define GET_MIMGBaseOpcode_DECL 53 #define GET_MIMGDim_DECL 54 #define GET_MIMGEncoding_DECL 55 #define GET_MIMGLZMapping_DECL 56 #define GET_MIMGMIPMapping_DECL 57 #include "AMDGPUGenSearchableTables.inc" 58 59 namespace IsaInfo { 60 61 enum { 62 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 63 // doesn't spill SGPRs as much as when 80 is set. 64 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 65 TRAP_NUM_SGPRS = 16 66 }; 67 68 /// Streams isa version string for given subtarget \p STI into \p Stream. 69 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); 70 71 /// \returns True if given subtarget \p STI supports code object version 3, 72 /// false otherwise. 73 bool hasCodeObjectV3(const MCSubtargetInfo *STI); 74 75 /// \returns Wavefront size for given subtarget \p STI. 76 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 77 78 /// \returns Local memory size in bytes for given subtarget \p STI. 79 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 80 81 /// \returns Number of execution units per compute unit for given subtarget \p 82 /// STI. 83 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 84 85 /// \returns Maximum number of work groups per compute unit for given subtarget 86 /// \p STI and limited by given \p FlatWorkGroupSize. 87 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 88 unsigned FlatWorkGroupSize); 89 90 /// \returns Maximum number of waves per compute unit for given subtarget \p 91 /// STI without any kind of limitation. 92 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI); 93 94 /// \returns Maximum number of waves per compute unit for given subtarget \p 95 /// STI and limited by given \p FlatWorkGroupSize. 96 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, 97 unsigned FlatWorkGroupSize); 98 99 /// \returns Minimum number of waves per execution unit for given subtarget \p 100 /// STI. 101 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 102 103 /// \returns Maximum number of waves per execution unit for given subtarget \p 104 /// STI without any kind of limitation. 105 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 106 107 /// \returns Maximum number of waves per execution unit for given subtarget \p 108 /// STI and limited by given \p FlatWorkGroupSize. 109 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, 110 unsigned FlatWorkGroupSize); 111 112 /// \returns Minimum flat work group size for given subtarget \p STI. 113 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 114 115 /// \returns Maximum flat work group size for given subtarget \p STI. 116 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 117 118 /// \returns Number of waves per work group for given subtarget \p STI and 119 /// limited by given \p FlatWorkGroupSize. 120 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 121 unsigned FlatWorkGroupSize); 122 123 /// \returns SGPR allocation granularity for given subtarget \p STI. 124 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 125 126 /// \returns SGPR encoding granularity for given subtarget \p STI. 127 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 128 129 /// \returns Total number of SGPRs for given subtarget \p STI. 130 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 131 132 /// \returns Addressable number of SGPRs for given subtarget \p STI. 133 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 134 135 /// \returns Minimum number of SGPRs that meets the given number of waves per 136 /// execution unit requirement for given subtarget \p STI. 137 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 138 139 /// \returns Maximum number of SGPRs that meets the given number of waves per 140 /// execution unit requirement for given subtarget \p STI. 141 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 142 bool Addressable); 143 144 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 145 /// STI when the given special registers are used. 146 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 147 bool FlatScrUsed, bool XNACKUsed); 148 149 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 150 /// STI when the given special registers are used. XNACK is inferred from 151 /// \p STI. 152 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 153 bool FlatScrUsed); 154 155 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 156 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 157 /// register counts. 158 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 159 160 /// \returns VGPR allocation granularity for given subtarget \p STI. 161 /// 162 /// For subtargets which support it, \p EnableWavefrontSize32 should match 163 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 164 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 165 Optional<bool> EnableWavefrontSize32 = None); 166 167 /// \returns VGPR encoding granularity for given subtarget \p STI. 168 /// 169 /// For subtargets which support it, \p EnableWavefrontSize32 should match 170 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 171 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 172 Optional<bool> EnableWavefrontSize32 = None); 173 174 /// \returns Total number of VGPRs for given subtarget \p STI. 175 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 176 177 /// \returns Addressable number of VGPRs for given subtarget \p STI. 178 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 179 180 /// \returns Minimum number of VGPRs that meets given number of waves per 181 /// execution unit requirement for given subtarget \p STI. 182 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 183 184 /// \returns Maximum number of VGPRs that meets given number of waves per 185 /// execution unit requirement for given subtarget \p STI. 186 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 187 188 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 189 /// \p NumVGPRs are used. 190 /// 191 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 192 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 193 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 194 Optional<bool> EnableWavefrontSize32 = None); 195 196 } // end namespace IsaInfo 197 198 LLVM_READONLY 199 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 200 201 LLVM_READONLY 202 int getSOPPWithRelaxation(uint16_t Opcode); 203 204 struct MIMGBaseOpcodeInfo { 205 MIMGBaseOpcode BaseOpcode; 206 bool Store; 207 bool Atomic; 208 bool AtomicX2; 209 bool Sampler; 210 bool Gather4; 211 212 uint8_t NumExtraArgs; 213 bool Gradients; 214 bool Coordinates; 215 bool LodOrClampOrMip; 216 bool HasD16; 217 }; 218 219 LLVM_READONLY 220 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 221 222 struct MIMGDimInfo { 223 MIMGDim Dim; 224 uint8_t NumCoords; 225 uint8_t NumGradients; 226 bool DA; 227 uint8_t Encoding; 228 const char *AsmSuffix; 229 }; 230 231 LLVM_READONLY 232 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 233 234 LLVM_READONLY 235 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 236 237 LLVM_READONLY 238 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 239 240 struct MIMGLZMappingInfo { 241 MIMGBaseOpcode L; 242 MIMGBaseOpcode LZ; 243 }; 244 245 struct MIMGMIPMappingInfo { 246 MIMGBaseOpcode MIP; 247 MIMGBaseOpcode NONMIP; 248 }; 249 250 LLVM_READONLY 251 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 252 253 LLVM_READONLY 254 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L); 255 256 LLVM_READONLY 257 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 258 unsigned VDataDwords, unsigned VAddrDwords); 259 260 LLVM_READONLY 261 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 262 263 struct MIMGInfo { 264 uint16_t Opcode; 265 uint16_t BaseOpcode; 266 uint8_t MIMGEncoding; 267 uint8_t VDataDwords; 268 uint8_t VAddrDwords; 269 }; 270 271 LLVM_READONLY 272 const MIMGInfo *getMIMGInfo(unsigned Opc); 273 274 LLVM_READONLY 275 int getMTBUFBaseOpcode(unsigned Opc); 276 277 LLVM_READONLY 278 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 279 280 LLVM_READONLY 281 int getMTBUFElements(unsigned Opc); 282 283 LLVM_READONLY 284 bool getMTBUFHasVAddr(unsigned Opc); 285 286 LLVM_READONLY 287 bool getMTBUFHasSrsrc(unsigned Opc); 288 289 LLVM_READONLY 290 bool getMTBUFHasSoffset(unsigned Opc); 291 292 LLVM_READONLY 293 int getMUBUFBaseOpcode(unsigned Opc); 294 295 LLVM_READONLY 296 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 297 298 LLVM_READONLY 299 int getMUBUFElements(unsigned Opc); 300 301 LLVM_READONLY 302 bool getMUBUFHasVAddr(unsigned Opc); 303 304 LLVM_READONLY 305 bool getMUBUFHasSrsrc(unsigned Opc); 306 307 LLVM_READONLY 308 bool getMUBUFHasSoffset(unsigned Opc); 309 310 LLVM_READONLY 311 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 312 uint8_t NumComponents, 313 uint8_t NumFormat, 314 const MCSubtargetInfo &STI); 315 LLVM_READONLY 316 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 317 const MCSubtargetInfo &STI); 318 319 LLVM_READONLY 320 int getMCOpcode(uint16_t Opcode, unsigned Gen); 321 322 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 323 const MCSubtargetInfo *STI); 324 325 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 326 const MCSubtargetInfo *STI); 327 328 bool isGroupSegment(const GlobalValue *GV); 329 bool isGlobalSegment(const GlobalValue *GV); 330 bool isReadOnlySegment(const GlobalValue *GV); 331 332 /// \returns True if constants should be emitted to .text section for given 333 /// target triple \p TT, false otherwise. 334 bool shouldEmitConstantsToTextSection(const Triple &TT); 335 336 /// \returns Integer value requested using \p F's \p Name attribute. 337 /// 338 /// \returns \p Default if attribute is not present. 339 /// 340 /// \returns \p Default and emits error if requested value cannot be converted 341 /// to integer. 342 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 343 344 /// \returns A pair of integer values requested using \p F's \p Name attribute 345 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 346 /// is false). 347 /// 348 /// \returns \p Default if attribute is not present. 349 /// 350 /// \returns \p Default and emits error if one of the requested values cannot be 351 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 352 /// not present. 353 std::pair<int, int> getIntegerPairAttribute(const Function &F, 354 StringRef Name, 355 std::pair<int, int> Default, 356 bool OnlyFirstRequired = false); 357 358 /// Represents the counter values to wait for in an s_waitcnt instruction. 359 /// 360 /// Large values (including the maximum possible integer) can be used to 361 /// represent "don't care" waits. 362 struct Waitcnt { 363 unsigned VmCnt = ~0u; 364 unsigned ExpCnt = ~0u; 365 unsigned LgkmCnt = ~0u; 366 unsigned VsCnt = ~0u; 367 368 Waitcnt() {} 369 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 370 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 371 372 static Waitcnt allZero(const IsaVersion &Version) { 373 return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u); 374 } 375 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 376 377 bool hasWait() const { 378 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 379 } 380 381 bool dominates(const Waitcnt &Other) const { 382 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 383 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 384 } 385 386 Waitcnt combined(const Waitcnt &Other) const { 387 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 388 std::min(LgkmCnt, Other.LgkmCnt), 389 std::min(VsCnt, Other.VsCnt)); 390 } 391 }; 392 393 /// \returns Vmcnt bit mask for given isa \p Version. 394 unsigned getVmcntBitMask(const IsaVersion &Version); 395 396 /// \returns Expcnt bit mask for given isa \p Version. 397 unsigned getExpcntBitMask(const IsaVersion &Version); 398 399 /// \returns Lgkmcnt bit mask for given isa \p Version. 400 unsigned getLgkmcntBitMask(const IsaVersion &Version); 401 402 /// \returns Waitcnt bit mask for given isa \p Version. 403 unsigned getWaitcntBitMask(const IsaVersion &Version); 404 405 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 406 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 407 408 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 409 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 410 411 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 412 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 413 414 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 415 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 416 /// \p Lgkmcnt respectively. 417 /// 418 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 419 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 420 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 421 /// \p Expcnt = \p Waitcnt[6:4] 422 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 423 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 424 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 425 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 426 427 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 428 429 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 430 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 431 unsigned Vmcnt); 432 433 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 434 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 435 unsigned Expcnt); 436 437 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 438 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 439 unsigned Lgkmcnt); 440 441 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 442 /// \p Version. 443 /// 444 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 445 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 446 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 447 /// Waitcnt[6:4] = \p Expcnt 448 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 449 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 450 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 451 /// 452 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 453 /// isa \p Version. 454 unsigned encodeWaitcnt(const IsaVersion &Version, 455 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 456 457 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 458 459 namespace Hwreg { 460 461 LLVM_READONLY 462 int64_t getHwregId(const StringRef Name); 463 464 LLVM_READNONE 465 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); 466 467 LLVM_READNONE 468 bool isValidHwreg(int64_t Id); 469 470 LLVM_READNONE 471 bool isValidHwregOffset(int64_t Offset); 472 473 LLVM_READNONE 474 bool isValidHwregWidth(int64_t Width); 475 476 LLVM_READNONE 477 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 478 479 LLVM_READNONE 480 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 481 482 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 483 484 } // namespace Hwreg 485 486 namespace SendMsg { 487 488 LLVM_READONLY 489 int64_t getMsgId(const StringRef Name); 490 491 LLVM_READONLY 492 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 493 494 LLVM_READNONE 495 StringRef getMsgName(int64_t MsgId); 496 497 LLVM_READNONE 498 StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 499 500 LLVM_READNONE 501 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 502 503 LLVM_READNONE 504 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true); 505 506 LLVM_READNONE 507 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true); 508 509 LLVM_READNONE 510 bool msgRequiresOp(int64_t MsgId); 511 512 LLVM_READNONE 513 bool msgSupportsStream(int64_t MsgId, int64_t OpId); 514 515 void decodeMsg(unsigned Val, 516 uint16_t &MsgId, 517 uint16_t &OpId, 518 uint16_t &StreamId); 519 520 LLVM_READNONE 521 uint64_t encodeMsg(uint64_t MsgId, 522 uint64_t OpId, 523 uint64_t StreamId); 524 525 } // namespace SendMsg 526 527 528 unsigned getInitialPSInputAddr(const Function &F); 529 530 LLVM_READNONE 531 bool isShader(CallingConv::ID CC); 532 533 LLVM_READNONE 534 bool isCompute(CallingConv::ID CC); 535 536 LLVM_READNONE 537 bool isEntryFunctionCC(CallingConv::ID CC); 538 539 // FIXME: Remove this when calling conventions cleaned up 540 LLVM_READNONE 541 inline bool isKernel(CallingConv::ID CC) { 542 switch (CC) { 543 case CallingConv::AMDGPU_KERNEL: 544 case CallingConv::SPIR_KERNEL: 545 return true; 546 default: 547 return false; 548 } 549 } 550 551 bool hasXNACK(const MCSubtargetInfo &STI); 552 bool hasSRAMECC(const MCSubtargetInfo &STI); 553 bool hasMIMG_R128(const MCSubtargetInfo &STI); 554 bool hasPackedD16(const MCSubtargetInfo &STI); 555 556 bool isSI(const MCSubtargetInfo &STI); 557 bool isCI(const MCSubtargetInfo &STI); 558 bool isVI(const MCSubtargetInfo &STI); 559 bool isGFX9(const MCSubtargetInfo &STI); 560 bool isGFX10(const MCSubtargetInfo &STI); 561 562 /// Is Reg - scalar register 563 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 564 565 /// Is there any intersection between registers 566 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 567 568 /// If \p Reg is a pseudo reg, return the correct hardware register given 569 /// \p STI otherwise return \p Reg. 570 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 571 572 /// Convert hardware register \p Reg to a pseudo register 573 LLVM_READNONE 574 unsigned mc2PseudoReg(unsigned Reg); 575 576 /// Can this operand also contain immediate values? 577 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 578 579 /// Is this floating-point operand? 580 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 581 582 /// Does this opearnd support only inlinable literals? 583 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 584 585 /// Get the size in bits of a register from the register class \p RC. 586 unsigned getRegBitWidth(unsigned RCID); 587 588 /// Get the size in bits of a register from the register class \p RC. 589 unsigned getRegBitWidth(const MCRegisterClass &RC); 590 591 /// Get size of register operand 592 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 593 unsigned OpNo); 594 595 LLVM_READNONE 596 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 597 switch (OpInfo.OperandType) { 598 case AMDGPU::OPERAND_REG_IMM_INT32: 599 case AMDGPU::OPERAND_REG_IMM_FP32: 600 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 601 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 602 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 603 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 604 return 4; 605 606 case AMDGPU::OPERAND_REG_IMM_INT64: 607 case AMDGPU::OPERAND_REG_IMM_FP64: 608 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 609 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 610 return 8; 611 612 case AMDGPU::OPERAND_REG_IMM_INT16: 613 case AMDGPU::OPERAND_REG_IMM_FP16: 614 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 615 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 616 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 617 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 618 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 619 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 620 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 621 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 622 case AMDGPU::OPERAND_REG_IMM_V2INT16: 623 case AMDGPU::OPERAND_REG_IMM_V2FP16: 624 return 2; 625 626 default: 627 llvm_unreachable("unhandled operand type"); 628 } 629 } 630 631 LLVM_READNONE 632 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 633 return getOperandSize(Desc.OpInfo[OpNo]); 634 } 635 636 /// Is this literal inlinable 637 LLVM_READNONE 638 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 639 640 LLVM_READNONE 641 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 642 643 LLVM_READNONE 644 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 645 646 LLVM_READNONE 647 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 648 649 bool isArgPassedInSGPR(const Argument *Arg); 650 651 /// \returns The encoding that will be used for \p ByteOffset in the SMRD 652 /// offset field. 653 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 654 655 /// \returns true if this offset is small enough to fit in the SMRD 656 /// offset field. \p ByteOffset should be the offset in bytes and 657 /// not the encoded offset. 658 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 659 660 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 661 const GCNSubtarget *Subtarget, uint32_t Align = 4); 662 663 /// \returns true if the intrinsic is divergent 664 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 665 666 // Track defaults for fields in the MODE registser. 667 struct SIModeRegisterDefaults { 668 /// Floating point opcodes that support exception flag gathering quiet and 669 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 670 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 671 /// quieting. 672 bool IEEE : 1; 673 674 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 675 /// clamp NaN to zero; otherwise, pass NaN through. 676 bool DX10Clamp : 1; 677 678 /// If this is set, neither input or output denormals are flushed for most f32 679 /// instructions. 680 /// 681 /// TODO: Split into separate input and output fields if necessary like the 682 /// control bits really provide? 683 bool FP32Denormals : 1; 684 685 /// If this is set, neither input or output denormals are flushed for both f64 686 /// and f16/v2f16 instructions. 687 bool FP64FP16Denormals : 1; 688 689 SIModeRegisterDefaults() : 690 IEEE(true), 691 DX10Clamp(true), 692 FP32Denormals(true), 693 FP64FP16Denormals(true) {} 694 695 // FIXME: Should not depend on the subtarget 696 SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST); 697 698 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 699 const bool IsCompute = AMDGPU::isCompute(CC); 700 701 SIModeRegisterDefaults Mode; 702 Mode.DX10Clamp = true; 703 Mode.IEEE = IsCompute; 704 Mode.FP32Denormals = false; // FIXME: Should be on by default. 705 Mode.FP64FP16Denormals = true; 706 return Mode; 707 } 708 709 bool operator ==(const SIModeRegisterDefaults Other) const { 710 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 711 FP32Denormals == Other.FP32Denormals && 712 FP64FP16Denormals == Other.FP64FP16Denormals; 713 } 714 715 /// Returns true if a flag is compatible if it's enabled in the callee, but 716 /// disabled in the caller. 717 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 718 return CallerMode == CalleeMode || (CallerMode && !CalleeMode); 719 } 720 721 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 722 // be able to override. 723 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 724 if (DX10Clamp != CalleeMode.DX10Clamp) 725 return false; 726 if (IEEE != CalleeMode.IEEE) 727 return false; 728 729 // Allow inlining denormals enabled into denormals flushed functions. 730 return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) && 731 oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals); 732 } 733 }; 734 735 } // end namespace AMDGPU 736 } // end namespace llvm 737 738 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 739