1e8d8bef9SDimitry Andric //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //==-----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 10e8d8bef9SDimitry Andric /// Base class for AMDGPU specific classes of TargetSubtarget. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 150b57cec5SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 160b57cec5SDimitry Andric 17e8d8bef9SDimitry Andric #include "llvm/IR/CallingConv.h" 18e8d8bef9SDimitry Andric #include "llvm/Support/Alignment.h" 1906c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h" 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric namespace llvm { 220b57cec5SDimitry Andric 23e8d8bef9SDimitry Andric enum AMDGPUDwarfFlavour : unsigned; 24e8d8bef9SDimitry Andric class Function; 25e8d8bef9SDimitry Andric class Instruction; 26e8d8bef9SDimitry Andric class MachineFunction; 27e8d8bef9SDimitry Andric class TargetMachine; 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric class AMDGPUSubtarget { 300b57cec5SDimitry Andric public: 310b57cec5SDimitry Andric enum Generation { 32e8d8bef9SDimitry Andric INVALID = 0, 33e8d8bef9SDimitry Andric R600 = 1, 34e8d8bef9SDimitry Andric R700 = 2, 35e8d8bef9SDimitry Andric EVERGREEN = 3, 36e8d8bef9SDimitry Andric NORTHERN_ISLANDS = 4, 37e8d8bef9SDimitry Andric SOUTHERN_ISLANDS = 5, 38e8d8bef9SDimitry Andric SEA_ISLANDS = 6, 39e8d8bef9SDimitry Andric VOLCANIC_ISLANDS = 7, 40e8d8bef9SDimitry Andric GFX9 = 8, 4181ad6265SDimitry Andric GFX10 = 9, 425f757f3fSDimitry Andric GFX11 = 10, 435f757f3fSDimitry Andric GFX12 = 11, 440b57cec5SDimitry Andric }; 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric private: 470b57cec5SDimitry Andric Triple TargetTriple; 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric protected: 5081ad6265SDimitry Andric bool GCN3Encoding = false; 5181ad6265SDimitry Andric bool Has16BitInsts = false; 5281ad6265SDimitry Andric bool HasTrue16BitInsts = false; 535f757f3fSDimitry Andric bool EnableRealTrue16Insts = false; 5481ad6265SDimitry Andric bool HasMadMixInsts = false; 5581ad6265SDimitry Andric bool HasMadMacF32Insts = false; 5681ad6265SDimitry Andric bool HasDsSrc2Insts = false; 5781ad6265SDimitry Andric bool HasSDWA = false; 5881ad6265SDimitry Andric bool HasVOP3PInsts = false; 5981ad6265SDimitry Andric bool HasMulI24 = true; 6081ad6265SDimitry Andric bool HasMulU24 = true; 6181ad6265SDimitry Andric bool HasSMulHi = false; 6281ad6265SDimitry Andric bool HasInv2PiInlineImm = false; 6381ad6265SDimitry Andric bool HasFminFmaxLegacy = true; 6481ad6265SDimitry Andric bool EnablePromoteAlloca = false; 6581ad6265SDimitry Andric bool HasTrigReducedRange = false; 6606c3fb27SDimitry Andric bool FastFMAF32 = false; 67bdd1243dSDimitry Andric unsigned EUsPerCU = 4; 6881ad6265SDimitry Andric unsigned MaxWavesPerEU = 10; 6981ad6265SDimitry Andric unsigned LocalMemorySize = 0; 70bdd1243dSDimitry Andric unsigned AddressableLocalMemorySize = 0; 7181ad6265SDimitry Andric char WavefrontSizeLog2 = 0; 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric public: 74*0fca6ea1SDimitry Andric AMDGPUSubtarget(Triple TT); 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric static const AMDGPUSubtarget &get(const MachineFunction &MF); 770b57cec5SDimitry Andric static const AMDGPUSubtarget &get(const TargetMachine &TM, 780b57cec5SDimitry Andric const Function &F); 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /// \returns Default range flat work group size for a calling convention. 810b57cec5SDimitry Andric std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric /// \returns Subtarget's default pair of minimum/maximum flat work group sizes 840b57cec5SDimitry Andric /// for function \p F, or minimum/maximum flat work group sizes explicitly 850b57cec5SDimitry Andric /// requested using "amdgpu-flat-work-group-size" attribute attached to 860b57cec5SDimitry Andric /// function \p F. 870b57cec5SDimitry Andric /// 880b57cec5SDimitry Andric /// \returns Subtarget's default values if explicitly requested values cannot 890b57cec5SDimitry Andric /// be converted to integer, or violate subtarget's specifications. 900b57cec5SDimitry Andric std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric /// \returns Subtarget's default pair of minimum/maximum number of waves per 930b57cec5SDimitry Andric /// execution unit for function \p F, or minimum/maximum number of waves per 940b57cec5SDimitry Andric /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute 950b57cec5SDimitry Andric /// attached to function \p F. 960b57cec5SDimitry Andric /// 970b57cec5SDimitry Andric /// \returns Subtarget's default values if explicitly requested values cannot 980b57cec5SDimitry Andric /// be converted to integer, violate subtarget's specifications, or are not 990b57cec5SDimitry Andric /// compatible with minimum/maximum number of waves limited by flat work group 1000b57cec5SDimitry Andric /// size, register usage, and/or lds usage. getWavesPerEU(const Function & F)101349cc55cSDimitry Andric std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const { 102349cc55cSDimitry Andric // Default/requested minimum/maximum flat work group sizes. 103349cc55cSDimitry Andric std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 104349cc55cSDimitry Andric return getWavesPerEU(F, FlatWorkGroupSizes); 105349cc55cSDimitry Andric } 106349cc55cSDimitry Andric 107349cc55cSDimitry Andric /// Overload which uses the specified values for the flat work group sizes, 108349cc55cSDimitry Andric /// rather than querying the function itself. \p FlatWorkGroupSizes Should 109349cc55cSDimitry Andric /// correspond to the function's value for getFlatWorkGroupSizes. 110349cc55cSDimitry Andric std::pair<unsigned, unsigned> 111349cc55cSDimitry Andric getWavesPerEU(const Function &F, 112349cc55cSDimitry Andric std::pair<unsigned, unsigned> FlatWorkGroupSizes) const; 11306c3fb27SDimitry Andric std::pair<unsigned, unsigned> getEffectiveWavesPerEU( 11406c3fb27SDimitry Andric std::pair<unsigned, unsigned> WavesPerEU, 11506c3fb27SDimitry Andric std::pair<unsigned, unsigned> FlatWorkGroupSizes) const; 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric /// Return the amount of LDS that can be used that will not restrict the 1180b57cec5SDimitry Andric /// occupancy lower than WaveCount. 1190b57cec5SDimitry Andric unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 1200b57cec5SDimitry Andric const Function &) const; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if 1230b57cec5SDimitry Andric /// the given LDS memory size is the only constraint. 1240b57cec5SDimitry Andric unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; 1270b57cec5SDimitry Andric isAmdHsaOS()1280b57cec5SDimitry Andric bool isAmdHsaOS() const { 1290b57cec5SDimitry Andric return TargetTriple.getOS() == Triple::AMDHSA; 1300b57cec5SDimitry Andric } 1310b57cec5SDimitry Andric isAmdPalOS()1320b57cec5SDimitry Andric bool isAmdPalOS() const { 1330b57cec5SDimitry Andric return TargetTriple.getOS() == Triple::AMDPAL; 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric isMesa3DOS()1360b57cec5SDimitry Andric bool isMesa3DOS() const { 1370b57cec5SDimitry Andric return TargetTriple.getOS() == Triple::Mesa3D; 1380b57cec5SDimitry Andric } 1390b57cec5SDimitry Andric 140e8d8bef9SDimitry Andric bool isMesaKernel(const Function &F) const; 1410b57cec5SDimitry Andric isAmdHsaOrMesa(const Function & F)1420b57cec5SDimitry Andric bool isAmdHsaOrMesa(const Function &F) const { 1430b57cec5SDimitry Andric return isAmdHsaOS() || isMesaKernel(F); 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric isGCN()1465ffd83dbSDimitry Andric bool isGCN() const { 1475ffd83dbSDimitry Andric return TargetTriple.getArch() == Triple::amdgcn; 1485ffd83dbSDimitry Andric } 1495ffd83dbSDimitry Andric isGCN3Encoding()150fe6060f1SDimitry Andric bool isGCN3Encoding() const { 151fe6060f1SDimitry Andric return GCN3Encoding; 152fe6060f1SDimitry Andric } 153fe6060f1SDimitry Andric has16BitInsts()1540b57cec5SDimitry Andric bool has16BitInsts() const { 1550b57cec5SDimitry Andric return Has16BitInsts; 1560b57cec5SDimitry Andric } 1570b57cec5SDimitry Andric 1585f757f3fSDimitry Andric /// Return true if the subtarget supports True16 instructions. hasTrue16BitInsts()15981ad6265SDimitry Andric bool hasTrue16BitInsts() const { return HasTrue16BitInsts; } 16081ad6265SDimitry Andric 1615f757f3fSDimitry Andric /// Return true if real (non-fake) variants of True16 instructions using 1625f757f3fSDimitry Andric /// 16-bit registers should be code-generated. Fake True16 instructions are 1635f757f3fSDimitry Andric /// identical to non-fake ones except that they take 32-bit registers as 1645f757f3fSDimitry Andric /// operands and always use their low halves. 1655f757f3fSDimitry Andric // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully 1665f757f3fSDimitry Andric // supported and the support for fake True16 instructions is removed. 1675f757f3fSDimitry Andric bool useRealTrue16Insts() const; 1685f757f3fSDimitry Andric hasMadMixInsts()1690b57cec5SDimitry Andric bool hasMadMixInsts() const { 1700b57cec5SDimitry Andric return HasMadMixInsts; 1710b57cec5SDimitry Andric } 1720b57cec5SDimitry Andric hasMadMacF32Insts()1735ffd83dbSDimitry Andric bool hasMadMacF32Insts() const { 1745ffd83dbSDimitry Andric return HasMadMacF32Insts || !isGCN(); 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric hasDsSrc2Insts()1775ffd83dbSDimitry Andric bool hasDsSrc2Insts() const { 1785ffd83dbSDimitry Andric return HasDsSrc2Insts; 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric hasSDWA()1810b57cec5SDimitry Andric bool hasSDWA() const { 1820b57cec5SDimitry Andric return HasSDWA; 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric hasVOP3PInsts()1850b57cec5SDimitry Andric bool hasVOP3PInsts() const { 1860b57cec5SDimitry Andric return HasVOP3PInsts; 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric hasMulI24()1890b57cec5SDimitry Andric bool hasMulI24() const { 1900b57cec5SDimitry Andric return HasMulI24; 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric hasMulU24()1930b57cec5SDimitry Andric bool hasMulU24() const { 1940b57cec5SDimitry Andric return HasMulU24; 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric hasSMulHi()197fe6060f1SDimitry Andric bool hasSMulHi() const { 198fe6060f1SDimitry Andric return HasSMulHi; 199fe6060f1SDimitry Andric } 200fe6060f1SDimitry Andric hasInv2PiInlineImm()2010b57cec5SDimitry Andric bool hasInv2PiInlineImm() const { 2020b57cec5SDimitry Andric return HasInv2PiInlineImm; 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric hasFminFmaxLegacy()2050b57cec5SDimitry Andric bool hasFminFmaxLegacy() const { 2060b57cec5SDimitry Andric return HasFminFmaxLegacy; 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric hasTrigReducedRange()2090b57cec5SDimitry Andric bool hasTrigReducedRange() const { 2100b57cec5SDimitry Andric return HasTrigReducedRange; 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric hasFastFMAF32()21306c3fb27SDimitry Andric bool hasFastFMAF32() const { 21406c3fb27SDimitry Andric return FastFMAF32; 21506c3fb27SDimitry Andric } 21606c3fb27SDimitry Andric isPromoteAllocaEnabled()2170b57cec5SDimitry Andric bool isPromoteAllocaEnabled() const { 2180b57cec5SDimitry Andric return EnablePromoteAlloca; 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric getWavefrontSize()2210b57cec5SDimitry Andric unsigned getWavefrontSize() const { 2225ffd83dbSDimitry Andric return 1 << WavefrontSizeLog2; 2235ffd83dbSDimitry Andric } 2245ffd83dbSDimitry Andric getWavefrontSizeLog2()2255ffd83dbSDimitry Andric unsigned getWavefrontSizeLog2() const { 2265ffd83dbSDimitry Andric return WavefrontSizeLog2; 2270b57cec5SDimitry Andric } 2280b57cec5SDimitry Andric getLocalMemorySize()229e8d8bef9SDimitry Andric unsigned getLocalMemorySize() const { 2300b57cec5SDimitry Andric return LocalMemorySize; 2310b57cec5SDimitry Andric } 2320b57cec5SDimitry Andric getAddressableLocalMemorySize()233bdd1243dSDimitry Andric unsigned getAddressableLocalMemorySize() const { 234bdd1243dSDimitry Andric return AddressableLocalMemorySize; 235bdd1243dSDimitry Andric } 236bdd1243dSDimitry Andric 237bdd1243dSDimitry Andric /// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the 238bdd1243dSDimitry Andric /// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs. 239bdd1243dSDimitry Andric /// CU mode into account. getEUsPerCU()240bdd1243dSDimitry Andric unsigned getEUsPerCU() const { return EUsPerCU; } 241bdd1243dSDimitry Andric getAlignmentForImplicitArgPtr()2428bcb0991SDimitry Andric Align getAlignmentForImplicitArgPtr() const { 2438bcb0991SDimitry Andric return isAmdHsaOS() ? Align(8) : Align(4); 2440b57cec5SDimitry Andric } 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric /// Returns the offset in bytes from the start of the input buffer 2470b57cec5SDimitry Andric /// of the first explicit kernel argument. getExplicitKernelArgOffset()24806c3fb27SDimitry Andric unsigned getExplicitKernelArgOffset() const { 24904eeddc0SDimitry Andric switch (TargetTriple.getOS()) { 25004eeddc0SDimitry Andric case Triple::AMDHSA: 25104eeddc0SDimitry Andric case Triple::AMDPAL: 25204eeddc0SDimitry Andric case Triple::Mesa3D: 25304eeddc0SDimitry Andric return 0; 25404eeddc0SDimitry Andric case Triple::UnknownOS: 25504eeddc0SDimitry Andric default: 25604eeddc0SDimitry Andric // For legacy reasons unknown/other is treated as a different version of 25704eeddc0SDimitry Andric // mesa. 25804eeddc0SDimitry Andric return 36; 25904eeddc0SDimitry Andric } 26004eeddc0SDimitry Andric 26104eeddc0SDimitry Andric llvm_unreachable("invalid triple OS"); 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric /// \returns Maximum number of work groups per compute unit supported by the 2650b57cec5SDimitry Andric /// subtarget and limited by given \p FlatWorkGroupSize. 2660b57cec5SDimitry Andric virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric /// \returns Minimum flat work group size supported by the subtarget. 2690b57cec5SDimitry Andric virtual unsigned getMinFlatWorkGroupSize() const = 0; 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric /// \returns Maximum flat work group size supported by the subtarget. 2720b57cec5SDimitry Andric virtual unsigned getMaxFlatWorkGroupSize() const = 0; 2730b57cec5SDimitry Andric 2745ffd83dbSDimitry Andric /// \returns Number of waves per execution unit required to support the given 2755ffd83dbSDimitry Andric /// \p FlatWorkGroupSize. 2765ffd83dbSDimitry Andric virtual unsigned 2775ffd83dbSDimitry Andric getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric /// \returns Minimum number of waves per execution unit supported by the 2800b57cec5SDimitry Andric /// subtarget. 2810b57cec5SDimitry Andric virtual unsigned getMinWavesPerEU() const = 0; 2820b57cec5SDimitry Andric 2838bcb0991SDimitry Andric /// \returns Maximum number of waves per execution unit supported by the 2848bcb0991SDimitry Andric /// subtarget without any kind of limitation. getMaxWavesPerEU()2858bcb0991SDimitry Andric unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } 2860b57cec5SDimitry Andric 287e8d8bef9SDimitry Andric /// Return the maximum workitem ID value in the function, for the given (0, 1, 288e8d8bef9SDimitry Andric /// 2) dimension. 289e8d8bef9SDimitry Andric unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; 290e8d8bef9SDimitry Andric 291*0fca6ea1SDimitry Andric /// Return the number of work groups for the function. 292*0fca6ea1SDimitry Andric SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) const; 293*0fca6ea1SDimitry Andric 29406c3fb27SDimitry Andric /// Return true if only a single workitem can be active in a wave. 29506c3fb27SDimitry Andric bool isSingleLaneExecution(const Function &Kernel) const; 29606c3fb27SDimitry Andric 297e8d8bef9SDimitry Andric /// Creates value range metadata on an workitemid.* intrinsic call or load. 2980b57cec5SDimitry Andric bool makeLIDRangeMetadata(Instruction *I) const; 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric /// \returns Number of bytes of arguments that are passed to a shader or 3010b57cec5SDimitry Andric /// kernel in addition to the explicit ones declared for the function. 302e8d8bef9SDimitry Andric unsigned getImplicitArgNumBytes(const Function &F) const; 3038bcb0991SDimitry Andric uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; 3048bcb0991SDimitry Andric unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; 3050b57cec5SDimitry Andric 306349cc55cSDimitry Andric /// \returns Corresponding DWARF register number mapping flavour for the 3075ffd83dbSDimitry Andric /// \p WavefrontSize. 308e8d8bef9SDimitry Andric AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; 3095ffd83dbSDimitry Andric 31081ad6265SDimitry Andric virtual ~AMDGPUSubtarget() = default; 3110b57cec5SDimitry Andric }; 3120b57cec5SDimitry Andric 3130b57cec5SDimitry Andric } // end namespace llvm 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 316