xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1e8d8bef9SDimitry Andric //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //==-----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
10e8d8bef9SDimitry Andric /// Base class for AMDGPU specific classes of TargetSubtarget.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
150b57cec5SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
160b57cec5SDimitry Andric 
17e8d8bef9SDimitry Andric #include "llvm/IR/CallingConv.h"
18e8d8bef9SDimitry Andric #include "llvm/Support/Alignment.h"
1906c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h"
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric namespace llvm {
220b57cec5SDimitry Andric 
23e8d8bef9SDimitry Andric enum AMDGPUDwarfFlavour : unsigned;
24e8d8bef9SDimitry Andric class Function;
25e8d8bef9SDimitry Andric class Instruction;
26e8d8bef9SDimitry Andric class MachineFunction;
27e8d8bef9SDimitry Andric class TargetMachine;
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric class AMDGPUSubtarget {
300b57cec5SDimitry Andric public:
310b57cec5SDimitry Andric   enum Generation {
32e8d8bef9SDimitry Andric     INVALID = 0,
33e8d8bef9SDimitry Andric     R600 = 1,
34e8d8bef9SDimitry Andric     R700 = 2,
35e8d8bef9SDimitry Andric     EVERGREEN = 3,
36e8d8bef9SDimitry Andric     NORTHERN_ISLANDS = 4,
37e8d8bef9SDimitry Andric     SOUTHERN_ISLANDS = 5,
38e8d8bef9SDimitry Andric     SEA_ISLANDS = 6,
39e8d8bef9SDimitry Andric     VOLCANIC_ISLANDS = 7,
40e8d8bef9SDimitry Andric     GFX9 = 8,
4181ad6265SDimitry Andric     GFX10 = 9,
425f757f3fSDimitry Andric     GFX11 = 10,
435f757f3fSDimitry Andric     GFX12 = 11,
440b57cec5SDimitry Andric   };
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric private:
470b57cec5SDimitry Andric   Triple TargetTriple;
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric protected:
5081ad6265SDimitry Andric   bool GCN3Encoding = false;
5181ad6265SDimitry Andric   bool Has16BitInsts = false;
5281ad6265SDimitry Andric   bool HasTrue16BitInsts = false;
535f757f3fSDimitry Andric   bool EnableRealTrue16Insts = false;
5481ad6265SDimitry Andric   bool HasMadMixInsts = false;
5581ad6265SDimitry Andric   bool HasMadMacF32Insts = false;
5681ad6265SDimitry Andric   bool HasDsSrc2Insts = false;
5781ad6265SDimitry Andric   bool HasSDWA = false;
5881ad6265SDimitry Andric   bool HasVOP3PInsts = false;
5981ad6265SDimitry Andric   bool HasMulI24 = true;
6081ad6265SDimitry Andric   bool HasMulU24 = true;
6181ad6265SDimitry Andric   bool HasSMulHi = false;
6281ad6265SDimitry Andric   bool HasInv2PiInlineImm = false;
6381ad6265SDimitry Andric   bool HasFminFmaxLegacy = true;
6481ad6265SDimitry Andric   bool EnablePromoteAlloca = false;
6581ad6265SDimitry Andric   bool HasTrigReducedRange = false;
6606c3fb27SDimitry Andric   bool FastFMAF32 = false;
67bdd1243dSDimitry Andric   unsigned EUsPerCU = 4;
6881ad6265SDimitry Andric   unsigned MaxWavesPerEU = 10;
6981ad6265SDimitry Andric   unsigned LocalMemorySize = 0;
70bdd1243dSDimitry Andric   unsigned AddressableLocalMemorySize = 0;
7181ad6265SDimitry Andric   char WavefrontSizeLog2 = 0;
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric public:
74*0fca6ea1SDimitry Andric   AMDGPUSubtarget(Triple TT);
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric   static const AMDGPUSubtarget &get(const MachineFunction &MF);
770b57cec5SDimitry Andric   static const AMDGPUSubtarget &get(const TargetMachine &TM,
780b57cec5SDimitry Andric                                     const Function &F);
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric   /// \returns Default range flat work group size for a calling convention.
810b57cec5SDimitry Andric   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
840b57cec5SDimitry Andric   /// for function \p F, or minimum/maximum flat work group sizes explicitly
850b57cec5SDimitry Andric   /// requested using "amdgpu-flat-work-group-size" attribute attached to
860b57cec5SDimitry Andric   /// function \p F.
870b57cec5SDimitry Andric   ///
880b57cec5SDimitry Andric   /// \returns Subtarget's default values if explicitly requested values cannot
890b57cec5SDimitry Andric   /// be converted to integer, or violate subtarget's specifications.
900b57cec5SDimitry Andric   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric   /// \returns Subtarget's default pair of minimum/maximum number of waves per
930b57cec5SDimitry Andric   /// execution unit for function \p F, or minimum/maximum number of waves per
940b57cec5SDimitry Andric   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
950b57cec5SDimitry Andric   /// attached to function \p F.
960b57cec5SDimitry Andric   ///
970b57cec5SDimitry Andric   /// \returns Subtarget's default values if explicitly requested values cannot
980b57cec5SDimitry Andric   /// be converted to integer, violate subtarget's specifications, or are not
990b57cec5SDimitry Andric   /// compatible with minimum/maximum number of waves limited by flat work group
1000b57cec5SDimitry Andric   /// size, register usage, and/or lds usage.
getWavesPerEU(const Function & F)101349cc55cSDimitry Andric   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
102349cc55cSDimitry Andric     // Default/requested minimum/maximum flat work group sizes.
103349cc55cSDimitry Andric     std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
104349cc55cSDimitry Andric     return getWavesPerEU(F, FlatWorkGroupSizes);
105349cc55cSDimitry Andric   }
106349cc55cSDimitry Andric 
107349cc55cSDimitry Andric   /// Overload which uses the specified values for the flat work group sizes,
108349cc55cSDimitry Andric   /// rather than querying the function itself. \p FlatWorkGroupSizes Should
109349cc55cSDimitry Andric   /// correspond to the function's value for getFlatWorkGroupSizes.
110349cc55cSDimitry Andric   std::pair<unsigned, unsigned>
111349cc55cSDimitry Andric   getWavesPerEU(const Function &F,
112349cc55cSDimitry Andric                 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
11306c3fb27SDimitry Andric   std::pair<unsigned, unsigned> getEffectiveWavesPerEU(
11406c3fb27SDimitry Andric       std::pair<unsigned, unsigned> WavesPerEU,
11506c3fb27SDimitry Andric       std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric   /// Return the amount of LDS that can be used that will not restrict the
1180b57cec5SDimitry Andric   /// occupancy lower than WaveCount.
1190b57cec5SDimitry Andric   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
1200b57cec5SDimitry Andric                                            const Function &) const;
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
1230b57cec5SDimitry Andric   /// the given LDS memory size is the only constraint.
1240b57cec5SDimitry Andric   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
1270b57cec5SDimitry Andric 
isAmdHsaOS()1280b57cec5SDimitry Andric   bool isAmdHsaOS() const {
1290b57cec5SDimitry Andric     return TargetTriple.getOS() == Triple::AMDHSA;
1300b57cec5SDimitry Andric   }
1310b57cec5SDimitry Andric 
isAmdPalOS()1320b57cec5SDimitry Andric   bool isAmdPalOS() const {
1330b57cec5SDimitry Andric     return TargetTriple.getOS() == Triple::AMDPAL;
1340b57cec5SDimitry Andric   }
1350b57cec5SDimitry Andric 
isMesa3DOS()1360b57cec5SDimitry Andric   bool isMesa3DOS() const {
1370b57cec5SDimitry Andric     return TargetTriple.getOS() == Triple::Mesa3D;
1380b57cec5SDimitry Andric   }
1390b57cec5SDimitry Andric 
140e8d8bef9SDimitry Andric   bool isMesaKernel(const Function &F) const;
1410b57cec5SDimitry Andric 
isAmdHsaOrMesa(const Function & F)1420b57cec5SDimitry Andric   bool isAmdHsaOrMesa(const Function &F) const {
1430b57cec5SDimitry Andric     return isAmdHsaOS() || isMesaKernel(F);
1440b57cec5SDimitry Andric   }
1450b57cec5SDimitry Andric 
isGCN()1465ffd83dbSDimitry Andric   bool isGCN() const {
1475ffd83dbSDimitry Andric     return TargetTriple.getArch() == Triple::amdgcn;
1485ffd83dbSDimitry Andric   }
1495ffd83dbSDimitry Andric 
isGCN3Encoding()150fe6060f1SDimitry Andric   bool isGCN3Encoding() const {
151fe6060f1SDimitry Andric     return GCN3Encoding;
152fe6060f1SDimitry Andric   }
153fe6060f1SDimitry Andric 
has16BitInsts()1540b57cec5SDimitry Andric   bool has16BitInsts() const {
1550b57cec5SDimitry Andric     return Has16BitInsts;
1560b57cec5SDimitry Andric   }
1570b57cec5SDimitry Andric 
1585f757f3fSDimitry Andric   /// Return true if the subtarget supports True16 instructions.
hasTrue16BitInsts()15981ad6265SDimitry Andric   bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
16081ad6265SDimitry Andric 
1615f757f3fSDimitry Andric   /// Return true if real (non-fake) variants of True16 instructions using
1625f757f3fSDimitry Andric   /// 16-bit registers should be code-generated. Fake True16 instructions are
1635f757f3fSDimitry Andric   /// identical to non-fake ones except that they take 32-bit registers as
1645f757f3fSDimitry Andric   /// operands and always use their low halves.
1655f757f3fSDimitry Andric   // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully
1665f757f3fSDimitry Andric   // supported and the support for fake True16 instructions is removed.
1675f757f3fSDimitry Andric   bool useRealTrue16Insts() const;
1685f757f3fSDimitry Andric 
hasMadMixInsts()1690b57cec5SDimitry Andric   bool hasMadMixInsts() const {
1700b57cec5SDimitry Andric     return HasMadMixInsts;
1710b57cec5SDimitry Andric   }
1720b57cec5SDimitry Andric 
hasMadMacF32Insts()1735ffd83dbSDimitry Andric   bool hasMadMacF32Insts() const {
1745ffd83dbSDimitry Andric     return HasMadMacF32Insts || !isGCN();
1750b57cec5SDimitry Andric   }
1760b57cec5SDimitry Andric 
hasDsSrc2Insts()1775ffd83dbSDimitry Andric   bool hasDsSrc2Insts() const {
1785ffd83dbSDimitry Andric     return HasDsSrc2Insts;
1790b57cec5SDimitry Andric   }
1800b57cec5SDimitry Andric 
hasSDWA()1810b57cec5SDimitry Andric   bool hasSDWA() const {
1820b57cec5SDimitry Andric     return HasSDWA;
1830b57cec5SDimitry Andric   }
1840b57cec5SDimitry Andric 
hasVOP3PInsts()1850b57cec5SDimitry Andric   bool hasVOP3PInsts() const {
1860b57cec5SDimitry Andric     return HasVOP3PInsts;
1870b57cec5SDimitry Andric   }
1880b57cec5SDimitry Andric 
hasMulI24()1890b57cec5SDimitry Andric   bool hasMulI24() const {
1900b57cec5SDimitry Andric     return HasMulI24;
1910b57cec5SDimitry Andric   }
1920b57cec5SDimitry Andric 
hasMulU24()1930b57cec5SDimitry Andric   bool hasMulU24() const {
1940b57cec5SDimitry Andric     return HasMulU24;
1950b57cec5SDimitry Andric   }
1960b57cec5SDimitry Andric 
hasSMulHi()197fe6060f1SDimitry Andric   bool hasSMulHi() const {
198fe6060f1SDimitry Andric     return HasSMulHi;
199fe6060f1SDimitry Andric   }
200fe6060f1SDimitry Andric 
hasInv2PiInlineImm()2010b57cec5SDimitry Andric   bool hasInv2PiInlineImm() const {
2020b57cec5SDimitry Andric     return HasInv2PiInlineImm;
2030b57cec5SDimitry Andric   }
2040b57cec5SDimitry Andric 
hasFminFmaxLegacy()2050b57cec5SDimitry Andric   bool hasFminFmaxLegacy() const {
2060b57cec5SDimitry Andric     return HasFminFmaxLegacy;
2070b57cec5SDimitry Andric   }
2080b57cec5SDimitry Andric 
hasTrigReducedRange()2090b57cec5SDimitry Andric   bool hasTrigReducedRange() const {
2100b57cec5SDimitry Andric     return HasTrigReducedRange;
2110b57cec5SDimitry Andric   }
2120b57cec5SDimitry Andric 
hasFastFMAF32()21306c3fb27SDimitry Andric   bool hasFastFMAF32() const {
21406c3fb27SDimitry Andric     return FastFMAF32;
21506c3fb27SDimitry Andric   }
21606c3fb27SDimitry Andric 
isPromoteAllocaEnabled()2170b57cec5SDimitry Andric   bool isPromoteAllocaEnabled() const {
2180b57cec5SDimitry Andric     return EnablePromoteAlloca;
2190b57cec5SDimitry Andric   }
2200b57cec5SDimitry Andric 
getWavefrontSize()2210b57cec5SDimitry Andric   unsigned getWavefrontSize() const {
2225ffd83dbSDimitry Andric     return 1 << WavefrontSizeLog2;
2235ffd83dbSDimitry Andric   }
2245ffd83dbSDimitry Andric 
getWavefrontSizeLog2()2255ffd83dbSDimitry Andric   unsigned getWavefrontSizeLog2() const {
2265ffd83dbSDimitry Andric     return WavefrontSizeLog2;
2270b57cec5SDimitry Andric   }
2280b57cec5SDimitry Andric 
getLocalMemorySize()229e8d8bef9SDimitry Andric   unsigned getLocalMemorySize() const {
2300b57cec5SDimitry Andric     return LocalMemorySize;
2310b57cec5SDimitry Andric   }
2320b57cec5SDimitry Andric 
getAddressableLocalMemorySize()233bdd1243dSDimitry Andric   unsigned getAddressableLocalMemorySize() const {
234bdd1243dSDimitry Andric     return AddressableLocalMemorySize;
235bdd1243dSDimitry Andric   }
236bdd1243dSDimitry Andric 
237bdd1243dSDimitry Andric   /// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the
238bdd1243dSDimitry Andric   /// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs.
239bdd1243dSDimitry Andric   /// CU mode into account.
getEUsPerCU()240bdd1243dSDimitry Andric   unsigned getEUsPerCU() const { return EUsPerCU; }
241bdd1243dSDimitry Andric 
getAlignmentForImplicitArgPtr()2428bcb0991SDimitry Andric   Align getAlignmentForImplicitArgPtr() const {
2438bcb0991SDimitry Andric     return isAmdHsaOS() ? Align(8) : Align(4);
2440b57cec5SDimitry Andric   }
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   /// Returns the offset in bytes from the start of the input buffer
2470b57cec5SDimitry Andric   ///        of the first explicit kernel argument.
getExplicitKernelArgOffset()24806c3fb27SDimitry Andric   unsigned getExplicitKernelArgOffset() const {
24904eeddc0SDimitry Andric     switch (TargetTriple.getOS()) {
25004eeddc0SDimitry Andric     case Triple::AMDHSA:
25104eeddc0SDimitry Andric     case Triple::AMDPAL:
25204eeddc0SDimitry Andric     case Triple::Mesa3D:
25304eeddc0SDimitry Andric       return 0;
25404eeddc0SDimitry Andric     case Triple::UnknownOS:
25504eeddc0SDimitry Andric     default:
25604eeddc0SDimitry Andric       // For legacy reasons unknown/other is treated as a different version of
25704eeddc0SDimitry Andric       // mesa.
25804eeddc0SDimitry Andric       return 36;
25904eeddc0SDimitry Andric     }
26004eeddc0SDimitry Andric 
26104eeddc0SDimitry Andric     llvm_unreachable("invalid triple OS");
2620b57cec5SDimitry Andric   }
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric   /// \returns Maximum number of work groups per compute unit supported by the
2650b57cec5SDimitry Andric   /// subtarget and limited by given \p FlatWorkGroupSize.
2660b57cec5SDimitry Andric   virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
2670b57cec5SDimitry Andric 
2680b57cec5SDimitry Andric   /// \returns Minimum flat work group size supported by the subtarget.
2690b57cec5SDimitry Andric   virtual unsigned getMinFlatWorkGroupSize() const = 0;
2700b57cec5SDimitry Andric 
2710b57cec5SDimitry Andric   /// \returns Maximum flat work group size supported by the subtarget.
2720b57cec5SDimitry Andric   virtual unsigned getMaxFlatWorkGroupSize() const = 0;
2730b57cec5SDimitry Andric 
2745ffd83dbSDimitry Andric   /// \returns Number of waves per execution unit required to support the given
2755ffd83dbSDimitry Andric   /// \p FlatWorkGroupSize.
2765ffd83dbSDimitry Andric   virtual unsigned
2775ffd83dbSDimitry Andric   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   /// \returns Minimum number of waves per execution unit supported by the
2800b57cec5SDimitry Andric   /// subtarget.
2810b57cec5SDimitry Andric   virtual unsigned getMinWavesPerEU() const = 0;
2820b57cec5SDimitry Andric 
2838bcb0991SDimitry Andric   /// \returns Maximum number of waves per execution unit supported by the
2848bcb0991SDimitry Andric   /// subtarget without any kind of limitation.
getMaxWavesPerEU()2858bcb0991SDimitry Andric   unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
2860b57cec5SDimitry Andric 
287e8d8bef9SDimitry Andric   /// Return the maximum workitem ID value in the function, for the given (0, 1,
288e8d8bef9SDimitry Andric   /// 2) dimension.
289e8d8bef9SDimitry Andric   unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
290e8d8bef9SDimitry Andric 
291*0fca6ea1SDimitry Andric   /// Return the number of work groups for the function.
292*0fca6ea1SDimitry Andric   SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) const;
293*0fca6ea1SDimitry Andric 
29406c3fb27SDimitry Andric   /// Return true if only a single workitem can be active in a wave.
29506c3fb27SDimitry Andric   bool isSingleLaneExecution(const Function &Kernel) const;
29606c3fb27SDimitry Andric 
297e8d8bef9SDimitry Andric   /// Creates value range metadata on an workitemid.* intrinsic call or load.
2980b57cec5SDimitry Andric   bool makeLIDRangeMetadata(Instruction *I) const;
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric   /// \returns Number of bytes of arguments that are passed to a shader or
3010b57cec5SDimitry Andric   /// kernel in addition to the explicit ones declared for the function.
302e8d8bef9SDimitry Andric   unsigned getImplicitArgNumBytes(const Function &F) const;
3038bcb0991SDimitry Andric   uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
3048bcb0991SDimitry Andric   unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
3050b57cec5SDimitry Andric 
306349cc55cSDimitry Andric   /// \returns Corresponding DWARF register number mapping flavour for the
3075ffd83dbSDimitry Andric   /// \p WavefrontSize.
308e8d8bef9SDimitry Andric   AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
3095ffd83dbSDimitry Andric 
31081ad6265SDimitry Andric   virtual ~AMDGPUSubtarget() = default;
3110b57cec5SDimitry Andric };
3120b57cec5SDimitry Andric 
3130b57cec5SDimitry Andric } // end namespace llvm
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
316