xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h (revision 38a52bd3b5cac3da6f7f6eef3dd050e6aa08ebb3)
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Base class for AMDGPU specific classes of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/CallingConv.h"
19 #include "llvm/Support/Alignment.h"
20 
21 namespace llvm {
22 
23 enum AMDGPUDwarfFlavour : unsigned;
24 class Function;
25 class Instruction;
26 class MachineFunction;
27 class TargetMachine;
28 
29 class AMDGPUSubtarget {
30 public:
31   enum Generation {
32     INVALID = 0,
33     R600 = 1,
34     R700 = 2,
35     EVERGREEN = 3,
36     NORTHERN_ISLANDS = 4,
37     SOUTHERN_ISLANDS = 5,
38     SEA_ISLANDS = 6,
39     VOLCANIC_ISLANDS = 7,
40     GFX9 = 8,
41     GFX10 = 9
42   };
43 
44 private:
45   Triple TargetTriple;
46 
47 protected:
48   bool GCN3Encoding;
49   bool Has16BitInsts;
50   bool HasMadMixInsts;
51   bool HasMadMacF32Insts;
52   bool HasDsSrc2Insts;
53   bool HasSDWA;
54   bool HasVOP3PInsts;
55   bool HasMulI24;
56   bool HasMulU24;
57   bool HasSMulHi;
58   bool HasInv2PiInlineImm;
59   bool HasFminFmaxLegacy;
60   bool EnablePromoteAlloca;
61   bool HasTrigReducedRange;
62   unsigned MaxWavesPerEU;
63   unsigned LocalMemorySize;
64   char WavefrontSizeLog2;
65 
66 public:
67   AMDGPUSubtarget(const Triple &TT);
68 
69   static const AMDGPUSubtarget &get(const MachineFunction &MF);
70   static const AMDGPUSubtarget &get(const TargetMachine &TM,
71                                     const Function &F);
72 
73   /// \returns Default range flat work group size for a calling convention.
74   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
75 
76   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
77   /// for function \p F, or minimum/maximum flat work group sizes explicitly
78   /// requested using "amdgpu-flat-work-group-size" attribute attached to
79   /// function \p F.
80   ///
81   /// \returns Subtarget's default values if explicitly requested values cannot
82   /// be converted to integer, or violate subtarget's specifications.
83   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
84 
85   /// \returns Subtarget's default pair of minimum/maximum number of waves per
86   /// execution unit for function \p F, or minimum/maximum number of waves per
87   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
88   /// attached to function \p F.
89   ///
90   /// \returns Subtarget's default values if explicitly requested values cannot
91   /// be converted to integer, violate subtarget's specifications, or are not
92   /// compatible with minimum/maximum number of waves limited by flat work group
93   /// size, register usage, and/or lds usage.
94   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
95     // Default/requested minimum/maximum flat work group sizes.
96     std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
97     return getWavesPerEU(F, FlatWorkGroupSizes);
98   }
99 
100   /// Overload which uses the specified values for the flat work group sizes,
101   /// rather than querying the function itself. \p FlatWorkGroupSizes Should
102   /// correspond to the function's value for getFlatWorkGroupSizes.
103   std::pair<unsigned, unsigned>
104   getWavesPerEU(const Function &F,
105                 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
106 
107   /// Return the amount of LDS that can be used that will not restrict the
108   /// occupancy lower than WaveCount.
109   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
110                                            const Function &) const;
111 
112   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
113   /// the given LDS memory size is the only constraint.
114   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
115 
116   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
117 
118   bool isAmdHsaOS() const {
119     return TargetTriple.getOS() == Triple::AMDHSA;
120   }
121 
122   bool isAmdPalOS() const {
123     return TargetTriple.getOS() == Triple::AMDPAL;
124   }
125 
126   bool isMesa3DOS() const {
127     return TargetTriple.getOS() == Triple::Mesa3D;
128   }
129 
130   bool isMesaKernel(const Function &F) const;
131 
132   bool isAmdHsaOrMesa(const Function &F) const {
133     return isAmdHsaOS() || isMesaKernel(F);
134   }
135 
136   bool isGCN() const {
137     return TargetTriple.getArch() == Triple::amdgcn;
138   }
139 
140   bool isGCN3Encoding() const {
141     return GCN3Encoding;
142   }
143 
144   bool has16BitInsts() const {
145     return Has16BitInsts;
146   }
147 
148   bool hasMadMixInsts() const {
149     return HasMadMixInsts;
150   }
151 
152   bool hasMadMacF32Insts() const {
153     return HasMadMacF32Insts || !isGCN();
154   }
155 
156   bool hasDsSrc2Insts() const {
157     return HasDsSrc2Insts;
158   }
159 
160   bool hasSDWA() const {
161     return HasSDWA;
162   }
163 
164   bool hasVOP3PInsts() const {
165     return HasVOP3PInsts;
166   }
167 
168   bool hasMulI24() const {
169     return HasMulI24;
170   }
171 
172   bool hasMulU24() const {
173     return HasMulU24;
174   }
175 
176   bool hasSMulHi() const {
177     return HasSMulHi;
178   }
179 
180   bool hasInv2PiInlineImm() const {
181     return HasInv2PiInlineImm;
182   }
183 
184   bool hasFminFmaxLegacy() const {
185     return HasFminFmaxLegacy;
186   }
187 
188   bool hasTrigReducedRange() const {
189     return HasTrigReducedRange;
190   }
191 
192   bool isPromoteAllocaEnabled() const {
193     return EnablePromoteAlloca;
194   }
195 
196   unsigned getWavefrontSize() const {
197     return 1 << WavefrontSizeLog2;
198   }
199 
200   unsigned getWavefrontSizeLog2() const {
201     return WavefrontSizeLog2;
202   }
203 
204   unsigned getLocalMemorySize() const {
205     return LocalMemorySize;
206   }
207 
208   Align getAlignmentForImplicitArgPtr() const {
209     return isAmdHsaOS() ? Align(8) : Align(4);
210   }
211 
212   /// Returns the offset in bytes from the start of the input buffer
213   ///        of the first explicit kernel argument.
214   unsigned getExplicitKernelArgOffset(const Function &F) const {
215     switch (TargetTriple.getOS()) {
216     case Triple::AMDHSA:
217     case Triple::AMDPAL:
218     case Triple::Mesa3D:
219       return 0;
220     case Triple::UnknownOS:
221     default:
222       // For legacy reasons unknown/other is treated as a different version of
223       // mesa.
224       return 36;
225     }
226 
227     llvm_unreachable("invalid triple OS");
228   }
229 
230   /// \returns Maximum number of work groups per compute unit supported by the
231   /// subtarget and limited by given \p FlatWorkGroupSize.
232   virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
233 
234   /// \returns Minimum flat work group size supported by the subtarget.
235   virtual unsigned getMinFlatWorkGroupSize() const = 0;
236 
237   /// \returns Maximum flat work group size supported by the subtarget.
238   virtual unsigned getMaxFlatWorkGroupSize() const = 0;
239 
240   /// \returns Number of waves per execution unit required to support the given
241   /// \p FlatWorkGroupSize.
242   virtual unsigned
243   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
244 
245   /// \returns Minimum number of waves per execution unit supported by the
246   /// subtarget.
247   virtual unsigned getMinWavesPerEU() const = 0;
248 
249   /// \returns Maximum number of waves per execution unit supported by the
250   /// subtarget without any kind of limitation.
251   unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
252 
253   /// Return the maximum workitem ID value in the function, for the given (0, 1,
254   /// 2) dimension.
255   unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
256 
257   /// Creates value range metadata on an workitemid.* intrinsic call or load.
258   bool makeLIDRangeMetadata(Instruction *I) const;
259 
260   /// \returns Number of bytes of arguments that are passed to a shader or
261   /// kernel in addition to the explicit ones declared for the function.
262   unsigned getImplicitArgNumBytes(const Function &F) const;
263   uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
264   unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
265 
266   /// \returns Corresponding DWARF register number mapping flavour for the
267   /// \p WavefrontSize.
268   AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
269 
270   virtual ~AMDGPUSubtarget() {}
271 };
272 
273 } // end namespace llvm
274 
275 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
276