1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Base class for AMDGPU specific classes of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 16 17 #include "llvm/IR/CallingConv.h" 18 #include "llvm/Support/Alignment.h" 19 #include "llvm/TargetParser/Triple.h" 20 21 namespace llvm { 22 23 enum AMDGPUDwarfFlavour : unsigned; 24 class Function; 25 class Instruction; 26 class MachineFunction; 27 class TargetMachine; 28 29 class AMDGPUSubtarget { 30 public: 31 enum Generation { 32 INVALID = 0, 33 R600 = 1, 34 R700 = 2, 35 EVERGREEN = 3, 36 NORTHERN_ISLANDS = 4, 37 SOUTHERN_ISLANDS = 5, 38 SEA_ISLANDS = 6, 39 VOLCANIC_ISLANDS = 7, 40 GFX9 = 8, 41 GFX10 = 9, 42 GFX11 = 10 43 }; 44 45 private: 46 Triple TargetTriple; 47 48 protected: 49 bool GCN3Encoding = false; 50 bool Has16BitInsts = false; 51 bool HasTrue16BitInsts = false; 52 bool HasMadMixInsts = false; 53 bool HasMadMacF32Insts = false; 54 bool HasDsSrc2Insts = false; 55 bool HasSDWA = false; 56 bool HasVOP3PInsts = false; 57 bool HasMulI24 = true; 58 bool HasMulU24 = true; 59 bool HasSMulHi = false; 60 bool HasInv2PiInlineImm = false; 61 bool HasFminFmaxLegacy = true; 62 bool EnablePromoteAlloca = false; 63 bool HasTrigReducedRange = false; 64 bool FastFMAF32 = false; 65 unsigned EUsPerCU = 4; 66 unsigned MaxWavesPerEU = 10; 67 unsigned LocalMemorySize = 0; 68 unsigned AddressableLocalMemorySize = 0; 69 char WavefrontSizeLog2 = 0; 70 71 public: 72 AMDGPUSubtarget(const Triple &TT); 73 74 static const AMDGPUSubtarget &get(const MachineFunction &MF); 75 static const AMDGPUSubtarget &get(const TargetMachine &TM, 76 const Function &F); 77 78 /// \returns Default range flat work group size for a calling convention. 79 std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; 80 81 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes 82 /// for function \p F, or minimum/maximum flat work group sizes explicitly 83 /// requested using "amdgpu-flat-work-group-size" attribute attached to 84 /// function \p F. 85 /// 86 /// \returns Subtarget's default values if explicitly requested values cannot 87 /// be converted to integer, or violate subtarget's specifications. 88 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; 89 90 /// \returns Subtarget's default pair of minimum/maximum number of waves per 91 /// execution unit for function \p F, or minimum/maximum number of waves per 92 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute 93 /// attached to function \p F. 94 /// 95 /// \returns Subtarget's default values if explicitly requested values cannot 96 /// be converted to integer, violate subtarget's specifications, or are not 97 /// compatible with minimum/maximum number of waves limited by flat work group 98 /// size, register usage, and/or lds usage. 99 std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const { 100 // Default/requested minimum/maximum flat work group sizes. 101 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 102 return getWavesPerEU(F, FlatWorkGroupSizes); 103 } 104 105 /// Overload which uses the specified values for the flat work group sizes, 106 /// rather than querying the function itself. \p FlatWorkGroupSizes Should 107 /// correspond to the function's value for getFlatWorkGroupSizes. 108 std::pair<unsigned, unsigned> 109 getWavesPerEU(const Function &F, 110 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const; 111 std::pair<unsigned, unsigned> getEffectiveWavesPerEU( 112 std::pair<unsigned, unsigned> WavesPerEU, 113 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const; 114 115 /// Return the amount of LDS that can be used that will not restrict the 116 /// occupancy lower than WaveCount. 117 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 118 const Function &) const; 119 120 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if 121 /// the given LDS memory size is the only constraint. 122 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; 123 124 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; 125 126 bool isAmdHsaOS() const { 127 return TargetTriple.getOS() == Triple::AMDHSA; 128 } 129 130 bool isAmdPalOS() const { 131 return TargetTriple.getOS() == Triple::AMDPAL; 132 } 133 134 bool isMesa3DOS() const { 135 return TargetTriple.getOS() == Triple::Mesa3D; 136 } 137 138 bool isMesaKernel(const Function &F) const; 139 140 bool isAmdHsaOrMesa(const Function &F) const { 141 return isAmdHsaOS() || isMesaKernel(F); 142 } 143 144 bool isGCN() const { 145 return TargetTriple.getArch() == Triple::amdgcn; 146 } 147 148 bool isGCN3Encoding() const { 149 return GCN3Encoding; 150 } 151 152 bool has16BitInsts() const { 153 return Has16BitInsts; 154 } 155 156 bool hasTrue16BitInsts() const { return HasTrue16BitInsts; } 157 158 bool hasMadMixInsts() const { 159 return HasMadMixInsts; 160 } 161 162 bool hasMadMacF32Insts() const { 163 return HasMadMacF32Insts || !isGCN(); 164 } 165 166 bool hasDsSrc2Insts() const { 167 return HasDsSrc2Insts; 168 } 169 170 bool hasSDWA() const { 171 return HasSDWA; 172 } 173 174 bool hasVOP3PInsts() const { 175 return HasVOP3PInsts; 176 } 177 178 bool hasMulI24() const { 179 return HasMulI24; 180 } 181 182 bool hasMulU24() const { 183 return HasMulU24; 184 } 185 186 bool hasSMulHi() const { 187 return HasSMulHi; 188 } 189 190 bool hasInv2PiInlineImm() const { 191 return HasInv2PiInlineImm; 192 } 193 194 bool hasFminFmaxLegacy() const { 195 return HasFminFmaxLegacy; 196 } 197 198 bool hasTrigReducedRange() const { 199 return HasTrigReducedRange; 200 } 201 202 bool hasFastFMAF32() const { 203 return FastFMAF32; 204 } 205 206 bool isPromoteAllocaEnabled() const { 207 return EnablePromoteAlloca; 208 } 209 210 unsigned getWavefrontSize() const { 211 return 1 << WavefrontSizeLog2; 212 } 213 214 unsigned getWavefrontSizeLog2() const { 215 return WavefrontSizeLog2; 216 } 217 218 unsigned getLocalMemorySize() const { 219 return LocalMemorySize; 220 } 221 222 unsigned getAddressableLocalMemorySize() const { 223 return AddressableLocalMemorySize; 224 } 225 226 /// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the 227 /// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs. 228 /// CU mode into account. 229 unsigned getEUsPerCU() const { return EUsPerCU; } 230 231 Align getAlignmentForImplicitArgPtr() const { 232 return isAmdHsaOS() ? Align(8) : Align(4); 233 } 234 235 /// Returns the offset in bytes from the start of the input buffer 236 /// of the first explicit kernel argument. 237 unsigned getExplicitKernelArgOffset() const { 238 switch (TargetTriple.getOS()) { 239 case Triple::AMDHSA: 240 case Triple::AMDPAL: 241 case Triple::Mesa3D: 242 return 0; 243 case Triple::UnknownOS: 244 default: 245 // For legacy reasons unknown/other is treated as a different version of 246 // mesa. 247 return 36; 248 } 249 250 llvm_unreachable("invalid triple OS"); 251 } 252 253 /// \returns Maximum number of work groups per compute unit supported by the 254 /// subtarget and limited by given \p FlatWorkGroupSize. 255 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; 256 257 /// \returns Minimum flat work group size supported by the subtarget. 258 virtual unsigned getMinFlatWorkGroupSize() const = 0; 259 260 /// \returns Maximum flat work group size supported by the subtarget. 261 virtual unsigned getMaxFlatWorkGroupSize() const = 0; 262 263 /// \returns Number of waves per execution unit required to support the given 264 /// \p FlatWorkGroupSize. 265 virtual unsigned 266 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; 267 268 /// \returns Minimum number of waves per execution unit supported by the 269 /// subtarget. 270 virtual unsigned getMinWavesPerEU() const = 0; 271 272 /// \returns Maximum number of waves per execution unit supported by the 273 /// subtarget without any kind of limitation. 274 unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } 275 276 /// Return the maximum workitem ID value in the function, for the given (0, 1, 277 /// 2) dimension. 278 unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; 279 280 /// Return true if only a single workitem can be active in a wave. 281 bool isSingleLaneExecution(const Function &Kernel) const; 282 283 /// Creates value range metadata on an workitemid.* intrinsic call or load. 284 bool makeLIDRangeMetadata(Instruction *I) const; 285 286 /// \returns Number of bytes of arguments that are passed to a shader or 287 /// kernel in addition to the explicit ones declared for the function. 288 unsigned getImplicitArgNumBytes(const Function &F) const; 289 uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; 290 unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; 291 292 /// \returns Corresponding DWARF register number mapping flavour for the 293 /// \p WavefrontSize. 294 AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; 295 296 virtual ~AMDGPUSubtarget() = default; 297 }; 298 299 } // end namespace llvm 300 301 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 302