1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Base class for AMDGPU specific classes of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 16 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/IR/CallingConv.h" 19 #include "llvm/Support/Alignment.h" 20 21 namespace llvm { 22 23 enum AMDGPUDwarfFlavour : unsigned; 24 class Function; 25 class Instruction; 26 class MachineFunction; 27 class TargetMachine; 28 29 class AMDGPUSubtarget { 30 public: 31 enum Generation { 32 INVALID = 0, 33 R600 = 1, 34 R700 = 2, 35 EVERGREEN = 3, 36 NORTHERN_ISLANDS = 4, 37 SOUTHERN_ISLANDS = 5, 38 SEA_ISLANDS = 6, 39 VOLCANIC_ISLANDS = 7, 40 GFX9 = 8, 41 GFX10 = 9, 42 GFX11 = 10 43 }; 44 45 private: 46 Triple TargetTriple; 47 48 protected: 49 bool GCN3Encoding = false; 50 bool Has16BitInsts = false; 51 bool HasTrue16BitInsts = false; 52 bool HasMadMixInsts = false; 53 bool HasMadMacF32Insts = false; 54 bool HasDsSrc2Insts = false; 55 bool HasSDWA = false; 56 bool HasVOP3PInsts = false; 57 bool HasMulI24 = true; 58 bool HasMulU24 = true; 59 bool HasSMulHi = false; 60 bool HasInv2PiInlineImm = false; 61 bool HasFminFmaxLegacy = true; 62 bool EnablePromoteAlloca = false; 63 bool HasTrigReducedRange = false; 64 unsigned EUsPerCU = 4; 65 unsigned MaxWavesPerEU = 10; 66 unsigned LocalMemorySize = 0; 67 unsigned AddressableLocalMemorySize = 0; 68 char WavefrontSizeLog2 = 0; 69 70 public: 71 AMDGPUSubtarget(const Triple &TT); 72 73 static const AMDGPUSubtarget &get(const MachineFunction &MF); 74 static const AMDGPUSubtarget &get(const TargetMachine &TM, 75 const Function &F); 76 77 /// \returns Default range flat work group size for a calling convention. 78 std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; 79 80 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes 81 /// for function \p F, or minimum/maximum flat work group sizes explicitly 82 /// requested using "amdgpu-flat-work-group-size" attribute attached to 83 /// function \p F. 84 /// 85 /// \returns Subtarget's default values if explicitly requested values cannot 86 /// be converted to integer, or violate subtarget's specifications. 87 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; 88 89 /// \returns Subtarget's default pair of minimum/maximum number of waves per 90 /// execution unit for function \p F, or minimum/maximum number of waves per 91 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute 92 /// attached to function \p F. 93 /// 94 /// \returns Subtarget's default values if explicitly requested values cannot 95 /// be converted to integer, violate subtarget's specifications, or are not 96 /// compatible with minimum/maximum number of waves limited by flat work group 97 /// size, register usage, and/or lds usage. 98 std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const { 99 // Default/requested minimum/maximum flat work group sizes. 100 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 101 return getWavesPerEU(F, FlatWorkGroupSizes); 102 } 103 104 /// Overload which uses the specified values for the flat work group sizes, 105 /// rather than querying the function itself. \p FlatWorkGroupSizes Should 106 /// correspond to the function's value for getFlatWorkGroupSizes. 107 std::pair<unsigned, unsigned> 108 getWavesPerEU(const Function &F, 109 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const; 110 111 /// Return the amount of LDS that can be used that will not restrict the 112 /// occupancy lower than WaveCount. 113 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 114 const Function &) const; 115 116 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if 117 /// the given LDS memory size is the only constraint. 118 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; 119 120 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; 121 122 bool isAmdHsaOS() const { 123 return TargetTriple.getOS() == Triple::AMDHSA; 124 } 125 126 bool isAmdPalOS() const { 127 return TargetTriple.getOS() == Triple::AMDPAL; 128 } 129 130 bool isMesa3DOS() const { 131 return TargetTriple.getOS() == Triple::Mesa3D; 132 } 133 134 bool isMesaKernel(const Function &F) const; 135 136 bool isAmdHsaOrMesa(const Function &F) const { 137 return isAmdHsaOS() || isMesaKernel(F); 138 } 139 140 bool isGCN() const { 141 return TargetTriple.getArch() == Triple::amdgcn; 142 } 143 144 bool isGCN3Encoding() const { 145 return GCN3Encoding; 146 } 147 148 bool has16BitInsts() const { 149 return Has16BitInsts; 150 } 151 152 bool hasTrue16BitInsts() const { return HasTrue16BitInsts; } 153 154 bool hasMadMixInsts() const { 155 return HasMadMixInsts; 156 } 157 158 bool hasMadMacF32Insts() const { 159 return HasMadMacF32Insts || !isGCN(); 160 } 161 162 bool hasDsSrc2Insts() const { 163 return HasDsSrc2Insts; 164 } 165 166 bool hasSDWA() const { 167 return HasSDWA; 168 } 169 170 bool hasVOP3PInsts() const { 171 return HasVOP3PInsts; 172 } 173 174 bool hasMulI24() const { 175 return HasMulI24; 176 } 177 178 bool hasMulU24() const { 179 return HasMulU24; 180 } 181 182 bool hasSMulHi() const { 183 return HasSMulHi; 184 } 185 186 bool hasInv2PiInlineImm() const { 187 return HasInv2PiInlineImm; 188 } 189 190 bool hasFminFmaxLegacy() const { 191 return HasFminFmaxLegacy; 192 } 193 194 bool hasTrigReducedRange() const { 195 return HasTrigReducedRange; 196 } 197 198 bool isPromoteAllocaEnabled() const { 199 return EnablePromoteAlloca; 200 } 201 202 unsigned getWavefrontSize() const { 203 return 1 << WavefrontSizeLog2; 204 } 205 206 unsigned getWavefrontSizeLog2() const { 207 return WavefrontSizeLog2; 208 } 209 210 unsigned getLocalMemorySize() const { 211 return LocalMemorySize; 212 } 213 214 unsigned getAddressableLocalMemorySize() const { 215 return AddressableLocalMemorySize; 216 } 217 218 /// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the 219 /// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs. 220 /// CU mode into account. 221 unsigned getEUsPerCU() const { return EUsPerCU; } 222 223 Align getAlignmentForImplicitArgPtr() const { 224 return isAmdHsaOS() ? Align(8) : Align(4); 225 } 226 227 /// Returns the offset in bytes from the start of the input buffer 228 /// of the first explicit kernel argument. 229 unsigned getExplicitKernelArgOffset(const Function &F) const { 230 switch (TargetTriple.getOS()) { 231 case Triple::AMDHSA: 232 case Triple::AMDPAL: 233 case Triple::Mesa3D: 234 return 0; 235 case Triple::UnknownOS: 236 default: 237 // For legacy reasons unknown/other is treated as a different version of 238 // mesa. 239 return 36; 240 } 241 242 llvm_unreachable("invalid triple OS"); 243 } 244 245 /// \returns Maximum number of work groups per compute unit supported by the 246 /// subtarget and limited by given \p FlatWorkGroupSize. 247 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; 248 249 /// \returns Minimum flat work group size supported by the subtarget. 250 virtual unsigned getMinFlatWorkGroupSize() const = 0; 251 252 /// \returns Maximum flat work group size supported by the subtarget. 253 virtual unsigned getMaxFlatWorkGroupSize() const = 0; 254 255 /// \returns Number of waves per execution unit required to support the given 256 /// \p FlatWorkGroupSize. 257 virtual unsigned 258 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; 259 260 /// \returns Minimum number of waves per execution unit supported by the 261 /// subtarget. 262 virtual unsigned getMinWavesPerEU() const = 0; 263 264 /// \returns Maximum number of waves per execution unit supported by the 265 /// subtarget without any kind of limitation. 266 unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } 267 268 /// Return the maximum workitem ID value in the function, for the given (0, 1, 269 /// 2) dimension. 270 unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; 271 272 /// Creates value range metadata on an workitemid.* intrinsic call or load. 273 bool makeLIDRangeMetadata(Instruction *I) const; 274 275 /// \returns Number of bytes of arguments that are passed to a shader or 276 /// kernel in addition to the explicit ones declared for the function. 277 unsigned getImplicitArgNumBytes(const Function &F) const; 278 uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; 279 unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; 280 281 /// \returns Corresponding DWARF register number mapping flavour for the 282 /// \p WavefrontSize. 283 AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; 284 285 virtual ~AMDGPUSubtarget() = default; 286 }; 287 288 } // end namespace llvm 289 290 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 291