1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Base class for AMDGPU specific classes of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 16 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/IR/CallingConv.h" 19 #include "llvm/Support/Alignment.h" 20 21 namespace llvm { 22 23 enum AMDGPUDwarfFlavour : unsigned; 24 class Function; 25 class Instruction; 26 class MachineFunction; 27 class TargetMachine; 28 29 class AMDGPUSubtarget { 30 public: 31 enum Generation { 32 INVALID = 0, 33 R600 = 1, 34 R700 = 2, 35 EVERGREEN = 3, 36 NORTHERN_ISLANDS = 4, 37 SOUTHERN_ISLANDS = 5, 38 SEA_ISLANDS = 6, 39 VOLCANIC_ISLANDS = 7, 40 GFX9 = 8, 41 GFX10 = 9, 42 GFX11 = 10 43 }; 44 45 private: 46 Triple TargetTriple; 47 48 protected: 49 bool GCN3Encoding = false; 50 bool Has16BitInsts = false; 51 bool HasTrue16BitInsts = false; 52 bool HasMadMixInsts = false; 53 bool HasMadMacF32Insts = false; 54 bool HasDsSrc2Insts = false; 55 bool HasSDWA = false; 56 bool HasVOP3PInsts = false; 57 bool HasMulI24 = true; 58 bool HasMulU24 = true; 59 bool HasSMulHi = false; 60 bool HasInv2PiInlineImm = false; 61 bool HasFminFmaxLegacy = true; 62 bool EnablePromoteAlloca = false; 63 bool HasTrigReducedRange = false; 64 unsigned MaxWavesPerEU = 10; 65 unsigned LocalMemorySize = 0; 66 char WavefrontSizeLog2 = 0; 67 68 public: 69 AMDGPUSubtarget(const Triple &TT); 70 71 static const AMDGPUSubtarget &get(const MachineFunction &MF); 72 static const AMDGPUSubtarget &get(const TargetMachine &TM, 73 const Function &F); 74 75 /// \returns Default range flat work group size for a calling convention. 76 std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; 77 78 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes 79 /// for function \p F, or minimum/maximum flat work group sizes explicitly 80 /// requested using "amdgpu-flat-work-group-size" attribute attached to 81 /// function \p F. 82 /// 83 /// \returns Subtarget's default values if explicitly requested values cannot 84 /// be converted to integer, or violate subtarget's specifications. 85 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; 86 87 /// \returns Subtarget's default pair of minimum/maximum number of waves per 88 /// execution unit for function \p F, or minimum/maximum number of waves per 89 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute 90 /// attached to function \p F. 91 /// 92 /// \returns Subtarget's default values if explicitly requested values cannot 93 /// be converted to integer, violate subtarget's specifications, or are not 94 /// compatible with minimum/maximum number of waves limited by flat work group 95 /// size, register usage, and/or lds usage. 96 std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const { 97 // Default/requested minimum/maximum flat work group sizes. 98 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 99 return getWavesPerEU(F, FlatWorkGroupSizes); 100 } 101 102 /// Overload which uses the specified values for the flat work group sizes, 103 /// rather than querying the function itself. \p FlatWorkGroupSizes Should 104 /// correspond to the function's value for getFlatWorkGroupSizes. 105 std::pair<unsigned, unsigned> 106 getWavesPerEU(const Function &F, 107 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const; 108 109 /// Return the amount of LDS that can be used that will not restrict the 110 /// occupancy lower than WaveCount. 111 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 112 const Function &) const; 113 114 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if 115 /// the given LDS memory size is the only constraint. 116 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; 117 118 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; 119 120 bool isAmdHsaOS() const { 121 return TargetTriple.getOS() == Triple::AMDHSA; 122 } 123 124 bool isAmdPalOS() const { 125 return TargetTriple.getOS() == Triple::AMDPAL; 126 } 127 128 bool isMesa3DOS() const { 129 return TargetTriple.getOS() == Triple::Mesa3D; 130 } 131 132 bool isMesaKernel(const Function &F) const; 133 134 bool isAmdHsaOrMesa(const Function &F) const { 135 return isAmdHsaOS() || isMesaKernel(F); 136 } 137 138 bool isGCN() const { 139 return TargetTriple.getArch() == Triple::amdgcn; 140 } 141 142 bool isGCN3Encoding() const { 143 return GCN3Encoding; 144 } 145 146 bool has16BitInsts() const { 147 return Has16BitInsts; 148 } 149 150 bool hasTrue16BitInsts() const { return HasTrue16BitInsts; } 151 152 bool hasMadMixInsts() const { 153 return HasMadMixInsts; 154 } 155 156 bool hasMadMacF32Insts() const { 157 return HasMadMacF32Insts || !isGCN(); 158 } 159 160 bool hasDsSrc2Insts() const { 161 return HasDsSrc2Insts; 162 } 163 164 bool hasSDWA() const { 165 return HasSDWA; 166 } 167 168 bool hasVOP3PInsts() const { 169 return HasVOP3PInsts; 170 } 171 172 bool hasMulI24() const { 173 return HasMulI24; 174 } 175 176 bool hasMulU24() const { 177 return HasMulU24; 178 } 179 180 bool hasSMulHi() const { 181 return HasSMulHi; 182 } 183 184 bool hasInv2PiInlineImm() const { 185 return HasInv2PiInlineImm; 186 } 187 188 bool hasFminFmaxLegacy() const { 189 return HasFminFmaxLegacy; 190 } 191 192 bool hasTrigReducedRange() const { 193 return HasTrigReducedRange; 194 } 195 196 bool isPromoteAllocaEnabled() const { 197 return EnablePromoteAlloca; 198 } 199 200 unsigned getWavefrontSize() const { 201 return 1 << WavefrontSizeLog2; 202 } 203 204 unsigned getWavefrontSizeLog2() const { 205 return WavefrontSizeLog2; 206 } 207 208 unsigned getLocalMemorySize() const { 209 return LocalMemorySize; 210 } 211 212 Align getAlignmentForImplicitArgPtr() const { 213 return isAmdHsaOS() ? Align(8) : Align(4); 214 } 215 216 /// Returns the offset in bytes from the start of the input buffer 217 /// of the first explicit kernel argument. 218 unsigned getExplicitKernelArgOffset(const Function &F) const { 219 switch (TargetTriple.getOS()) { 220 case Triple::AMDHSA: 221 case Triple::AMDPAL: 222 case Triple::Mesa3D: 223 return 0; 224 case Triple::UnknownOS: 225 default: 226 // For legacy reasons unknown/other is treated as a different version of 227 // mesa. 228 return 36; 229 } 230 231 llvm_unreachable("invalid triple OS"); 232 } 233 234 /// \returns Maximum number of work groups per compute unit supported by the 235 /// subtarget and limited by given \p FlatWorkGroupSize. 236 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; 237 238 /// \returns Minimum flat work group size supported by the subtarget. 239 virtual unsigned getMinFlatWorkGroupSize() const = 0; 240 241 /// \returns Maximum flat work group size supported by the subtarget. 242 virtual unsigned getMaxFlatWorkGroupSize() const = 0; 243 244 /// \returns Number of waves per execution unit required to support the given 245 /// \p FlatWorkGroupSize. 246 virtual unsigned 247 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; 248 249 /// \returns Minimum number of waves per execution unit supported by the 250 /// subtarget. 251 virtual unsigned getMinWavesPerEU() const = 0; 252 253 /// \returns Maximum number of waves per execution unit supported by the 254 /// subtarget without any kind of limitation. 255 unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } 256 257 /// Return the maximum workitem ID value in the function, for the given (0, 1, 258 /// 2) dimension. 259 unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; 260 261 /// Creates value range metadata on an workitemid.* intrinsic call or load. 262 bool makeLIDRangeMetadata(Instruction *I) const; 263 264 /// \returns Number of bytes of arguments that are passed to a shader or 265 /// kernel in addition to the explicit ones declared for the function. 266 unsigned getImplicitArgNumBytes(const Function &F) const; 267 uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; 268 unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; 269 270 /// \returns Corresponding DWARF register number mapping flavour for the 271 /// \p WavefrontSize. 272 AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; 273 274 virtual ~AMDGPUSubtarget() = default; 275 }; 276 277 } // end namespace llvm 278 279 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 280