1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Base class for AMDGPU specific classes of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 16 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/IR/CallingConv.h" 19 #include "llvm/Support/Alignment.h" 20 21 namespace llvm { 22 23 enum AMDGPUDwarfFlavour : unsigned; 24 class Function; 25 class Instruction; 26 class MachineFunction; 27 class TargetMachine; 28 29 class AMDGPUSubtarget { 30 public: 31 enum Generation { 32 INVALID = 0, 33 R600 = 1, 34 R700 = 2, 35 EVERGREEN = 3, 36 NORTHERN_ISLANDS = 4, 37 SOUTHERN_ISLANDS = 5, 38 SEA_ISLANDS = 6, 39 VOLCANIC_ISLANDS = 7, 40 GFX9 = 8, 41 GFX10 = 9 42 }; 43 44 private: 45 Triple TargetTriple; 46 47 protected: 48 bool GCN3Encoding; 49 bool Has16BitInsts; 50 bool HasMadMixInsts; 51 bool HasMadMacF32Insts; 52 bool HasDsSrc2Insts; 53 bool HasSDWA; 54 bool HasVOP3PInsts; 55 bool HasMulI24; 56 bool HasMulU24; 57 bool HasSMulHi; 58 bool HasInv2PiInlineImm; 59 bool HasFminFmaxLegacy; 60 bool EnablePromoteAlloca; 61 bool HasTrigReducedRange; 62 unsigned MaxWavesPerEU; 63 unsigned LocalMemorySize; 64 char WavefrontSizeLog2; 65 66 public: 67 AMDGPUSubtarget(const Triple &TT); 68 69 static const AMDGPUSubtarget &get(const MachineFunction &MF); 70 static const AMDGPUSubtarget &get(const TargetMachine &TM, 71 const Function &F); 72 73 /// \returns Default range flat work group size for a calling convention. 74 std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; 75 76 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes 77 /// for function \p F, or minimum/maximum flat work group sizes explicitly 78 /// requested using "amdgpu-flat-work-group-size" attribute attached to 79 /// function \p F. 80 /// 81 /// \returns Subtarget's default values if explicitly requested values cannot 82 /// be converted to integer, or violate subtarget's specifications. 83 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; 84 85 /// \returns Subtarget's default pair of minimum/maximum number of waves per 86 /// execution unit for function \p F, or minimum/maximum number of waves per 87 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute 88 /// attached to function \p F. 89 /// 90 /// \returns Subtarget's default values if explicitly requested values cannot 91 /// be converted to integer, violate subtarget's specifications, or are not 92 /// compatible with minimum/maximum number of waves limited by flat work group 93 /// size, register usage, and/or lds usage. 94 std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const { 95 // Default/requested minimum/maximum flat work group sizes. 96 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 97 return getWavesPerEU(F, FlatWorkGroupSizes); 98 } 99 100 /// Overload which uses the specified values for the flat work group sizes, 101 /// rather than querying the function itself. \p FlatWorkGroupSizes Should 102 /// correspond to the function's value for getFlatWorkGroupSizes. 103 std::pair<unsigned, unsigned> 104 getWavesPerEU(const Function &F, 105 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const; 106 107 /// Return the amount of LDS that can be used that will not restrict the 108 /// occupancy lower than WaveCount. 109 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 110 const Function &) const; 111 112 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if 113 /// the given LDS memory size is the only constraint. 114 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; 115 116 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; 117 118 bool isAmdHsaOS() const { 119 return TargetTriple.getOS() == Triple::AMDHSA; 120 } 121 122 bool isAmdPalOS() const { 123 return TargetTriple.getOS() == Triple::AMDPAL; 124 } 125 126 bool isMesa3DOS() const { 127 return TargetTriple.getOS() == Triple::Mesa3D; 128 } 129 130 bool isMesaKernel(const Function &F) const; 131 132 bool isAmdHsaOrMesa(const Function &F) const { 133 return isAmdHsaOS() || isMesaKernel(F); 134 } 135 136 bool isGCN() const { 137 return TargetTriple.getArch() == Triple::amdgcn; 138 } 139 140 bool isGCN3Encoding() const { 141 return GCN3Encoding; 142 } 143 144 bool has16BitInsts() const { 145 return Has16BitInsts; 146 } 147 148 bool hasMadMixInsts() const { 149 return HasMadMixInsts; 150 } 151 152 bool hasMadMacF32Insts() const { 153 return HasMadMacF32Insts || !isGCN(); 154 } 155 156 bool hasDsSrc2Insts() const { 157 return HasDsSrc2Insts; 158 } 159 160 bool hasSDWA() const { 161 return HasSDWA; 162 } 163 164 bool hasVOP3PInsts() const { 165 return HasVOP3PInsts; 166 } 167 168 bool hasMulI24() const { 169 return HasMulI24; 170 } 171 172 bool hasMulU24() const { 173 return HasMulU24; 174 } 175 176 bool hasSMulHi() const { 177 return HasSMulHi; 178 } 179 180 bool hasInv2PiInlineImm() const { 181 return HasInv2PiInlineImm; 182 } 183 184 bool hasFminFmaxLegacy() const { 185 return HasFminFmaxLegacy; 186 } 187 188 bool hasTrigReducedRange() const { 189 return HasTrigReducedRange; 190 } 191 192 bool isPromoteAllocaEnabled() const { 193 return EnablePromoteAlloca; 194 } 195 196 unsigned getWavefrontSize() const { 197 return 1 << WavefrontSizeLog2; 198 } 199 200 unsigned getWavefrontSizeLog2() const { 201 return WavefrontSizeLog2; 202 } 203 204 unsigned getLocalMemorySize() const { 205 return LocalMemorySize; 206 } 207 208 Align getAlignmentForImplicitArgPtr() const { 209 return isAmdHsaOS() ? Align(8) : Align(4); 210 } 211 212 /// Returns the offset in bytes from the start of the input buffer 213 /// of the first explicit kernel argument. 214 unsigned getExplicitKernelArgOffset(const Function &F) const { 215 switch (TargetTriple.getOS()) { 216 case Triple::AMDHSA: 217 case Triple::AMDPAL: 218 case Triple::Mesa3D: 219 return 0; 220 case Triple::UnknownOS: 221 default: 222 // For legacy reasons unknown/other is treated as a different version of 223 // mesa. 224 return 36; 225 } 226 227 llvm_unreachable("invalid triple OS"); 228 } 229 230 /// \returns Maximum number of work groups per compute unit supported by the 231 /// subtarget and limited by given \p FlatWorkGroupSize. 232 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; 233 234 /// \returns Minimum flat work group size supported by the subtarget. 235 virtual unsigned getMinFlatWorkGroupSize() const = 0; 236 237 /// \returns Maximum flat work group size supported by the subtarget. 238 virtual unsigned getMaxFlatWorkGroupSize() const = 0; 239 240 /// \returns Number of waves per execution unit required to support the given 241 /// \p FlatWorkGroupSize. 242 virtual unsigned 243 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; 244 245 /// \returns Minimum number of waves per execution unit supported by the 246 /// subtarget. 247 virtual unsigned getMinWavesPerEU() const = 0; 248 249 /// \returns Maximum number of waves per execution unit supported by the 250 /// subtarget without any kind of limitation. 251 unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } 252 253 /// Return the maximum workitem ID value in the function, for the given (0, 1, 254 /// 2) dimension. 255 unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; 256 257 /// Creates value range metadata on an workitemid.* intrinsic call or load. 258 bool makeLIDRangeMetadata(Instruction *I) const; 259 260 /// \returns Number of bytes of arguments that are passed to a shader or 261 /// kernel in addition to the explicit ones declared for the function. 262 unsigned getImplicitArgNumBytes(const Function &F) const; 263 uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; 264 unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; 265 266 /// \returns Corresponding DWARF register number mapping flavour for the 267 /// \p WavefrontSize. 268 AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; 269 270 virtual ~AMDGPUSubtarget() {} 271 }; 272 273 } // end namespace llvm 274 275 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 276