1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Base class for AMDGPU specific classes of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 16 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/IR/CallingConv.h" 19 #include "llvm/Support/Alignment.h" 20 21 namespace llvm { 22 23 enum AMDGPUDwarfFlavour : unsigned; 24 class Function; 25 class Instruction; 26 class MachineFunction; 27 class TargetMachine; 28 29 class AMDGPUSubtarget { 30 public: 31 enum Generation { 32 INVALID = 0, 33 R600 = 1, 34 R700 = 2, 35 EVERGREEN = 3, 36 NORTHERN_ISLANDS = 4, 37 SOUTHERN_ISLANDS = 5, 38 SEA_ISLANDS = 6, 39 VOLCANIC_ISLANDS = 7, 40 GFX9 = 8, 41 GFX10 = 9 42 }; 43 44 private: 45 Triple TargetTriple; 46 47 protected: 48 bool GCN3Encoding; 49 bool Has16BitInsts; 50 bool HasMadMixInsts; 51 bool HasMadMacF32Insts; 52 bool HasDsSrc2Insts; 53 bool HasSDWA; 54 bool HasVOP3PInsts; 55 bool HasMulI24; 56 bool HasMulU24; 57 bool HasSMulHi; 58 bool HasInv2PiInlineImm; 59 bool HasFminFmaxLegacy; 60 bool EnablePromoteAlloca; 61 bool HasTrigReducedRange; 62 unsigned MaxWavesPerEU; 63 unsigned LocalMemorySize; 64 char WavefrontSizeLog2; 65 66 public: 67 AMDGPUSubtarget(const Triple &TT); 68 69 static const AMDGPUSubtarget &get(const MachineFunction &MF); 70 static const AMDGPUSubtarget &get(const TargetMachine &TM, 71 const Function &F); 72 73 /// \returns Default range flat work group size for a calling convention. 74 std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; 75 76 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes 77 /// for function \p F, or minimum/maximum flat work group sizes explicitly 78 /// requested using "amdgpu-flat-work-group-size" attribute attached to 79 /// function \p F. 80 /// 81 /// \returns Subtarget's default values if explicitly requested values cannot 82 /// be converted to integer, or violate subtarget's specifications. 83 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; 84 85 /// \returns Subtarget's default pair of minimum/maximum number of waves per 86 /// execution unit for function \p F, or minimum/maximum number of waves per 87 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute 88 /// attached to function \p F. 89 /// 90 /// \returns Subtarget's default values if explicitly requested values cannot 91 /// be converted to integer, violate subtarget's specifications, or are not 92 /// compatible with minimum/maximum number of waves limited by flat work group 93 /// size, register usage, and/or lds usage. 94 std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const; 95 96 /// Return the amount of LDS that can be used that will not restrict the 97 /// occupancy lower than WaveCount. 98 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 99 const Function &) const; 100 101 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if 102 /// the given LDS memory size is the only constraint. 103 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; 104 105 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; 106 107 bool isAmdHsaOS() const { 108 return TargetTriple.getOS() == Triple::AMDHSA; 109 } 110 111 bool isAmdPalOS() const { 112 return TargetTriple.getOS() == Triple::AMDPAL; 113 } 114 115 bool isMesa3DOS() const { 116 return TargetTriple.getOS() == Triple::Mesa3D; 117 } 118 119 bool isMesaKernel(const Function &F) const; 120 121 bool isAmdHsaOrMesa(const Function &F) const { 122 return isAmdHsaOS() || isMesaKernel(F); 123 } 124 125 bool isGCN() const { 126 return TargetTriple.getArch() == Triple::amdgcn; 127 } 128 129 bool isGCN3Encoding() const { 130 return GCN3Encoding; 131 } 132 133 bool has16BitInsts() const { 134 return Has16BitInsts; 135 } 136 137 bool hasMadMixInsts() const { 138 return HasMadMixInsts; 139 } 140 141 bool hasMadMacF32Insts() const { 142 return HasMadMacF32Insts || !isGCN(); 143 } 144 145 bool hasDsSrc2Insts() const { 146 return HasDsSrc2Insts; 147 } 148 149 bool hasSDWA() const { 150 return HasSDWA; 151 } 152 153 bool hasVOP3PInsts() const { 154 return HasVOP3PInsts; 155 } 156 157 bool hasMulI24() const { 158 return HasMulI24; 159 } 160 161 bool hasMulU24() const { 162 return HasMulU24; 163 } 164 165 bool hasSMulHi() const { 166 return HasSMulHi; 167 } 168 169 bool hasInv2PiInlineImm() const { 170 return HasInv2PiInlineImm; 171 } 172 173 bool hasFminFmaxLegacy() const { 174 return HasFminFmaxLegacy; 175 } 176 177 bool hasTrigReducedRange() const { 178 return HasTrigReducedRange; 179 } 180 181 bool isPromoteAllocaEnabled() const { 182 return EnablePromoteAlloca; 183 } 184 185 unsigned getWavefrontSize() const { 186 return 1 << WavefrontSizeLog2; 187 } 188 189 unsigned getWavefrontSizeLog2() const { 190 return WavefrontSizeLog2; 191 } 192 193 unsigned getLocalMemorySize() const { 194 return LocalMemorySize; 195 } 196 197 Align getAlignmentForImplicitArgPtr() const { 198 return isAmdHsaOS() ? Align(8) : Align(4); 199 } 200 201 /// Returns the offset in bytes from the start of the input buffer 202 /// of the first explicit kernel argument. 203 unsigned getExplicitKernelArgOffset(const Function &F) const { 204 return isAmdHsaOrMesa(F) ? 0 : 36; 205 } 206 207 /// \returns Maximum number of work groups per compute unit supported by the 208 /// subtarget and limited by given \p FlatWorkGroupSize. 209 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; 210 211 /// \returns Minimum flat work group size supported by the subtarget. 212 virtual unsigned getMinFlatWorkGroupSize() const = 0; 213 214 /// \returns Maximum flat work group size supported by the subtarget. 215 virtual unsigned getMaxFlatWorkGroupSize() const = 0; 216 217 /// \returns Number of waves per execution unit required to support the given 218 /// \p FlatWorkGroupSize. 219 virtual unsigned 220 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; 221 222 /// \returns Minimum number of waves per execution unit supported by the 223 /// subtarget. 224 virtual unsigned getMinWavesPerEU() const = 0; 225 226 /// \returns Maximum number of waves per execution unit supported by the 227 /// subtarget without any kind of limitation. 228 unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } 229 230 /// Return the maximum workitem ID value in the function, for the given (0, 1, 231 /// 2) dimension. 232 unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; 233 234 /// Creates value range metadata on an workitemid.* intrinsic call or load. 235 bool makeLIDRangeMetadata(Instruction *I) const; 236 237 /// \returns Number of bytes of arguments that are passed to a shader or 238 /// kernel in addition to the explicit ones declared for the function. 239 unsigned getImplicitArgNumBytes(const Function &F) const; 240 uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; 241 unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; 242 243 /// \returns Corresponsing DWARF register number mapping flavour for the 244 /// \p WavefrontSize. 245 AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; 246 247 virtual ~AMDGPUSubtarget() {} 248 }; 249 250 } // end namespace llvm 251 252 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 253