1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Base class for AMDGPU specific classes of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 16 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/IR/CallingConv.h" 19 #include "llvm/Support/Alignment.h" 20 21 namespace llvm { 22 23 enum AMDGPUDwarfFlavour : unsigned; 24 class Function; 25 class Instruction; 26 class MachineFunction; 27 class TargetMachine; 28 29 class AMDGPUSubtarget { 30 public: 31 enum Generation { 32 INVALID = 0, 33 R600 = 1, 34 R700 = 2, 35 EVERGREEN = 3, 36 NORTHERN_ISLANDS = 4, 37 SOUTHERN_ISLANDS = 5, 38 SEA_ISLANDS = 6, 39 VOLCANIC_ISLANDS = 7, 40 GFX9 = 8, 41 GFX10 = 9 42 }; 43 44 private: 45 Triple TargetTriple; 46 47 protected: 48 bool Has16BitInsts; 49 bool HasMadMixInsts; 50 bool HasMadMacF32Insts; 51 bool HasDsSrc2Insts; 52 bool HasSDWA; 53 bool HasVOP3PInsts; 54 bool HasMulI24; 55 bool HasMulU24; 56 bool HasInv2PiInlineImm; 57 bool HasFminFmaxLegacy; 58 bool EnablePromoteAlloca; 59 bool HasTrigReducedRange; 60 unsigned MaxWavesPerEU; 61 unsigned LocalMemorySize; 62 char WavefrontSizeLog2; 63 64 public: 65 AMDGPUSubtarget(const Triple &TT); 66 67 static const AMDGPUSubtarget &get(const MachineFunction &MF); 68 static const AMDGPUSubtarget &get(const TargetMachine &TM, 69 const Function &F); 70 71 /// \returns Default range flat work group size for a calling convention. 72 std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; 73 74 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes 75 /// for function \p F, or minimum/maximum flat work group sizes explicitly 76 /// requested using "amdgpu-flat-work-group-size" attribute attached to 77 /// function \p F. 78 /// 79 /// \returns Subtarget's default values if explicitly requested values cannot 80 /// be converted to integer, or violate subtarget's specifications. 81 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; 82 83 /// \returns Subtarget's default pair of minimum/maximum number of waves per 84 /// execution unit for function \p F, or minimum/maximum number of waves per 85 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute 86 /// attached to function \p F. 87 /// 88 /// \returns Subtarget's default values if explicitly requested values cannot 89 /// be converted to integer, violate subtarget's specifications, or are not 90 /// compatible with minimum/maximum number of waves limited by flat work group 91 /// size, register usage, and/or lds usage. 92 std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const; 93 94 /// Return the amount of LDS that can be used that will not restrict the 95 /// occupancy lower than WaveCount. 96 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 97 const Function &) const; 98 99 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if 100 /// the given LDS memory size is the only constraint. 101 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; 102 103 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; 104 105 bool isAmdHsaOS() const { 106 return TargetTriple.getOS() == Triple::AMDHSA; 107 } 108 109 bool isAmdPalOS() const { 110 return TargetTriple.getOS() == Triple::AMDPAL; 111 } 112 113 bool isMesa3DOS() const { 114 return TargetTriple.getOS() == Triple::Mesa3D; 115 } 116 117 bool isMesaKernel(const Function &F) const; 118 119 bool isAmdHsaOrMesa(const Function &F) const { 120 return isAmdHsaOS() || isMesaKernel(F); 121 } 122 123 bool isGCN() const { 124 return TargetTriple.getArch() == Triple::amdgcn; 125 } 126 127 bool has16BitInsts() const { 128 return Has16BitInsts; 129 } 130 131 bool hasMadMixInsts() const { 132 return HasMadMixInsts; 133 } 134 135 bool hasMadMacF32Insts() const { 136 return HasMadMacF32Insts || !isGCN(); 137 } 138 139 bool hasDsSrc2Insts() const { 140 return HasDsSrc2Insts; 141 } 142 143 bool hasSDWA() const { 144 return HasSDWA; 145 } 146 147 bool hasVOP3PInsts() const { 148 return HasVOP3PInsts; 149 } 150 151 bool hasMulI24() const { 152 return HasMulI24; 153 } 154 155 bool hasMulU24() const { 156 return HasMulU24; 157 } 158 159 bool hasInv2PiInlineImm() const { 160 return HasInv2PiInlineImm; 161 } 162 163 bool hasFminFmaxLegacy() const { 164 return HasFminFmaxLegacy; 165 } 166 167 bool hasTrigReducedRange() const { 168 return HasTrigReducedRange; 169 } 170 171 bool isPromoteAllocaEnabled() const { 172 return EnablePromoteAlloca; 173 } 174 175 unsigned getWavefrontSize() const { 176 return 1 << WavefrontSizeLog2; 177 } 178 179 unsigned getWavefrontSizeLog2() const { 180 return WavefrontSizeLog2; 181 } 182 183 unsigned getLocalMemorySize() const { 184 return LocalMemorySize; 185 } 186 187 Align getAlignmentForImplicitArgPtr() const { 188 return isAmdHsaOS() ? Align(8) : Align(4); 189 } 190 191 /// Returns the offset in bytes from the start of the input buffer 192 /// of the first explicit kernel argument. 193 unsigned getExplicitKernelArgOffset(const Function &F) const { 194 return isAmdHsaOrMesa(F) ? 0 : 36; 195 } 196 197 /// \returns Maximum number of work groups per compute unit supported by the 198 /// subtarget and limited by given \p FlatWorkGroupSize. 199 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; 200 201 /// \returns Minimum flat work group size supported by the subtarget. 202 virtual unsigned getMinFlatWorkGroupSize() const = 0; 203 204 /// \returns Maximum flat work group size supported by the subtarget. 205 virtual unsigned getMaxFlatWorkGroupSize() const = 0; 206 207 /// \returns Number of waves per execution unit required to support the given 208 /// \p FlatWorkGroupSize. 209 virtual unsigned 210 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; 211 212 /// \returns Minimum number of waves per execution unit supported by the 213 /// subtarget. 214 virtual unsigned getMinWavesPerEU() const = 0; 215 216 /// \returns Maximum number of waves per execution unit supported by the 217 /// subtarget without any kind of limitation. 218 unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } 219 220 /// Return the maximum workitem ID value in the function, for the given (0, 1, 221 /// 2) dimension. 222 unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; 223 224 /// Creates value range metadata on an workitemid.* intrinsic call or load. 225 bool makeLIDRangeMetadata(Instruction *I) const; 226 227 /// \returns Number of bytes of arguments that are passed to a shader or 228 /// kernel in addition to the explicit ones declared for the function. 229 unsigned getImplicitArgNumBytes(const Function &F) const; 230 uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; 231 unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; 232 233 /// \returns Corresponsing DWARF register number mapping flavour for the 234 /// \p WavefrontSize. 235 AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; 236 237 virtual ~AMDGPUSubtarget() {} 238 }; 239 240 } // end namespace llvm 241 242 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 243