1 //===-- TargetParser - Parser for target features ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a target parser to recognise hardware features such as 10 // FPU/CPU/ARCH names as well as specific support such as HDIV, etc. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/TargetParser/TargetParser.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/TargetParser/Triple.h" 17 18 using namespace llvm; 19 using namespace AMDGPU; 20 21 /// Find KV in array using binary search. 22 static const BasicSubtargetSubTypeKV * 23 find(StringRef S, ArrayRef<BasicSubtargetSubTypeKV> A) { 24 // Binary search the array 25 auto F = llvm::lower_bound(A, S); 26 // If not found then return NULL 27 if (F == A.end() || StringRef(F->Key) != S) 28 return nullptr; 29 // Return the found array item 30 return F; 31 } 32 33 /// For each feature that is (transitively) implied by this feature, set it. 34 static void setImpliedBits(FeatureBitset &Bits, const FeatureBitset &Implies, 35 ArrayRef<BasicSubtargetFeatureKV> FeatureTable) { 36 // OR the Implies bits in outside the loop. This allows the Implies for CPUs 37 // which might imply features not in FeatureTable to use this. 38 Bits |= Implies; 39 for (const auto &FE : FeatureTable) 40 if (Implies.test(FE.Value)) 41 setImpliedBits(Bits, FE.Implies.getAsBitset(), FeatureTable); 42 } 43 44 std::optional<llvm::StringMap<bool>> llvm::getCPUDefaultTargetFeatures( 45 StringRef CPU, ArrayRef<BasicSubtargetSubTypeKV> ProcDesc, 46 ArrayRef<BasicSubtargetFeatureKV> ProcFeatures) { 47 if (CPU.empty()) 48 return std::nullopt; 49 50 const BasicSubtargetSubTypeKV *CPUEntry = ::find(CPU, ProcDesc); 51 if (!CPUEntry) 52 return std::nullopt; 53 54 // Set the features implied by this CPU feature if there is a match. 55 FeatureBitset Bits; 56 llvm::StringMap<bool> DefaultFeatures; 57 setImpliedBits(Bits, CPUEntry->Implies.getAsBitset(), ProcFeatures); 58 59 [[maybe_unused]] unsigned BitSize = Bits.size(); 60 for (const BasicSubtargetFeatureKV &FE : ProcFeatures) { 61 assert(FE.Value < BitSize && "Target Feature is out of range"); 62 if (Bits[FE.Value]) 63 DefaultFeatures[FE.Key] = true; 64 } 65 return DefaultFeatures; 66 } 67 68 namespace { 69 70 struct GPUInfo { 71 StringLiteral Name; 72 StringLiteral CanonicalName; 73 AMDGPU::GPUKind Kind; 74 unsigned Features; 75 }; 76 77 constexpr GPUInfo R600GPUs[] = { 78 // Name Canonical Kind Features 79 // Name 80 {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE }, 81 {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE }, 82 {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE }, 83 {{"r630"}, {"r630"}, GK_R630, FEATURE_NONE }, 84 {{"rs780"}, {"rs880"}, GK_RS880, FEATURE_NONE }, 85 {{"rs880"}, {"rs880"}, GK_RS880, FEATURE_NONE }, 86 {{"rv610"}, {"rs880"}, GK_RS880, FEATURE_NONE }, 87 {{"rv620"}, {"rs880"}, GK_RS880, FEATURE_NONE }, 88 {{"rv670"}, {"rv670"}, GK_RV670, FEATURE_NONE }, 89 {{"rv710"}, {"rv710"}, GK_RV710, FEATURE_NONE }, 90 {{"rv730"}, {"rv730"}, GK_RV730, FEATURE_NONE }, 91 {{"rv740"}, {"rv770"}, GK_RV770, FEATURE_NONE }, 92 {{"rv770"}, {"rv770"}, GK_RV770, FEATURE_NONE }, 93 {{"cedar"}, {"cedar"}, GK_CEDAR, FEATURE_NONE }, 94 {{"palm"}, {"cedar"}, GK_CEDAR, FEATURE_NONE }, 95 {{"cypress"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA }, 96 {{"hemlock"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA }, 97 {{"juniper"}, {"juniper"}, GK_JUNIPER, FEATURE_NONE }, 98 {{"redwood"}, {"redwood"}, GK_REDWOOD, FEATURE_NONE }, 99 {{"sumo"}, {"sumo"}, GK_SUMO, FEATURE_NONE }, 100 {{"sumo2"}, {"sumo"}, GK_SUMO, FEATURE_NONE }, 101 {{"barts"}, {"barts"}, GK_BARTS, FEATURE_NONE }, 102 {{"caicos"}, {"caicos"}, GK_CAICOS, FEATURE_NONE }, 103 {{"aruba"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA }, 104 {{"cayman"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA }, 105 {{"turks"}, {"turks"}, GK_TURKS, FEATURE_NONE } 106 }; 107 108 // This table should be sorted by the value of GPUKind 109 // Don't bother listing the implicitly true features 110 constexpr GPUInfo AMDGCNGPUs[] = { 111 // clang-format off 112 // Name Canonical Kind Features 113 // Name 114 {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, 115 {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, 116 {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, 117 {{"pitcairn"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, 118 {{"verde"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, 119 {{"gfx602"}, {"gfx602"}, GK_GFX602, FEATURE_NONE}, 120 {{"hainan"}, {"gfx602"}, GK_GFX602, FEATURE_NONE}, 121 {{"oland"}, {"gfx602"}, GK_GFX602, FEATURE_NONE}, 122 {{"gfx700"}, {"gfx700"}, GK_GFX700, FEATURE_NONE}, 123 {{"kaveri"}, {"gfx700"}, GK_GFX700, FEATURE_NONE}, 124 {{"gfx701"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32}, 125 {{"hawaii"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32}, 126 {{"gfx702"}, {"gfx702"}, GK_GFX702, FEATURE_FAST_FMA_F32}, 127 {{"gfx703"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, 128 {{"kabini"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, 129 {{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, 130 {{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE}, 131 {{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE}, 132 {{"gfx705"}, {"gfx705"}, GK_GFX705, FEATURE_NONE}, 133 {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 134 {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 135 {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, 136 {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, 137 {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, 138 {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, 139 {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, 140 {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, 141 {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, 142 {{"gfx805"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32}, 143 {{"tongapro"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32}, 144 {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 145 {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 146 {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 147 {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 148 {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 149 {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 150 {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 151 {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 152 {{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 153 {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 154 {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 155 {{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 156 {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 157 {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 158 {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 159 {{"gfx1013"}, {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 160 {{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 161 {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 162 {{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 163 {{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 164 {{"gfx1034"}, {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 165 {{"gfx1035"}, {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 166 {{"gfx1036"}, {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 167 {{"gfx1100"}, {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 168 {{"gfx1101"}, {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 169 {{"gfx1102"}, {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 170 {{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 171 {{"gfx1150"}, {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 172 {{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 173 {{"gfx1152"}, {"gfx1152"}, GK_GFX1152, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 174 {{"gfx1153"}, {"gfx1153"}, GK_GFX1153, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 175 {{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 176 {{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 177 {{"gfx1250"}, {"gfx1250"}, GK_GFX1250, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, 178 179 {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 180 {{"gfx10-1-generic"}, {"gfx10-1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 181 {{"gfx10-3-generic"}, {"gfx10-3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 182 {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 183 {{"gfx12-generic"}, {"gfx12-generic"}, GK_GFX12_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 184 {{"gfx9-4-generic"}, {"gfx9-4-generic"}, GK_GFX9_4_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 185 // clang-format on 186 }; 187 188 const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) { 189 GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE }; 190 191 auto I = 192 llvm::lower_bound(Table, Search, [](const GPUInfo &A, const GPUInfo &B) { 193 return A.Kind < B.Kind; 194 }); 195 196 if (I == Table.end() || I->Kind != Search.Kind) 197 return nullptr; 198 return I; 199 } 200 201 } // namespace 202 203 StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) { 204 switch (AK) { 205 case AMDGPU::GK_GFX9_GENERIC: 206 case AMDGPU::GK_GFX9_4_GENERIC: 207 return "gfx9"; 208 case AMDGPU::GK_GFX10_1_GENERIC: 209 case AMDGPU::GK_GFX10_3_GENERIC: 210 return "gfx10"; 211 case AMDGPU::GK_GFX11_GENERIC: 212 return "gfx11"; 213 case AMDGPU::GK_GFX12_GENERIC: 214 return "gfx12"; 215 default: { 216 StringRef ArchName = getArchNameAMDGCN(AK); 217 return ArchName.empty() ? "" : ArchName.drop_back(2); 218 } 219 } 220 } 221 222 StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) { 223 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) 224 return Entry->CanonicalName; 225 return ""; 226 } 227 228 StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) { 229 if (const auto *Entry = getArchEntry(AK, R600GPUs)) 230 return Entry->CanonicalName; 231 return ""; 232 } 233 234 AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) { 235 for (const auto &C : AMDGCNGPUs) { 236 if (CPU == C.Name) 237 return C.Kind; 238 } 239 240 return AMDGPU::GPUKind::GK_NONE; 241 } 242 243 AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) { 244 for (const auto &C : R600GPUs) { 245 if (CPU == C.Name) 246 return C.Kind; 247 } 248 249 return AMDGPU::GPUKind::GK_NONE; 250 } 251 252 unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) { 253 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) 254 return Entry->Features; 255 return FEATURE_NONE; 256 } 257 258 unsigned AMDGPU::getArchAttrR600(GPUKind AK) { 259 if (const auto *Entry = getArchEntry(AK, R600GPUs)) 260 return Entry->Features; 261 return FEATURE_NONE; 262 } 263 264 void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) { 265 // XXX: Should this only report unique canonical names? 266 for (const auto &C : AMDGCNGPUs) 267 Values.push_back(C.Name); 268 } 269 270 void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) { 271 for (const auto &C : R600GPUs) 272 Values.push_back(C.Name); 273 } 274 275 AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { 276 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); 277 if (AK == AMDGPU::GPUKind::GK_NONE) { 278 if (GPU == "generic-hsa") 279 return {7, 0, 0}; 280 if (GPU == "generic") 281 return {6, 0, 0}; 282 return {0, 0, 0}; 283 } 284 285 // clang-format off 286 switch (AK) { 287 case GK_GFX600: return {6, 0, 0}; 288 case GK_GFX601: return {6, 0, 1}; 289 case GK_GFX602: return {6, 0, 2}; 290 case GK_GFX700: return {7, 0, 0}; 291 case GK_GFX701: return {7, 0, 1}; 292 case GK_GFX702: return {7, 0, 2}; 293 case GK_GFX703: return {7, 0, 3}; 294 case GK_GFX704: return {7, 0, 4}; 295 case GK_GFX705: return {7, 0, 5}; 296 case GK_GFX801: return {8, 0, 1}; 297 case GK_GFX802: return {8, 0, 2}; 298 case GK_GFX803: return {8, 0, 3}; 299 case GK_GFX805: return {8, 0, 5}; 300 case GK_GFX810: return {8, 1, 0}; 301 case GK_GFX900: return {9, 0, 0}; 302 case GK_GFX902: return {9, 0, 2}; 303 case GK_GFX904: return {9, 0, 4}; 304 case GK_GFX906: return {9, 0, 6}; 305 case GK_GFX908: return {9, 0, 8}; 306 case GK_GFX909: return {9, 0, 9}; 307 case GK_GFX90A: return {9, 0, 10}; 308 case GK_GFX90C: return {9, 0, 12}; 309 case GK_GFX942: return {9, 4, 2}; 310 case GK_GFX950: return {9, 5, 0}; 311 case GK_GFX1010: return {10, 1, 0}; 312 case GK_GFX1011: return {10, 1, 1}; 313 case GK_GFX1012: return {10, 1, 2}; 314 case GK_GFX1013: return {10, 1, 3}; 315 case GK_GFX1030: return {10, 3, 0}; 316 case GK_GFX1031: return {10, 3, 1}; 317 case GK_GFX1032: return {10, 3, 2}; 318 case GK_GFX1033: return {10, 3, 3}; 319 case GK_GFX1034: return {10, 3, 4}; 320 case GK_GFX1035: return {10, 3, 5}; 321 case GK_GFX1036: return {10, 3, 6}; 322 case GK_GFX1100: return {11, 0, 0}; 323 case GK_GFX1101: return {11, 0, 1}; 324 case GK_GFX1102: return {11, 0, 2}; 325 case GK_GFX1103: return {11, 0, 3}; 326 case GK_GFX1150: return {11, 5, 0}; 327 case GK_GFX1151: return {11, 5, 1}; 328 case GK_GFX1152: return {11, 5, 2}; 329 case GK_GFX1153: return {11, 5, 3}; 330 case GK_GFX1200: return {12, 0, 0}; 331 case GK_GFX1201: return {12, 0, 1}; 332 case GK_GFX1250: return {12, 5, 0}; 333 334 // Generic targets return the lowest common denominator 335 // within their family. That is, the ISA that is the most 336 // restricted in terms of features. 337 // 338 // gfx9-generic is tricky because there is no lowest 339 // common denominator, so we return gfx900 which has mad-mix 340 // but this family doesn't have it. 341 // 342 // This API should never be used to check for a particular 343 // feature anyway. 344 // 345 // TODO: Split up this API depending on its caller so 346 // generic target handling is more obvious and less risky. 347 case GK_GFX9_GENERIC: return {9, 0, 0}; 348 case GK_GFX9_4_GENERIC: return {9, 4, 0}; 349 case GK_GFX10_1_GENERIC: return {10, 1, 0}; 350 case GK_GFX10_3_GENERIC: return {10, 3, 0}; 351 case GK_GFX11_GENERIC: return {11, 0, 3}; 352 case GK_GFX12_GENERIC: return {12, 0, 0}; 353 default: return {0, 0, 0}; 354 } 355 // clang-format on 356 } 357 358 StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) { 359 assert(T.isAMDGPU()); 360 auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(Arch) : parseArchR600(Arch); 361 if (ProcKind == GK_NONE) 362 return StringRef(); 363 364 return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind); 365 } 366 367 void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, 368 StringMap<bool> &Features) { 369 // XXX - What does the member GPU mean if device name string passed here? 370 if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) { 371 // AMDGCN SPIRV must support the union of all AMDGCN features. This list 372 // should be kept in sorted order and updated whenever new features are 373 // added. 374 Features["16-bit-insts"] = true; 375 Features["ashr-pk-insts"] = true; 376 Features["atomic-buffer-pk-add-bf16-inst"] = true; 377 Features["atomic-buffer-global-pk-add-f16-insts"] = true; 378 Features["atomic-ds-pk-add-16-insts"] = true; 379 Features["atomic-fadd-rtn-insts"] = true; 380 Features["atomic-flat-pk-add-16-insts"] = true; 381 Features["atomic-global-pk-add-bf16-inst"] = true; 382 Features["bf8-cvt-scale-insts"] = true; 383 Features["bitop3-insts"] = true; 384 Features["ci-insts"] = true; 385 Features["dl-insts"] = true; 386 Features["dot1-insts"] = true; 387 Features["dot2-insts"] = true; 388 Features["dot3-insts"] = true; 389 Features["dot4-insts"] = true; 390 Features["dot5-insts"] = true; 391 Features["dot6-insts"] = true; 392 Features["dot7-insts"] = true; 393 Features["dot8-insts"] = true; 394 Features["dot9-insts"] = true; 395 Features["dot10-insts"] = true; 396 Features["dot11-insts"] = true; 397 Features["dot12-insts"] = true; 398 Features["dot13-insts"] = true; 399 Features["dpp"] = true; 400 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; 401 Features["f32-to-f16bf16-cvt-sr-insts"] = true; 402 Features["fp4-cvt-scale-insts"] = true; 403 Features["fp6bf6-cvt-scale-insts"] = true; 404 Features["fp8-insts"] = true; 405 Features["fp8-conversion-insts"] = true; 406 Features["fp8-cvt-scale-insts"] = true; 407 Features["gfx8-insts"] = true; 408 Features["gfx9-insts"] = true; 409 Features["gfx90a-insts"] = true; 410 Features["gfx940-insts"] = true; 411 Features["gfx950-insts"] = true; 412 Features["gfx10-insts"] = true; 413 Features["gfx10-3-insts"] = true; 414 Features["gfx11-insts"] = true; 415 Features["gfx12-insts"] = true; 416 Features["gws"] = true; 417 Features["image-insts"] = true; 418 Features["s-memrealtime"] = true; 419 Features["s-memtime-inst"] = true; 420 Features["mai-insts"] = true; 421 Features["permlane16-swap"] = true; 422 Features["permlane32-swap"] = true; 423 Features["prng-inst"] = true; 424 Features["wavefrontsize32"] = true; 425 Features["wavefrontsize64"] = true; 426 Features["vmem-to-lds-load-insts"] = true; 427 } else if (T.isAMDGCN()) { 428 AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); 429 switch (Kind) { 430 case GK_GFX1250: 431 Features["ci-insts"] = true; 432 Features["dot7-insts"] = true; 433 Features["dot8-insts"] = true; 434 Features["dl-insts"] = true; 435 Features["16-bit-insts"] = true; 436 Features["dpp"] = true; 437 Features["gfx8-insts"] = true; 438 Features["gfx9-insts"] = true; 439 Features["gfx10-insts"] = true; 440 Features["gfx10-3-insts"] = true; 441 Features["gfx11-insts"] = true; 442 Features["gfx12-insts"] = true; 443 Features["gfx1250-insts"] = true; 444 Features["bitop3-insts"] = true; 445 Features["prng-inst"] = true; 446 Features["transpose-load-f4f6-insts"] = true; 447 Features["bf16-trans-insts"] = true; 448 Features["fp8-conversion-insts"] = true; 449 Features["fp8e5m3-insts"] = true; 450 Features["permlane16-swap"] = true; 451 Features["ashr-pk-insts"] = true; 452 Features["atomic-buffer-pk-add-bf16-inst"] = true; 453 Features["atomic-fadd-rtn-insts"] = true; 454 Features["atomic-buffer-global-pk-add-f16-insts"] = true; 455 Features["atomic-flat-pk-add-16-insts"] = true; 456 Features["atomic-global-pk-add-bf16-inst"] = true; 457 Features["atomic-ds-pk-add-16-insts"] = true; 458 Features["setprio-inc-wg-inst"] = true; 459 break; 460 case GK_GFX1201: 461 case GK_GFX1200: 462 case GK_GFX12_GENERIC: 463 Features["ci-insts"] = true; 464 Features["dot7-insts"] = true; 465 Features["dot8-insts"] = true; 466 Features["dot9-insts"] = true; 467 Features["dot10-insts"] = true; 468 Features["dot11-insts"] = true; 469 Features["dot12-insts"] = true; 470 Features["dl-insts"] = true; 471 Features["atomic-ds-pk-add-16-insts"] = true; 472 Features["atomic-flat-pk-add-16-insts"] = true; 473 Features["atomic-buffer-global-pk-add-f16-insts"] = true; 474 Features["atomic-buffer-pk-add-bf16-inst"] = true; 475 Features["atomic-global-pk-add-bf16-inst"] = true; 476 Features["16-bit-insts"] = true; 477 Features["dpp"] = true; 478 Features["gfx8-insts"] = true; 479 Features["gfx9-insts"] = true; 480 Features["gfx10-insts"] = true; 481 Features["gfx10-3-insts"] = true; 482 Features["gfx11-insts"] = true; 483 Features["gfx12-insts"] = true; 484 Features["atomic-fadd-rtn-insts"] = true; 485 Features["image-insts"] = true; 486 Features["fp8-conversion-insts"] = true; 487 break; 488 case GK_GFX1153: 489 case GK_GFX1152: 490 case GK_GFX1151: 491 case GK_GFX1150: 492 case GK_GFX1103: 493 case GK_GFX1102: 494 case GK_GFX1101: 495 case GK_GFX1100: 496 case GK_GFX11_GENERIC: 497 Features["ci-insts"] = true; 498 Features["dot5-insts"] = true; 499 Features["dot7-insts"] = true; 500 Features["dot8-insts"] = true; 501 Features["dot9-insts"] = true; 502 Features["dot10-insts"] = true; 503 Features["dot12-insts"] = true; 504 Features["dl-insts"] = true; 505 Features["16-bit-insts"] = true; 506 Features["dpp"] = true; 507 Features["gfx8-insts"] = true; 508 Features["gfx9-insts"] = true; 509 Features["gfx10-insts"] = true; 510 Features["gfx10-3-insts"] = true; 511 Features["gfx11-insts"] = true; 512 Features["atomic-fadd-rtn-insts"] = true; 513 Features["image-insts"] = true; 514 Features["gws"] = true; 515 break; 516 case GK_GFX1036: 517 case GK_GFX1035: 518 case GK_GFX1034: 519 case GK_GFX1033: 520 case GK_GFX1032: 521 case GK_GFX1031: 522 case GK_GFX1030: 523 case GK_GFX10_3_GENERIC: 524 Features["ci-insts"] = true; 525 Features["dot1-insts"] = true; 526 Features["dot2-insts"] = true; 527 Features["dot5-insts"] = true; 528 Features["dot6-insts"] = true; 529 Features["dot7-insts"] = true; 530 Features["dot10-insts"] = true; 531 Features["dl-insts"] = true; 532 Features["16-bit-insts"] = true; 533 Features["dpp"] = true; 534 Features["gfx8-insts"] = true; 535 Features["gfx9-insts"] = true; 536 Features["gfx10-insts"] = true; 537 Features["gfx10-3-insts"] = true; 538 Features["image-insts"] = true; 539 Features["s-memrealtime"] = true; 540 Features["s-memtime-inst"] = true; 541 Features["gws"] = true; 542 Features["vmem-to-lds-load-insts"] = true; 543 break; 544 case GK_GFX1012: 545 case GK_GFX1011: 546 Features["dot1-insts"] = true; 547 Features["dot2-insts"] = true; 548 Features["dot5-insts"] = true; 549 Features["dot6-insts"] = true; 550 Features["dot7-insts"] = true; 551 Features["dot10-insts"] = true; 552 [[fallthrough]]; 553 case GK_GFX1013: 554 case GK_GFX1010: 555 case GK_GFX10_1_GENERIC: 556 Features["dl-insts"] = true; 557 Features["ci-insts"] = true; 558 Features["16-bit-insts"] = true; 559 Features["dpp"] = true; 560 Features["gfx8-insts"] = true; 561 Features["gfx9-insts"] = true; 562 Features["gfx10-insts"] = true; 563 Features["image-insts"] = true; 564 Features["s-memrealtime"] = true; 565 Features["s-memtime-inst"] = true; 566 Features["gws"] = true; 567 Features["vmem-to-lds-load-insts"] = true; 568 break; 569 case GK_GFX950: 570 Features["bitop3-insts"] = true; 571 Features["fp6bf6-cvt-scale-insts"] = true; 572 Features["fp4-cvt-scale-insts"] = true; 573 Features["bf8-cvt-scale-insts"] = true; 574 Features["fp8-cvt-scale-insts"] = true; 575 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; 576 Features["f32-to-f16bf16-cvt-sr-insts"] = true; 577 Features["prng-inst"] = true; 578 Features["permlane16-swap"] = true; 579 Features["permlane32-swap"] = true; 580 Features["ashr-pk-insts"] = true; 581 Features["dot12-insts"] = true; 582 Features["dot13-insts"] = true; 583 Features["atomic-buffer-pk-add-bf16-inst"] = true; 584 Features["gfx950-insts"] = true; 585 [[fallthrough]]; 586 case GK_GFX942: 587 Features["fp8-insts"] = true; 588 Features["fp8-conversion-insts"] = true; 589 if (Kind != GK_GFX950) 590 Features["xf32-insts"] = true; 591 [[fallthrough]]; 592 case GK_GFX9_4_GENERIC: 593 Features["gfx940-insts"] = true; 594 Features["atomic-ds-pk-add-16-insts"] = true; 595 Features["atomic-flat-pk-add-16-insts"] = true; 596 Features["atomic-global-pk-add-bf16-inst"] = true; 597 Features["gfx90a-insts"] = true; 598 Features["atomic-buffer-global-pk-add-f16-insts"] = true; 599 Features["atomic-fadd-rtn-insts"] = true; 600 Features["dot3-insts"] = true; 601 Features["dot4-insts"] = true; 602 Features["dot5-insts"] = true; 603 Features["dot6-insts"] = true; 604 Features["mai-insts"] = true; 605 Features["dl-insts"] = true; 606 Features["dot1-insts"] = true; 607 Features["dot2-insts"] = true; 608 Features["dot7-insts"] = true; 609 Features["dot10-insts"] = true; 610 Features["gfx9-insts"] = true; 611 Features["gfx8-insts"] = true; 612 Features["16-bit-insts"] = true; 613 Features["dpp"] = true; 614 Features["s-memrealtime"] = true; 615 Features["ci-insts"] = true; 616 Features["s-memtime-inst"] = true; 617 Features["gws"] = true; 618 Features["vmem-to-lds-load-insts"] = true; 619 break; 620 case GK_GFX90A: 621 Features["gfx90a-insts"] = true; 622 Features["atomic-buffer-global-pk-add-f16-insts"] = true; 623 Features["atomic-fadd-rtn-insts"] = true; 624 [[fallthrough]]; 625 case GK_GFX908: 626 Features["dot3-insts"] = true; 627 Features["dot4-insts"] = true; 628 Features["dot5-insts"] = true; 629 Features["dot6-insts"] = true; 630 Features["mai-insts"] = true; 631 [[fallthrough]]; 632 case GK_GFX906: 633 Features["dl-insts"] = true; 634 Features["dot1-insts"] = true; 635 Features["dot2-insts"] = true; 636 Features["dot7-insts"] = true; 637 Features["dot10-insts"] = true; 638 [[fallthrough]]; 639 case GK_GFX90C: 640 case GK_GFX909: 641 case GK_GFX904: 642 case GK_GFX902: 643 case GK_GFX900: 644 case GK_GFX9_GENERIC: 645 Features["gfx9-insts"] = true; 646 Features["vmem-to-lds-load-insts"] = true; 647 [[fallthrough]]; 648 case GK_GFX810: 649 case GK_GFX805: 650 case GK_GFX803: 651 case GK_GFX802: 652 case GK_GFX801: 653 Features["gfx8-insts"] = true; 654 Features["16-bit-insts"] = true; 655 Features["dpp"] = true; 656 Features["s-memrealtime"] = true; 657 [[fallthrough]]; 658 case GK_GFX705: 659 case GK_GFX704: 660 case GK_GFX703: 661 case GK_GFX702: 662 case GK_GFX701: 663 case GK_GFX700: 664 Features["ci-insts"] = true; 665 [[fallthrough]]; 666 case GK_GFX602: 667 case GK_GFX601: 668 case GK_GFX600: 669 Features["image-insts"] = true; 670 Features["s-memtime-inst"] = true; 671 Features["gws"] = true; 672 break; 673 case GK_NONE: 674 break; 675 default: 676 llvm_unreachable("Unhandled GPU!"); 677 } 678 } else { 679 if (GPU.empty()) 680 GPU = "r600"; 681 682 switch (llvm::AMDGPU::parseArchR600(GPU)) { 683 case GK_CAYMAN: 684 case GK_CYPRESS: 685 case GK_RV770: 686 case GK_RV670: 687 // TODO: Add fp64 when implemented. 688 break; 689 case GK_TURKS: 690 case GK_CAICOS: 691 case GK_BARTS: 692 case GK_SUMO: 693 case GK_REDWOOD: 694 case GK_JUNIPER: 695 case GK_CEDAR: 696 case GK_RV730: 697 case GK_RV710: 698 case GK_RS880: 699 case GK_R630: 700 case GK_R600: 701 break; 702 default: 703 llvm_unreachable("Unhandled GPU!"); 704 } 705 } 706 } 707 708 static bool isWave32Capable(StringRef GPU, const Triple &T) { 709 bool IsWave32Capable = false; 710 // XXX - What does the member GPU mean if device name string passed here? 711 if (T.isAMDGCN()) { 712 switch (parseArchAMDGCN(GPU)) { 713 case GK_GFX1250: 714 case GK_GFX1201: 715 case GK_GFX1200: 716 case GK_GFX1153: 717 case GK_GFX1152: 718 case GK_GFX1151: 719 case GK_GFX1150: 720 case GK_GFX1103: 721 case GK_GFX1102: 722 case GK_GFX1101: 723 case GK_GFX1100: 724 case GK_GFX1036: 725 case GK_GFX1035: 726 case GK_GFX1034: 727 case GK_GFX1033: 728 case GK_GFX1032: 729 case GK_GFX1031: 730 case GK_GFX1030: 731 case GK_GFX1012: 732 case GK_GFX1011: 733 case GK_GFX1013: 734 case GK_GFX1010: 735 case GK_GFX12_GENERIC: 736 case GK_GFX11_GENERIC: 737 case GK_GFX10_3_GENERIC: 738 case GK_GFX10_1_GENERIC: 739 IsWave32Capable = true; 740 break; 741 default: 742 break; 743 } 744 } 745 return IsWave32Capable; 746 } 747 748 std::pair<FeatureError, StringRef> 749 AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T, 750 StringMap<bool> &Features) { 751 bool IsWave32Capable = isWave32Capable(GPU, T); 752 const bool IsNullGPU = GPU.empty(); 753 const bool HaveWave32 = Features.count("wavefrontsize32"); 754 const bool HaveWave64 = Features.count("wavefrontsize64"); 755 if (HaveWave32 && HaveWave64) { 756 return {AMDGPU::INVALID_FEATURE_COMBINATION, 757 "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"}; 758 } 759 if (HaveWave32 && !IsNullGPU && !IsWave32Capable) { 760 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"}; 761 } 762 // Don't assume any wavesize with an unknown subtarget. 763 if (!IsNullGPU) { 764 // Default to wave32 if available, or wave64 if not 765 if (!HaveWave32 && !HaveWave64) { 766 StringRef DefaultWaveSizeFeature = 767 IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64"; 768 Features.insert(std::make_pair(DefaultWaveSizeFeature, true)); 769 } 770 } 771 return {NO_ERROR, StringRef()}; 772 } 773