xref: /freebsd/contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a target parser to recognise hardware features such as
10 // FPU/CPU/ARCH names as well as specific support such as HDIV, etc.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/TargetParser/TargetParser.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/TargetParser/Triple.h"
17 
18 using namespace llvm;
19 using namespace AMDGPU;
20 
21 /// Find KV in array using binary search.
22 static const BasicSubtargetSubTypeKV *
23 find(StringRef S, ArrayRef<BasicSubtargetSubTypeKV> A) {
24   // Binary search the array
25   auto F = llvm::lower_bound(A, S);
26   // If not found then return NULL
27   if (F == A.end() || StringRef(F->Key) != S)
28     return nullptr;
29   // Return the found array item
30   return F;
31 }
32 
33 /// For each feature that is (transitively) implied by this feature, set it.
34 static void setImpliedBits(FeatureBitset &Bits, const FeatureBitset &Implies,
35                            ArrayRef<BasicSubtargetFeatureKV> FeatureTable) {
36   // OR the Implies bits in outside the loop. This allows the Implies for CPUs
37   // which might imply features not in FeatureTable to use this.
38   Bits |= Implies;
39   for (const auto &FE : FeatureTable)
40     if (Implies.test(FE.Value))
41       setImpliedBits(Bits, FE.Implies.getAsBitset(), FeatureTable);
42 }
43 
44 std::optional<llvm::StringMap<bool>> llvm::getCPUDefaultTargetFeatures(
45     StringRef CPU, ArrayRef<BasicSubtargetSubTypeKV> ProcDesc,
46     ArrayRef<BasicSubtargetFeatureKV> ProcFeatures) {
47   if (CPU.empty())
48     return std::nullopt;
49 
50   const BasicSubtargetSubTypeKV *CPUEntry = ::find(CPU, ProcDesc);
51   if (!CPUEntry)
52     return std::nullopt;
53 
54   // Set the features implied by this CPU feature if there is a match.
55   FeatureBitset Bits;
56   llvm::StringMap<bool> DefaultFeatures;
57   setImpliedBits(Bits, CPUEntry->Implies.getAsBitset(), ProcFeatures);
58 
59   [[maybe_unused]] unsigned BitSize = Bits.size();
60   for (const BasicSubtargetFeatureKV &FE : ProcFeatures) {
61     assert(FE.Value < BitSize && "Target Feature is out of range");
62     if (Bits[FE.Value])
63       DefaultFeatures[FE.Key] = true;
64   }
65   return DefaultFeatures;
66 }
67 
68 namespace {
69 
70 struct GPUInfo {
71   StringLiteral Name;
72   StringLiteral CanonicalName;
73   AMDGPU::GPUKind Kind;
74   unsigned Features;
75 };
76 
77 constexpr GPUInfo R600GPUs[] = {
78   // Name       Canonical    Kind        Features
79   //            Name
80   {{"r600"},    {"r600"},    GK_R600,    FEATURE_NONE },
81   {{"rv630"},   {"r600"},    GK_R600,    FEATURE_NONE },
82   {{"rv635"},   {"r600"},    GK_R600,    FEATURE_NONE },
83   {{"r630"},    {"r630"},    GK_R630,    FEATURE_NONE },
84   {{"rs780"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
85   {{"rs880"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
86   {{"rv610"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
87   {{"rv620"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
88   {{"rv670"},   {"rv670"},   GK_RV670,   FEATURE_NONE },
89   {{"rv710"},   {"rv710"},   GK_RV710,   FEATURE_NONE },
90   {{"rv730"},   {"rv730"},   GK_RV730,   FEATURE_NONE },
91   {{"rv740"},   {"rv770"},   GK_RV770,   FEATURE_NONE },
92   {{"rv770"},   {"rv770"},   GK_RV770,   FEATURE_NONE },
93   {{"cedar"},   {"cedar"},   GK_CEDAR,   FEATURE_NONE },
94   {{"palm"},    {"cedar"},   GK_CEDAR,   FEATURE_NONE },
95   {{"cypress"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA  },
96   {{"hemlock"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA  },
97   {{"juniper"}, {"juniper"}, GK_JUNIPER, FEATURE_NONE },
98   {{"redwood"}, {"redwood"}, GK_REDWOOD, FEATURE_NONE },
99   {{"sumo"},    {"sumo"},    GK_SUMO,    FEATURE_NONE },
100   {{"sumo2"},   {"sumo"},    GK_SUMO,    FEATURE_NONE },
101   {{"barts"},   {"barts"},   GK_BARTS,   FEATURE_NONE },
102   {{"caicos"},  {"caicos"},  GK_CAICOS,  FEATURE_NONE },
103   {{"aruba"},   {"cayman"},  GK_CAYMAN,  FEATURE_FMA  },
104   {{"cayman"},  {"cayman"},  GK_CAYMAN,  FEATURE_FMA  },
105   {{"turks"},   {"turks"},   GK_TURKS,   FEATURE_NONE }
106 };
107 
108 // This table should be sorted by the value of GPUKind
109 // Don't bother listing the implicitly true features
110 constexpr GPUInfo AMDGCNGPUs[] = {
111     // clang-format off
112     // Name         Canonical    Kind        Features
113     //              Name
114     {{"gfx600"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
115     {{"tahiti"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
116     {{"gfx601"},    {"gfx601"},  GK_GFX601,  FEATURE_NONE},
117     {{"pitcairn"},  {"gfx601"},  GK_GFX601,  FEATURE_NONE},
118     {{"verde"},     {"gfx601"},  GK_GFX601,  FEATURE_NONE},
119     {{"gfx602"},    {"gfx602"},  GK_GFX602,  FEATURE_NONE},
120     {{"hainan"},    {"gfx602"},  GK_GFX602,  FEATURE_NONE},
121     {{"oland"},     {"gfx602"},  GK_GFX602,  FEATURE_NONE},
122     {{"gfx700"},    {"gfx700"},  GK_GFX700,  FEATURE_NONE},
123     {{"kaveri"},    {"gfx700"},  GK_GFX700,  FEATURE_NONE},
124     {{"gfx701"},    {"gfx701"},  GK_GFX701,  FEATURE_FAST_FMA_F32},
125     {{"hawaii"},    {"gfx701"},  GK_GFX701,  FEATURE_FAST_FMA_F32},
126     {{"gfx702"},    {"gfx702"},  GK_GFX702,  FEATURE_FAST_FMA_F32},
127     {{"gfx703"},    {"gfx703"},  GK_GFX703,  FEATURE_NONE},
128     {{"kabini"},    {"gfx703"},  GK_GFX703,  FEATURE_NONE},
129     {{"mullins"},   {"gfx703"},  GK_GFX703,  FEATURE_NONE},
130     {{"gfx704"},    {"gfx704"},  GK_GFX704,  FEATURE_NONE},
131     {{"bonaire"},   {"gfx704"},  GK_GFX704,  FEATURE_NONE},
132     {{"gfx705"},    {"gfx705"},  GK_GFX705,  FEATURE_NONE},
133     {{"gfx801"},    {"gfx801"},  GK_GFX801,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
134     {{"carrizo"},   {"gfx801"},  GK_GFX801,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
135     {{"gfx802"},    {"gfx802"},  GK_GFX802,  FEATURE_FAST_DENORMAL_F32},
136     {{"iceland"},   {"gfx802"},  GK_GFX802,  FEATURE_FAST_DENORMAL_F32},
137     {{"tonga"},     {"gfx802"},  GK_GFX802,  FEATURE_FAST_DENORMAL_F32},
138     {{"gfx803"},    {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
139     {{"fiji"},      {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
140     {{"polaris10"}, {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
141     {{"polaris11"}, {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
142     {{"gfx805"},    {"gfx805"},  GK_GFX805,  FEATURE_FAST_DENORMAL_F32},
143     {{"tongapro"},  {"gfx805"},  GK_GFX805,  FEATURE_FAST_DENORMAL_F32},
144     {{"gfx810"},    {"gfx810"},  GK_GFX810,  FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
145     {{"stoney"},    {"gfx810"},  GK_GFX810,  FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
146     {{"gfx900"},    {"gfx900"},  GK_GFX900,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
147     {{"gfx902"},    {"gfx902"},  GK_GFX902,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
148     {{"gfx904"},    {"gfx904"},  GK_GFX904,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
149     {{"gfx906"},    {"gfx906"},  GK_GFX906,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
150     {{"gfx908"},    {"gfx908"},  GK_GFX908,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
151     {{"gfx909"},    {"gfx909"},  GK_GFX909,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
152     {{"gfx90a"},    {"gfx90a"},  GK_GFX90A,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
153     {{"gfx90c"},    {"gfx90c"},  GK_GFX90C,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
154     {{"gfx942"},    {"gfx942"},  GK_GFX942,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
155     {{"gfx950"},    {"gfx950"},  GK_GFX950,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
156     {{"gfx1010"},   {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
157     {{"gfx1011"},   {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
158     {{"gfx1012"},   {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
159     {{"gfx1013"},   {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
160     {{"gfx1030"},   {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
161     {{"gfx1031"},   {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
162     {{"gfx1032"},   {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
163     {{"gfx1033"},   {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
164     {{"gfx1034"},   {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
165     {{"gfx1035"},   {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
166     {{"gfx1036"},   {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
167     {{"gfx1100"},   {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
168     {{"gfx1101"},   {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
169     {{"gfx1102"},   {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
170     {{"gfx1103"},   {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
171     {{"gfx1150"},   {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
172     {{"gfx1151"},   {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
173     {{"gfx1152"},   {"gfx1152"}, GK_GFX1152, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
174     {{"gfx1153"},   {"gfx1153"}, GK_GFX1153, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
175     {{"gfx1200"},   {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
176     {{"gfx1201"},   {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
177     {{"gfx1250"},   {"gfx1250"}, GK_GFX1250, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
178 
179     {{"gfx9-generic"},      {"gfx9-generic"},    GK_GFX9_GENERIC,    FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
180     {{"gfx10-1-generic"},   {"gfx10-1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
181     {{"gfx10-3-generic"},   {"gfx10-3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
182     {{"gfx11-generic"},     {"gfx11-generic"},   GK_GFX11_GENERIC,   FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
183     {{"gfx12-generic"},     {"gfx12-generic"},   GK_GFX12_GENERIC,   FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
184     {{"gfx9-4-generic"},    {"gfx9-4-generic"},  GK_GFX9_4_GENERIC,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
185     // clang-format on
186 };
187 
188 const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
189   GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
190 
191   auto I =
192       llvm::lower_bound(Table, Search, [](const GPUInfo &A, const GPUInfo &B) {
193         return A.Kind < B.Kind;
194       });
195 
196   if (I == Table.end() || I->Kind != Search.Kind)
197     return nullptr;
198   return I;
199 }
200 
201 } // namespace
202 
203 StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
204   switch (AK) {
205   case AMDGPU::GK_GFX9_GENERIC:
206   case AMDGPU::GK_GFX9_4_GENERIC:
207     return "gfx9";
208   case AMDGPU::GK_GFX10_1_GENERIC:
209   case AMDGPU::GK_GFX10_3_GENERIC:
210     return "gfx10";
211   case AMDGPU::GK_GFX11_GENERIC:
212     return "gfx11";
213   case AMDGPU::GK_GFX12_GENERIC:
214     return "gfx12";
215   default: {
216     StringRef ArchName = getArchNameAMDGCN(AK);
217     return ArchName.empty() ? "" : ArchName.drop_back(2);
218   }
219   }
220 }
221 
222 StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
223   if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
224     return Entry->CanonicalName;
225   return "";
226 }
227 
228 StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) {
229   if (const auto *Entry = getArchEntry(AK, R600GPUs))
230     return Entry->CanonicalName;
231   return "";
232 }
233 
234 AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) {
235   for (const auto &C : AMDGCNGPUs) {
236     if (CPU == C.Name)
237       return C.Kind;
238   }
239 
240   return AMDGPU::GPUKind::GK_NONE;
241 }
242 
243 AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) {
244   for (const auto &C : R600GPUs) {
245     if (CPU == C.Name)
246       return C.Kind;
247   }
248 
249   return AMDGPU::GPUKind::GK_NONE;
250 }
251 
252 unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) {
253   if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
254     return Entry->Features;
255   return FEATURE_NONE;
256 }
257 
258 unsigned AMDGPU::getArchAttrR600(GPUKind AK) {
259   if (const auto *Entry = getArchEntry(AK, R600GPUs))
260     return Entry->Features;
261   return FEATURE_NONE;
262 }
263 
264 void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) {
265   // XXX: Should this only report unique canonical names?
266   for (const auto &C : AMDGCNGPUs)
267     Values.push_back(C.Name);
268 }
269 
270 void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
271   for (const auto &C : R600GPUs)
272     Values.push_back(C.Name);
273 }
274 
275 AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
276   AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
277   if (AK == AMDGPU::GPUKind::GK_NONE) {
278     if (GPU == "generic-hsa")
279       return {7, 0, 0};
280     if (GPU == "generic")
281       return {6, 0, 0};
282     return {0, 0, 0};
283   }
284 
285   // clang-format off
286   switch (AK) {
287   case GK_GFX600:  return {6, 0, 0};
288   case GK_GFX601:  return {6, 0, 1};
289   case GK_GFX602:  return {6, 0, 2};
290   case GK_GFX700:  return {7, 0, 0};
291   case GK_GFX701:  return {7, 0, 1};
292   case GK_GFX702:  return {7, 0, 2};
293   case GK_GFX703:  return {7, 0, 3};
294   case GK_GFX704:  return {7, 0, 4};
295   case GK_GFX705:  return {7, 0, 5};
296   case GK_GFX801:  return {8, 0, 1};
297   case GK_GFX802:  return {8, 0, 2};
298   case GK_GFX803:  return {8, 0, 3};
299   case GK_GFX805:  return {8, 0, 5};
300   case GK_GFX810:  return {8, 1, 0};
301   case GK_GFX900:  return {9, 0, 0};
302   case GK_GFX902:  return {9, 0, 2};
303   case GK_GFX904:  return {9, 0, 4};
304   case GK_GFX906:  return {9, 0, 6};
305   case GK_GFX908:  return {9, 0, 8};
306   case GK_GFX909:  return {9, 0, 9};
307   case GK_GFX90A:  return {9, 0, 10};
308   case GK_GFX90C:  return {9, 0, 12};
309   case GK_GFX942:  return {9, 4, 2};
310   case GK_GFX950:  return {9, 5, 0};
311   case GK_GFX1010: return {10, 1, 0};
312   case GK_GFX1011: return {10, 1, 1};
313   case GK_GFX1012: return {10, 1, 2};
314   case GK_GFX1013: return {10, 1, 3};
315   case GK_GFX1030: return {10, 3, 0};
316   case GK_GFX1031: return {10, 3, 1};
317   case GK_GFX1032: return {10, 3, 2};
318   case GK_GFX1033: return {10, 3, 3};
319   case GK_GFX1034: return {10, 3, 4};
320   case GK_GFX1035: return {10, 3, 5};
321   case GK_GFX1036: return {10, 3, 6};
322   case GK_GFX1100: return {11, 0, 0};
323   case GK_GFX1101: return {11, 0, 1};
324   case GK_GFX1102: return {11, 0, 2};
325   case GK_GFX1103: return {11, 0, 3};
326   case GK_GFX1150: return {11, 5, 0};
327   case GK_GFX1151: return {11, 5, 1};
328   case GK_GFX1152: return {11, 5, 2};
329   case GK_GFX1153: return {11, 5, 3};
330   case GK_GFX1200: return {12, 0, 0};
331   case GK_GFX1201: return {12, 0, 1};
332   case GK_GFX1250: return {12, 5, 0};
333 
334   // Generic targets return the lowest common denominator
335   // within their family. That is, the ISA that is the most
336   // restricted in terms of features.
337   //
338   // gfx9-generic is tricky because there is no lowest
339   // common denominator, so we return gfx900 which has mad-mix
340   // but this family doesn't have it.
341   //
342   // This API should never be used to check for a particular
343   // feature anyway.
344   //
345   // TODO: Split up this API depending on its caller so
346   // generic target handling is more obvious and less risky.
347   case GK_GFX9_GENERIC:    return {9, 0, 0};
348   case GK_GFX9_4_GENERIC:  return {9, 4, 0};
349   case GK_GFX10_1_GENERIC: return {10, 1, 0};
350   case GK_GFX10_3_GENERIC: return {10, 3, 0};
351   case GK_GFX11_GENERIC:   return {11, 0, 3};
352   case GK_GFX12_GENERIC:   return {12, 0, 0};
353   default:         return {0, 0, 0};
354   }
355   // clang-format on
356 }
357 
358 StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
359   assert(T.isAMDGPU());
360   auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(Arch) : parseArchR600(Arch);
361   if (ProcKind == GK_NONE)
362     return StringRef();
363 
364   return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
365 }
366 
367 void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
368                                   StringMap<bool> &Features) {
369   // XXX - What does the member GPU mean if device name string passed here?
370   if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) {
371     // AMDGCN SPIRV must support the union of all AMDGCN features. This list
372     // should be kept in sorted order and updated whenever new features are
373     // added.
374     Features["16-bit-insts"] = true;
375     Features["ashr-pk-insts"] = true;
376     Features["atomic-buffer-pk-add-bf16-inst"] = true;
377     Features["atomic-buffer-global-pk-add-f16-insts"] = true;
378     Features["atomic-ds-pk-add-16-insts"] = true;
379     Features["atomic-fadd-rtn-insts"] = true;
380     Features["atomic-flat-pk-add-16-insts"] = true;
381     Features["atomic-global-pk-add-bf16-inst"] = true;
382     Features["bf8-cvt-scale-insts"] = true;
383     Features["bitop3-insts"] = true;
384     Features["ci-insts"] = true;
385     Features["dl-insts"] = true;
386     Features["dot1-insts"] = true;
387     Features["dot2-insts"] = true;
388     Features["dot3-insts"] = true;
389     Features["dot4-insts"] = true;
390     Features["dot5-insts"] = true;
391     Features["dot6-insts"] = true;
392     Features["dot7-insts"] = true;
393     Features["dot8-insts"] = true;
394     Features["dot9-insts"] = true;
395     Features["dot10-insts"] = true;
396     Features["dot11-insts"] = true;
397     Features["dot12-insts"] = true;
398     Features["dot13-insts"] = true;
399     Features["dpp"] = true;
400     Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
401     Features["f32-to-f16bf16-cvt-sr-insts"] = true;
402     Features["fp4-cvt-scale-insts"] = true;
403     Features["fp6bf6-cvt-scale-insts"] = true;
404     Features["fp8-insts"] = true;
405     Features["fp8-conversion-insts"] = true;
406     Features["fp8-cvt-scale-insts"] = true;
407     Features["gfx8-insts"] = true;
408     Features["gfx9-insts"] = true;
409     Features["gfx90a-insts"] = true;
410     Features["gfx940-insts"] = true;
411     Features["gfx950-insts"] = true;
412     Features["gfx10-insts"] = true;
413     Features["gfx10-3-insts"] = true;
414     Features["gfx11-insts"] = true;
415     Features["gfx12-insts"] = true;
416     Features["gws"] = true;
417     Features["image-insts"] = true;
418     Features["s-memrealtime"] = true;
419     Features["s-memtime-inst"] = true;
420     Features["mai-insts"] = true;
421     Features["permlane16-swap"] = true;
422     Features["permlane32-swap"] = true;
423     Features["prng-inst"] = true;
424     Features["wavefrontsize32"] = true;
425     Features["wavefrontsize64"] = true;
426     Features["vmem-to-lds-load-insts"] = true;
427   } else if (T.isAMDGCN()) {
428     AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
429     switch (Kind) {
430     case GK_GFX1250:
431       Features["ci-insts"] = true;
432       Features["dot7-insts"] = true;
433       Features["dot8-insts"] = true;
434       Features["dl-insts"] = true;
435       Features["16-bit-insts"] = true;
436       Features["dpp"] = true;
437       Features["gfx8-insts"] = true;
438       Features["gfx9-insts"] = true;
439       Features["gfx10-insts"] = true;
440       Features["gfx10-3-insts"] = true;
441       Features["gfx11-insts"] = true;
442       Features["gfx12-insts"] = true;
443       Features["gfx1250-insts"] = true;
444       Features["bitop3-insts"] = true;
445       Features["prng-inst"] = true;
446       Features["transpose-load-f4f6-insts"] = true;
447       Features["bf16-trans-insts"] = true;
448       Features["fp8-conversion-insts"] = true;
449       Features["fp8e5m3-insts"] = true;
450       Features["permlane16-swap"] = true;
451       Features["ashr-pk-insts"] = true;
452       Features["atomic-buffer-pk-add-bf16-inst"] = true;
453       Features["atomic-fadd-rtn-insts"] = true;
454       Features["atomic-buffer-global-pk-add-f16-insts"] = true;
455       Features["atomic-flat-pk-add-16-insts"] = true;
456       Features["atomic-global-pk-add-bf16-inst"] = true;
457       Features["atomic-ds-pk-add-16-insts"] = true;
458       Features["setprio-inc-wg-inst"] = true;
459       break;
460     case GK_GFX1201:
461     case GK_GFX1200:
462     case GK_GFX12_GENERIC:
463       Features["ci-insts"] = true;
464       Features["dot7-insts"] = true;
465       Features["dot8-insts"] = true;
466       Features["dot9-insts"] = true;
467       Features["dot10-insts"] = true;
468       Features["dot11-insts"] = true;
469       Features["dot12-insts"] = true;
470       Features["dl-insts"] = true;
471       Features["atomic-ds-pk-add-16-insts"] = true;
472       Features["atomic-flat-pk-add-16-insts"] = true;
473       Features["atomic-buffer-global-pk-add-f16-insts"] = true;
474       Features["atomic-buffer-pk-add-bf16-inst"] = true;
475       Features["atomic-global-pk-add-bf16-inst"] = true;
476       Features["16-bit-insts"] = true;
477       Features["dpp"] = true;
478       Features["gfx8-insts"] = true;
479       Features["gfx9-insts"] = true;
480       Features["gfx10-insts"] = true;
481       Features["gfx10-3-insts"] = true;
482       Features["gfx11-insts"] = true;
483       Features["gfx12-insts"] = true;
484       Features["atomic-fadd-rtn-insts"] = true;
485       Features["image-insts"] = true;
486       Features["fp8-conversion-insts"] = true;
487       break;
488     case GK_GFX1153:
489     case GK_GFX1152:
490     case GK_GFX1151:
491     case GK_GFX1150:
492     case GK_GFX1103:
493     case GK_GFX1102:
494     case GK_GFX1101:
495     case GK_GFX1100:
496     case GK_GFX11_GENERIC:
497       Features["ci-insts"] = true;
498       Features["dot5-insts"] = true;
499       Features["dot7-insts"] = true;
500       Features["dot8-insts"] = true;
501       Features["dot9-insts"] = true;
502       Features["dot10-insts"] = true;
503       Features["dot12-insts"] = true;
504       Features["dl-insts"] = true;
505       Features["16-bit-insts"] = true;
506       Features["dpp"] = true;
507       Features["gfx8-insts"] = true;
508       Features["gfx9-insts"] = true;
509       Features["gfx10-insts"] = true;
510       Features["gfx10-3-insts"] = true;
511       Features["gfx11-insts"] = true;
512       Features["atomic-fadd-rtn-insts"] = true;
513       Features["image-insts"] = true;
514       Features["gws"] = true;
515       break;
516     case GK_GFX1036:
517     case GK_GFX1035:
518     case GK_GFX1034:
519     case GK_GFX1033:
520     case GK_GFX1032:
521     case GK_GFX1031:
522     case GK_GFX1030:
523     case GK_GFX10_3_GENERIC:
524       Features["ci-insts"] = true;
525       Features["dot1-insts"] = true;
526       Features["dot2-insts"] = true;
527       Features["dot5-insts"] = true;
528       Features["dot6-insts"] = true;
529       Features["dot7-insts"] = true;
530       Features["dot10-insts"] = true;
531       Features["dl-insts"] = true;
532       Features["16-bit-insts"] = true;
533       Features["dpp"] = true;
534       Features["gfx8-insts"] = true;
535       Features["gfx9-insts"] = true;
536       Features["gfx10-insts"] = true;
537       Features["gfx10-3-insts"] = true;
538       Features["image-insts"] = true;
539       Features["s-memrealtime"] = true;
540       Features["s-memtime-inst"] = true;
541       Features["gws"] = true;
542       Features["vmem-to-lds-load-insts"] = true;
543       break;
544     case GK_GFX1012:
545     case GK_GFX1011:
546       Features["dot1-insts"] = true;
547       Features["dot2-insts"] = true;
548       Features["dot5-insts"] = true;
549       Features["dot6-insts"] = true;
550       Features["dot7-insts"] = true;
551       Features["dot10-insts"] = true;
552       [[fallthrough]];
553     case GK_GFX1013:
554     case GK_GFX1010:
555     case GK_GFX10_1_GENERIC:
556       Features["dl-insts"] = true;
557       Features["ci-insts"] = true;
558       Features["16-bit-insts"] = true;
559       Features["dpp"] = true;
560       Features["gfx8-insts"] = true;
561       Features["gfx9-insts"] = true;
562       Features["gfx10-insts"] = true;
563       Features["image-insts"] = true;
564       Features["s-memrealtime"] = true;
565       Features["s-memtime-inst"] = true;
566       Features["gws"] = true;
567       Features["vmem-to-lds-load-insts"] = true;
568       break;
569     case GK_GFX950:
570       Features["bitop3-insts"] = true;
571       Features["fp6bf6-cvt-scale-insts"] = true;
572       Features["fp4-cvt-scale-insts"] = true;
573       Features["bf8-cvt-scale-insts"] = true;
574       Features["fp8-cvt-scale-insts"] = true;
575       Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
576       Features["f32-to-f16bf16-cvt-sr-insts"] = true;
577       Features["prng-inst"] = true;
578       Features["permlane16-swap"] = true;
579       Features["permlane32-swap"] = true;
580       Features["ashr-pk-insts"] = true;
581       Features["dot12-insts"] = true;
582       Features["dot13-insts"] = true;
583       Features["atomic-buffer-pk-add-bf16-inst"] = true;
584       Features["gfx950-insts"] = true;
585       [[fallthrough]];
586     case GK_GFX942:
587       Features["fp8-insts"] = true;
588       Features["fp8-conversion-insts"] = true;
589       if (Kind != GK_GFX950)
590         Features["xf32-insts"] = true;
591       [[fallthrough]];
592     case GK_GFX9_4_GENERIC:
593       Features["gfx940-insts"] = true;
594       Features["atomic-ds-pk-add-16-insts"] = true;
595       Features["atomic-flat-pk-add-16-insts"] = true;
596       Features["atomic-global-pk-add-bf16-inst"] = true;
597       Features["gfx90a-insts"] = true;
598       Features["atomic-buffer-global-pk-add-f16-insts"] = true;
599       Features["atomic-fadd-rtn-insts"] = true;
600       Features["dot3-insts"] = true;
601       Features["dot4-insts"] = true;
602       Features["dot5-insts"] = true;
603       Features["dot6-insts"] = true;
604       Features["mai-insts"] = true;
605       Features["dl-insts"] = true;
606       Features["dot1-insts"] = true;
607       Features["dot2-insts"] = true;
608       Features["dot7-insts"] = true;
609       Features["dot10-insts"] = true;
610       Features["gfx9-insts"] = true;
611       Features["gfx8-insts"] = true;
612       Features["16-bit-insts"] = true;
613       Features["dpp"] = true;
614       Features["s-memrealtime"] = true;
615       Features["ci-insts"] = true;
616       Features["s-memtime-inst"] = true;
617       Features["gws"] = true;
618       Features["vmem-to-lds-load-insts"] = true;
619       break;
620     case GK_GFX90A:
621       Features["gfx90a-insts"] = true;
622       Features["atomic-buffer-global-pk-add-f16-insts"] = true;
623       Features["atomic-fadd-rtn-insts"] = true;
624       [[fallthrough]];
625     case GK_GFX908:
626       Features["dot3-insts"] = true;
627       Features["dot4-insts"] = true;
628       Features["dot5-insts"] = true;
629       Features["dot6-insts"] = true;
630       Features["mai-insts"] = true;
631       [[fallthrough]];
632     case GK_GFX906:
633       Features["dl-insts"] = true;
634       Features["dot1-insts"] = true;
635       Features["dot2-insts"] = true;
636       Features["dot7-insts"] = true;
637       Features["dot10-insts"] = true;
638       [[fallthrough]];
639     case GK_GFX90C:
640     case GK_GFX909:
641     case GK_GFX904:
642     case GK_GFX902:
643     case GK_GFX900:
644     case GK_GFX9_GENERIC:
645       Features["gfx9-insts"] = true;
646       Features["vmem-to-lds-load-insts"] = true;
647       [[fallthrough]];
648     case GK_GFX810:
649     case GK_GFX805:
650     case GK_GFX803:
651     case GK_GFX802:
652     case GK_GFX801:
653       Features["gfx8-insts"] = true;
654       Features["16-bit-insts"] = true;
655       Features["dpp"] = true;
656       Features["s-memrealtime"] = true;
657       [[fallthrough]];
658     case GK_GFX705:
659     case GK_GFX704:
660     case GK_GFX703:
661     case GK_GFX702:
662     case GK_GFX701:
663     case GK_GFX700:
664       Features["ci-insts"] = true;
665       [[fallthrough]];
666     case GK_GFX602:
667     case GK_GFX601:
668     case GK_GFX600:
669       Features["image-insts"] = true;
670       Features["s-memtime-inst"] = true;
671       Features["gws"] = true;
672       break;
673     case GK_NONE:
674       break;
675     default:
676       llvm_unreachable("Unhandled GPU!");
677     }
678   } else {
679     if (GPU.empty())
680       GPU = "r600";
681 
682     switch (llvm::AMDGPU::parseArchR600(GPU)) {
683     case GK_CAYMAN:
684     case GK_CYPRESS:
685     case GK_RV770:
686     case GK_RV670:
687       // TODO: Add fp64 when implemented.
688       break;
689     case GK_TURKS:
690     case GK_CAICOS:
691     case GK_BARTS:
692     case GK_SUMO:
693     case GK_REDWOOD:
694     case GK_JUNIPER:
695     case GK_CEDAR:
696     case GK_RV730:
697     case GK_RV710:
698     case GK_RS880:
699     case GK_R630:
700     case GK_R600:
701       break;
702     default:
703       llvm_unreachable("Unhandled GPU!");
704     }
705   }
706 }
707 
708 static bool isWave32Capable(StringRef GPU, const Triple &T) {
709   bool IsWave32Capable = false;
710   // XXX - What does the member GPU mean if device name string passed here?
711   if (T.isAMDGCN()) {
712     switch (parseArchAMDGCN(GPU)) {
713     case GK_GFX1250:
714     case GK_GFX1201:
715     case GK_GFX1200:
716     case GK_GFX1153:
717     case GK_GFX1152:
718     case GK_GFX1151:
719     case GK_GFX1150:
720     case GK_GFX1103:
721     case GK_GFX1102:
722     case GK_GFX1101:
723     case GK_GFX1100:
724     case GK_GFX1036:
725     case GK_GFX1035:
726     case GK_GFX1034:
727     case GK_GFX1033:
728     case GK_GFX1032:
729     case GK_GFX1031:
730     case GK_GFX1030:
731     case GK_GFX1012:
732     case GK_GFX1011:
733     case GK_GFX1013:
734     case GK_GFX1010:
735     case GK_GFX12_GENERIC:
736     case GK_GFX11_GENERIC:
737     case GK_GFX10_3_GENERIC:
738     case GK_GFX10_1_GENERIC:
739       IsWave32Capable = true;
740       break;
741     default:
742       break;
743     }
744   }
745   return IsWave32Capable;
746 }
747 
748 std::pair<FeatureError, StringRef>
749 AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
750                               StringMap<bool> &Features) {
751   bool IsWave32Capable = isWave32Capable(GPU, T);
752   const bool IsNullGPU = GPU.empty();
753   const bool HaveWave32 = Features.count("wavefrontsize32");
754   const bool HaveWave64 = Features.count("wavefrontsize64");
755   if (HaveWave32 && HaveWave64) {
756     return {AMDGPU::INVALID_FEATURE_COMBINATION,
757             "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"};
758   }
759   if (HaveWave32 && !IsNullGPU && !IsWave32Capable) {
760     return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"};
761   }
762   // Don't assume any wavesize with an unknown subtarget.
763   if (!IsNullGPU) {
764     // Default to wave32 if available, or wave64 if not
765     if (!HaveWave32 && !HaveWave64) {
766       StringRef DefaultWaveSizeFeature =
767           IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
768       Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
769     }
770   }
771   return {NO_ERROR, StringRef()};
772 }
773