1 //===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a target parser to recognise hardware features such as
10 // FPU/CPU/ARCH names as well as specific support such as HDIV, etc.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/TargetParser/TargetParser.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/TargetParser/Triple.h"
17
18 using namespace llvm;
19 using namespace AMDGPU;
20
21 /// Find KV in array using binary search.
22 static const BasicSubtargetSubTypeKV *
find(StringRef S,ArrayRef<BasicSubtargetSubTypeKV> A)23 find(StringRef S, ArrayRef<BasicSubtargetSubTypeKV> A) {
24 // Binary search the array
25 auto F = llvm::lower_bound(A, S);
26 // If not found then return NULL
27 if (F == A.end() || StringRef(F->Key) != S)
28 return nullptr;
29 // Return the found array item
30 return F;
31 }
32
33 /// For each feature that is (transitively) implied by this feature, set it.
setImpliedBits(FeatureBitset & Bits,const FeatureBitset & Implies,ArrayRef<BasicSubtargetFeatureKV> FeatureTable)34 static void setImpliedBits(FeatureBitset &Bits, const FeatureBitset &Implies,
35 ArrayRef<BasicSubtargetFeatureKV> FeatureTable) {
36 // OR the Implies bits in outside the loop. This allows the Implies for CPUs
37 // which might imply features not in FeatureTable to use this.
38 Bits |= Implies;
39 for (const auto &FE : FeatureTable)
40 if (Implies.test(FE.Value))
41 setImpliedBits(Bits, FE.Implies.getAsBitset(), FeatureTable);
42 }
43
getCPUDefaultTargetFeatures(StringRef CPU,ArrayRef<BasicSubtargetSubTypeKV> ProcDesc,ArrayRef<BasicSubtargetFeatureKV> ProcFeatures)44 std::optional<llvm::StringMap<bool>> llvm::getCPUDefaultTargetFeatures(
45 StringRef CPU, ArrayRef<BasicSubtargetSubTypeKV> ProcDesc,
46 ArrayRef<BasicSubtargetFeatureKV> ProcFeatures) {
47 if (CPU.empty())
48 return std::nullopt;
49
50 const BasicSubtargetSubTypeKV *CPUEntry = ::find(CPU, ProcDesc);
51 if (!CPUEntry)
52 return std::nullopt;
53
54 // Set the features implied by this CPU feature if there is a match.
55 FeatureBitset Bits;
56 llvm::StringMap<bool> DefaultFeatures;
57 setImpliedBits(Bits, CPUEntry->Implies.getAsBitset(), ProcFeatures);
58
59 [[maybe_unused]] unsigned BitSize = Bits.size();
60 for (const BasicSubtargetFeatureKV &FE : ProcFeatures) {
61 assert(FE.Value < BitSize && "Target Feature is out of range");
62 if (Bits[FE.Value])
63 DefaultFeatures[FE.Key] = true;
64 }
65 return DefaultFeatures;
66 }
67
68 namespace {
69
70 struct GPUInfo {
71 StringLiteral Name;
72 StringLiteral CanonicalName;
73 AMDGPU::GPUKind Kind;
74 unsigned Features;
75 };
76
77 constexpr GPUInfo R600GPUs[] = {
78 // Name Canonical Kind Features
79 // Name
80 {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
81 {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
82 {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
83 {{"r630"}, {"r630"}, GK_R630, FEATURE_NONE },
84 {{"rs780"}, {"rs880"}, GK_RS880, FEATURE_NONE },
85 {{"rs880"}, {"rs880"}, GK_RS880, FEATURE_NONE },
86 {{"rv610"}, {"rs880"}, GK_RS880, FEATURE_NONE },
87 {{"rv620"}, {"rs880"}, GK_RS880, FEATURE_NONE },
88 {{"rv670"}, {"rv670"}, GK_RV670, FEATURE_NONE },
89 {{"rv710"}, {"rv710"}, GK_RV710, FEATURE_NONE },
90 {{"rv730"}, {"rv730"}, GK_RV730, FEATURE_NONE },
91 {{"rv740"}, {"rv770"}, GK_RV770, FEATURE_NONE },
92 {{"rv770"}, {"rv770"}, GK_RV770, FEATURE_NONE },
93 {{"cedar"}, {"cedar"}, GK_CEDAR, FEATURE_NONE },
94 {{"palm"}, {"cedar"}, GK_CEDAR, FEATURE_NONE },
95 {{"cypress"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA },
96 {{"hemlock"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA },
97 {{"juniper"}, {"juniper"}, GK_JUNIPER, FEATURE_NONE },
98 {{"redwood"}, {"redwood"}, GK_REDWOOD, FEATURE_NONE },
99 {{"sumo"}, {"sumo"}, GK_SUMO, FEATURE_NONE },
100 {{"sumo2"}, {"sumo"}, GK_SUMO, FEATURE_NONE },
101 {{"barts"}, {"barts"}, GK_BARTS, FEATURE_NONE },
102 {{"caicos"}, {"caicos"}, GK_CAICOS, FEATURE_NONE },
103 {{"aruba"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA },
104 {{"cayman"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA },
105 {{"turks"}, {"turks"}, GK_TURKS, FEATURE_NONE }
106 };
107
108 // This table should be sorted by the value of GPUKind
109 // Don't bother listing the implicitly true features
110 constexpr GPUInfo AMDGCNGPUs[] = {
111 // clang-format off
112 // Name Canonical Kind Features
113 // Name
114 {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
115 {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
116 {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
117 {{"pitcairn"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
118 {{"verde"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
119 {{"gfx602"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
120 {{"hainan"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
121 {{"oland"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
122 {{"gfx700"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
123 {{"kaveri"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
124 {{"gfx701"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
125 {{"hawaii"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
126 {{"gfx702"}, {"gfx702"}, GK_GFX702, FEATURE_FAST_FMA_F32},
127 {{"gfx703"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
128 {{"kabini"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
129 {{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
130 {{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
131 {{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
132 {{"gfx705"}, {"gfx705"}, GK_GFX705, FEATURE_NONE},
133 {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
134 {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
135 {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
136 {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
137 {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
138 {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
139 {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
140 {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
141 {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
142 {{"gfx805"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32},
143 {{"tongapro"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32},
144 {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
145 {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
146 {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
147 {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
148 {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
149 {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
150 {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
151 {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
152 {{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
153 {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
154 {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
155 {{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
156 {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
157 {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
158 {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
159 {{"gfx1013"}, {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
160 {{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
161 {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
162 {{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
163 {{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
164 {{"gfx1034"}, {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
165 {{"gfx1035"}, {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
166 {{"gfx1036"}, {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
167 {{"gfx1100"}, {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
168 {{"gfx1101"}, {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
169 {{"gfx1102"}, {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
170 {{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
171 {{"gfx1150"}, {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
172 {{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
173 {{"gfx1152"}, {"gfx1152"}, GK_GFX1152, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
174 {{"gfx1153"}, {"gfx1153"}, GK_GFX1153, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
175 {{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
176 {{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
177 {{"gfx1250"}, {"gfx1250"}, GK_GFX1250, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
178
179 {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
180 {{"gfx10-1-generic"}, {"gfx10-1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
181 {{"gfx10-3-generic"}, {"gfx10-3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
182 {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
183 {{"gfx12-generic"}, {"gfx12-generic"}, GK_GFX12_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
184 {{"gfx9-4-generic"}, {"gfx9-4-generic"}, GK_GFX9_4_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
185 // clang-format on
186 };
187
getArchEntry(AMDGPU::GPUKind AK,ArrayRef<GPUInfo> Table)188 const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
189 GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
190
191 auto I =
192 llvm::lower_bound(Table, Search, [](const GPUInfo &A, const GPUInfo &B) {
193 return A.Kind < B.Kind;
194 });
195
196 if (I == Table.end() || I->Kind != Search.Kind)
197 return nullptr;
198 return I;
199 }
200
201 } // namespace
202
getArchFamilyNameAMDGCN(GPUKind AK)203 StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
204 switch (AK) {
205 case AMDGPU::GK_GFX9_GENERIC:
206 case AMDGPU::GK_GFX9_4_GENERIC:
207 return "gfx9";
208 case AMDGPU::GK_GFX10_1_GENERIC:
209 case AMDGPU::GK_GFX10_3_GENERIC:
210 return "gfx10";
211 case AMDGPU::GK_GFX11_GENERIC:
212 return "gfx11";
213 case AMDGPU::GK_GFX12_GENERIC:
214 return "gfx12";
215 default: {
216 StringRef ArchName = getArchNameAMDGCN(AK);
217 return ArchName.empty() ? "" : ArchName.drop_back(2);
218 }
219 }
220 }
221
getArchNameAMDGCN(GPUKind AK)222 StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
223 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
224 return Entry->CanonicalName;
225 return "";
226 }
227
getArchNameR600(GPUKind AK)228 StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) {
229 if (const auto *Entry = getArchEntry(AK, R600GPUs))
230 return Entry->CanonicalName;
231 return "";
232 }
233
parseArchAMDGCN(StringRef CPU)234 AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) {
235 for (const auto &C : AMDGCNGPUs) {
236 if (CPU == C.Name)
237 return C.Kind;
238 }
239
240 return AMDGPU::GPUKind::GK_NONE;
241 }
242
parseArchR600(StringRef CPU)243 AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) {
244 for (const auto &C : R600GPUs) {
245 if (CPU == C.Name)
246 return C.Kind;
247 }
248
249 return AMDGPU::GPUKind::GK_NONE;
250 }
251
getArchAttrAMDGCN(GPUKind AK)252 unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) {
253 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
254 return Entry->Features;
255 return FEATURE_NONE;
256 }
257
getArchAttrR600(GPUKind AK)258 unsigned AMDGPU::getArchAttrR600(GPUKind AK) {
259 if (const auto *Entry = getArchEntry(AK, R600GPUs))
260 return Entry->Features;
261 return FEATURE_NONE;
262 }
263
fillValidArchListAMDGCN(SmallVectorImpl<StringRef> & Values)264 void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) {
265 // XXX: Should this only report unique canonical names?
266 for (const auto &C : AMDGCNGPUs)
267 Values.push_back(C.Name);
268 }
269
fillValidArchListR600(SmallVectorImpl<StringRef> & Values)270 void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
271 for (const auto &C : R600GPUs)
272 Values.push_back(C.Name);
273 }
274
getIsaVersion(StringRef GPU)275 AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
276 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
277 if (AK == AMDGPU::GPUKind::GK_NONE) {
278 if (GPU == "generic-hsa")
279 return {7, 0, 0};
280 if (GPU == "generic")
281 return {6, 0, 0};
282 return {0, 0, 0};
283 }
284
285 // clang-format off
286 switch (AK) {
287 case GK_GFX600: return {6, 0, 0};
288 case GK_GFX601: return {6, 0, 1};
289 case GK_GFX602: return {6, 0, 2};
290 case GK_GFX700: return {7, 0, 0};
291 case GK_GFX701: return {7, 0, 1};
292 case GK_GFX702: return {7, 0, 2};
293 case GK_GFX703: return {7, 0, 3};
294 case GK_GFX704: return {7, 0, 4};
295 case GK_GFX705: return {7, 0, 5};
296 case GK_GFX801: return {8, 0, 1};
297 case GK_GFX802: return {8, 0, 2};
298 case GK_GFX803: return {8, 0, 3};
299 case GK_GFX805: return {8, 0, 5};
300 case GK_GFX810: return {8, 1, 0};
301 case GK_GFX900: return {9, 0, 0};
302 case GK_GFX902: return {9, 0, 2};
303 case GK_GFX904: return {9, 0, 4};
304 case GK_GFX906: return {9, 0, 6};
305 case GK_GFX908: return {9, 0, 8};
306 case GK_GFX909: return {9, 0, 9};
307 case GK_GFX90A: return {9, 0, 10};
308 case GK_GFX90C: return {9, 0, 12};
309 case GK_GFX942: return {9, 4, 2};
310 case GK_GFX950: return {9, 5, 0};
311 case GK_GFX1010: return {10, 1, 0};
312 case GK_GFX1011: return {10, 1, 1};
313 case GK_GFX1012: return {10, 1, 2};
314 case GK_GFX1013: return {10, 1, 3};
315 case GK_GFX1030: return {10, 3, 0};
316 case GK_GFX1031: return {10, 3, 1};
317 case GK_GFX1032: return {10, 3, 2};
318 case GK_GFX1033: return {10, 3, 3};
319 case GK_GFX1034: return {10, 3, 4};
320 case GK_GFX1035: return {10, 3, 5};
321 case GK_GFX1036: return {10, 3, 6};
322 case GK_GFX1100: return {11, 0, 0};
323 case GK_GFX1101: return {11, 0, 1};
324 case GK_GFX1102: return {11, 0, 2};
325 case GK_GFX1103: return {11, 0, 3};
326 case GK_GFX1150: return {11, 5, 0};
327 case GK_GFX1151: return {11, 5, 1};
328 case GK_GFX1152: return {11, 5, 2};
329 case GK_GFX1153: return {11, 5, 3};
330 case GK_GFX1200: return {12, 0, 0};
331 case GK_GFX1201: return {12, 0, 1};
332 case GK_GFX1250: return {12, 5, 0};
333
334 // Generic targets return the lowest common denominator
335 // within their family. That is, the ISA that is the most
336 // restricted in terms of features.
337 //
338 // gfx9-generic is tricky because there is no lowest
339 // common denominator, so we return gfx900 which has mad-mix
340 // but this family doesn't have it.
341 //
342 // This API should never be used to check for a particular
343 // feature anyway.
344 //
345 // TODO: Split up this API depending on its caller so
346 // generic target handling is more obvious and less risky.
347 case GK_GFX9_GENERIC: return {9, 0, 0};
348 case GK_GFX9_4_GENERIC: return {9, 4, 0};
349 case GK_GFX10_1_GENERIC: return {10, 1, 0};
350 case GK_GFX10_3_GENERIC: return {10, 3, 0};
351 case GK_GFX11_GENERIC: return {11, 0, 3};
352 case GK_GFX12_GENERIC: return {12, 0, 0};
353 default: return {0, 0, 0};
354 }
355 // clang-format on
356 }
357
getCanonicalArchName(const Triple & T,StringRef Arch)358 StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
359 assert(T.isAMDGPU());
360 auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(Arch) : parseArchR600(Arch);
361 if (ProcKind == GK_NONE)
362 return StringRef();
363
364 return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
365 }
366
fillAMDGPUFeatureMap(StringRef GPU,const Triple & T,StringMap<bool> & Features)367 void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
368 StringMap<bool> &Features) {
369 // XXX - What does the member GPU mean if device name string passed here?
370 if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) {
371 // AMDGCN SPIRV must support the union of all AMDGCN features. This list
372 // should be kept in sorted order and updated whenever new features are
373 // added.
374 Features["16-bit-insts"] = true;
375 Features["ashr-pk-insts"] = true;
376 Features["atomic-buffer-pk-add-bf16-inst"] = true;
377 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
378 Features["atomic-ds-pk-add-16-insts"] = true;
379 Features["atomic-fadd-rtn-insts"] = true;
380 Features["atomic-flat-pk-add-16-insts"] = true;
381 Features["atomic-global-pk-add-bf16-inst"] = true;
382 Features["bf8-cvt-scale-insts"] = true;
383 Features["bitop3-insts"] = true;
384 Features["ci-insts"] = true;
385 Features["dl-insts"] = true;
386 Features["dot1-insts"] = true;
387 Features["dot2-insts"] = true;
388 Features["dot3-insts"] = true;
389 Features["dot4-insts"] = true;
390 Features["dot5-insts"] = true;
391 Features["dot6-insts"] = true;
392 Features["dot7-insts"] = true;
393 Features["dot8-insts"] = true;
394 Features["dot9-insts"] = true;
395 Features["dot10-insts"] = true;
396 Features["dot11-insts"] = true;
397 Features["dot12-insts"] = true;
398 Features["dot13-insts"] = true;
399 Features["dpp"] = true;
400 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
401 Features["f32-to-f16bf16-cvt-sr-insts"] = true;
402 Features["fp4-cvt-scale-insts"] = true;
403 Features["fp6bf6-cvt-scale-insts"] = true;
404 Features["fp8-insts"] = true;
405 Features["fp8-conversion-insts"] = true;
406 Features["fp8-cvt-scale-insts"] = true;
407 Features["gfx8-insts"] = true;
408 Features["gfx9-insts"] = true;
409 Features["gfx90a-insts"] = true;
410 Features["gfx940-insts"] = true;
411 Features["gfx950-insts"] = true;
412 Features["gfx10-insts"] = true;
413 Features["gfx10-3-insts"] = true;
414 Features["gfx11-insts"] = true;
415 Features["gfx12-insts"] = true;
416 Features["gws"] = true;
417 Features["image-insts"] = true;
418 Features["s-memrealtime"] = true;
419 Features["s-memtime-inst"] = true;
420 Features["mai-insts"] = true;
421 Features["permlane16-swap"] = true;
422 Features["permlane32-swap"] = true;
423 Features["prng-inst"] = true;
424 Features["wavefrontsize32"] = true;
425 Features["wavefrontsize64"] = true;
426 Features["vmem-to-lds-load-insts"] = true;
427 } else if (T.isAMDGCN()) {
428 AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
429 switch (Kind) {
430 case GK_GFX1250:
431 Features["ci-insts"] = true;
432 Features["dot7-insts"] = true;
433 Features["dot8-insts"] = true;
434 Features["dl-insts"] = true;
435 Features["16-bit-insts"] = true;
436 Features["dpp"] = true;
437 Features["gfx8-insts"] = true;
438 Features["gfx9-insts"] = true;
439 Features["gfx10-insts"] = true;
440 Features["gfx10-3-insts"] = true;
441 Features["gfx11-insts"] = true;
442 Features["gfx12-insts"] = true;
443 Features["gfx1250-insts"] = true;
444 Features["bitop3-insts"] = true;
445 Features["prng-inst"] = true;
446 Features["transpose-load-f4f6-insts"] = true;
447 Features["bf16-trans-insts"] = true;
448 Features["fp8-conversion-insts"] = true;
449 Features["fp8e5m3-insts"] = true;
450 Features["permlane16-swap"] = true;
451 Features["ashr-pk-insts"] = true;
452 Features["atomic-buffer-pk-add-bf16-inst"] = true;
453 Features["atomic-fadd-rtn-insts"] = true;
454 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
455 Features["atomic-flat-pk-add-16-insts"] = true;
456 Features["atomic-global-pk-add-bf16-inst"] = true;
457 Features["atomic-ds-pk-add-16-insts"] = true;
458 Features["setprio-inc-wg-inst"] = true;
459 break;
460 case GK_GFX1201:
461 case GK_GFX1200:
462 case GK_GFX12_GENERIC:
463 Features["ci-insts"] = true;
464 Features["dot7-insts"] = true;
465 Features["dot8-insts"] = true;
466 Features["dot9-insts"] = true;
467 Features["dot10-insts"] = true;
468 Features["dot11-insts"] = true;
469 Features["dot12-insts"] = true;
470 Features["dl-insts"] = true;
471 Features["atomic-ds-pk-add-16-insts"] = true;
472 Features["atomic-flat-pk-add-16-insts"] = true;
473 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
474 Features["atomic-buffer-pk-add-bf16-inst"] = true;
475 Features["atomic-global-pk-add-bf16-inst"] = true;
476 Features["16-bit-insts"] = true;
477 Features["dpp"] = true;
478 Features["gfx8-insts"] = true;
479 Features["gfx9-insts"] = true;
480 Features["gfx10-insts"] = true;
481 Features["gfx10-3-insts"] = true;
482 Features["gfx11-insts"] = true;
483 Features["gfx12-insts"] = true;
484 Features["atomic-fadd-rtn-insts"] = true;
485 Features["image-insts"] = true;
486 Features["fp8-conversion-insts"] = true;
487 break;
488 case GK_GFX1153:
489 case GK_GFX1152:
490 case GK_GFX1151:
491 case GK_GFX1150:
492 case GK_GFX1103:
493 case GK_GFX1102:
494 case GK_GFX1101:
495 case GK_GFX1100:
496 case GK_GFX11_GENERIC:
497 Features["ci-insts"] = true;
498 Features["dot5-insts"] = true;
499 Features["dot7-insts"] = true;
500 Features["dot8-insts"] = true;
501 Features["dot9-insts"] = true;
502 Features["dot10-insts"] = true;
503 Features["dot12-insts"] = true;
504 Features["dl-insts"] = true;
505 Features["16-bit-insts"] = true;
506 Features["dpp"] = true;
507 Features["gfx8-insts"] = true;
508 Features["gfx9-insts"] = true;
509 Features["gfx10-insts"] = true;
510 Features["gfx10-3-insts"] = true;
511 Features["gfx11-insts"] = true;
512 Features["atomic-fadd-rtn-insts"] = true;
513 Features["image-insts"] = true;
514 Features["gws"] = true;
515 break;
516 case GK_GFX1036:
517 case GK_GFX1035:
518 case GK_GFX1034:
519 case GK_GFX1033:
520 case GK_GFX1032:
521 case GK_GFX1031:
522 case GK_GFX1030:
523 case GK_GFX10_3_GENERIC:
524 Features["ci-insts"] = true;
525 Features["dot1-insts"] = true;
526 Features["dot2-insts"] = true;
527 Features["dot5-insts"] = true;
528 Features["dot6-insts"] = true;
529 Features["dot7-insts"] = true;
530 Features["dot10-insts"] = true;
531 Features["dl-insts"] = true;
532 Features["16-bit-insts"] = true;
533 Features["dpp"] = true;
534 Features["gfx8-insts"] = true;
535 Features["gfx9-insts"] = true;
536 Features["gfx10-insts"] = true;
537 Features["gfx10-3-insts"] = true;
538 Features["image-insts"] = true;
539 Features["s-memrealtime"] = true;
540 Features["s-memtime-inst"] = true;
541 Features["gws"] = true;
542 Features["vmem-to-lds-load-insts"] = true;
543 break;
544 case GK_GFX1012:
545 case GK_GFX1011:
546 Features["dot1-insts"] = true;
547 Features["dot2-insts"] = true;
548 Features["dot5-insts"] = true;
549 Features["dot6-insts"] = true;
550 Features["dot7-insts"] = true;
551 Features["dot10-insts"] = true;
552 [[fallthrough]];
553 case GK_GFX1013:
554 case GK_GFX1010:
555 case GK_GFX10_1_GENERIC:
556 Features["dl-insts"] = true;
557 Features["ci-insts"] = true;
558 Features["16-bit-insts"] = true;
559 Features["dpp"] = true;
560 Features["gfx8-insts"] = true;
561 Features["gfx9-insts"] = true;
562 Features["gfx10-insts"] = true;
563 Features["image-insts"] = true;
564 Features["s-memrealtime"] = true;
565 Features["s-memtime-inst"] = true;
566 Features["gws"] = true;
567 Features["vmem-to-lds-load-insts"] = true;
568 break;
569 case GK_GFX950:
570 Features["bitop3-insts"] = true;
571 Features["fp6bf6-cvt-scale-insts"] = true;
572 Features["fp4-cvt-scale-insts"] = true;
573 Features["bf8-cvt-scale-insts"] = true;
574 Features["fp8-cvt-scale-insts"] = true;
575 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
576 Features["f32-to-f16bf16-cvt-sr-insts"] = true;
577 Features["prng-inst"] = true;
578 Features["permlane16-swap"] = true;
579 Features["permlane32-swap"] = true;
580 Features["ashr-pk-insts"] = true;
581 Features["dot12-insts"] = true;
582 Features["dot13-insts"] = true;
583 Features["atomic-buffer-pk-add-bf16-inst"] = true;
584 Features["gfx950-insts"] = true;
585 [[fallthrough]];
586 case GK_GFX942:
587 Features["fp8-insts"] = true;
588 Features["fp8-conversion-insts"] = true;
589 if (Kind != GK_GFX950)
590 Features["xf32-insts"] = true;
591 [[fallthrough]];
592 case GK_GFX9_4_GENERIC:
593 Features["gfx940-insts"] = true;
594 Features["atomic-ds-pk-add-16-insts"] = true;
595 Features["atomic-flat-pk-add-16-insts"] = true;
596 Features["atomic-global-pk-add-bf16-inst"] = true;
597 Features["gfx90a-insts"] = true;
598 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
599 Features["atomic-fadd-rtn-insts"] = true;
600 Features["dot3-insts"] = true;
601 Features["dot4-insts"] = true;
602 Features["dot5-insts"] = true;
603 Features["dot6-insts"] = true;
604 Features["mai-insts"] = true;
605 Features["dl-insts"] = true;
606 Features["dot1-insts"] = true;
607 Features["dot2-insts"] = true;
608 Features["dot7-insts"] = true;
609 Features["dot10-insts"] = true;
610 Features["gfx9-insts"] = true;
611 Features["gfx8-insts"] = true;
612 Features["16-bit-insts"] = true;
613 Features["dpp"] = true;
614 Features["s-memrealtime"] = true;
615 Features["ci-insts"] = true;
616 Features["s-memtime-inst"] = true;
617 Features["gws"] = true;
618 Features["vmem-to-lds-load-insts"] = true;
619 break;
620 case GK_GFX90A:
621 Features["gfx90a-insts"] = true;
622 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
623 Features["atomic-fadd-rtn-insts"] = true;
624 [[fallthrough]];
625 case GK_GFX908:
626 Features["dot3-insts"] = true;
627 Features["dot4-insts"] = true;
628 Features["dot5-insts"] = true;
629 Features["dot6-insts"] = true;
630 Features["mai-insts"] = true;
631 [[fallthrough]];
632 case GK_GFX906:
633 Features["dl-insts"] = true;
634 Features["dot1-insts"] = true;
635 Features["dot2-insts"] = true;
636 Features["dot7-insts"] = true;
637 Features["dot10-insts"] = true;
638 [[fallthrough]];
639 case GK_GFX90C:
640 case GK_GFX909:
641 case GK_GFX904:
642 case GK_GFX902:
643 case GK_GFX900:
644 case GK_GFX9_GENERIC:
645 Features["gfx9-insts"] = true;
646 Features["vmem-to-lds-load-insts"] = true;
647 [[fallthrough]];
648 case GK_GFX810:
649 case GK_GFX805:
650 case GK_GFX803:
651 case GK_GFX802:
652 case GK_GFX801:
653 Features["gfx8-insts"] = true;
654 Features["16-bit-insts"] = true;
655 Features["dpp"] = true;
656 Features["s-memrealtime"] = true;
657 [[fallthrough]];
658 case GK_GFX705:
659 case GK_GFX704:
660 case GK_GFX703:
661 case GK_GFX702:
662 case GK_GFX701:
663 case GK_GFX700:
664 Features["ci-insts"] = true;
665 [[fallthrough]];
666 case GK_GFX602:
667 case GK_GFX601:
668 case GK_GFX600:
669 Features["image-insts"] = true;
670 Features["s-memtime-inst"] = true;
671 Features["gws"] = true;
672 break;
673 case GK_NONE:
674 break;
675 default:
676 llvm_unreachable("Unhandled GPU!");
677 }
678 } else {
679 if (GPU.empty())
680 GPU = "r600";
681
682 switch (llvm::AMDGPU::parseArchR600(GPU)) {
683 case GK_CAYMAN:
684 case GK_CYPRESS:
685 case GK_RV770:
686 case GK_RV670:
687 // TODO: Add fp64 when implemented.
688 break;
689 case GK_TURKS:
690 case GK_CAICOS:
691 case GK_BARTS:
692 case GK_SUMO:
693 case GK_REDWOOD:
694 case GK_JUNIPER:
695 case GK_CEDAR:
696 case GK_RV730:
697 case GK_RV710:
698 case GK_RS880:
699 case GK_R630:
700 case GK_R600:
701 break;
702 default:
703 llvm_unreachable("Unhandled GPU!");
704 }
705 }
706 }
707
isWave32Capable(StringRef GPU,const Triple & T)708 static bool isWave32Capable(StringRef GPU, const Triple &T) {
709 bool IsWave32Capable = false;
710 // XXX - What does the member GPU mean if device name string passed here?
711 if (T.isAMDGCN()) {
712 switch (parseArchAMDGCN(GPU)) {
713 case GK_GFX1250:
714 case GK_GFX1201:
715 case GK_GFX1200:
716 case GK_GFX1153:
717 case GK_GFX1152:
718 case GK_GFX1151:
719 case GK_GFX1150:
720 case GK_GFX1103:
721 case GK_GFX1102:
722 case GK_GFX1101:
723 case GK_GFX1100:
724 case GK_GFX1036:
725 case GK_GFX1035:
726 case GK_GFX1034:
727 case GK_GFX1033:
728 case GK_GFX1032:
729 case GK_GFX1031:
730 case GK_GFX1030:
731 case GK_GFX1012:
732 case GK_GFX1011:
733 case GK_GFX1013:
734 case GK_GFX1010:
735 case GK_GFX12_GENERIC:
736 case GK_GFX11_GENERIC:
737 case GK_GFX10_3_GENERIC:
738 case GK_GFX10_1_GENERIC:
739 IsWave32Capable = true;
740 break;
741 default:
742 break;
743 }
744 }
745 return IsWave32Capable;
746 }
747
748 std::pair<FeatureError, StringRef>
insertWaveSizeFeature(StringRef GPU,const Triple & T,StringMap<bool> & Features)749 AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
750 StringMap<bool> &Features) {
751 bool IsWave32Capable = isWave32Capable(GPU, T);
752 const bool IsNullGPU = GPU.empty();
753 const bool HaveWave32 = Features.count("wavefrontsize32");
754 const bool HaveWave64 = Features.count("wavefrontsize64");
755 if (HaveWave32 && HaveWave64) {
756 return {AMDGPU::INVALID_FEATURE_COMBINATION,
757 "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"};
758 }
759 if (HaveWave32 && !IsNullGPU && !IsWave32Capable) {
760 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"};
761 }
762 // Don't assume any wavesize with an unknown subtarget.
763 if (!IsNullGPU) {
764 // Default to wave32 if available, or wave64 if not
765 if (!HaveWave32 && !HaveWave64) {
766 StringRef DefaultWaveSizeFeature =
767 IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
768 Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
769 }
770 }
771 return {NO_ERROR, StringRef()};
772 }
773