xref: /freebsd/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp (revision fcaf7f8644a9988098ac6be2165bce3ea4786e91)
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
38     "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // opencl_global_device
48     Global,   // opencl_global_host
49     Global,   // cuda_device
50     Constant, // cuda_constant
51     Local,    // cuda_shared
52     Global,   // sycl_global
53     Global,   // sycl_global_device
54     Global,   // sycl_global_host
55     Local,    // sycl_local
56     Private,  // sycl_private
57     Generic,  // ptr32_sptr
58     Generic,  // ptr32_uptr
59     Generic   // ptr64
60 };
61 
62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
63     Private,  // Default
64     Global,   // opencl_global
65     Local,    // opencl_local
66     Constant, // opencl_constant
67     Private,  // opencl_private
68     Generic,  // opencl_generic
69     Global,   // opencl_global_device
70     Global,   // opencl_global_host
71     Global,   // cuda_device
72     Constant, // cuda_constant
73     Local,    // cuda_shared
74     // SYCL address space values for this map are dummy
75     Generic,  // sycl_global
76     Generic,  // sycl_global_device
77     Generic,  // sycl_global_host
78     Generic,  // sycl_local
79     Generic,  // sycl_private
80     Generic,  // ptr32_sptr
81     Generic,  // ptr32_uptr
82     Generic   // ptr64
83 
84 };
85 } // namespace targets
86 } // namespace clang
87 
88 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
89 #define BUILTIN(ID, TYPE, ATTRS)                                               \
90   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
91 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
92   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
93 #include "clang/Basic/BuiltinsAMDGPU.def"
94 };
95 
96 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
97   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
98   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
99   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
100   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
101   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
102   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
103   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
104   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
105   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
106   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
107   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
108   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
109   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
110   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
111   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
112   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
113   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
114   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
115   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
116   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
117   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
118   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
119   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
120   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
121   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
122   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
123   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
124   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
125   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
126   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
127   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
128   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
129   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
130   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
131   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
132   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
133   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
134   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
135   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
136   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
137   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
138   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
139   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
140   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
141   "flat_scratch_lo", "flat_scratch_hi",
142   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
143   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
144   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
145   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
146   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
147   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
148   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
149   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
150   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
151   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
152   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
153   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
154   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
155   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
156   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
157   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
158   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
159   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
160   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
161   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
162   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
163   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
164   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
165   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
166   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
167   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
168   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
169   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
170   "a252", "a253", "a254", "a255"
171 };
172 
173 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
174   return llvm::makeArrayRef(GCCRegNames);
175 }
176 
177 bool AMDGPUTargetInfo::initFeatureMap(
178     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
179     const std::vector<std::string> &FeatureVec) const {
180 
181   using namespace llvm::AMDGPU;
182 
183   // XXX - What does the member GPU mean if device name string passed here?
184   if (isAMDGCN(getTriple())) {
185     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
186     case GK_GFX1103:
187     case GK_GFX1102:
188     case GK_GFX1101:
189     case GK_GFX1100:
190       Features["ci-insts"] = true;
191       Features["dot1-insts"] = true;
192       Features["dot5-insts"] = true;
193       Features["dot6-insts"] = true;
194       Features["dot7-insts"] = true;
195       Features["dot8-insts"] = true;
196       Features["dl-insts"] = true;
197       Features["flat-address-space"] = true;
198       Features["16-bit-insts"] = true;
199       Features["dpp"] = true;
200       Features["gfx8-insts"] = true;
201       Features["gfx9-insts"] = true;
202       Features["gfx10-insts"] = true;
203       Features["gfx10-3-insts"] = true;
204       Features["gfx11-insts"] = true;
205       break;
206     case GK_GFX1036:
207     case GK_GFX1035:
208     case GK_GFX1034:
209     case GK_GFX1033:
210     case GK_GFX1032:
211     case GK_GFX1031:
212     case GK_GFX1030:
213       Features["ci-insts"] = true;
214       Features["dot1-insts"] = true;
215       Features["dot2-insts"] = true;
216       Features["dot5-insts"] = true;
217       Features["dot6-insts"] = true;
218       Features["dot7-insts"] = true;
219       Features["dl-insts"] = true;
220       Features["flat-address-space"] = true;
221       Features["16-bit-insts"] = true;
222       Features["dpp"] = true;
223       Features["gfx8-insts"] = true;
224       Features["gfx9-insts"] = true;
225       Features["gfx10-insts"] = true;
226       Features["gfx10-3-insts"] = true;
227       Features["s-memrealtime"] = true;
228       Features["s-memtime-inst"] = true;
229       break;
230     case GK_GFX1012:
231     case GK_GFX1011:
232       Features["dot1-insts"] = true;
233       Features["dot2-insts"] = true;
234       Features["dot5-insts"] = true;
235       Features["dot6-insts"] = true;
236       Features["dot7-insts"] = true;
237       LLVM_FALLTHROUGH;
238     case GK_GFX1013:
239     case GK_GFX1010:
240       Features["dl-insts"] = true;
241       Features["ci-insts"] = true;
242       Features["flat-address-space"] = true;
243       Features["16-bit-insts"] = true;
244       Features["dpp"] = true;
245       Features["gfx8-insts"] = true;
246       Features["gfx9-insts"] = true;
247       Features["gfx10-insts"] = true;
248       Features["s-memrealtime"] = true;
249       Features["s-memtime-inst"] = true;
250       break;
251     case GK_GFX940:
252       Features["gfx940-insts"] = true;
253       Features["fp8-insts"] = true;
254       LLVM_FALLTHROUGH;
255     case GK_GFX90A:
256       Features["gfx90a-insts"] = true;
257       LLVM_FALLTHROUGH;
258     case GK_GFX908:
259       Features["dot3-insts"] = true;
260       Features["dot4-insts"] = true;
261       Features["dot5-insts"] = true;
262       Features["dot6-insts"] = true;
263       Features["mai-insts"] = true;
264       LLVM_FALLTHROUGH;
265     case GK_GFX906:
266       Features["dl-insts"] = true;
267       Features["dot1-insts"] = true;
268       Features["dot2-insts"] = true;
269       Features["dot7-insts"] = true;
270       LLVM_FALLTHROUGH;
271     case GK_GFX90C:
272     case GK_GFX909:
273     case GK_GFX904:
274     case GK_GFX902:
275     case GK_GFX900:
276       Features["gfx9-insts"] = true;
277       LLVM_FALLTHROUGH;
278     case GK_GFX810:
279     case GK_GFX805:
280     case GK_GFX803:
281     case GK_GFX802:
282     case GK_GFX801:
283       Features["gfx8-insts"] = true;
284       Features["16-bit-insts"] = true;
285       Features["dpp"] = true;
286       Features["s-memrealtime"] = true;
287       LLVM_FALLTHROUGH;
288     case GK_GFX705:
289     case GK_GFX704:
290     case GK_GFX703:
291     case GK_GFX702:
292     case GK_GFX701:
293     case GK_GFX700:
294       Features["ci-insts"] = true;
295       Features["flat-address-space"] = true;
296       LLVM_FALLTHROUGH;
297     case GK_GFX602:
298     case GK_GFX601:
299     case GK_GFX600:
300       Features["s-memtime-inst"] = true;
301       break;
302     case GK_NONE:
303       break;
304     default:
305       llvm_unreachable("Unhandled GPU!");
306     }
307   } else {
308     if (CPU.empty())
309       CPU = "r600";
310 
311     switch (llvm::AMDGPU::parseArchR600(CPU)) {
312     case GK_CAYMAN:
313     case GK_CYPRESS:
314     case GK_RV770:
315     case GK_RV670:
316       // TODO: Add fp64 when implemented.
317       break;
318     case GK_TURKS:
319     case GK_CAICOS:
320     case GK_BARTS:
321     case GK_SUMO:
322     case GK_REDWOOD:
323     case GK_JUNIPER:
324     case GK_CEDAR:
325     case GK_RV730:
326     case GK_RV710:
327     case GK_RS880:
328     case GK_R630:
329     case GK_R600:
330       break;
331     default:
332       llvm_unreachable("Unhandled GPU!");
333     }
334   }
335 
336   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
337 }
338 
339 void AMDGPUTargetInfo::fillValidCPUList(
340     SmallVectorImpl<StringRef> &Values) const {
341   if (isAMDGCN(getTriple()))
342     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
343   else
344     llvm::AMDGPU::fillValidArchListR600(Values);
345 }
346 
347 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
348   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
349 }
350 
351 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
352                                    const TargetOptions &Opts)
353     : TargetInfo(Triple),
354       GPUKind(isAMDGCN(Triple) ?
355               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
356               llvm::AMDGPU::parseArchR600(Opts.CPU)),
357       GPUFeatures(isAMDGCN(Triple) ?
358                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
359                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
360   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
361                                         : DataLayoutStringR600);
362 
363   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
364                      !isAMDGCN(Triple));
365   UseAddrSpaceMapMangling = true;
366 
367   HasLegalHalfType = true;
368   HasFloat16 = true;
369   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
370   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
371 
372   // Set pointer width and alignment for target address space 0.
373   PointerWidth = PointerAlign = getPointerWidthV(Generic);
374   if (getMaxPointerWidth() == 64) {
375     LongWidth = LongAlign = 64;
376     SizeType = UnsignedLong;
377     PtrDiffType = SignedLong;
378     IntPtrType = SignedLong;
379   }
380 
381   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
382 }
383 
384 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
385   TargetInfo::adjust(Diags, Opts);
386   // ToDo: There are still a few places using default address space as private
387   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
388   // can be removed from the following line.
389   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
390                      !isAMDGCN(getTriple()));
391 }
392 
393 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
394   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
395                                              Builtin::FirstTSBuiltin);
396 }
397 
398 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
399                                         MacroBuilder &Builder) const {
400   Builder.defineMacro("__AMD__");
401   Builder.defineMacro("__AMDGPU__");
402 
403   if (isAMDGCN(getTriple()))
404     Builder.defineMacro("__AMDGCN__");
405   else
406     Builder.defineMacro("__R600__");
407 
408   if (GPUKind != llvm::AMDGPU::GK_NONE) {
409     StringRef CanonName = isAMDGCN(getTriple()) ?
410       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
411     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
412     // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
413     if (isAMDGCN(getTriple())) {
414       assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name");
415       Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
416                           Twine("__"));
417     }
418     if (isAMDGCN(getTriple())) {
419       Builder.defineMacro("__amdgcn_processor__",
420                           Twine("\"") + Twine(CanonName) + Twine("\""));
421       Builder.defineMacro("__amdgcn_target_id__",
422                           Twine("\"") + Twine(*getTargetID()) + Twine("\""));
423       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
424         auto Loc = OffloadArchFeatures.find(F);
425         if (Loc != OffloadArchFeatures.end()) {
426           std::string NewF = F.str();
427           std::replace(NewF.begin(), NewF.end(), '-', '_');
428           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
429                                   Twine("__"),
430                               Loc->second ? "1" : "0");
431         }
432       }
433     }
434   }
435 
436   if (AllowAMDGPUUnsafeFPAtomics)
437     Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
438 
439   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
440   // removed in the near future.
441   if (hasFMAF())
442     Builder.defineMacro("__HAS_FMAF__");
443   if (hasFastFMAF())
444     Builder.defineMacro("FP_FAST_FMAF");
445   if (hasLDEXPF())
446     Builder.defineMacro("__HAS_LDEXPF__");
447   if (hasFP64())
448     Builder.defineMacro("__HAS_FP64__");
449   if (hasFastFMA())
450     Builder.defineMacro("FP_FAST_FMA");
451 
452   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
453 }
454 
455 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
456   assert(HalfFormat == Aux->HalfFormat);
457   assert(FloatFormat == Aux->FloatFormat);
458   assert(DoubleFormat == Aux->DoubleFormat);
459 
460   // On x86_64 long double is 80-bit extended precision format, which is
461   // not supported by AMDGPU. 128-bit floating point format is also not
462   // supported by AMDGPU. Therefore keep its own format for these two types.
463   auto SaveLongDoubleFormat = LongDoubleFormat;
464   auto SaveFloat128Format = Float128Format;
465   auto SaveLongDoubleWidth = LongDoubleWidth;
466   auto SaveLongDoubleAlign = LongDoubleAlign;
467   copyAuxTarget(Aux);
468   LongDoubleFormat = SaveLongDoubleFormat;
469   Float128Format = SaveFloat128Format;
470   LongDoubleWidth = SaveLongDoubleWidth;
471   LongDoubleAlign = SaveLongDoubleAlign;
472   // For certain builtin types support on the host target, claim they are
473   // support to pass the compilation of the host code during the device-side
474   // compilation.
475   // FIXME: As the side effect, we also accept `__float128` uses in the device
476   // code. To rejct these builtin types supported in the host target but not in
477   // the device target, one approach would support `device_builtin` attribute
478   // so that we could tell the device builtin types from the host ones. The
479   // also solves the different representations of the same builtin type, such
480   // as `size_t` in the MSVC environment.
481   if (Aux->hasFloat128Type()) {
482     HasFloat128 = true;
483     Float128Format = DoubleFormat;
484   }
485 }
486