xref: /freebsd/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39     "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // opencl_global_device
49     Global,   // opencl_global_host
50     Global,   // cuda_device
51     Constant, // cuda_constant
52     Local,    // cuda_shared
53     Global,   // sycl_global
54     Global,   // sycl_global_device
55     Global,   // sycl_global_host
56     Local,    // sycl_local
57     Private,  // sycl_private
58     Generic,  // ptr32_sptr
59     Generic,  // ptr32_uptr
60     Generic   // ptr64
61 };
62 
63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
64     Private,  // Default
65     Global,   // opencl_global
66     Local,    // opencl_local
67     Constant, // opencl_constant
68     Private,  // opencl_private
69     Generic,  // opencl_generic
70     Global,   // opencl_global_device
71     Global,   // opencl_global_host
72     Global,   // cuda_device
73     Constant, // cuda_constant
74     Local,    // cuda_shared
75     // SYCL address space values for this map are dummy
76     Generic,  // sycl_global
77     Generic,  // sycl_global_device
78     Generic,  // sycl_global_host
79     Generic,  // sycl_local
80     Generic,  // sycl_private
81     Generic,  // ptr32_sptr
82     Generic,  // ptr32_uptr
83     Generic   // ptr64
84 
85 };
86 } // namespace targets
87 } // namespace clang
88 
89 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
90 #define BUILTIN(ID, TYPE, ATTRS)                                               \
91   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
93   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
94 #include "clang/Basic/BuiltinsAMDGPU.def"
95 };
96 
97 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
98   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142   "flat_scratch_lo", "flat_scratch_hi",
143   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171   "a252", "a253", "a254", "a255"
172 };
173 
174 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
175   return llvm::makeArrayRef(GCCRegNames);
176 }
177 
178 bool AMDGPUTargetInfo::initFeatureMap(
179     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
180     const std::vector<std::string> &FeatureVec) const {
181 
182   using namespace llvm::AMDGPU;
183 
184   // XXX - What does the member GPU mean if device name string passed here?
185   if (isAMDGCN(getTriple())) {
186     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
187     case GK_GFX1035:
188     case GK_GFX1034:
189     case GK_GFX1033:
190     case GK_GFX1032:
191     case GK_GFX1031:
192     case GK_GFX1030:
193       Features["ci-insts"] = true;
194       Features["dot1-insts"] = true;
195       Features["dot2-insts"] = true;
196       Features["dot5-insts"] = true;
197       Features["dot6-insts"] = true;
198       Features["dot7-insts"] = true;
199       Features["dl-insts"] = true;
200       Features["flat-address-space"] = true;
201       Features["16-bit-insts"] = true;
202       Features["dpp"] = true;
203       Features["gfx8-insts"] = true;
204       Features["gfx9-insts"] = true;
205       Features["gfx10-insts"] = true;
206       Features["gfx10-3-insts"] = true;
207       Features["s-memrealtime"] = true;
208       Features["s-memtime-inst"] = true;
209       break;
210     case GK_GFX1012:
211     case GK_GFX1011:
212       Features["dot1-insts"] = true;
213       Features["dot2-insts"] = true;
214       Features["dot5-insts"] = true;
215       Features["dot6-insts"] = true;
216       Features["dot7-insts"] = true;
217       LLVM_FALLTHROUGH;
218     case GK_GFX1013:
219     case GK_GFX1010:
220       Features["dl-insts"] = true;
221       Features["ci-insts"] = true;
222       Features["flat-address-space"] = true;
223       Features["16-bit-insts"] = true;
224       Features["dpp"] = true;
225       Features["gfx8-insts"] = true;
226       Features["gfx9-insts"] = true;
227       Features["gfx10-insts"] = true;
228       Features["s-memrealtime"] = true;
229       Features["s-memtime-inst"] = true;
230       break;
231     case GK_GFX90A:
232       Features["gfx90a-insts"] = true;
233       LLVM_FALLTHROUGH;
234     case GK_GFX908:
235       Features["dot3-insts"] = true;
236       Features["dot4-insts"] = true;
237       Features["dot5-insts"] = true;
238       Features["dot6-insts"] = true;
239       Features["mai-insts"] = true;
240       LLVM_FALLTHROUGH;
241     case GK_GFX906:
242       Features["dl-insts"] = true;
243       Features["dot1-insts"] = true;
244       Features["dot2-insts"] = true;
245       Features["dot7-insts"] = true;
246       LLVM_FALLTHROUGH;
247     case GK_GFX90C:
248     case GK_GFX909:
249     case GK_GFX904:
250     case GK_GFX902:
251     case GK_GFX900:
252       Features["gfx9-insts"] = true;
253       LLVM_FALLTHROUGH;
254     case GK_GFX810:
255     case GK_GFX805:
256     case GK_GFX803:
257     case GK_GFX802:
258     case GK_GFX801:
259       Features["gfx8-insts"] = true;
260       Features["16-bit-insts"] = true;
261       Features["dpp"] = true;
262       Features["s-memrealtime"] = true;
263       LLVM_FALLTHROUGH;
264     case GK_GFX705:
265     case GK_GFX704:
266     case GK_GFX703:
267     case GK_GFX702:
268     case GK_GFX701:
269     case GK_GFX700:
270       Features["ci-insts"] = true;
271       Features["flat-address-space"] = true;
272       LLVM_FALLTHROUGH;
273     case GK_GFX602:
274     case GK_GFX601:
275     case GK_GFX600:
276       Features["s-memtime-inst"] = true;
277       break;
278     case GK_NONE:
279       break;
280     default:
281       llvm_unreachable("Unhandled GPU!");
282     }
283   } else {
284     if (CPU.empty())
285       CPU = "r600";
286 
287     switch (llvm::AMDGPU::parseArchR600(CPU)) {
288     case GK_CAYMAN:
289     case GK_CYPRESS:
290     case GK_RV770:
291     case GK_RV670:
292       // TODO: Add fp64 when implemented.
293       break;
294     case GK_TURKS:
295     case GK_CAICOS:
296     case GK_BARTS:
297     case GK_SUMO:
298     case GK_REDWOOD:
299     case GK_JUNIPER:
300     case GK_CEDAR:
301     case GK_RV730:
302     case GK_RV710:
303     case GK_RS880:
304     case GK_R630:
305     case GK_R600:
306       break;
307     default:
308       llvm_unreachable("Unhandled GPU!");
309     }
310   }
311 
312   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
313 }
314 
315 void AMDGPUTargetInfo::fillValidCPUList(
316     SmallVectorImpl<StringRef> &Values) const {
317   if (isAMDGCN(getTriple()))
318     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
319   else
320     llvm::AMDGPU::fillValidArchListR600(Values);
321 }
322 
323 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
324   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
325 }
326 
327 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
328                                    const TargetOptions &Opts)
329     : TargetInfo(Triple),
330       GPUKind(isAMDGCN(Triple) ?
331               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
332               llvm::AMDGPU::parseArchR600(Opts.CPU)),
333       GPUFeatures(isAMDGCN(Triple) ?
334                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
335                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
336   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
337                                         : DataLayoutStringR600);
338   GridValues = llvm::omp::AMDGPUGpuGridValues;
339 
340   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
341                      !isAMDGCN(Triple));
342   UseAddrSpaceMapMangling = true;
343 
344   HasLegalHalfType = true;
345   HasFloat16 = true;
346   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
347   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
348 
349   // Set pointer width and alignment for target address space 0.
350   PointerWidth = PointerAlign = getPointerWidthV(Generic);
351   if (getMaxPointerWidth() == 64) {
352     LongWidth = LongAlign = 64;
353     SizeType = UnsignedLong;
354     PtrDiffType = SignedLong;
355     IntPtrType = SignedLong;
356   }
357 
358   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
359 }
360 
361 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
362   TargetInfo::adjust(Diags, Opts);
363   // ToDo: There are still a few places using default address space as private
364   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
365   // can be removed from the following line.
366   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
367                      !isAMDGCN(getTriple()));
368 }
369 
370 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
371   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
372                                              Builtin::FirstTSBuiltin);
373 }
374 
375 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
376                                         MacroBuilder &Builder) const {
377   Builder.defineMacro("__AMD__");
378   Builder.defineMacro("__AMDGPU__");
379 
380   if (isAMDGCN(getTriple()))
381     Builder.defineMacro("__AMDGCN__");
382   else
383     Builder.defineMacro("__R600__");
384 
385   if (GPUKind != llvm::AMDGPU::GK_NONE) {
386     StringRef CanonName = isAMDGCN(getTriple()) ?
387       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
388     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
389     if (isAMDGCN(getTriple())) {
390       Builder.defineMacro("__amdgcn_processor__",
391                           Twine("\"") + Twine(CanonName) + Twine("\""));
392       Builder.defineMacro("__amdgcn_target_id__",
393                           Twine("\"") + Twine(getTargetID().getValue()) +
394                               Twine("\""));
395       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
396         auto Loc = OffloadArchFeatures.find(F);
397         if (Loc != OffloadArchFeatures.end()) {
398           std::string NewF = F.str();
399           std::replace(NewF.begin(), NewF.end(), '-', '_');
400           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
401                                   Twine("__"),
402                               Loc->second ? "1" : "0");
403         }
404       }
405     }
406   }
407 
408   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
409   // removed in the near future.
410   if (hasFMAF())
411     Builder.defineMacro("__HAS_FMAF__");
412   if (hasFastFMAF())
413     Builder.defineMacro("FP_FAST_FMAF");
414   if (hasLDEXPF())
415     Builder.defineMacro("__HAS_LDEXPF__");
416   if (hasFP64())
417     Builder.defineMacro("__HAS_FP64__");
418   if (hasFastFMA())
419     Builder.defineMacro("FP_FAST_FMA");
420 
421   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
422 }
423 
424 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
425   assert(HalfFormat == Aux->HalfFormat);
426   assert(FloatFormat == Aux->FloatFormat);
427   assert(DoubleFormat == Aux->DoubleFormat);
428 
429   // On x86_64 long double is 80-bit extended precision format, which is
430   // not supported by AMDGPU. 128-bit floating point format is also not
431   // supported by AMDGPU. Therefore keep its own format for these two types.
432   auto SaveLongDoubleFormat = LongDoubleFormat;
433   auto SaveFloat128Format = Float128Format;
434   copyAuxTarget(Aux);
435   LongDoubleFormat = SaveLongDoubleFormat;
436   Float128Format = SaveFloat128Format;
437   // For certain builtin types support on the host target, claim they are
438   // support to pass the compilation of the host code during the device-side
439   // compilation.
440   // FIXME: As the side effect, we also accept `__float128` uses in the device
441   // code. To rejct these builtin types supported in the host target but not in
442   // the device target, one approach would support `device_builtin` attribute
443   // so that we could tell the device builtin types from the host ones. The
444   // also solves the different representations of the same builtin type, such
445   // as `size_t` in the MSVC environment.
446   if (Aux->hasFloat128Type()) {
447     HasFloat128 = true;
448     Float128Format = DoubleFormat;
449   }
450 }
451