xref: /freebsd/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 #include "llvm/ADT/SmallString.h"
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
37     "32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39     "-ni:7:8:9";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     llvm::AMDGPUAS::FLAT_ADDRESS,     // Default
43     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global
44     llvm::AMDGPUAS::LOCAL_ADDRESS,    // opencl_local
45     llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
46     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // opencl_private
47     llvm::AMDGPUAS::FLAT_ADDRESS,     // opencl_generic
48     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_device
49     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_host
50     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // cuda_device
51     llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
52     llvm::AMDGPUAS::LOCAL_ADDRESS,    // cuda_shared
53     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global
54     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global_device
55     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global_host
56     llvm::AMDGPUAS::LOCAL_ADDRESS,    // sycl_local
57     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // sycl_private
58     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_sptr
59     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_uptr
60     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr64
61     llvm::AMDGPUAS::FLAT_ADDRESS,     // hlsl_groupshared
62 };
63 
64 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
65     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // Default
66     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global
67     llvm::AMDGPUAS::LOCAL_ADDRESS,    // opencl_local
68     llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
69     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // opencl_private
70     llvm::AMDGPUAS::FLAT_ADDRESS,     // opencl_generic
71     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_device
72     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_host
73     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // cuda_device
74     llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
75     llvm::AMDGPUAS::LOCAL_ADDRESS,    // cuda_shared
76     // SYCL address space values for this map are dummy
77     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
78     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
79     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
80     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
81     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
82     llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
83     llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
84     llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
85     llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
86 
87 };
88 } // namespace targets
89 } // namespace clang
90 
91 static constexpr Builtin::Info BuiltinInfo[] = {
92 #define BUILTIN(ID, TYPE, ATTRS)                                               \
93   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
95   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
96 #include "clang/Basic/BuiltinsAMDGPU.def"
97 };
98 
99 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
100   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
101   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
102   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
103   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
104   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
105   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
106   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
107   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
108   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
109   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
110   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
111   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
112   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
113   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
114   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
115   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
116   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
117   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
118   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
119   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
120   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
121   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
122   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
123   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
124   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
125   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
126   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
127   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
128   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
129   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
130   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
131   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
132   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
133   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
134   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
135   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
136   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
137   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
138   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
139   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
140   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
141   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
142   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
143   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
144   "flat_scratch_lo", "flat_scratch_hi",
145   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
146   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
147   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
148   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
149   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
150   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
151   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
152   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
153   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
154   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
155   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
156   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
157   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
158   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
159   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
160   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
161   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
162   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
163   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
164   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
165   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
166   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
167   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
168   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
169   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
170   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
171   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
172   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
173   "a252", "a253", "a254", "a255"
174 };
175 
176 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
177   return llvm::ArrayRef(GCCRegNames);
178 }
179 
180 bool AMDGPUTargetInfo::initFeatureMap(
181     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
182     const std::vector<std::string> &FeatureVec) const {
183 
184   using namespace llvm::AMDGPU;
185   fillAMDGPUFeatureMap(CPU, getTriple(), Features);
186   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
187     return false;
188 
189   // TODO: Should move this logic into TargetParser
190   auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features);
191   switch (HasError.first) {
192   default:
193     break;
194   case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
195     Diags.Report(diag::err_invalid_feature_combination) << HasError.second;
196     return false;
197   case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
198     Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second;
199     return false;
200   }
201 
202   return true;
203 }
204 
205 void AMDGPUTargetInfo::fillValidCPUList(
206     SmallVectorImpl<StringRef> &Values) const {
207   if (isAMDGCN(getTriple()))
208     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
209   else
210     llvm::AMDGPU::fillValidArchListR600(Values);
211 }
212 
213 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
214   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
215 }
216 
217 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
218                                    const TargetOptions &Opts)
219     : TargetInfo(Triple),
220       GPUKind(isAMDGCN(Triple) ?
221               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
222               llvm::AMDGPU::parseArchR600(Opts.CPU)),
223       GPUFeatures(isAMDGCN(Triple) ?
224                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
225                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
226   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
227                                         : DataLayoutStringR600);
228 
229   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
230                      !isAMDGCN(Triple));
231   UseAddrSpaceMapMangling = true;
232 
233   if (isAMDGCN(Triple)) {
234     // __bf16 is always available as a load/store only type on AMDGCN.
235     BFloat16Width = BFloat16Align = 16;
236     BFloat16Format = &llvm::APFloat::BFloat();
237   }
238 
239   HasLegalHalfType = true;
240   HasFloat16 = true;
241   WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
242   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
243 
244   // Set pointer width and alignment for the generic address space.
245   PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
246   if (getMaxPointerWidth() == 64) {
247     LongWidth = LongAlign = 64;
248     SizeType = UnsignedLong;
249     PtrDiffType = SignedLong;
250     IntPtrType = SignedLong;
251   }
252 
253   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
254   CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
255   for (auto F : {"image-insts", "gws"})
256     ReadOnlyFeatures.insert(F);
257   HalfArgsAndReturns = true;
258 }
259 
260 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
261   TargetInfo::adjust(Diags, Opts);
262   // ToDo: There are still a few places using default address space as private
263   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
264   // can be removed from the following line.
265   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
266                      !isAMDGCN(getTriple()));
267 }
268 
269 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
270   return llvm::ArrayRef(BuiltinInfo,
271                         clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
272 }
273 
274 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
275                                         MacroBuilder &Builder) const {
276   Builder.defineMacro("__AMD__");
277   Builder.defineMacro("__AMDGPU__");
278 
279   if (isAMDGCN(getTriple()))
280     Builder.defineMacro("__AMDGCN__");
281   else
282     Builder.defineMacro("__R600__");
283 
284   // Legacy HIP host code relies on these default attributes to be defined.
285   bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
286   if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
287     return;
288 
289   llvm::SmallString<16> CanonName =
290       (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
291                              : getArchNameR600(GPUKind));
292 
293   // Sanitize the name of generic targets.
294   // e.g. gfx10-1-generic -> gfx10_1_generic
295   if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
296       GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
297     std::replace(CanonName.begin(), CanonName.end(), '-', '_');
298   }
299 
300   Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
301   // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
302   if (isAMDGCN(getTriple()) && !IsHIPHost) {
303     assert(StringRef(CanonName).starts_with("gfx") &&
304            "Invalid amdgcn canonical name");
305     StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
306     Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
307                         Twine("__"));
308     Builder.defineMacro("__amdgcn_processor__",
309                         Twine("\"") + Twine(CanonName) + Twine("\""));
310     Builder.defineMacro("__amdgcn_target_id__",
311                         Twine("\"") + Twine(*getTargetID()) + Twine("\""));
312     for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
313       auto Loc = OffloadArchFeatures.find(F);
314       if (Loc != OffloadArchFeatures.end()) {
315         std::string NewF = F.str();
316         std::replace(NewF.begin(), NewF.end(), '-', '_');
317         Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
318                                 Twine("__"),
319                             Loc->second ? "1" : "0");
320       }
321     }
322   }
323 
324   if (AllowAMDGPUUnsafeFPAtomics)
325     Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
326 
327   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
328   // removed in the near future.
329   if (hasFMAF())
330     Builder.defineMacro("__HAS_FMAF__");
331   if (hasFastFMAF())
332     Builder.defineMacro("FP_FAST_FMAF");
333   if (hasLDEXPF())
334     Builder.defineMacro("__HAS_LDEXPF__");
335   if (hasFP64())
336     Builder.defineMacro("__HAS_FP64__");
337   if (hasFastFMA())
338     Builder.defineMacro("FP_FAST_FMA");
339 
340   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
341   // ToDo: deprecate this macro for naming consistency.
342   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
343   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
344 }
345 
346 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
347   assert(HalfFormat == Aux->HalfFormat);
348   assert(FloatFormat == Aux->FloatFormat);
349   assert(DoubleFormat == Aux->DoubleFormat);
350 
351   // On x86_64 long double is 80-bit extended precision format, which is
352   // not supported by AMDGPU. 128-bit floating point format is also not
353   // supported by AMDGPU. Therefore keep its own format for these two types.
354   auto SaveLongDoubleFormat = LongDoubleFormat;
355   auto SaveFloat128Format = Float128Format;
356   auto SaveLongDoubleWidth = LongDoubleWidth;
357   auto SaveLongDoubleAlign = LongDoubleAlign;
358   copyAuxTarget(Aux);
359   LongDoubleFormat = SaveLongDoubleFormat;
360   Float128Format = SaveFloat128Format;
361   LongDoubleWidth = SaveLongDoubleWidth;
362   LongDoubleAlign = SaveLongDoubleAlign;
363   // For certain builtin types support on the host target, claim they are
364   // support to pass the compilation of the host code during the device-side
365   // compilation.
366   // FIXME: As the side effect, we also accept `__float128` uses in the device
367   // code. To rejct these builtin types supported in the host target but not in
368   // the device target, one approach would support `device_builtin` attribute
369   // so that we could tell the device builtin types from the host ones. The
370   // also solves the different representations of the same builtin type, such
371   // as `size_t` in the MSVC environment.
372   if (Aux->hasFloat128Type()) {
373     HasFloat128 = true;
374     Float128Format = DoubleFormat;
375   }
376 }
377