1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 #include "llvm/ADT/SmallString.h"
21 using namespace clang;
22 using namespace clang::targets;
23
24 namespace clang {
25 namespace targets {
26
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29
30 static const char *const DataLayoutStringR600 =
31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33
34 static const char *const DataLayoutStringAMDGCN =
35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36 "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
37 "32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39 "-ni:7:8:9";
40
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
43 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
44 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
45 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
46 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
47 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
48 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
49 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
50 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
51 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
52 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
53 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
54 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
55 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
56 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
57 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
58 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
59 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
60 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
61 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
62 };
63
64 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
65 llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default
66 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
67 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
68 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
69 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
70 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
71 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
72 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
73 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
74 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
75 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
76 // SYCL address space values for this map are dummy
77 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
78 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
79 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
80 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
81 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
82 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
83 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
84 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
85 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
86
87 };
88 } // namespace targets
89 } // namespace clang
90
91 static constexpr Builtin::Info BuiltinInfo[] = {
92 #define BUILTIN(ID, TYPE, ATTRS) \
93 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
95 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
96 #include "clang/Basic/BuiltinsAMDGPU.def"
97 };
98
99 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
100 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
101 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
102 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
103 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
104 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
105 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
106 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
107 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
108 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
109 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
110 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
111 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
112 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
113 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
114 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
115 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
116 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
117 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
118 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
119 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
120 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
121 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
122 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
123 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
124 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
125 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
126 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
127 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
128 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
129 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
130 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
131 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
132 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
133 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
134 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
135 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
136 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
137 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
138 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
139 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
140 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
141 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
142 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
143 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
144 "flat_scratch_lo", "flat_scratch_hi",
145 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
146 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
147 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
148 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
149 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
150 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
151 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
152 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
153 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
154 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
155 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
156 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
157 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
158 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
159 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
160 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
161 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
162 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
163 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
164 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
165 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
166 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
167 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
168 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
169 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
170 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
171 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
172 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
173 "a252", "a253", "a254", "a255"
174 };
175
getGCCRegNames() const176 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
177 return llvm::ArrayRef(GCCRegNames);
178 }
179
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const180 bool AMDGPUTargetInfo::initFeatureMap(
181 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
182 const std::vector<std::string> &FeatureVec) const {
183
184 using namespace llvm::AMDGPU;
185 fillAMDGPUFeatureMap(CPU, getTriple(), Features);
186 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
187 return false;
188
189 // TODO: Should move this logic into TargetParser
190 auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features);
191 switch (HasError.first) {
192 default:
193 break;
194 case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
195 Diags.Report(diag::err_invalid_feature_combination) << HasError.second;
196 return false;
197 case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
198 Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second;
199 return false;
200 }
201
202 return true;
203 }
204
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const205 void AMDGPUTargetInfo::fillValidCPUList(
206 SmallVectorImpl<StringRef> &Values) const {
207 if (isAMDGCN(getTriple()))
208 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
209 else
210 llvm::AMDGPU::fillValidArchListR600(Values);
211 }
212
setAddressSpaceMap(bool DefaultIsPrivate)213 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
214 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
215 }
216
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)217 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
218 const TargetOptions &Opts)
219 : TargetInfo(Triple),
220 GPUKind(isAMDGCN(Triple) ?
221 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
222 llvm::AMDGPU::parseArchR600(Opts.CPU)),
223 GPUFeatures(isAMDGCN(Triple) ?
224 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
225 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
226 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
227 : DataLayoutStringR600);
228
229 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
230 !isAMDGCN(Triple));
231 UseAddrSpaceMapMangling = true;
232
233 if (isAMDGCN(Triple)) {
234 // __bf16 is always available as a load/store only type on AMDGCN.
235 BFloat16Width = BFloat16Align = 16;
236 BFloat16Format = &llvm::APFloat::BFloat();
237 }
238
239 HasLegalHalfType = true;
240 HasFloat16 = true;
241 WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
242 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
243
244 // Set pointer width and alignment for the generic address space.
245 PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
246 if (getMaxPointerWidth() == 64) {
247 LongWidth = LongAlign = 64;
248 SizeType = UnsignedLong;
249 PtrDiffType = SignedLong;
250 IntPtrType = SignedLong;
251 }
252
253 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
254 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
255 for (auto F : {"image-insts", "gws"})
256 ReadOnlyFeatures.insert(F);
257 HalfArgsAndReturns = true;
258 }
259
adjust(DiagnosticsEngine & Diags,LangOptions & Opts)260 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
261 TargetInfo::adjust(Diags, Opts);
262 // ToDo: There are still a few places using default address space as private
263 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
264 // can be removed from the following line.
265 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
266 !isAMDGCN(getTriple()));
267 }
268
getTargetBuiltins() const269 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
270 return llvm::ArrayRef(BuiltinInfo,
271 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
272 }
273
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const274 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
275 MacroBuilder &Builder) const {
276 Builder.defineMacro("__AMD__");
277 Builder.defineMacro("__AMDGPU__");
278
279 if (isAMDGCN(getTriple()))
280 Builder.defineMacro("__AMDGCN__");
281 else
282 Builder.defineMacro("__R600__");
283
284 // Legacy HIP host code relies on these default attributes to be defined.
285 bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
286 if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
287 return;
288
289 llvm::SmallString<16> CanonName =
290 (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
291 : getArchNameR600(GPUKind));
292
293 // Sanitize the name of generic targets.
294 // e.g. gfx10-1-generic -> gfx10_1_generic
295 if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
296 GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
297 std::replace(CanonName.begin(), CanonName.end(), '-', '_');
298 }
299
300 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
301 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
302 if (isAMDGCN(getTriple()) && !IsHIPHost) {
303 assert(StringRef(CanonName).starts_with("gfx") &&
304 "Invalid amdgcn canonical name");
305 StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
306 Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
307 Twine("__"));
308 Builder.defineMacro("__amdgcn_processor__",
309 Twine("\"") + Twine(CanonName) + Twine("\""));
310 Builder.defineMacro("__amdgcn_target_id__",
311 Twine("\"") + Twine(*getTargetID()) + Twine("\""));
312 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
313 auto Loc = OffloadArchFeatures.find(F);
314 if (Loc != OffloadArchFeatures.end()) {
315 std::string NewF = F.str();
316 std::replace(NewF.begin(), NewF.end(), '-', '_');
317 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
318 Twine("__"),
319 Loc->second ? "1" : "0");
320 }
321 }
322 }
323
324 if (AllowAMDGPUUnsafeFPAtomics)
325 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
326
327 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
328 // removed in the near future.
329 if (hasFMAF())
330 Builder.defineMacro("__HAS_FMAF__");
331 if (hasFastFMAF())
332 Builder.defineMacro("FP_FAST_FMAF");
333 if (hasLDEXPF())
334 Builder.defineMacro("__HAS_LDEXPF__");
335 if (hasFP64())
336 Builder.defineMacro("__HAS_FP64__");
337 if (hasFastFMA())
338 Builder.defineMacro("FP_FAST_FMA");
339
340 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
341 // ToDo: deprecate this macro for naming consistency.
342 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
343 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
344 }
345
setAuxTarget(const TargetInfo * Aux)346 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
347 assert(HalfFormat == Aux->HalfFormat);
348 assert(FloatFormat == Aux->FloatFormat);
349 assert(DoubleFormat == Aux->DoubleFormat);
350
351 // On x86_64 long double is 80-bit extended precision format, which is
352 // not supported by AMDGPU. 128-bit floating point format is also not
353 // supported by AMDGPU. Therefore keep its own format for these two types.
354 auto SaveLongDoubleFormat = LongDoubleFormat;
355 auto SaveFloat128Format = Float128Format;
356 auto SaveLongDoubleWidth = LongDoubleWidth;
357 auto SaveLongDoubleAlign = LongDoubleAlign;
358 copyAuxTarget(Aux);
359 LongDoubleFormat = SaveLongDoubleFormat;
360 Float128Format = SaveFloat128Format;
361 LongDoubleWidth = SaveLongDoubleWidth;
362 LongDoubleAlign = SaveLongDoubleAlign;
363 // For certain builtin types support on the host target, claim they are
364 // support to pass the compilation of the host code during the device-side
365 // compilation.
366 // FIXME: As the side effect, we also accept `__float128` uses in the device
367 // code. To rejct these builtin types supported in the host target but not in
368 // the device target, one approach would support `device_builtin` attribute
369 // so that we could tell the device builtin types from the host ones. The
370 // also solves the different representations of the same builtin type, such
371 // as `size_t` in the MSVC environment.
372 if (Aux->hasFloat128Type()) {
373 HasFloat128 = true;
374 Float128Format = DoubleFormat;
375 }
376 }
377