xref: /freebsd/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp (revision 562894f0dc310f658284863ff329906e7737a0a0)
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/IR/DataLayout.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
39     "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // cuda_device
49     Constant, // cuda_constant
50     Local,    // cuda_shared
51     Generic,  // ptr32_sptr
52     Generic,  // ptr32_uptr
53     Generic   // ptr64
54 };
55 
56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
57     Private,  // Default
58     Global,   // opencl_global
59     Local,    // opencl_local
60     Constant, // opencl_constant
61     Private,  // opencl_private
62     Generic,  // opencl_generic
63     Global,   // cuda_device
64     Constant, // cuda_constant
65     Local,    // cuda_shared
66     Generic,  // ptr32_sptr
67     Generic,  // ptr32_uptr
68     Generic   // ptr64
69 
70 };
71 } // namespace targets
72 } // namespace clang
73 
74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
75 #define BUILTIN(ID, TYPE, ATTRS)                                               \
76   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
78   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
79 #include "clang/Basic/BuiltinsAMDGPU.def"
80 };
81 
82 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
83   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
84   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
85   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
86   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
87   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
88   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
89   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
90   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
91   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
92   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
93   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
94   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
95   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
96   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
97   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
98   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
99   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
100   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
101   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
102   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
103   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
104   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
105   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
106   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
107   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
108   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
109   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
110   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
111   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
112   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
113   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
114   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
115   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
116   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
117   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
118   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
119   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
120   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
121   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
122   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
123   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
124   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
125   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
126   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
127   "flat_scratch_lo", "flat_scratch_hi"
128 };
129 
130 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
131   return llvm::makeArrayRef(GCCRegNames);
132 }
133 
134 bool AMDGPUTargetInfo::initFeatureMap(
135     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
136     const std::vector<std::string> &FeatureVec) const {
137 
138   using namespace llvm::AMDGPU;
139 
140   // XXX - What does the member GPU mean if device name string passed here?
141   if (isAMDGCN(getTriple())) {
142     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
143     case GK_GFX1012:
144     case GK_GFX1011:
145       Features["dot1-insts"] = true;
146       Features["dot2-insts"] = true;
147       Features["dot5-insts"] = true;
148       Features["dot6-insts"] = true;
149       LLVM_FALLTHROUGH;
150     case GK_GFX1010:
151       Features["dl-insts"] = true;
152       Features["ci-insts"] = true;
153       Features["flat-address-space"] = true;
154       Features["16-bit-insts"] = true;
155       Features["dpp"] = true;
156       Features["gfx8-insts"] = true;
157       Features["gfx9-insts"] = true;
158       Features["gfx10-insts"] = true;
159       Features["s-memrealtime"] = true;
160       break;
161     case GK_GFX908:
162       Features["dot3-insts"] = true;
163       Features["dot4-insts"] = true;
164       Features["dot5-insts"] = true;
165       Features["dot6-insts"] = true;
166       LLVM_FALLTHROUGH;
167     case GK_GFX906:
168       Features["dl-insts"] = true;
169       Features["dot1-insts"] = true;
170       Features["dot2-insts"] = true;
171       LLVM_FALLTHROUGH;
172     case GK_GFX909:
173     case GK_GFX904:
174     case GK_GFX902:
175     case GK_GFX900:
176       Features["gfx9-insts"] = true;
177       LLVM_FALLTHROUGH;
178     case GK_GFX810:
179     case GK_GFX803:
180     case GK_GFX802:
181     case GK_GFX801:
182       Features["gfx8-insts"] = true;
183       Features["16-bit-insts"] = true;
184       Features["dpp"] = true;
185       Features["s-memrealtime"] = true;
186       LLVM_FALLTHROUGH;
187     case GK_GFX704:
188     case GK_GFX703:
189     case GK_GFX702:
190     case GK_GFX701:
191     case GK_GFX700:
192       Features["ci-insts"] = true;
193       Features["flat-address-space"] = true;
194       LLVM_FALLTHROUGH;
195     case GK_GFX601:
196     case GK_GFX600:
197       break;
198     case GK_NONE:
199       break;
200     default:
201       llvm_unreachable("Unhandled GPU!");
202     }
203   } else {
204     if (CPU.empty())
205       CPU = "r600";
206 
207     switch (llvm::AMDGPU::parseArchR600(CPU)) {
208     case GK_CAYMAN:
209     case GK_CYPRESS:
210     case GK_RV770:
211     case GK_RV670:
212       // TODO: Add fp64 when implemented.
213       break;
214     case GK_TURKS:
215     case GK_CAICOS:
216     case GK_BARTS:
217     case GK_SUMO:
218     case GK_REDWOOD:
219     case GK_JUNIPER:
220     case GK_CEDAR:
221     case GK_RV730:
222     case GK_RV710:
223     case GK_RS880:
224     case GK_R630:
225     case GK_R600:
226       break;
227     default:
228       llvm_unreachable("Unhandled GPU!");
229     }
230   }
231 
232   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
233 }
234 
235 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
236                                            TargetOptions &TargetOpts) const {
237   bool hasFP32Denormals = false;
238   bool hasFP64Denormals = false;
239 
240   for (auto &I : TargetOpts.FeaturesAsWritten) {
241     if (I == "+fp32-denormals" || I == "-fp32-denormals")
242       hasFP32Denormals = true;
243     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
244       hasFP64Denormals = true;
245   }
246   if (!hasFP32Denormals)
247     TargetOpts.Features.push_back(
248       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
249              ? '+' : '-') + Twine("fp32-denormals"))
250             .str());
251   // Always do not flush fp64 or fp16 denorms.
252   if (!hasFP64Denormals && hasFP64())
253     TargetOpts.Features.push_back("+fp64-fp16-denormals");
254 }
255 
256 void AMDGPUTargetInfo::fillValidCPUList(
257     SmallVectorImpl<StringRef> &Values) const {
258   if (isAMDGCN(getTriple()))
259     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
260   else
261     llvm::AMDGPU::fillValidArchListR600(Values);
262 }
263 
264 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
265   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
266 }
267 
268 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
269                                    const TargetOptions &Opts)
270     : TargetInfo(Triple),
271       GPUKind(isAMDGCN(Triple) ?
272               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
273               llvm::AMDGPU::parseArchR600(Opts.CPU)),
274       GPUFeatures(isAMDGCN(Triple) ?
275                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
276                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
277   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
278                                         : DataLayoutStringR600);
279   assert(DataLayout->getAllocaAddrSpace() == Private);
280 
281   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
282                      !isAMDGCN(Triple));
283   UseAddrSpaceMapMangling = true;
284 
285   HasLegalHalfType = true;
286   HasFloat16 = true;
287 
288   // Set pointer width and alignment for target address space 0.
289   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
290   if (getMaxPointerWidth() == 64) {
291     LongWidth = LongAlign = 64;
292     SizeType = UnsignedLong;
293     PtrDiffType = SignedLong;
294     IntPtrType = SignedLong;
295   }
296 
297   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
298 }
299 
300 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
301   TargetInfo::adjust(Opts);
302   // ToDo: There are still a few places using default address space as private
303   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
304   // can be removed from the following line.
305   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
306                      !isAMDGCN(getTriple()));
307 }
308 
309 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
310   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
311                                              Builtin::FirstTSBuiltin);
312 }
313 
314 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
315                                         MacroBuilder &Builder) const {
316   Builder.defineMacro("__AMD__");
317   Builder.defineMacro("__AMDGPU__");
318 
319   if (isAMDGCN(getTriple()))
320     Builder.defineMacro("__AMDGCN__");
321   else
322     Builder.defineMacro("__R600__");
323 
324   if (GPUKind != llvm::AMDGPU::GK_NONE) {
325     StringRef CanonName = isAMDGCN(getTriple()) ?
326       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
327     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
328   }
329 
330   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
331   // removed in the near future.
332   if (hasFMAF())
333     Builder.defineMacro("__HAS_FMAF__");
334   if (hasFastFMAF())
335     Builder.defineMacro("FP_FAST_FMAF");
336   if (hasLDEXPF())
337     Builder.defineMacro("__HAS_LDEXPF__");
338   if (hasFP64())
339     Builder.defineMacro("__HAS_FP64__");
340   if (hasFastFMA())
341     Builder.defineMacro("FP_FAST_FMA");
342 }
343 
344 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
345   assert(HalfFormat == Aux->HalfFormat);
346   assert(FloatFormat == Aux->FloatFormat);
347   assert(DoubleFormat == Aux->DoubleFormat);
348 
349   // On x86_64 long double is 80-bit extended precision format, which is
350   // not supported by AMDGPU. 128-bit floating point format is also not
351   // supported by AMDGPU. Therefore keep its own format for these two types.
352   auto SaveLongDoubleFormat = LongDoubleFormat;
353   auto SaveFloat128Format = Float128Format;
354   copyAuxTarget(Aux);
355   LongDoubleFormat = SaveLongDoubleFormat;
356   Float128Format = SaveFloat128Format;
357 }
358