xref: /freebsd/contrib/llvm-project/clang/lib/Basic/Targets/NVPTX.cpp (revision 9c77fb6aaa366cbabc80ee1b834bcfe4df135491)
1 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements NVPTX TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "NVPTX.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/MacroBuilder.h"
16 #include "clang/Basic/TargetBuiltins.h"
17 #include "llvm/ADT/StringSwitch.h"
18 
19 using namespace clang;
20 using namespace clang::targets;
21 
22 static constexpr int NumBuiltins =
23     clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin;
24 
25 #define GET_BUILTIN_STR_TABLE
26 #include "clang/Basic/BuiltinsNVPTX.inc"
27 #undef GET_BUILTIN_STR_TABLE
28 
29 static constexpr Builtin::Info BuiltinInfos[] = {
30 #define GET_BUILTIN_INFOS
31 #include "clang/Basic/BuiltinsNVPTX.inc"
32 #undef GET_BUILTIN_INFOS
33 };
34 static_assert(std::size(BuiltinInfos) == NumBuiltins);
35 
36 const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
37 
38 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
39                                  const TargetOptions &Opts,
40                                  unsigned TargetPointerWidth)
41     : TargetInfo(Triple) {
42   assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&
43          "NVPTX only supports 32- and 64-bit modes.");
44 
45   PTXVersion = 32;
46   for (const StringRef Feature : Opts.FeaturesAsWritten) {
47     int PTXV;
48     if (!Feature.starts_with("+ptx") ||
49         Feature.drop_front(4).getAsInteger(10, PTXV))
50       continue;
51     PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
52   }
53 
54   TLSSupported = false;
55   VLASupported = false;
56   AddrSpaceMap = &NVPTXAddrSpaceMap;
57   UseAddrSpaceMapMangling = true;
58   // __bf16 is always available as a load/store only type.
59   BFloat16Width = BFloat16Align = 16;
60   BFloat16Format = &llvm::APFloat::BFloat();
61 
62   // Define available target features
63   // These must be defined in sorted order!
64   NoAsmVariants = true;
65   GPU = OffloadArch::UNUSED;
66 
67   // PTX supports f16 as a fundamental type.
68   HasLegalHalfType = true;
69   HasFloat16 = true;
70 
71   if (TargetPointerWidth == 32)
72     resetDataLayout(
73         "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
74   else if (Opts.NVPTXUseShortPointers)
75     resetDataLayout(
76         "e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:"
77         "16-v32:32-n16:32:64");
78   else
79     resetDataLayout("e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
80 
81   // If possible, get a TargetInfo for our host triple, so we can match its
82   // types.
83   llvm::Triple HostTriple(Opts.HostTriple);
84   if (!HostTriple.isNVPTX())
85     HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts);
86 
87   // If no host target, make some guesses about the data layout and return.
88   if (!HostTarget) {
89     LongWidth = LongAlign = TargetPointerWidth;
90     PointerWidth = PointerAlign = TargetPointerWidth;
91     switch (TargetPointerWidth) {
92     case 32:
93       SizeType = TargetInfo::UnsignedInt;
94       PtrDiffType = TargetInfo::SignedInt;
95       IntPtrType = TargetInfo::SignedInt;
96       break;
97     case 64:
98       SizeType = TargetInfo::UnsignedLong;
99       PtrDiffType = TargetInfo::SignedLong;
100       IntPtrType = TargetInfo::SignedLong;
101       break;
102     default:
103       llvm_unreachable("TargetPointerWidth must be 32 or 64");
104     }
105 
106     MaxAtomicInlineWidth = TargetPointerWidth;
107     return;
108   }
109 
110   // Copy properties from host target.
111   PointerWidth = HostTarget->getPointerWidth(LangAS::Default);
112   PointerAlign = HostTarget->getPointerAlign(LangAS::Default);
113   BoolWidth = HostTarget->getBoolWidth();
114   BoolAlign = HostTarget->getBoolAlign();
115   IntWidth = HostTarget->getIntWidth();
116   IntAlign = HostTarget->getIntAlign();
117   HalfWidth = HostTarget->getHalfWidth();
118   HalfAlign = HostTarget->getHalfAlign();
119   FloatWidth = HostTarget->getFloatWidth();
120   FloatAlign = HostTarget->getFloatAlign();
121   DoubleWidth = HostTarget->getDoubleWidth();
122   DoubleAlign = HostTarget->getDoubleAlign();
123   LongWidth = HostTarget->getLongWidth();
124   LongAlign = HostTarget->getLongAlign();
125   LongLongWidth = HostTarget->getLongLongWidth();
126   LongLongAlign = HostTarget->getLongLongAlign();
127   MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0,
128                                                  /* HasNonWeakDef = */ true);
129   NewAlign = HostTarget->getNewAlign();
130   DefaultAlignForAttributeAligned =
131       HostTarget->getDefaultAlignForAttributeAligned();
132   SizeType = HostTarget->getSizeType();
133   IntMaxType = HostTarget->getIntMaxType();
134   PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default);
135   IntPtrType = HostTarget->getIntPtrType();
136   WCharType = HostTarget->getWCharType();
137   WIntType = HostTarget->getWIntType();
138   Char16Type = HostTarget->getChar16Type();
139   Char32Type = HostTarget->getChar32Type();
140   Int64Type = HostTarget->getInt64Type();
141   SigAtomicType = HostTarget->getSigAtomicType();
142   ProcessIDType = HostTarget->getProcessIDType();
143 
144   UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
145   UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
146   UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
147   ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
148 
149   // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
150   // we need those macros to be identical on host and device, because (among
151   // other things) they affect which standard library classes are defined, and
152   // we need all classes to be defined on both the host and device.
153   MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();
154 
155   // Properties intentionally not copied from host:
156   // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
157   //   host/device boundary.
158   // - SuitableAlign: Not visible across the host/device boundary, and may
159   //   correctly be different on host/device, e.g. if host has wider vector
160   //   types than device.
161   // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
162   //   as its double type, but that's not necessarily true on the host.
163   //   TODO: nvcc emits a warning when using long double on device; we should
164   //   do the same.
165 }
166 
167 ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
168   return llvm::ArrayRef(GCCRegNames);
169 }
170 
171 bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
172   return llvm::StringSwitch<bool>(Feature)
173       .Cases("ptx", "nvptx", true)
174       .Default(false);
175 }
176 
177 void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
178                                        MacroBuilder &Builder) const {
179   Builder.defineMacro("__PTX__");
180   Builder.defineMacro("__NVPTX__");
181 
182   // Skip setting architecture dependent macros if undefined.
183   if (GPU == OffloadArch::UNUSED && !HostTarget)
184     return;
185 
186   if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
187     // Set __CUDA_ARCH__ for the GPU specified.
188     llvm::StringRef CUDAArchCode = [this] {
189       switch (GPU) {
190       case OffloadArch::GFX600:
191       case OffloadArch::GFX601:
192       case OffloadArch::GFX602:
193       case OffloadArch::GFX700:
194       case OffloadArch::GFX701:
195       case OffloadArch::GFX702:
196       case OffloadArch::GFX703:
197       case OffloadArch::GFX704:
198       case OffloadArch::GFX705:
199       case OffloadArch::GFX801:
200       case OffloadArch::GFX802:
201       case OffloadArch::GFX803:
202       case OffloadArch::GFX805:
203       case OffloadArch::GFX810:
204       case OffloadArch::GFX9_GENERIC:
205       case OffloadArch::GFX900:
206       case OffloadArch::GFX902:
207       case OffloadArch::GFX904:
208       case OffloadArch::GFX906:
209       case OffloadArch::GFX908:
210       case OffloadArch::GFX909:
211       case OffloadArch::GFX90a:
212       case OffloadArch::GFX90c:
213       case OffloadArch::GFX9_4_GENERIC:
214       case OffloadArch::GFX942:
215       case OffloadArch::GFX950:
216       case OffloadArch::GFX10_1_GENERIC:
217       case OffloadArch::GFX1010:
218       case OffloadArch::GFX1011:
219       case OffloadArch::GFX1012:
220       case OffloadArch::GFX1013:
221       case OffloadArch::GFX10_3_GENERIC:
222       case OffloadArch::GFX1030:
223       case OffloadArch::GFX1031:
224       case OffloadArch::GFX1032:
225       case OffloadArch::GFX1033:
226       case OffloadArch::GFX1034:
227       case OffloadArch::GFX1035:
228       case OffloadArch::GFX1036:
229       case OffloadArch::GFX11_GENERIC:
230       case OffloadArch::GFX1100:
231       case OffloadArch::GFX1101:
232       case OffloadArch::GFX1102:
233       case OffloadArch::GFX1103:
234       case OffloadArch::GFX1150:
235       case OffloadArch::GFX1151:
236       case OffloadArch::GFX1152:
237       case OffloadArch::GFX1153:
238       case OffloadArch::GFX12_GENERIC:
239       case OffloadArch::GFX1200:
240       case OffloadArch::GFX1201:
241       case OffloadArch::GFX1250:
242       case OffloadArch::AMDGCNSPIRV:
243       case OffloadArch::Generic:
244       case OffloadArch::GRANITERAPIDS:
245       case OffloadArch::BMG_G21:
246       case OffloadArch::LAST:
247         break;
248       case OffloadArch::UNKNOWN:
249         assert(false && "No GPU arch when compiling CUDA device code.");
250         return "";
251       case OffloadArch::UNUSED:
252       case OffloadArch::SM_20:
253         return "200";
254       case OffloadArch::SM_21:
255         return "210";
256       case OffloadArch::SM_30:
257         return "300";
258       case OffloadArch::SM_32_:
259         return "320";
260       case OffloadArch::SM_35:
261         return "350";
262       case OffloadArch::SM_37:
263         return "370";
264       case OffloadArch::SM_50:
265         return "500";
266       case OffloadArch::SM_52:
267         return "520";
268       case OffloadArch::SM_53:
269         return "530";
270       case OffloadArch::SM_60:
271         return "600";
272       case OffloadArch::SM_61:
273         return "610";
274       case OffloadArch::SM_62:
275         return "620";
276       case OffloadArch::SM_70:
277         return "700";
278       case OffloadArch::SM_72:
279         return "720";
280       case OffloadArch::SM_75:
281         return "750";
282       case OffloadArch::SM_80:
283         return "800";
284       case OffloadArch::SM_86:
285         return "860";
286       case OffloadArch::SM_87:
287         return "870";
288       case OffloadArch::SM_89:
289         return "890";
290       case OffloadArch::SM_90:
291       case OffloadArch::SM_90a:
292         return "900";
293       case OffloadArch::SM_100:
294       case OffloadArch::SM_100a:
295         return "1000";
296       case OffloadArch::SM_101:
297       case OffloadArch::SM_101a:
298          return "1010";
299       case OffloadArch::SM_120:
300       case OffloadArch::SM_120a:
301          return "1200";
302       }
303       llvm_unreachable("unhandled OffloadArch");
304     }();
305     Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
306     switch(GPU) {
307       case OffloadArch::SM_90a:
308       case OffloadArch::SM_100a:
309       case OffloadArch::SM_101a:
310       case OffloadArch::SM_120a:
311         Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
312         break;
313       default:
314         // Do nothing if this is not an enhanced architecture.
315         break;
316     }
317   }
318 }
319 
320 llvm::SmallVector<Builtin::InfosShard>
321 NVPTXTargetInfo::getTargetBuiltins() const {
322   return {{&BuiltinStrings, BuiltinInfos}};
323 }
324