1 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements NVPTX TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "NVPTX.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/MacroBuilder.h" 16 #include "clang/Basic/TargetBuiltins.h" 17 #include "llvm/ADT/StringSwitch.h" 18 19 using namespace clang; 20 using namespace clang::targets; 21 22 static constexpr int NumBuiltins = 23 clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin; 24 25 #define GET_BUILTIN_STR_TABLE 26 #include "clang/Basic/BuiltinsNVPTX.inc" 27 #undef GET_BUILTIN_STR_TABLE 28 29 static constexpr Builtin::Info BuiltinInfos[] = { 30 #define GET_BUILTIN_INFOS 31 #include "clang/Basic/BuiltinsNVPTX.inc" 32 #undef GET_BUILTIN_INFOS 33 }; 34 static_assert(std::size(BuiltinInfos) == NumBuiltins); 35 36 const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"}; 37 38 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, 39 const TargetOptions &Opts, 40 unsigned TargetPointerWidth) 41 : TargetInfo(Triple) { 42 assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && 43 "NVPTX only supports 32- and 64-bit modes."); 44 45 PTXVersion = 32; 46 for (const StringRef Feature : Opts.FeaturesAsWritten) { 47 int PTXV; 48 if (!Feature.starts_with("+ptx") || 49 Feature.drop_front(4).getAsInteger(10, PTXV)) 50 continue; 51 PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)? 52 } 53 54 TLSSupported = false; 55 VLASupported = false; 56 AddrSpaceMap = &NVPTXAddrSpaceMap; 57 UseAddrSpaceMapMangling = true; 58 // __bf16 is always available as a load/store only type. 59 BFloat16Width = BFloat16Align = 16; 60 BFloat16Format = &llvm::APFloat::BFloat(); 61 62 // Define available target features 63 // These must be defined in sorted order! 64 NoAsmVariants = true; 65 GPU = OffloadArch::UNUSED; 66 67 // PTX supports f16 as a fundamental type. 68 HasLegalHalfType = true; 69 HasFloat16 = true; 70 71 if (TargetPointerWidth == 32) 72 resetDataLayout( 73 "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 74 else if (Opts.NVPTXUseShortPointers) 75 resetDataLayout( 76 "e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:" 77 "16-v32:32-n16:32:64"); 78 else 79 resetDataLayout("e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 80 81 // If possible, get a TargetInfo for our host triple, so we can match its 82 // types. 83 llvm::Triple HostTriple(Opts.HostTriple); 84 if (!HostTriple.isNVPTX()) 85 HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts); 86 87 // If no host target, make some guesses about the data layout and return. 88 if (!HostTarget) { 89 LongWidth = LongAlign = TargetPointerWidth; 90 PointerWidth = PointerAlign = TargetPointerWidth; 91 switch (TargetPointerWidth) { 92 case 32: 93 SizeType = TargetInfo::UnsignedInt; 94 PtrDiffType = TargetInfo::SignedInt; 95 IntPtrType = TargetInfo::SignedInt; 96 break; 97 case 64: 98 SizeType = TargetInfo::UnsignedLong; 99 PtrDiffType = TargetInfo::SignedLong; 100 IntPtrType = TargetInfo::SignedLong; 101 break; 102 default: 103 llvm_unreachable("TargetPointerWidth must be 32 or 64"); 104 } 105 106 MaxAtomicInlineWidth = TargetPointerWidth; 107 return; 108 } 109 110 // Copy properties from host target. 111 PointerWidth = HostTarget->getPointerWidth(LangAS::Default); 112 PointerAlign = HostTarget->getPointerAlign(LangAS::Default); 113 BoolWidth = HostTarget->getBoolWidth(); 114 BoolAlign = HostTarget->getBoolAlign(); 115 IntWidth = HostTarget->getIntWidth(); 116 IntAlign = HostTarget->getIntAlign(); 117 HalfWidth = HostTarget->getHalfWidth(); 118 HalfAlign = HostTarget->getHalfAlign(); 119 FloatWidth = HostTarget->getFloatWidth(); 120 FloatAlign = HostTarget->getFloatAlign(); 121 DoubleWidth = HostTarget->getDoubleWidth(); 122 DoubleAlign = HostTarget->getDoubleAlign(); 123 LongWidth = HostTarget->getLongWidth(); 124 LongAlign = HostTarget->getLongAlign(); 125 LongLongWidth = HostTarget->getLongLongWidth(); 126 LongLongAlign = HostTarget->getLongLongAlign(); 127 MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0, 128 /* HasNonWeakDef = */ true); 129 NewAlign = HostTarget->getNewAlign(); 130 DefaultAlignForAttributeAligned = 131 HostTarget->getDefaultAlignForAttributeAligned(); 132 SizeType = HostTarget->getSizeType(); 133 IntMaxType = HostTarget->getIntMaxType(); 134 PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default); 135 IntPtrType = HostTarget->getIntPtrType(); 136 WCharType = HostTarget->getWCharType(); 137 WIntType = HostTarget->getWIntType(); 138 Char16Type = HostTarget->getChar16Type(); 139 Char32Type = HostTarget->getChar32Type(); 140 Int64Type = HostTarget->getInt64Type(); 141 SigAtomicType = HostTarget->getSigAtomicType(); 142 ProcessIDType = HostTarget->getProcessIDType(); 143 144 UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); 145 UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); 146 UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); 147 ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); 148 149 // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and 150 // we need those macros to be identical on host and device, because (among 151 // other things) they affect which standard library classes are defined, and 152 // we need all classes to be defined on both the host and device. 153 MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); 154 155 // Properties intentionally not copied from host: 156 // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the 157 // host/device boundary. 158 // - SuitableAlign: Not visible across the host/device boundary, and may 159 // correctly be different on host/device, e.g. if host has wider vector 160 // types than device. 161 // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same 162 // as its double type, but that's not necessarily true on the host. 163 // TODO: nvcc emits a warning when using long double on device; we should 164 // do the same. 165 } 166 167 ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { 168 return llvm::ArrayRef(GCCRegNames); 169 } 170 171 bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { 172 return llvm::StringSwitch<bool>(Feature) 173 .Cases("ptx", "nvptx", true) 174 .Default(false); 175 } 176 177 void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, 178 MacroBuilder &Builder) const { 179 Builder.defineMacro("__PTX__"); 180 Builder.defineMacro("__NVPTX__"); 181 182 // Skip setting architecture dependent macros if undefined. 183 if (GPU == OffloadArch::UNUSED && !HostTarget) 184 return; 185 186 if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { 187 // Set __CUDA_ARCH__ for the GPU specified. 188 llvm::StringRef CUDAArchCode = [this] { 189 switch (GPU) { 190 case OffloadArch::GFX600: 191 case OffloadArch::GFX601: 192 case OffloadArch::GFX602: 193 case OffloadArch::GFX700: 194 case OffloadArch::GFX701: 195 case OffloadArch::GFX702: 196 case OffloadArch::GFX703: 197 case OffloadArch::GFX704: 198 case OffloadArch::GFX705: 199 case OffloadArch::GFX801: 200 case OffloadArch::GFX802: 201 case OffloadArch::GFX803: 202 case OffloadArch::GFX805: 203 case OffloadArch::GFX810: 204 case OffloadArch::GFX9_GENERIC: 205 case OffloadArch::GFX900: 206 case OffloadArch::GFX902: 207 case OffloadArch::GFX904: 208 case OffloadArch::GFX906: 209 case OffloadArch::GFX908: 210 case OffloadArch::GFX909: 211 case OffloadArch::GFX90a: 212 case OffloadArch::GFX90c: 213 case OffloadArch::GFX9_4_GENERIC: 214 case OffloadArch::GFX942: 215 case OffloadArch::GFX950: 216 case OffloadArch::GFX10_1_GENERIC: 217 case OffloadArch::GFX1010: 218 case OffloadArch::GFX1011: 219 case OffloadArch::GFX1012: 220 case OffloadArch::GFX1013: 221 case OffloadArch::GFX10_3_GENERIC: 222 case OffloadArch::GFX1030: 223 case OffloadArch::GFX1031: 224 case OffloadArch::GFX1032: 225 case OffloadArch::GFX1033: 226 case OffloadArch::GFX1034: 227 case OffloadArch::GFX1035: 228 case OffloadArch::GFX1036: 229 case OffloadArch::GFX11_GENERIC: 230 case OffloadArch::GFX1100: 231 case OffloadArch::GFX1101: 232 case OffloadArch::GFX1102: 233 case OffloadArch::GFX1103: 234 case OffloadArch::GFX1150: 235 case OffloadArch::GFX1151: 236 case OffloadArch::GFX1152: 237 case OffloadArch::GFX1153: 238 case OffloadArch::GFX12_GENERIC: 239 case OffloadArch::GFX1200: 240 case OffloadArch::GFX1201: 241 case OffloadArch::GFX1250: 242 case OffloadArch::AMDGCNSPIRV: 243 case OffloadArch::Generic: 244 case OffloadArch::GRANITERAPIDS: 245 case OffloadArch::BMG_G21: 246 case OffloadArch::LAST: 247 break; 248 case OffloadArch::UNKNOWN: 249 assert(false && "No GPU arch when compiling CUDA device code."); 250 return ""; 251 case OffloadArch::UNUSED: 252 case OffloadArch::SM_20: 253 return "200"; 254 case OffloadArch::SM_21: 255 return "210"; 256 case OffloadArch::SM_30: 257 return "300"; 258 case OffloadArch::SM_32_: 259 return "320"; 260 case OffloadArch::SM_35: 261 return "350"; 262 case OffloadArch::SM_37: 263 return "370"; 264 case OffloadArch::SM_50: 265 return "500"; 266 case OffloadArch::SM_52: 267 return "520"; 268 case OffloadArch::SM_53: 269 return "530"; 270 case OffloadArch::SM_60: 271 return "600"; 272 case OffloadArch::SM_61: 273 return "610"; 274 case OffloadArch::SM_62: 275 return "620"; 276 case OffloadArch::SM_70: 277 return "700"; 278 case OffloadArch::SM_72: 279 return "720"; 280 case OffloadArch::SM_75: 281 return "750"; 282 case OffloadArch::SM_80: 283 return "800"; 284 case OffloadArch::SM_86: 285 return "860"; 286 case OffloadArch::SM_87: 287 return "870"; 288 case OffloadArch::SM_89: 289 return "890"; 290 case OffloadArch::SM_90: 291 case OffloadArch::SM_90a: 292 return "900"; 293 case OffloadArch::SM_100: 294 case OffloadArch::SM_100a: 295 return "1000"; 296 case OffloadArch::SM_101: 297 case OffloadArch::SM_101a: 298 return "1010"; 299 case OffloadArch::SM_120: 300 case OffloadArch::SM_120a: 301 return "1200"; 302 } 303 llvm_unreachable("unhandled OffloadArch"); 304 }(); 305 Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); 306 switch(GPU) { 307 case OffloadArch::SM_90a: 308 case OffloadArch::SM_100a: 309 case OffloadArch::SM_101a: 310 case OffloadArch::SM_120a: 311 Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1"); 312 break; 313 default: 314 // Do nothing if this is not an enhanced architecture. 315 break; 316 } 317 } 318 } 319 320 llvm::SmallVector<Builtin::InfosShard> 321 NVPTXTargetInfo::getTargetBuiltins() const { 322 return {{&BuiltinStrings, BuiltinInfos}}; 323 } 324