1 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements NVPTX TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "NVPTX.h" 14 #include "Targets.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/MacroBuilder.h" 17 #include "clang/Basic/TargetBuiltins.h" 18 #include "llvm/ADT/StringSwitch.h" 19 20 using namespace clang; 21 using namespace clang::targets; 22 23 static constexpr Builtin::Info BuiltinInfo[] = { 24 #define BUILTIN(ID, TYPE, ATTRS) \ 25 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 26 #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ 27 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES}, 28 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 29 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 30 #include "clang/Basic/BuiltinsNVPTX.def" 31 }; 32 33 const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"}; 34 35 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, 36 const TargetOptions &Opts, 37 unsigned TargetPointerWidth) 38 : TargetInfo(Triple) { 39 assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && 40 "NVPTX only supports 32- and 64-bit modes."); 41 42 PTXVersion = 32; 43 for (const StringRef Feature : Opts.FeaturesAsWritten) { 44 int PTXV; 45 if (!Feature.starts_with("+ptx") || 46 Feature.drop_front(4).getAsInteger(10, PTXV)) 47 continue; 48 PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)? 49 } 50 51 TLSSupported = false; 52 VLASupported = false; 53 AddrSpaceMap = &NVPTXAddrSpaceMap; 54 UseAddrSpaceMapMangling = true; 55 // __bf16 is always available as a load/store only type. 56 BFloat16Width = BFloat16Align = 16; 57 BFloat16Format = &llvm::APFloat::BFloat(); 58 59 // Define available target features 60 // These must be defined in sorted order! 61 NoAsmVariants = true; 62 GPU = OffloadArch::UNUSED; 63 64 // PTX supports f16 as a fundamental type. 65 HasLegalHalfType = true; 66 HasFloat16 = true; 67 68 if (TargetPointerWidth == 32) 69 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 70 else if (Opts.NVPTXUseShortPointers) 71 resetDataLayout( 72 "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 73 else 74 resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 75 76 // If possible, get a TargetInfo for our host triple, so we can match its 77 // types. 78 llvm::Triple HostTriple(Opts.HostTriple); 79 if (!HostTriple.isNVPTX()) 80 HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts); 81 82 // If no host target, make some guesses about the data layout and return. 83 if (!HostTarget) { 84 LongWidth = LongAlign = TargetPointerWidth; 85 PointerWidth = PointerAlign = TargetPointerWidth; 86 switch (TargetPointerWidth) { 87 case 32: 88 SizeType = TargetInfo::UnsignedInt; 89 PtrDiffType = TargetInfo::SignedInt; 90 IntPtrType = TargetInfo::SignedInt; 91 break; 92 case 64: 93 SizeType = TargetInfo::UnsignedLong; 94 PtrDiffType = TargetInfo::SignedLong; 95 IntPtrType = TargetInfo::SignedLong; 96 break; 97 default: 98 llvm_unreachable("TargetPointerWidth must be 32 or 64"); 99 } 100 101 MaxAtomicInlineWidth = TargetPointerWidth; 102 return; 103 } 104 105 // Copy properties from host target. 106 PointerWidth = HostTarget->getPointerWidth(LangAS::Default); 107 PointerAlign = HostTarget->getPointerAlign(LangAS::Default); 108 BoolWidth = HostTarget->getBoolWidth(); 109 BoolAlign = HostTarget->getBoolAlign(); 110 IntWidth = HostTarget->getIntWidth(); 111 IntAlign = HostTarget->getIntAlign(); 112 HalfWidth = HostTarget->getHalfWidth(); 113 HalfAlign = HostTarget->getHalfAlign(); 114 FloatWidth = HostTarget->getFloatWidth(); 115 FloatAlign = HostTarget->getFloatAlign(); 116 DoubleWidth = HostTarget->getDoubleWidth(); 117 DoubleAlign = HostTarget->getDoubleAlign(); 118 LongWidth = HostTarget->getLongWidth(); 119 LongAlign = HostTarget->getLongAlign(); 120 LongLongWidth = HostTarget->getLongLongWidth(); 121 LongLongAlign = HostTarget->getLongLongAlign(); 122 MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0, 123 /* HasNonWeakDef = */ true); 124 NewAlign = HostTarget->getNewAlign(); 125 DefaultAlignForAttributeAligned = 126 HostTarget->getDefaultAlignForAttributeAligned(); 127 SizeType = HostTarget->getSizeType(); 128 IntMaxType = HostTarget->getIntMaxType(); 129 PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default); 130 IntPtrType = HostTarget->getIntPtrType(); 131 WCharType = HostTarget->getWCharType(); 132 WIntType = HostTarget->getWIntType(); 133 Char16Type = HostTarget->getChar16Type(); 134 Char32Type = HostTarget->getChar32Type(); 135 Int64Type = HostTarget->getInt64Type(); 136 SigAtomicType = HostTarget->getSigAtomicType(); 137 ProcessIDType = HostTarget->getProcessIDType(); 138 139 UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); 140 UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); 141 UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); 142 ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); 143 144 // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and 145 // we need those macros to be identical on host and device, because (among 146 // other things) they affect which standard library classes are defined, and 147 // we need all classes to be defined on both the host and device. 148 MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); 149 150 // Properties intentionally not copied from host: 151 // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the 152 // host/device boundary. 153 // - SuitableAlign: Not visible across the host/device boundary, and may 154 // correctly be different on host/device, e.g. if host has wider vector 155 // types than device. 156 // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same 157 // as its double type, but that's not necessarily true on the host. 158 // TODO: nvcc emits a warning when using long double on device; we should 159 // do the same. 160 } 161 162 ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { 163 return llvm::ArrayRef(GCCRegNames); 164 } 165 166 bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { 167 return llvm::StringSwitch<bool>(Feature) 168 .Cases("ptx", "nvptx", true) 169 .Default(false); 170 } 171 172 void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, 173 MacroBuilder &Builder) const { 174 Builder.defineMacro("__PTX__"); 175 Builder.defineMacro("__NVPTX__"); 176 177 // Skip setting architecture dependent macros if undefined. 178 if (GPU == OffloadArch::UNUSED && !HostTarget) 179 return; 180 181 if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { 182 // Set __CUDA_ARCH__ for the GPU specified. 183 std::string CUDAArchCode = [this] { 184 switch (GPU) { 185 case OffloadArch::GFX600: 186 case OffloadArch::GFX601: 187 case OffloadArch::GFX602: 188 case OffloadArch::GFX700: 189 case OffloadArch::GFX701: 190 case OffloadArch::GFX702: 191 case OffloadArch::GFX703: 192 case OffloadArch::GFX704: 193 case OffloadArch::GFX705: 194 case OffloadArch::GFX801: 195 case OffloadArch::GFX802: 196 case OffloadArch::GFX803: 197 case OffloadArch::GFX805: 198 case OffloadArch::GFX810: 199 case OffloadArch::GFX9_GENERIC: 200 case OffloadArch::GFX900: 201 case OffloadArch::GFX902: 202 case OffloadArch::GFX904: 203 case OffloadArch::GFX906: 204 case OffloadArch::GFX908: 205 case OffloadArch::GFX909: 206 case OffloadArch::GFX90a: 207 case OffloadArch::GFX90c: 208 case OffloadArch::GFX940: 209 case OffloadArch::GFX941: 210 case OffloadArch::GFX942: 211 case OffloadArch::GFX10_1_GENERIC: 212 case OffloadArch::GFX1010: 213 case OffloadArch::GFX1011: 214 case OffloadArch::GFX1012: 215 case OffloadArch::GFX1013: 216 case OffloadArch::GFX10_3_GENERIC: 217 case OffloadArch::GFX1030: 218 case OffloadArch::GFX1031: 219 case OffloadArch::GFX1032: 220 case OffloadArch::GFX1033: 221 case OffloadArch::GFX1034: 222 case OffloadArch::GFX1035: 223 case OffloadArch::GFX1036: 224 case OffloadArch::GFX11_GENERIC: 225 case OffloadArch::GFX1100: 226 case OffloadArch::GFX1101: 227 case OffloadArch::GFX1102: 228 case OffloadArch::GFX1103: 229 case OffloadArch::GFX1150: 230 case OffloadArch::GFX1151: 231 case OffloadArch::GFX1152: 232 case OffloadArch::GFX12_GENERIC: 233 case OffloadArch::GFX1200: 234 case OffloadArch::GFX1201: 235 case OffloadArch::AMDGCNSPIRV: 236 case OffloadArch::Generic: 237 case OffloadArch::LAST: 238 break; 239 case OffloadArch::UNKNOWN: 240 assert(false && "No GPU arch when compiling CUDA device code."); 241 return ""; 242 case OffloadArch::UNUSED: 243 case OffloadArch::SM_20: 244 return "200"; 245 case OffloadArch::SM_21: 246 return "210"; 247 case OffloadArch::SM_30: 248 return "300"; 249 case OffloadArch::SM_32_: 250 return "320"; 251 case OffloadArch::SM_35: 252 return "350"; 253 case OffloadArch::SM_37: 254 return "370"; 255 case OffloadArch::SM_50: 256 return "500"; 257 case OffloadArch::SM_52: 258 return "520"; 259 case OffloadArch::SM_53: 260 return "530"; 261 case OffloadArch::SM_60: 262 return "600"; 263 case OffloadArch::SM_61: 264 return "610"; 265 case OffloadArch::SM_62: 266 return "620"; 267 case OffloadArch::SM_70: 268 return "700"; 269 case OffloadArch::SM_72: 270 return "720"; 271 case OffloadArch::SM_75: 272 return "750"; 273 case OffloadArch::SM_80: 274 return "800"; 275 case OffloadArch::SM_86: 276 return "860"; 277 case OffloadArch::SM_87: 278 return "870"; 279 case OffloadArch::SM_89: 280 return "890"; 281 case OffloadArch::SM_90: 282 case OffloadArch::SM_90a: 283 return "900"; 284 } 285 llvm_unreachable("unhandled OffloadArch"); 286 }(); 287 Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); 288 if (GPU == OffloadArch::SM_90a) 289 Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); 290 } 291 } 292 293 ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const { 294 return llvm::ArrayRef(BuiltinInfo, 295 clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin); 296 } 297