1 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements NVPTX TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "NVPTX.h" 14 #include "Targets.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/MacroBuilder.h" 17 #include "clang/Basic/TargetBuiltins.h" 18 #include "llvm/ADT/StringSwitch.h" 19 20 using namespace clang; 21 using namespace clang::targets; 22 23 static constexpr Builtin::Info BuiltinInfo[] = { 24 #define BUILTIN(ID, TYPE, ATTRS) \ 25 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 26 #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ 27 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES}, 28 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 29 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 30 #include "clang/Basic/BuiltinsNVPTX.def" 31 }; 32 33 const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"}; 34 35 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, 36 const TargetOptions &Opts, 37 unsigned TargetPointerWidth) 38 : TargetInfo(Triple) { 39 assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && 40 "NVPTX only supports 32- and 64-bit modes."); 41 42 PTXVersion = 32; 43 for (const StringRef Feature : Opts.FeaturesAsWritten) { 44 int PTXV; 45 if (!Feature.starts_with("+ptx") || 46 Feature.drop_front(4).getAsInteger(10, PTXV)) 47 continue; 48 PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)? 49 } 50 51 TLSSupported = false; 52 VLASupported = false; 53 AddrSpaceMap = &NVPTXAddrSpaceMap; 54 UseAddrSpaceMapMangling = true; 55 // __bf16 is always available as a load/store only type. 56 BFloat16Width = BFloat16Align = 16; 57 BFloat16Format = &llvm::APFloat::BFloat(); 58 59 // Define available target features 60 // These must be defined in sorted order! 61 NoAsmVariants = true; 62 GPU = CudaArch::SM_20; 63 64 if (TargetPointerWidth == 32) 65 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 66 else if (Opts.NVPTXUseShortPointers) 67 resetDataLayout( 68 "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 69 else 70 resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 71 72 // If possible, get a TargetInfo for our host triple, so we can match its 73 // types. 74 llvm::Triple HostTriple(Opts.HostTriple); 75 if (!HostTriple.isNVPTX()) 76 HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts); 77 78 // If no host target, make some guesses about the data layout and return. 79 if (!HostTarget) { 80 LongWidth = LongAlign = TargetPointerWidth; 81 PointerWidth = PointerAlign = TargetPointerWidth; 82 switch (TargetPointerWidth) { 83 case 32: 84 SizeType = TargetInfo::UnsignedInt; 85 PtrDiffType = TargetInfo::SignedInt; 86 IntPtrType = TargetInfo::SignedInt; 87 break; 88 case 64: 89 SizeType = TargetInfo::UnsignedLong; 90 PtrDiffType = TargetInfo::SignedLong; 91 IntPtrType = TargetInfo::SignedLong; 92 break; 93 default: 94 llvm_unreachable("TargetPointerWidth must be 32 or 64"); 95 } 96 97 MaxAtomicInlineWidth = TargetPointerWidth; 98 return; 99 } 100 101 // Copy properties from host target. 102 PointerWidth = HostTarget->getPointerWidth(LangAS::Default); 103 PointerAlign = HostTarget->getPointerAlign(LangAS::Default); 104 BoolWidth = HostTarget->getBoolWidth(); 105 BoolAlign = HostTarget->getBoolAlign(); 106 IntWidth = HostTarget->getIntWidth(); 107 IntAlign = HostTarget->getIntAlign(); 108 HalfWidth = HostTarget->getHalfWidth(); 109 HalfAlign = HostTarget->getHalfAlign(); 110 FloatWidth = HostTarget->getFloatWidth(); 111 FloatAlign = HostTarget->getFloatAlign(); 112 DoubleWidth = HostTarget->getDoubleWidth(); 113 DoubleAlign = HostTarget->getDoubleAlign(); 114 LongWidth = HostTarget->getLongWidth(); 115 LongAlign = HostTarget->getLongAlign(); 116 LongLongWidth = HostTarget->getLongLongWidth(); 117 LongLongAlign = HostTarget->getLongLongAlign(); 118 MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0); 119 NewAlign = HostTarget->getNewAlign(); 120 DefaultAlignForAttributeAligned = 121 HostTarget->getDefaultAlignForAttributeAligned(); 122 SizeType = HostTarget->getSizeType(); 123 IntMaxType = HostTarget->getIntMaxType(); 124 PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default); 125 IntPtrType = HostTarget->getIntPtrType(); 126 WCharType = HostTarget->getWCharType(); 127 WIntType = HostTarget->getWIntType(); 128 Char16Type = HostTarget->getChar16Type(); 129 Char32Type = HostTarget->getChar32Type(); 130 Int64Type = HostTarget->getInt64Type(); 131 SigAtomicType = HostTarget->getSigAtomicType(); 132 ProcessIDType = HostTarget->getProcessIDType(); 133 134 UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); 135 UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); 136 UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); 137 ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); 138 139 // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and 140 // we need those macros to be identical on host and device, because (among 141 // other things) they affect which standard library classes are defined, and 142 // we need all classes to be defined on both the host and device. 143 MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); 144 145 // Properties intentionally not copied from host: 146 // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the 147 // host/device boundary. 148 // - SuitableAlign: Not visible across the host/device boundary, and may 149 // correctly be different on host/device, e.g. if host has wider vector 150 // types than device. 151 // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same 152 // as its double type, but that's not necessarily true on the host. 153 // TODO: nvcc emits a warning when using long double on device; we should 154 // do the same. 155 } 156 157 ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { 158 return llvm::ArrayRef(GCCRegNames); 159 } 160 161 bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { 162 return llvm::StringSwitch<bool>(Feature) 163 .Cases("ptx", "nvptx", true) 164 .Default(false); 165 } 166 167 void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, 168 MacroBuilder &Builder) const { 169 Builder.defineMacro("__PTX__"); 170 Builder.defineMacro("__NVPTX__"); 171 if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { 172 // Set __CUDA_ARCH__ for the GPU specified. 173 std::string CUDAArchCode = [this] { 174 switch (GPU) { 175 case CudaArch::GFX600: 176 case CudaArch::GFX601: 177 case CudaArch::GFX602: 178 case CudaArch::GFX700: 179 case CudaArch::GFX701: 180 case CudaArch::GFX702: 181 case CudaArch::GFX703: 182 case CudaArch::GFX704: 183 case CudaArch::GFX705: 184 case CudaArch::GFX801: 185 case CudaArch::GFX802: 186 case CudaArch::GFX803: 187 case CudaArch::GFX805: 188 case CudaArch::GFX810: 189 case CudaArch::GFX900: 190 case CudaArch::GFX902: 191 case CudaArch::GFX904: 192 case CudaArch::GFX906: 193 case CudaArch::GFX908: 194 case CudaArch::GFX909: 195 case CudaArch::GFX90a: 196 case CudaArch::GFX90c: 197 case CudaArch::GFX940: 198 case CudaArch::GFX941: 199 case CudaArch::GFX942: 200 case CudaArch::GFX1010: 201 case CudaArch::GFX1011: 202 case CudaArch::GFX1012: 203 case CudaArch::GFX1013: 204 case CudaArch::GFX1030: 205 case CudaArch::GFX1031: 206 case CudaArch::GFX1032: 207 case CudaArch::GFX1033: 208 case CudaArch::GFX1034: 209 case CudaArch::GFX1035: 210 case CudaArch::GFX1036: 211 case CudaArch::GFX1100: 212 case CudaArch::GFX1101: 213 case CudaArch::GFX1102: 214 case CudaArch::GFX1103: 215 case CudaArch::GFX1150: 216 case CudaArch::GFX1151: 217 case CudaArch::GFX1200: 218 case CudaArch::GFX1201: 219 case CudaArch::Generic: 220 case CudaArch::LAST: 221 break; 222 case CudaArch::UNUSED: 223 case CudaArch::UNKNOWN: 224 assert(false && "No GPU arch when compiling CUDA device code."); 225 return ""; 226 case CudaArch::SM_20: 227 return "200"; 228 case CudaArch::SM_21: 229 return "210"; 230 case CudaArch::SM_30: 231 return "300"; 232 case CudaArch::SM_32: 233 return "320"; 234 case CudaArch::SM_35: 235 return "350"; 236 case CudaArch::SM_37: 237 return "370"; 238 case CudaArch::SM_50: 239 return "500"; 240 case CudaArch::SM_52: 241 return "520"; 242 case CudaArch::SM_53: 243 return "530"; 244 case CudaArch::SM_60: 245 return "600"; 246 case CudaArch::SM_61: 247 return "610"; 248 case CudaArch::SM_62: 249 return "620"; 250 case CudaArch::SM_70: 251 return "700"; 252 case CudaArch::SM_72: 253 return "720"; 254 case CudaArch::SM_75: 255 return "750"; 256 case CudaArch::SM_80: 257 return "800"; 258 case CudaArch::SM_86: 259 return "860"; 260 case CudaArch::SM_87: 261 return "870"; 262 case CudaArch::SM_89: 263 return "890"; 264 case CudaArch::SM_90: 265 case CudaArch::SM_90a: 266 return "900"; 267 } 268 llvm_unreachable("unhandled CudaArch"); 269 }(); 270 Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); 271 if (GPU == CudaArch::SM_90a) 272 Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); 273 } 274 } 275 276 ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const { 277 return llvm::ArrayRef(BuiltinInfo, 278 clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin); 279 } 280