1 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements NVPTX TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "NVPTX.h" 14 #include "Targets.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/MacroBuilder.h" 17 #include "clang/Basic/TargetBuiltins.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = { 25 #define BUILTIN(ID, TYPE, ATTRS) \ 26 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 27 #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ 28 {#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr}, 29 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 30 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 31 #include "clang/Basic/BuiltinsNVPTX.def" 32 }; 33 34 const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"}; 35 36 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, 37 const TargetOptions &Opts, 38 unsigned TargetPointerWidth) 39 : TargetInfo(Triple) { 40 assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && 41 "NVPTX only supports 32- and 64-bit modes."); 42 43 PTXVersion = 32; 44 for (const StringRef Feature : Opts.FeaturesAsWritten) { 45 if (!Feature.startswith("+ptx")) 46 continue; 47 PTXVersion = llvm::StringSwitch<unsigned>(Feature) 48 .Case("+ptx72", 72) 49 .Case("+ptx71", 71) 50 .Case("+ptx70", 70) 51 .Case("+ptx65", 65) 52 .Case("+ptx64", 64) 53 .Case("+ptx63", 63) 54 .Case("+ptx61", 61) 55 .Case("+ptx60", 60) 56 .Case("+ptx50", 50) 57 .Case("+ptx43", 43) 58 .Case("+ptx42", 42) 59 .Case("+ptx41", 41) 60 .Case("+ptx40", 40) 61 .Case("+ptx32", 32) 62 .Default(32); 63 } 64 65 TLSSupported = false; 66 VLASupported = false; 67 AddrSpaceMap = &NVPTXAddrSpaceMap; 68 GridValues = llvm::omp::NVPTXGpuGridValues; 69 UseAddrSpaceMapMangling = true; 70 71 // Define available target features 72 // These must be defined in sorted order! 73 NoAsmVariants = true; 74 GPU = CudaArch::SM_20; 75 76 if (TargetPointerWidth == 32) 77 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 78 else if (Opts.NVPTXUseShortPointers) 79 resetDataLayout( 80 "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 81 else 82 resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 83 84 // If possible, get a TargetInfo for our host triple, so we can match its 85 // types. 86 llvm::Triple HostTriple(Opts.HostTriple); 87 if (!HostTriple.isNVPTX()) 88 HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts)); 89 90 // If no host target, make some guesses about the data layout and return. 91 if (!HostTarget) { 92 LongWidth = LongAlign = TargetPointerWidth; 93 PointerWidth = PointerAlign = TargetPointerWidth; 94 switch (TargetPointerWidth) { 95 case 32: 96 SizeType = TargetInfo::UnsignedInt; 97 PtrDiffType = TargetInfo::SignedInt; 98 IntPtrType = TargetInfo::SignedInt; 99 break; 100 case 64: 101 SizeType = TargetInfo::UnsignedLong; 102 PtrDiffType = TargetInfo::SignedLong; 103 IntPtrType = TargetInfo::SignedLong; 104 break; 105 default: 106 llvm_unreachable("TargetPointerWidth must be 32 or 64"); 107 } 108 return; 109 } 110 111 // Copy properties from host target. 112 PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0); 113 PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0); 114 BoolWidth = HostTarget->getBoolWidth(); 115 BoolAlign = HostTarget->getBoolAlign(); 116 IntWidth = HostTarget->getIntWidth(); 117 IntAlign = HostTarget->getIntAlign(); 118 HalfWidth = HostTarget->getHalfWidth(); 119 HalfAlign = HostTarget->getHalfAlign(); 120 FloatWidth = HostTarget->getFloatWidth(); 121 FloatAlign = HostTarget->getFloatAlign(); 122 DoubleWidth = HostTarget->getDoubleWidth(); 123 DoubleAlign = HostTarget->getDoubleAlign(); 124 LongWidth = HostTarget->getLongWidth(); 125 LongAlign = HostTarget->getLongAlign(); 126 LongLongWidth = HostTarget->getLongLongWidth(); 127 LongLongAlign = HostTarget->getLongLongAlign(); 128 MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0); 129 NewAlign = HostTarget->getNewAlign(); 130 DefaultAlignForAttributeAligned = 131 HostTarget->getDefaultAlignForAttributeAligned(); 132 SizeType = HostTarget->getSizeType(); 133 IntMaxType = HostTarget->getIntMaxType(); 134 PtrDiffType = HostTarget->getPtrDiffType(/* AddrSpace = */ 0); 135 IntPtrType = HostTarget->getIntPtrType(); 136 WCharType = HostTarget->getWCharType(); 137 WIntType = HostTarget->getWIntType(); 138 Char16Type = HostTarget->getChar16Type(); 139 Char32Type = HostTarget->getChar32Type(); 140 Int64Type = HostTarget->getInt64Type(); 141 SigAtomicType = HostTarget->getSigAtomicType(); 142 ProcessIDType = HostTarget->getProcessIDType(); 143 144 UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); 145 UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); 146 UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); 147 ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); 148 149 // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and 150 // we need those macros to be identical on host and device, because (among 151 // other things) they affect which standard library classes are defined, and 152 // we need all classes to be defined on both the host and device. 153 MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); 154 155 // Properties intentionally not copied from host: 156 // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the 157 // host/device boundary. 158 // - SuitableAlign: Not visible across the host/device boundary, and may 159 // correctly be different on host/device, e.g. if host has wider vector 160 // types than device. 161 // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same 162 // as its double type, but that's not necessarily true on the host. 163 // TODO: nvcc emits a warning when using long double on device; we should 164 // do the same. 165 } 166 167 ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { 168 return llvm::makeArrayRef(GCCRegNames); 169 } 170 171 bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { 172 return llvm::StringSwitch<bool>(Feature) 173 .Cases("ptx", "nvptx", true) 174 .Default(false); 175 } 176 177 void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, 178 MacroBuilder &Builder) const { 179 Builder.defineMacro("__PTX__"); 180 Builder.defineMacro("__NVPTX__"); 181 if (Opts.CUDAIsDevice) { 182 // Set __CUDA_ARCH__ for the GPU specified. 183 std::string CUDAArchCode = [this] { 184 switch (GPU) { 185 case CudaArch::GFX600: 186 case CudaArch::GFX601: 187 case CudaArch::GFX602: 188 case CudaArch::GFX700: 189 case CudaArch::GFX701: 190 case CudaArch::GFX702: 191 case CudaArch::GFX703: 192 case CudaArch::GFX704: 193 case CudaArch::GFX705: 194 case CudaArch::GFX801: 195 case CudaArch::GFX802: 196 case CudaArch::GFX803: 197 case CudaArch::GFX805: 198 case CudaArch::GFX810: 199 case CudaArch::GFX900: 200 case CudaArch::GFX902: 201 case CudaArch::GFX904: 202 case CudaArch::GFX906: 203 case CudaArch::GFX908: 204 case CudaArch::GFX909: 205 case CudaArch::GFX90a: 206 case CudaArch::GFX90c: 207 case CudaArch::GFX1010: 208 case CudaArch::GFX1011: 209 case CudaArch::GFX1012: 210 case CudaArch::GFX1013: 211 case CudaArch::GFX1030: 212 case CudaArch::GFX1031: 213 case CudaArch::GFX1032: 214 case CudaArch::GFX1033: 215 case CudaArch::GFX1034: 216 case CudaArch::GFX1035: 217 case CudaArch::LAST: 218 break; 219 case CudaArch::UNUSED: 220 case CudaArch::UNKNOWN: 221 assert(false && "No GPU arch when compiling CUDA device code."); 222 return ""; 223 case CudaArch::SM_20: 224 return "200"; 225 case CudaArch::SM_21: 226 return "210"; 227 case CudaArch::SM_30: 228 return "300"; 229 case CudaArch::SM_32: 230 return "320"; 231 case CudaArch::SM_35: 232 return "350"; 233 case CudaArch::SM_37: 234 return "370"; 235 case CudaArch::SM_50: 236 return "500"; 237 case CudaArch::SM_52: 238 return "520"; 239 case CudaArch::SM_53: 240 return "530"; 241 case CudaArch::SM_60: 242 return "600"; 243 case CudaArch::SM_61: 244 return "610"; 245 case CudaArch::SM_62: 246 return "620"; 247 case CudaArch::SM_70: 248 return "700"; 249 case CudaArch::SM_72: 250 return "720"; 251 case CudaArch::SM_75: 252 return "750"; 253 case CudaArch::SM_80: 254 return "800"; 255 case CudaArch::SM_86: 256 return "860"; 257 } 258 llvm_unreachable("unhandled CudaArch"); 259 }(); 260 Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); 261 } 262 } 263 264 ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const { 265 return llvm::makeArrayRef(BuiltinInfo, clang::NVPTX::LastTSBuiltin - 266 Builtin::FirstTSBuiltin); 267 } 268