1 //===- NVPTX.cpp ----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ABIInfoImpl.h" 10 #include "TargetInfo.h" 11 #include "llvm/IR/IntrinsicsNVPTX.h" 12 13 using namespace clang; 14 using namespace clang::CodeGen; 15 16 //===----------------------------------------------------------------------===// 17 // NVPTX ABI Implementation 18 //===----------------------------------------------------------------------===// 19 20 namespace { 21 22 class NVPTXTargetCodeGenInfo; 23 24 class NVPTXABIInfo : public ABIInfo { 25 NVPTXTargetCodeGenInfo &CGInfo; 26 27 public: 28 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) 29 : ABIInfo(CGT), CGInfo(Info) {} 30 31 ABIArgInfo classifyReturnType(QualType RetTy) const; 32 ABIArgInfo classifyArgumentType(QualType Ty) const; 33 34 void computeInfo(CGFunctionInfo &FI) const override; 35 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 36 AggValueSlot Slot) const override; 37 bool isUnsupportedType(QualType T) const; 38 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; 39 }; 40 41 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { 42 public: 43 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) 44 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {} 45 46 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 47 CodeGen::CodeGenModule &M) const override; 48 bool shouldEmitStaticExternCAliases() const override; 49 50 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, 51 llvm::PointerType *T, 52 QualType QT) const override; 53 54 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { 55 // On the device side, surface reference is represented as an object handle 56 // in 64-bit integer. 57 return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); 58 } 59 60 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { 61 // On the device side, texture reference is represented as an object handle 62 // in 64-bit integer. 63 return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); 64 } 65 66 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, 67 LValue Src) const override { 68 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); 69 return true; 70 } 71 72 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, 73 LValue Src) const override { 74 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); 75 return true; 76 } 77 78 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the 79 // resulting MDNode to the nvvm.annotations MDNode. 80 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, 81 int Operand); 82 83 private: 84 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, 85 LValue Src) { 86 llvm::Value *Handle = nullptr; 87 llvm::Constant *C = 88 llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF)); 89 // Lookup `addrspacecast` through the constant pointer if any. 90 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) 91 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); 92 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { 93 // Load the handle from the specific global variable using 94 // `nvvm.texsurf.handle.internal` intrinsic. 95 Handle = CGF.EmitRuntimeCall( 96 CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, 97 {GV->getType()}), 98 {GV}, "texsurf_handle"); 99 } else 100 Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); 101 CGF.EmitStoreOfScalar(Handle, Dst); 102 } 103 }; 104 105 /// Checks if the type is unsupported directly by the current target. 106 bool NVPTXABIInfo::isUnsupportedType(QualType T) const { 107 ASTContext &Context = getContext(); 108 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) 109 return true; 110 if (!Context.getTargetInfo().hasFloat128Type() && 111 (T->isFloat128Type() || 112 (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) 113 return true; 114 if (const auto *EIT = T->getAs<BitIntType>()) 115 return EIT->getNumBits() > 116 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); 117 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && 118 Context.getTypeSize(T) > 64U) 119 return true; 120 if (const auto *AT = T->getAsArrayTypeUnsafe()) 121 return isUnsupportedType(AT->getElementType()); 122 const auto *RT = T->getAs<RecordType>(); 123 if (!RT) 124 return false; 125 const RecordDecl *RD = RT->getDecl(); 126 127 // If this is a C++ record, check the bases first. 128 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) 129 for (const CXXBaseSpecifier &I : CXXRD->bases()) 130 if (isUnsupportedType(I.getType())) 131 return true; 132 133 for (const FieldDecl *I : RD->fields()) 134 if (isUnsupportedType(I->getType())) 135 return true; 136 return false; 137 } 138 139 /// Coerce the given type into an array with maximum allowed size of elements. 140 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, 141 unsigned MaxSize) const { 142 // Alignment and Size are measured in bits. 143 const uint64_t Size = getContext().getTypeSize(Ty); 144 const uint64_t Alignment = getContext().getTypeAlign(Ty); 145 const unsigned Div = std::min<unsigned>(MaxSize, Alignment); 146 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div); 147 const uint64_t NumElements = (Size + Div - 1) / Div; 148 return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); 149 } 150 151 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { 152 if (RetTy->isVoidType()) 153 return ABIArgInfo::getIgnore(); 154 155 if (getContext().getLangOpts().OpenMP && 156 getContext().getLangOpts().OpenMPIsTargetDevice && 157 isUnsupportedType(RetTy)) 158 return coerceToIntArrayWithLimit(RetTy, 64); 159 160 // note: this is different from default ABI 161 if (!RetTy->isScalarType()) 162 return ABIArgInfo::getDirect(); 163 164 // Treat an enum type as its underlying type. 165 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 166 RetTy = EnumTy->getDecl()->getIntegerType(); 167 168 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) 169 : ABIArgInfo::getDirect()); 170 } 171 172 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { 173 // Treat an enum type as its underlying type. 174 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 175 Ty = EnumTy->getDecl()->getIntegerType(); 176 177 // Return aggregates type as indirect by value 178 if (isAggregateTypeForABI(Ty)) { 179 // Under CUDA device compilation, tex/surf builtin types are replaced with 180 // object types and passed directly. 181 if (getContext().getLangOpts().CUDAIsDevice) { 182 if (Ty->isCUDADeviceBuiltinSurfaceType()) 183 return ABIArgInfo::getDirect( 184 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); 185 if (Ty->isCUDADeviceBuiltinTextureType()) 186 return ABIArgInfo::getDirect( 187 CGInfo.getCUDADeviceBuiltinTextureDeviceType()); 188 } 189 return getNaturalAlignIndirect(Ty, /* byval */ true); 190 } 191 192 if (const auto *EIT = Ty->getAs<BitIntType>()) { 193 if ((EIT->getNumBits() > 128) || 194 (!getContext().getTargetInfo().hasInt128Type() && 195 EIT->getNumBits() > 64)) 196 return getNaturalAlignIndirect(Ty, /* byval */ true); 197 } 198 199 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 200 : ABIArgInfo::getDirect()); 201 } 202 203 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { 204 if (!getCXXABI().classifyReturnType(FI)) 205 FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 206 207 for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments())) 208 I.info = ArgumentsCount < FI.getNumRequiredArgs() 209 ? classifyArgumentType(I.type) 210 : ABIArgInfo::getDirect(); 211 212 // Always honor user-specified calling convention. 213 if (FI.getCallingConvention() != llvm::CallingConv::C) 214 return; 215 216 FI.setEffectiveCallingConvention(getRuntimeCC()); 217 } 218 219 RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 220 QualType Ty, AggValueSlot Slot) const { 221 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false, 222 getContext().getTypeInfoInChars(Ty), 223 CharUnits::fromQuantity(1), 224 /*AllowHigherAlign=*/true, Slot); 225 } 226 227 void NVPTXTargetCodeGenInfo::setTargetAttributes( 228 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { 229 if (GV->isDeclaration()) 230 return; 231 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D); 232 if (VD) { 233 if (M.getLangOpts().CUDA) { 234 if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) 235 addNVVMMetadata(GV, "surface", 1); 236 else if (VD->getType()->isCUDADeviceBuiltinTextureType()) 237 addNVVMMetadata(GV, "texture", 1); 238 return; 239 } 240 } 241 242 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); 243 if (!FD) return; 244 245 llvm::Function *F = cast<llvm::Function>(GV); 246 247 // Perform special handling in OpenCL mode 248 if (M.getLangOpts().OpenCL) { 249 // Use OpenCL function attributes to check for kernel functions 250 // By default, all functions are device functions 251 if (FD->hasAttr<OpenCLKernelAttr>()) { 252 // OpenCL __kernel functions get kernel metadata 253 // Create !{<func-ref>, metadata !"kernel", i32 1} node 254 addNVVMMetadata(F, "kernel", 1); 255 // And kernel functions are not subject to inlining 256 F->addFnAttr(llvm::Attribute::NoInline); 257 } 258 } 259 260 // Perform special handling in CUDA mode. 261 if (M.getLangOpts().CUDA) { 262 // CUDA __global__ functions get a kernel metadata entry. Since 263 // __global__ functions cannot be called from the device, we do not 264 // need to set the noinline attribute. 265 if (FD->hasAttr<CUDAGlobalAttr>()) { 266 // Create !{<func-ref>, metadata !"kernel", i32 1} node 267 addNVVMMetadata(F, "kernel", 1); 268 } 269 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) 270 M.handleCUDALaunchBoundsAttr(F, Attr); 271 } 272 273 // Attach kernel metadata directly if compiling for NVPTX. 274 if (FD->hasAttr<NVPTXKernelAttr>()) { 275 addNVVMMetadata(F, "kernel", 1); 276 } 277 } 278 279 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, 280 StringRef Name, int Operand) { 281 llvm::Module *M = GV->getParent(); 282 llvm::LLVMContext &Ctx = M->getContext(); 283 284 // Get "nvvm.annotations" metadata node 285 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); 286 287 llvm::Metadata *MDVals[] = { 288 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), 289 llvm::ConstantAsMetadata::get( 290 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; 291 // Append metadata to nvvm.annotations 292 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 293 } 294 295 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { 296 return false; 297 } 298 299 llvm::Constant * 300 NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, 301 llvm::PointerType *PT, 302 QualType QT) const { 303 auto &Ctx = CGM.getContext(); 304 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local)) 305 return llvm::ConstantPointerNull::get(PT); 306 307 auto NPT = llvm::PointerType::get( 308 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); 309 return llvm::ConstantExpr::getAddrSpaceCast( 310 llvm::ConstantPointerNull::get(NPT), PT); 311 } 312 } 313 314 void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, 315 const CUDALaunchBoundsAttr *Attr, 316 int32_t *MaxThreadsVal, 317 int32_t *MinBlocksVal, 318 int32_t *MaxClusterRankVal) { 319 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node 320 llvm::APSInt MaxThreads(32); 321 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext()); 322 if (MaxThreads > 0) { 323 if (MaxThreadsVal) 324 *MaxThreadsVal = MaxThreads.getExtValue(); 325 if (F) { 326 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node 327 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx", 328 MaxThreads.getExtValue()); 329 } 330 } 331 332 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it 333 // was not specified in __launch_bounds__ or if the user specified a 0 value, 334 // we don't have to add a PTX directive. 335 if (Attr->getMinBlocks()) { 336 llvm::APSInt MinBlocks(32); 337 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext()); 338 if (MinBlocks > 0) { 339 if (MinBlocksVal) 340 *MinBlocksVal = MinBlocks.getExtValue(); 341 if (F) { 342 // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node 343 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm", 344 MinBlocks.getExtValue()); 345 } 346 } 347 } 348 if (Attr->getMaxBlocks()) { 349 llvm::APSInt MaxBlocks(32); 350 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext()); 351 if (MaxBlocks > 0) { 352 if (MaxClusterRankVal) 353 *MaxClusterRankVal = MaxBlocks.getExtValue(); 354 if (F) { 355 // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node 356 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank", 357 MaxBlocks.getExtValue()); 358 } 359 } 360 } 361 } 362 363 std::unique_ptr<TargetCodeGenInfo> 364 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { 365 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes()); 366 } 367