1 //===- NVPTX.cpp ----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ABIInfoImpl.h" 10 #include "TargetInfo.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/ADT/StringExtras.h" 13 #include "llvm/IR/CallingConv.h" 14 #include "llvm/IR/IntrinsicsNVPTX.h" 15 16 using namespace clang; 17 using namespace clang::CodeGen; 18 19 //===----------------------------------------------------------------------===// 20 // NVPTX ABI Implementation 21 //===----------------------------------------------------------------------===// 22 23 namespace { 24 25 class NVPTXTargetCodeGenInfo; 26 27 class NVPTXABIInfo : public ABIInfo { 28 NVPTXTargetCodeGenInfo &CGInfo; 29 30 public: 31 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) 32 : ABIInfo(CGT), CGInfo(Info) {} 33 34 ABIArgInfo classifyReturnType(QualType RetTy) const; 35 ABIArgInfo classifyArgumentType(QualType Ty) const; 36 37 void computeInfo(CGFunctionInfo &FI) const override; 38 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 39 AggValueSlot Slot) const override; 40 bool isUnsupportedType(QualType T) const; 41 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; 42 }; 43 44 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { 45 public: 46 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) 47 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {} 48 49 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 50 CodeGen::CodeGenModule &M) const override; 51 bool shouldEmitStaticExternCAliases() const override; 52 53 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, 54 llvm::PointerType *T, 55 QualType QT) const override; 56 57 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { 58 // On the device side, surface reference is represented as an object handle 59 // in 64-bit integer. 60 return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); 61 } 62 63 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { 64 // On the device side, texture reference is represented as an object handle 65 // in 64-bit integer. 66 return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); 67 } 68 69 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, 70 LValue Src) const override { 71 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); 72 return true; 73 } 74 75 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, 76 LValue Src) const override { 77 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); 78 return true; 79 } 80 81 unsigned getDeviceKernelCallingConv() const override { 82 return llvm::CallingConv::PTX_Kernel; 83 } 84 85 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the 86 // resulting MDNode to the nvvm.annotations MDNode. 87 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, 88 int Operand); 89 90 static void 91 addGridConstantNVVMMetadata(llvm::GlobalValue *GV, 92 const SmallVectorImpl<int> &GridConstantArgs); 93 94 private: 95 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, 96 LValue Src) { 97 llvm::Value *Handle = nullptr; 98 llvm::Constant *C = 99 llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF)); 100 // Lookup `addrspacecast` through the constant pointer if any. 101 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) 102 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); 103 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { 104 // Load the handle from the specific global variable using 105 // `nvvm.texsurf.handle.internal` intrinsic. 106 Handle = CGF.EmitRuntimeCall( 107 CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, 108 {GV->getType()}), 109 {GV}, "texsurf_handle"); 110 } else 111 Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); 112 CGF.EmitStoreOfScalar(Handle, Dst); 113 } 114 }; 115 116 /// Checks if the type is unsupported directly by the current target. 117 bool NVPTXABIInfo::isUnsupportedType(QualType T) const { 118 ASTContext &Context = getContext(); 119 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) 120 return true; 121 if (!Context.getTargetInfo().hasFloat128Type() && 122 (T->isFloat128Type() || 123 (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) 124 return true; 125 if (const auto *EIT = T->getAs<BitIntType>()) 126 return EIT->getNumBits() > 127 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); 128 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && 129 Context.getTypeSize(T) > 64U) 130 return true; 131 if (const auto *AT = T->getAsArrayTypeUnsafe()) 132 return isUnsupportedType(AT->getElementType()); 133 const auto *RT = T->getAs<RecordType>(); 134 if (!RT) 135 return false; 136 const RecordDecl *RD = RT->getDecl(); 137 138 // If this is a C++ record, check the bases first. 139 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) 140 for (const CXXBaseSpecifier &I : CXXRD->bases()) 141 if (isUnsupportedType(I.getType())) 142 return true; 143 144 for (const FieldDecl *I : RD->fields()) 145 if (isUnsupportedType(I->getType())) 146 return true; 147 return false; 148 } 149 150 /// Coerce the given type into an array with maximum allowed size of elements. 151 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, 152 unsigned MaxSize) const { 153 // Alignment and Size are measured in bits. 154 const uint64_t Size = getContext().getTypeSize(Ty); 155 const uint64_t Alignment = getContext().getTypeAlign(Ty); 156 const unsigned Div = std::min<unsigned>(MaxSize, Alignment); 157 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div); 158 const uint64_t NumElements = (Size + Div - 1) / Div; 159 return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); 160 } 161 162 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { 163 if (RetTy->isVoidType()) 164 return ABIArgInfo::getIgnore(); 165 166 if (getContext().getLangOpts().OpenMP && 167 getContext().getLangOpts().OpenMPIsTargetDevice && 168 isUnsupportedType(RetTy)) 169 return coerceToIntArrayWithLimit(RetTy, 64); 170 171 // note: this is different from default ABI 172 if (!RetTy->isScalarType()) 173 return ABIArgInfo::getDirect(); 174 175 // Treat an enum type as its underlying type. 176 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 177 RetTy = EnumTy->getDecl()->getIntegerType(); 178 179 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) 180 : ABIArgInfo::getDirect()); 181 } 182 183 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { 184 // Treat an enum type as its underlying type. 185 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 186 Ty = EnumTy->getDecl()->getIntegerType(); 187 188 // Return aggregates type as indirect by value 189 if (isAggregateTypeForABI(Ty)) { 190 // Under CUDA device compilation, tex/surf builtin types are replaced with 191 // object types and passed directly. 192 if (getContext().getLangOpts().CUDAIsDevice) { 193 if (Ty->isCUDADeviceBuiltinSurfaceType()) 194 return ABIArgInfo::getDirect( 195 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); 196 if (Ty->isCUDADeviceBuiltinTextureType()) 197 return ABIArgInfo::getDirect( 198 CGInfo.getCUDADeviceBuiltinTextureDeviceType()); 199 } 200 return getNaturalAlignIndirect( 201 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(), 202 /* byval */ true); 203 } 204 205 if (const auto *EIT = Ty->getAs<BitIntType>()) { 206 if ((EIT->getNumBits() > 128) || 207 (!getContext().getTargetInfo().hasInt128Type() && 208 EIT->getNumBits() > 64)) 209 return getNaturalAlignIndirect( 210 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(), 211 /* byval */ true); 212 } 213 214 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 215 : ABIArgInfo::getDirect()); 216 } 217 218 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { 219 if (!getCXXABI().classifyReturnType(FI)) 220 FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 221 222 for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments())) 223 I.info = ArgumentsCount < FI.getNumRequiredArgs() 224 ? classifyArgumentType(I.type) 225 : ABIArgInfo::getDirect(); 226 227 // Always honor user-specified calling convention. 228 if (FI.getCallingConvention() != llvm::CallingConv::C) 229 return; 230 231 FI.setEffectiveCallingConvention(getRuntimeCC()); 232 } 233 234 RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 235 QualType Ty, AggValueSlot Slot) const { 236 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false, 237 getContext().getTypeInfoInChars(Ty), 238 CharUnits::fromQuantity(1), 239 /*AllowHigherAlign=*/true, Slot); 240 } 241 242 void NVPTXTargetCodeGenInfo::setTargetAttributes( 243 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { 244 if (GV->isDeclaration()) 245 return; 246 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D); 247 if (VD) { 248 if (M.getLangOpts().CUDA) { 249 if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) 250 addNVVMMetadata(GV, "surface", 1); 251 else if (VD->getType()->isCUDADeviceBuiltinTextureType()) 252 addNVVMMetadata(GV, "texture", 1); 253 return; 254 } 255 } 256 257 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); 258 if (!FD) 259 return; 260 261 llvm::Function *F = cast<llvm::Function>(GV); 262 263 // Perform special handling in OpenCL/CUDA mode 264 if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) { 265 // Use function attributes to check for kernel functions 266 // By default, all functions are device functions 267 if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) { 268 // OpenCL/CUDA kernel functions get kernel metadata 269 // Create !{<func-ref>, metadata !"kernel", i32 1} node 270 // And kernel functions are not subject to inlining 271 F->addFnAttr(llvm::Attribute::NoInline); 272 if (FD->hasAttr<CUDAGlobalAttr>()) { 273 SmallVector<int, 10> GCI; 274 for (auto IV : llvm::enumerate(FD->parameters())) 275 if (IV.value()->hasAttr<CUDAGridConstantAttr>()) 276 // For some reason arg indices are 1-based in NVVM 277 GCI.push_back(IV.index() + 1); 278 // Create !{<func-ref>, metadata !"kernel", i32 1} node 279 F->setCallingConv(llvm::CallingConv::PTX_Kernel); 280 addGridConstantNVVMMetadata(F, GCI); 281 } 282 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) 283 M.handleCUDALaunchBoundsAttr(F, Attr); 284 } 285 } 286 // Attach kernel metadata directly if compiling for NVPTX. 287 if (FD->hasAttr<DeviceKernelAttr>()) { 288 F->setCallingConv(llvm::CallingConv::PTX_Kernel); 289 } 290 } 291 292 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, 293 StringRef Name, int Operand) { 294 llvm::Module *M = GV->getParent(); 295 llvm::LLVMContext &Ctx = M->getContext(); 296 297 // Get "nvvm.annotations" metadata node 298 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); 299 300 SmallVector<llvm::Metadata *, 5> MDVals = { 301 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), 302 llvm::ConstantAsMetadata::get( 303 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; 304 305 // Append metadata to nvvm.annotations 306 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 307 } 308 309 void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata( 310 llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) { 311 312 llvm::Module *M = GV->getParent(); 313 llvm::LLVMContext &Ctx = M->getContext(); 314 315 // Get "nvvm.annotations" metadata node 316 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); 317 318 SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)}; 319 if (!GridConstantArgs.empty()) { 320 SmallVector<llvm::Metadata *, 10> GCM; 321 for (int I : GridConstantArgs) 322 GCM.push_back(llvm::ConstantAsMetadata::get( 323 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I))); 324 MDVals.append({llvm::MDString::get(Ctx, "grid_constant"), 325 llvm::MDNode::get(Ctx, GCM)}); 326 } 327 328 // Append metadata to nvvm.annotations 329 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 330 } 331 332 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { 333 return false; 334 } 335 336 llvm::Constant * 337 NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, 338 llvm::PointerType *PT, 339 QualType QT) const { 340 auto &Ctx = CGM.getContext(); 341 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local)) 342 return llvm::ConstantPointerNull::get(PT); 343 344 auto NPT = llvm::PointerType::get( 345 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); 346 return llvm::ConstantExpr::getAddrSpaceCast( 347 llvm::ConstantPointerNull::get(NPT), PT); 348 } 349 } // namespace 350 351 void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, 352 const CUDALaunchBoundsAttr *Attr, 353 int32_t *MaxThreadsVal, 354 int32_t *MinBlocksVal, 355 int32_t *MaxClusterRankVal) { 356 llvm::APSInt MaxThreads(32); 357 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext()); 358 if (MaxThreads > 0) { 359 if (MaxThreadsVal) 360 *MaxThreadsVal = MaxThreads.getExtValue(); 361 if (F) 362 F->addFnAttr("nvvm.maxntid", llvm::utostr(MaxThreads.getExtValue())); 363 } 364 365 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it 366 // was not specified in __launch_bounds__ or if the user specified a 0 value, 367 // we don't have to add a PTX directive. 368 if (Attr->getMinBlocks()) { 369 llvm::APSInt MinBlocks(32); 370 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext()); 371 if (MinBlocks > 0) { 372 if (MinBlocksVal) 373 *MinBlocksVal = MinBlocks.getExtValue(); 374 if (F) 375 F->addFnAttr("nvvm.minctasm", llvm::utostr(MinBlocks.getExtValue())); 376 } 377 } 378 if (Attr->getMaxBlocks()) { 379 llvm::APSInt MaxBlocks(32); 380 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext()); 381 if (MaxBlocks > 0) { 382 if (MaxClusterRankVal) 383 *MaxClusterRankVal = MaxBlocks.getExtValue(); 384 if (F) 385 F->addFnAttr("nvvm.maxclusterrank", 386 llvm::utostr(MaxBlocks.getExtValue())); 387 } 388 } 389 } 390 391 std::unique_ptr<TargetCodeGenInfo> 392 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { 393 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes()); 394 } 395