1 //===- NVPTX.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "llvm/IR/IntrinsicsNVPTX.h"
12
13 using namespace clang;
14 using namespace clang::CodeGen;
15
16 //===----------------------------------------------------------------------===//
17 // NVPTX ABI Implementation
18 //===----------------------------------------------------------------------===//
19
20 namespace {
21
22 class NVPTXTargetCodeGenInfo;
23
24 class NVPTXABIInfo : public ABIInfo {
25 NVPTXTargetCodeGenInfo &CGInfo;
26
27 public:
NVPTXABIInfo(CodeGenTypes & CGT,NVPTXTargetCodeGenInfo & Info)28 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
29 : ABIInfo(CGT), CGInfo(Info) {}
30
31 ABIArgInfo classifyReturnType(QualType RetTy) const;
32 ABIArgInfo classifyArgumentType(QualType Ty) const;
33
34 void computeInfo(CGFunctionInfo &FI) const override;
35 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
36 AggValueSlot Slot) const override;
37 bool isUnsupportedType(QualType T) const;
38 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
39 };
40
41 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
42 public:
NVPTXTargetCodeGenInfo(CodeGenTypes & CGT)43 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
44 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
45
46 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
47 CodeGen::CodeGenModule &M) const override;
48 bool shouldEmitStaticExternCAliases() const override;
49
50 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
51 llvm::PointerType *T,
52 QualType QT) const override;
53
getCUDADeviceBuiltinSurfaceDeviceType() const54 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
55 // On the device side, surface reference is represented as an object handle
56 // in 64-bit integer.
57 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
58 }
59
getCUDADeviceBuiltinTextureDeviceType() const60 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
61 // On the device side, texture reference is represented as an object handle
62 // in 64-bit integer.
63 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
64 }
65
emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction & CGF,LValue Dst,LValue Src) const66 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
67 LValue Src) const override {
68 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
69 return true;
70 }
71
emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction & CGF,LValue Dst,LValue Src) const72 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
73 LValue Src) const override {
74 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
75 return true;
76 }
77
78 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
79 // resulting MDNode to the nvvm.annotations MDNode.
80 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
81 int Operand);
82
83 private:
emitBuiltinSurfTexDeviceCopy(CodeGenFunction & CGF,LValue Dst,LValue Src)84 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
85 LValue Src) {
86 llvm::Value *Handle = nullptr;
87 llvm::Constant *C =
88 llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF));
89 // Lookup `addrspacecast` through the constant pointer if any.
90 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
91 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
92 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
93 // Load the handle from the specific global variable using
94 // `nvvm.texsurf.handle.internal` intrinsic.
95 Handle = CGF.EmitRuntimeCall(
96 CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
97 {GV->getType()}),
98 {GV}, "texsurf_handle");
99 } else
100 Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
101 CGF.EmitStoreOfScalar(Handle, Dst);
102 }
103 };
104
105 /// Checks if the type is unsupported directly by the current target.
isUnsupportedType(QualType T) const106 bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
107 ASTContext &Context = getContext();
108 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
109 return true;
110 if (!Context.getTargetInfo().hasFloat128Type() &&
111 (T->isFloat128Type() ||
112 (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
113 return true;
114 if (const auto *EIT = T->getAs<BitIntType>())
115 return EIT->getNumBits() >
116 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
117 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
118 Context.getTypeSize(T) > 64U)
119 return true;
120 if (const auto *AT = T->getAsArrayTypeUnsafe())
121 return isUnsupportedType(AT->getElementType());
122 const auto *RT = T->getAs<RecordType>();
123 if (!RT)
124 return false;
125 const RecordDecl *RD = RT->getDecl();
126
127 // If this is a C++ record, check the bases first.
128 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
129 for (const CXXBaseSpecifier &I : CXXRD->bases())
130 if (isUnsupportedType(I.getType()))
131 return true;
132
133 for (const FieldDecl *I : RD->fields())
134 if (isUnsupportedType(I->getType()))
135 return true;
136 return false;
137 }
138
139 /// Coerce the given type into an array with maximum allowed size of elements.
coerceToIntArrayWithLimit(QualType Ty,unsigned MaxSize) const140 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
141 unsigned MaxSize) const {
142 // Alignment and Size are measured in bits.
143 const uint64_t Size = getContext().getTypeSize(Ty);
144 const uint64_t Alignment = getContext().getTypeAlign(Ty);
145 const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
146 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
147 const uint64_t NumElements = (Size + Div - 1) / Div;
148 return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
149 }
150
classifyReturnType(QualType RetTy) const151 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
152 if (RetTy->isVoidType())
153 return ABIArgInfo::getIgnore();
154
155 if (getContext().getLangOpts().OpenMP &&
156 getContext().getLangOpts().OpenMPIsTargetDevice &&
157 isUnsupportedType(RetTy))
158 return coerceToIntArrayWithLimit(RetTy, 64);
159
160 // note: this is different from default ABI
161 if (!RetTy->isScalarType())
162 return ABIArgInfo::getDirect();
163
164 // Treat an enum type as its underlying type.
165 if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
166 RetTy = EnumTy->getDecl()->getIntegerType();
167
168 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
169 : ABIArgInfo::getDirect());
170 }
171
classifyArgumentType(QualType Ty) const172 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
173 // Treat an enum type as its underlying type.
174 if (const EnumType *EnumTy = Ty->getAs<EnumType>())
175 Ty = EnumTy->getDecl()->getIntegerType();
176
177 // Return aggregates type as indirect by value
178 if (isAggregateTypeForABI(Ty)) {
179 // Under CUDA device compilation, tex/surf builtin types are replaced with
180 // object types and passed directly.
181 if (getContext().getLangOpts().CUDAIsDevice) {
182 if (Ty->isCUDADeviceBuiltinSurfaceType())
183 return ABIArgInfo::getDirect(
184 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
185 if (Ty->isCUDADeviceBuiltinTextureType())
186 return ABIArgInfo::getDirect(
187 CGInfo.getCUDADeviceBuiltinTextureDeviceType());
188 }
189 return getNaturalAlignIndirect(Ty, /* byval */ true);
190 }
191
192 if (const auto *EIT = Ty->getAs<BitIntType>()) {
193 if ((EIT->getNumBits() > 128) ||
194 (!getContext().getTargetInfo().hasInt128Type() &&
195 EIT->getNumBits() > 64))
196 return getNaturalAlignIndirect(Ty, /* byval */ true);
197 }
198
199 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
200 : ABIArgInfo::getDirect());
201 }
202
computeInfo(CGFunctionInfo & FI) const203 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
204 if (!getCXXABI().classifyReturnType(FI))
205 FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
206
207 for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments()))
208 I.info = ArgumentsCount < FI.getNumRequiredArgs()
209 ? classifyArgumentType(I.type)
210 : ABIArgInfo::getDirect();
211
212 // Always honor user-specified calling convention.
213 if (FI.getCallingConvention() != llvm::CallingConv::C)
214 return;
215
216 FI.setEffectiveCallingConvention(getRuntimeCC());
217 }
218
EmitVAArg(CodeGenFunction & CGF,Address VAListAddr,QualType Ty,AggValueSlot Slot) const219 RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
220 QualType Ty, AggValueSlot Slot) const {
221 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false,
222 getContext().getTypeInfoInChars(Ty),
223 CharUnits::fromQuantity(1),
224 /*AllowHigherAlign=*/true, Slot);
225 }
226
setTargetAttributes(const Decl * D,llvm::GlobalValue * GV,CodeGen::CodeGenModule & M) const227 void NVPTXTargetCodeGenInfo::setTargetAttributes(
228 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
229 if (GV->isDeclaration())
230 return;
231 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
232 if (VD) {
233 if (M.getLangOpts().CUDA) {
234 if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
235 addNVVMMetadata(GV, "surface", 1);
236 else if (VD->getType()->isCUDADeviceBuiltinTextureType())
237 addNVVMMetadata(GV, "texture", 1);
238 return;
239 }
240 }
241
242 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
243 if (!FD) return;
244
245 llvm::Function *F = cast<llvm::Function>(GV);
246
247 // Perform special handling in OpenCL mode
248 if (M.getLangOpts().OpenCL) {
249 // Use OpenCL function attributes to check for kernel functions
250 // By default, all functions are device functions
251 if (FD->hasAttr<OpenCLKernelAttr>()) {
252 // OpenCL __kernel functions get kernel metadata
253 // Create !{<func-ref>, metadata !"kernel", i32 1} node
254 addNVVMMetadata(F, "kernel", 1);
255 // And kernel functions are not subject to inlining
256 F->addFnAttr(llvm::Attribute::NoInline);
257 }
258 }
259
260 // Perform special handling in CUDA mode.
261 if (M.getLangOpts().CUDA) {
262 // CUDA __global__ functions get a kernel metadata entry. Since
263 // __global__ functions cannot be called from the device, we do not
264 // need to set the noinline attribute.
265 if (FD->hasAttr<CUDAGlobalAttr>()) {
266 // Create !{<func-ref>, metadata !"kernel", i32 1} node
267 addNVVMMetadata(F, "kernel", 1);
268 }
269 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
270 M.handleCUDALaunchBoundsAttr(F, Attr);
271 }
272
273 // Attach kernel metadata directly if compiling for NVPTX.
274 if (FD->hasAttr<NVPTXKernelAttr>()) {
275 addNVVMMetadata(F, "kernel", 1);
276 }
277 }
278
addNVVMMetadata(llvm::GlobalValue * GV,StringRef Name,int Operand)279 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
280 StringRef Name, int Operand) {
281 llvm::Module *M = GV->getParent();
282 llvm::LLVMContext &Ctx = M->getContext();
283
284 // Get "nvvm.annotations" metadata node
285 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
286
287 llvm::Metadata *MDVals[] = {
288 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
289 llvm::ConstantAsMetadata::get(
290 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
291 // Append metadata to nvvm.annotations
292 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
293 }
294
shouldEmitStaticExternCAliases() const295 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
296 return false;
297 }
298
299 llvm::Constant *
getNullPointer(const CodeGen::CodeGenModule & CGM,llvm::PointerType * PT,QualType QT) const300 NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
301 llvm::PointerType *PT,
302 QualType QT) const {
303 auto &Ctx = CGM.getContext();
304 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
305 return llvm::ConstantPointerNull::get(PT);
306
307 auto NPT = llvm::PointerType::get(
308 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
309 return llvm::ConstantExpr::getAddrSpaceCast(
310 llvm::ConstantPointerNull::get(NPT), PT);
311 }
312 }
313
handleCUDALaunchBoundsAttr(llvm::Function * F,const CUDALaunchBoundsAttr * Attr,int32_t * MaxThreadsVal,int32_t * MinBlocksVal,int32_t * MaxClusterRankVal)314 void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
315 const CUDALaunchBoundsAttr *Attr,
316 int32_t *MaxThreadsVal,
317 int32_t *MinBlocksVal,
318 int32_t *MaxClusterRankVal) {
319 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
320 llvm::APSInt MaxThreads(32);
321 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
322 if (MaxThreads > 0) {
323 if (MaxThreadsVal)
324 *MaxThreadsVal = MaxThreads.getExtValue();
325 if (F) {
326 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
327 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
328 MaxThreads.getExtValue());
329 }
330 }
331
332 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
333 // was not specified in __launch_bounds__ or if the user specified a 0 value,
334 // we don't have to add a PTX directive.
335 if (Attr->getMinBlocks()) {
336 llvm::APSInt MinBlocks(32);
337 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
338 if (MinBlocks > 0) {
339 if (MinBlocksVal)
340 *MinBlocksVal = MinBlocks.getExtValue();
341 if (F) {
342 // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
343 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
344 MinBlocks.getExtValue());
345 }
346 }
347 }
348 if (Attr->getMaxBlocks()) {
349 llvm::APSInt MaxBlocks(32);
350 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
351 if (MaxBlocks > 0) {
352 if (MaxClusterRankVal)
353 *MaxClusterRankVal = MaxBlocks.getExtValue();
354 if (F) {
355 // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
356 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
357 MaxBlocks.getExtValue());
358 }
359 }
360 }
361 }
362
363 std::unique_ptr<TargetCodeGenInfo>
createNVPTXTargetCodeGenInfo(CodeGenModule & CGM)364 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
365 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
366 }
367