1 //===- NVPTX.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringExtras.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/IR/IntrinsicsNVPTX.h"
15
16 using namespace clang;
17 using namespace clang::CodeGen;
18
19 //===----------------------------------------------------------------------===//
20 // NVPTX ABI Implementation
21 //===----------------------------------------------------------------------===//
22
23 namespace {
24
25 class NVPTXTargetCodeGenInfo;
26
27 class NVPTXABIInfo : public ABIInfo {
28 NVPTXTargetCodeGenInfo &CGInfo;
29
30 public:
NVPTXABIInfo(CodeGenTypes & CGT,NVPTXTargetCodeGenInfo & Info)31 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
32 : ABIInfo(CGT), CGInfo(Info) {}
33
34 ABIArgInfo classifyReturnType(QualType RetTy) const;
35 ABIArgInfo classifyArgumentType(QualType Ty) const;
36
37 void computeInfo(CGFunctionInfo &FI) const override;
38 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
39 AggValueSlot Slot) const override;
40 bool isUnsupportedType(QualType T) const;
41 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
42 };
43
44 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
45 public:
NVPTXTargetCodeGenInfo(CodeGenTypes & CGT)46 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
47 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
48
49 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
50 CodeGen::CodeGenModule &M) const override;
51 bool shouldEmitStaticExternCAliases() const override;
52
53 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
54 llvm::PointerType *T,
55 QualType QT) const override;
56
getCUDADeviceBuiltinSurfaceDeviceType() const57 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
58 // On the device side, surface reference is represented as an object handle
59 // in 64-bit integer.
60 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
61 }
62
getCUDADeviceBuiltinTextureDeviceType() const63 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
64 // On the device side, texture reference is represented as an object handle
65 // in 64-bit integer.
66 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
67 }
68
emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction & CGF,LValue Dst,LValue Src) const69 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
70 LValue Src) const override {
71 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
72 return true;
73 }
74
emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction & CGF,LValue Dst,LValue Src) const75 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
76 LValue Src) const override {
77 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
78 return true;
79 }
80
getDeviceKernelCallingConv() const81 unsigned getDeviceKernelCallingConv() const override {
82 return llvm::CallingConv::PTX_Kernel;
83 }
84
85 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
86 // resulting MDNode to the nvvm.annotations MDNode.
87 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
88 int Operand);
89
90 static void
91 addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
92 const SmallVectorImpl<int> &GridConstantArgs);
93
94 private:
emitBuiltinSurfTexDeviceCopy(CodeGenFunction & CGF,LValue Dst,LValue Src)95 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
96 LValue Src) {
97 llvm::Value *Handle = nullptr;
98 llvm::Constant *C =
99 llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF));
100 // Lookup `addrspacecast` through the constant pointer if any.
101 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
102 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
103 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
104 // Load the handle from the specific global variable using
105 // `nvvm.texsurf.handle.internal` intrinsic.
106 Handle = CGF.EmitRuntimeCall(
107 CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
108 {GV->getType()}),
109 {GV}, "texsurf_handle");
110 } else
111 Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
112 CGF.EmitStoreOfScalar(Handle, Dst);
113 }
114 };
115
116 /// Checks if the type is unsupported directly by the current target.
isUnsupportedType(QualType T) const117 bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
118 ASTContext &Context = getContext();
119 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
120 return true;
121 if (!Context.getTargetInfo().hasFloat128Type() &&
122 (T->isFloat128Type() ||
123 (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
124 return true;
125 if (const auto *EIT = T->getAs<BitIntType>())
126 return EIT->getNumBits() >
127 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
128 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
129 Context.getTypeSize(T) > 64U)
130 return true;
131 if (const auto *AT = T->getAsArrayTypeUnsafe())
132 return isUnsupportedType(AT->getElementType());
133 const auto *RT = T->getAs<RecordType>();
134 if (!RT)
135 return false;
136 const RecordDecl *RD = RT->getDecl();
137
138 // If this is a C++ record, check the bases first.
139 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
140 for (const CXXBaseSpecifier &I : CXXRD->bases())
141 if (isUnsupportedType(I.getType()))
142 return true;
143
144 for (const FieldDecl *I : RD->fields())
145 if (isUnsupportedType(I->getType()))
146 return true;
147 return false;
148 }
149
150 /// Coerce the given type into an array with maximum allowed size of elements.
coerceToIntArrayWithLimit(QualType Ty,unsigned MaxSize) const151 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
152 unsigned MaxSize) const {
153 // Alignment and Size are measured in bits.
154 const uint64_t Size = getContext().getTypeSize(Ty);
155 const uint64_t Alignment = getContext().getTypeAlign(Ty);
156 const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
157 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
158 const uint64_t NumElements = (Size + Div - 1) / Div;
159 return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
160 }
161
classifyReturnType(QualType RetTy) const162 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
163 if (RetTy->isVoidType())
164 return ABIArgInfo::getIgnore();
165
166 if (getContext().getLangOpts().OpenMP &&
167 getContext().getLangOpts().OpenMPIsTargetDevice &&
168 isUnsupportedType(RetTy))
169 return coerceToIntArrayWithLimit(RetTy, 64);
170
171 // note: this is different from default ABI
172 if (!RetTy->isScalarType())
173 return ABIArgInfo::getDirect();
174
175 // Treat an enum type as its underlying type.
176 if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
177 RetTy = EnumTy->getDecl()->getIntegerType();
178
179 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
180 : ABIArgInfo::getDirect());
181 }
182
classifyArgumentType(QualType Ty) const183 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
184 // Treat an enum type as its underlying type.
185 if (const EnumType *EnumTy = Ty->getAs<EnumType>())
186 Ty = EnumTy->getDecl()->getIntegerType();
187
188 // Return aggregates type as indirect by value
189 if (isAggregateTypeForABI(Ty)) {
190 // Under CUDA device compilation, tex/surf builtin types are replaced with
191 // object types and passed directly.
192 if (getContext().getLangOpts().CUDAIsDevice) {
193 if (Ty->isCUDADeviceBuiltinSurfaceType())
194 return ABIArgInfo::getDirect(
195 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
196 if (Ty->isCUDADeviceBuiltinTextureType())
197 return ABIArgInfo::getDirect(
198 CGInfo.getCUDADeviceBuiltinTextureDeviceType());
199 }
200 return getNaturalAlignIndirect(
201 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
202 /* byval */ true);
203 }
204
205 if (const auto *EIT = Ty->getAs<BitIntType>()) {
206 if ((EIT->getNumBits() > 128) ||
207 (!getContext().getTargetInfo().hasInt128Type() &&
208 EIT->getNumBits() > 64))
209 return getNaturalAlignIndirect(
210 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
211 /* byval */ true);
212 }
213
214 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
215 : ABIArgInfo::getDirect());
216 }
217
computeInfo(CGFunctionInfo & FI) const218 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
219 if (!getCXXABI().classifyReturnType(FI))
220 FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
221
222 for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments()))
223 I.info = ArgumentsCount < FI.getNumRequiredArgs()
224 ? classifyArgumentType(I.type)
225 : ABIArgInfo::getDirect();
226
227 // Always honor user-specified calling convention.
228 if (FI.getCallingConvention() != llvm::CallingConv::C)
229 return;
230
231 FI.setEffectiveCallingConvention(getRuntimeCC());
232 }
233
EmitVAArg(CodeGenFunction & CGF,Address VAListAddr,QualType Ty,AggValueSlot Slot) const234 RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
235 QualType Ty, AggValueSlot Slot) const {
236 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false,
237 getContext().getTypeInfoInChars(Ty),
238 CharUnits::fromQuantity(1),
239 /*AllowHigherAlign=*/true, Slot);
240 }
241
setTargetAttributes(const Decl * D,llvm::GlobalValue * GV,CodeGen::CodeGenModule & M) const242 void NVPTXTargetCodeGenInfo::setTargetAttributes(
243 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
244 if (GV->isDeclaration())
245 return;
246 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
247 if (VD) {
248 if (M.getLangOpts().CUDA) {
249 if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
250 addNVVMMetadata(GV, "surface", 1);
251 else if (VD->getType()->isCUDADeviceBuiltinTextureType())
252 addNVVMMetadata(GV, "texture", 1);
253 return;
254 }
255 }
256
257 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
258 if (!FD)
259 return;
260
261 llvm::Function *F = cast<llvm::Function>(GV);
262
263 // Perform special handling in OpenCL/CUDA mode
264 if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) {
265 // Use function attributes to check for kernel functions
266 // By default, all functions are device functions
267 if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
268 // OpenCL/CUDA kernel functions get kernel metadata
269 // Create !{<func-ref>, metadata !"kernel", i32 1} node
270 // And kernel functions are not subject to inlining
271 F->addFnAttr(llvm::Attribute::NoInline);
272 if (FD->hasAttr<CUDAGlobalAttr>()) {
273 SmallVector<int, 10> GCI;
274 for (auto IV : llvm::enumerate(FD->parameters()))
275 if (IV.value()->hasAttr<CUDAGridConstantAttr>())
276 // For some reason arg indices are 1-based in NVVM
277 GCI.push_back(IV.index() + 1);
278 // Create !{<func-ref>, metadata !"kernel", i32 1} node
279 F->setCallingConv(llvm::CallingConv::PTX_Kernel);
280 addGridConstantNVVMMetadata(F, GCI);
281 }
282 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
283 M.handleCUDALaunchBoundsAttr(F, Attr);
284 }
285 }
286 // Attach kernel metadata directly if compiling for NVPTX.
287 if (FD->hasAttr<DeviceKernelAttr>()) {
288 F->setCallingConv(llvm::CallingConv::PTX_Kernel);
289 }
290 }
291
addNVVMMetadata(llvm::GlobalValue * GV,StringRef Name,int Operand)292 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
293 StringRef Name, int Operand) {
294 llvm::Module *M = GV->getParent();
295 llvm::LLVMContext &Ctx = M->getContext();
296
297 // Get "nvvm.annotations" metadata node
298 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
299
300 SmallVector<llvm::Metadata *, 5> MDVals = {
301 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
302 llvm::ConstantAsMetadata::get(
303 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
304
305 // Append metadata to nvvm.annotations
306 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
307 }
308
addGridConstantNVVMMetadata(llvm::GlobalValue * GV,const SmallVectorImpl<int> & GridConstantArgs)309 void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
310 llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
311
312 llvm::Module *M = GV->getParent();
313 llvm::LLVMContext &Ctx = M->getContext();
314
315 // Get "nvvm.annotations" metadata node
316 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
317
318 SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
319 if (!GridConstantArgs.empty()) {
320 SmallVector<llvm::Metadata *, 10> GCM;
321 for (int I : GridConstantArgs)
322 GCM.push_back(llvm::ConstantAsMetadata::get(
323 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
324 MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
325 llvm::MDNode::get(Ctx, GCM)});
326 }
327
328 // Append metadata to nvvm.annotations
329 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
330 }
331
shouldEmitStaticExternCAliases() const332 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
333 return false;
334 }
335
336 llvm::Constant *
getNullPointer(const CodeGen::CodeGenModule & CGM,llvm::PointerType * PT,QualType QT) const337 NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
338 llvm::PointerType *PT,
339 QualType QT) const {
340 auto &Ctx = CGM.getContext();
341 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
342 return llvm::ConstantPointerNull::get(PT);
343
344 auto NPT = llvm::PointerType::get(
345 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
346 return llvm::ConstantExpr::getAddrSpaceCast(
347 llvm::ConstantPointerNull::get(NPT), PT);
348 }
349 } // namespace
350
handleCUDALaunchBoundsAttr(llvm::Function * F,const CUDALaunchBoundsAttr * Attr,int32_t * MaxThreadsVal,int32_t * MinBlocksVal,int32_t * MaxClusterRankVal)351 void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
352 const CUDALaunchBoundsAttr *Attr,
353 int32_t *MaxThreadsVal,
354 int32_t *MinBlocksVal,
355 int32_t *MaxClusterRankVal) {
356 llvm::APSInt MaxThreads(32);
357 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
358 if (MaxThreads > 0) {
359 if (MaxThreadsVal)
360 *MaxThreadsVal = MaxThreads.getExtValue();
361 if (F)
362 F->addFnAttr("nvvm.maxntid", llvm::utostr(MaxThreads.getExtValue()));
363 }
364
365 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
366 // was not specified in __launch_bounds__ or if the user specified a 0 value,
367 // we don't have to add a PTX directive.
368 if (Attr->getMinBlocks()) {
369 llvm::APSInt MinBlocks(32);
370 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
371 if (MinBlocks > 0) {
372 if (MinBlocksVal)
373 *MinBlocksVal = MinBlocks.getExtValue();
374 if (F)
375 F->addFnAttr("nvvm.minctasm", llvm::utostr(MinBlocks.getExtValue()));
376 }
377 }
378 if (Attr->getMaxBlocks()) {
379 llvm::APSInt MaxBlocks(32);
380 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
381 if (MaxBlocks > 0) {
382 if (MaxClusterRankVal)
383 *MaxClusterRankVal = MaxBlocks.getExtValue();
384 if (F)
385 F->addFnAttr("nvvm.maxclusterrank",
386 llvm::utostr(MaxBlocks.getExtValue()));
387 }
388 }
389 }
390
391 std::unique_ptr<TargetCodeGenInfo>
createNVPTXTargetCodeGenInfo(CodeGenModule & CGM)392 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
393 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
394 }
395