xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===- NVPTX.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "llvm/IR/IntrinsicsNVPTX.h"
12 
13 using namespace clang;
14 using namespace clang::CodeGen;
15 
16 //===----------------------------------------------------------------------===//
17 // NVPTX ABI Implementation
18 //===----------------------------------------------------------------------===//
19 
20 namespace {
21 
22 class NVPTXTargetCodeGenInfo;
23 
24 class NVPTXABIInfo : public ABIInfo {
25   NVPTXTargetCodeGenInfo &CGInfo;
26 
27 public:
28   NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
29       : ABIInfo(CGT), CGInfo(Info) {}
30 
31   ABIArgInfo classifyReturnType(QualType RetTy) const;
32   ABIArgInfo classifyArgumentType(QualType Ty) const;
33 
34   void computeInfo(CGFunctionInfo &FI) const override;
35   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
36                    AggValueSlot Slot) const override;
37   bool isUnsupportedType(QualType T) const;
38   ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
39 };
40 
41 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
42 public:
43   NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
44       : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
45 
46   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
47                            CodeGen::CodeGenModule &M) const override;
48   bool shouldEmitStaticExternCAliases() const override;
49 
50   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
51                                  llvm::PointerType *T,
52                                  QualType QT) const override;
53 
54   llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
55     // On the device side, surface reference is represented as an object handle
56     // in 64-bit integer.
57     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
58   }
59 
60   llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
61     // On the device side, texture reference is represented as an object handle
62     // in 64-bit integer.
63     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
64   }
65 
66   bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
67                                               LValue Src) const override {
68     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
69     return true;
70   }
71 
72   bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
73                                               LValue Src) const override {
74     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
75     return true;
76   }
77 
78   // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
79   // resulting MDNode to the nvvm.annotations MDNode.
80   static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
81                               int Operand);
82 
83 private:
84   static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
85                                            LValue Src) {
86     llvm::Value *Handle = nullptr;
87     llvm::Constant *C =
88         llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF));
89     // Lookup `addrspacecast` through the constant pointer if any.
90     if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
91       C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
92     if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
93       // Load the handle from the specific global variable using
94       // `nvvm.texsurf.handle.internal` intrinsic.
95       Handle = CGF.EmitRuntimeCall(
96           CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
97                                {GV->getType()}),
98           {GV}, "texsurf_handle");
99     } else
100       Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
101     CGF.EmitStoreOfScalar(Handle, Dst);
102   }
103 };
104 
105 /// Checks if the type is unsupported directly by the current target.
106 bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
107   ASTContext &Context = getContext();
108   if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
109     return true;
110   if (!Context.getTargetInfo().hasFloat128Type() &&
111       (T->isFloat128Type() ||
112        (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
113     return true;
114   if (const auto *EIT = T->getAs<BitIntType>())
115     return EIT->getNumBits() >
116            (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
117   if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
118       Context.getTypeSize(T) > 64U)
119     return true;
120   if (const auto *AT = T->getAsArrayTypeUnsafe())
121     return isUnsupportedType(AT->getElementType());
122   const auto *RT = T->getAs<RecordType>();
123   if (!RT)
124     return false;
125   const RecordDecl *RD = RT->getDecl();
126 
127   // If this is a C++ record, check the bases first.
128   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
129     for (const CXXBaseSpecifier &I : CXXRD->bases())
130       if (isUnsupportedType(I.getType()))
131         return true;
132 
133   for (const FieldDecl *I : RD->fields())
134     if (isUnsupportedType(I->getType()))
135       return true;
136   return false;
137 }
138 
139 /// Coerce the given type into an array with maximum allowed size of elements.
140 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
141                                                    unsigned MaxSize) const {
142   // Alignment and Size are measured in bits.
143   const uint64_t Size = getContext().getTypeSize(Ty);
144   const uint64_t Alignment = getContext().getTypeAlign(Ty);
145   const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
146   llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
147   const uint64_t NumElements = (Size + Div - 1) / Div;
148   return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
149 }
150 
151 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
152   if (RetTy->isVoidType())
153     return ABIArgInfo::getIgnore();
154 
155   if (getContext().getLangOpts().OpenMP &&
156       getContext().getLangOpts().OpenMPIsTargetDevice &&
157       isUnsupportedType(RetTy))
158     return coerceToIntArrayWithLimit(RetTy, 64);
159 
160   // note: this is different from default ABI
161   if (!RetTy->isScalarType())
162     return ABIArgInfo::getDirect();
163 
164   // Treat an enum type as its underlying type.
165   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
166     RetTy = EnumTy->getDecl()->getIntegerType();
167 
168   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
169                                                : ABIArgInfo::getDirect());
170 }
171 
172 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
173   // Treat an enum type as its underlying type.
174   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
175     Ty = EnumTy->getDecl()->getIntegerType();
176 
177   // Return aggregates type as indirect by value
178   if (isAggregateTypeForABI(Ty)) {
179     // Under CUDA device compilation, tex/surf builtin types are replaced with
180     // object types and passed directly.
181     if (getContext().getLangOpts().CUDAIsDevice) {
182       if (Ty->isCUDADeviceBuiltinSurfaceType())
183         return ABIArgInfo::getDirect(
184             CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
185       if (Ty->isCUDADeviceBuiltinTextureType())
186         return ABIArgInfo::getDirect(
187             CGInfo.getCUDADeviceBuiltinTextureDeviceType());
188     }
189     return getNaturalAlignIndirect(Ty, /* byval */ true);
190   }
191 
192   if (const auto *EIT = Ty->getAs<BitIntType>()) {
193     if ((EIT->getNumBits() > 128) ||
194         (!getContext().getTargetInfo().hasInt128Type() &&
195          EIT->getNumBits() > 64))
196       return getNaturalAlignIndirect(Ty, /* byval */ true);
197   }
198 
199   return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
200                                             : ABIArgInfo::getDirect());
201 }
202 
203 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
204   if (!getCXXABI().classifyReturnType(FI))
205     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
206 
207   for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments()))
208     I.info = ArgumentsCount < FI.getNumRequiredArgs()
209                  ? classifyArgumentType(I.type)
210                  : ABIArgInfo::getDirect();
211 
212   // Always honor user-specified calling convention.
213   if (FI.getCallingConvention() != llvm::CallingConv::C)
214     return;
215 
216   FI.setEffectiveCallingConvention(getRuntimeCC());
217 }
218 
219 RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
220                                QualType Ty, AggValueSlot Slot) const {
221   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false,
222                           getContext().getTypeInfoInChars(Ty),
223                           CharUnits::fromQuantity(1),
224                           /*AllowHigherAlign=*/true, Slot);
225 }
226 
227 void NVPTXTargetCodeGenInfo::setTargetAttributes(
228     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
229   if (GV->isDeclaration())
230     return;
231   const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
232   if (VD) {
233     if (M.getLangOpts().CUDA) {
234       if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
235         addNVVMMetadata(GV, "surface", 1);
236       else if (VD->getType()->isCUDADeviceBuiltinTextureType())
237         addNVVMMetadata(GV, "texture", 1);
238       return;
239     }
240   }
241 
242   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
243   if (!FD) return;
244 
245   llvm::Function *F = cast<llvm::Function>(GV);
246 
247   // Perform special handling in OpenCL mode
248   if (M.getLangOpts().OpenCL) {
249     // Use OpenCL function attributes to check for kernel functions
250     // By default, all functions are device functions
251     if (FD->hasAttr<OpenCLKernelAttr>()) {
252       // OpenCL __kernel functions get kernel metadata
253       // Create !{<func-ref>, metadata !"kernel", i32 1} node
254       addNVVMMetadata(F, "kernel", 1);
255       // And kernel functions are not subject to inlining
256       F->addFnAttr(llvm::Attribute::NoInline);
257     }
258   }
259 
260   // Perform special handling in CUDA mode.
261   if (M.getLangOpts().CUDA) {
262     // CUDA __global__ functions get a kernel metadata entry.  Since
263     // __global__ functions cannot be called from the device, we do not
264     // need to set the noinline attribute.
265     if (FD->hasAttr<CUDAGlobalAttr>()) {
266       // Create !{<func-ref>, metadata !"kernel", i32 1} node
267       addNVVMMetadata(F, "kernel", 1);
268     }
269     if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
270       M.handleCUDALaunchBoundsAttr(F, Attr);
271   }
272 
273   // Attach kernel metadata directly if compiling for NVPTX.
274   if (FD->hasAttr<NVPTXKernelAttr>()) {
275     addNVVMMetadata(F, "kernel", 1);
276   }
277 }
278 
279 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
280                                              StringRef Name, int Operand) {
281   llvm::Module *M = GV->getParent();
282   llvm::LLVMContext &Ctx = M->getContext();
283 
284   // Get "nvvm.annotations" metadata node
285   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
286 
287   llvm::Metadata *MDVals[] = {
288       llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
289       llvm::ConstantAsMetadata::get(
290           llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
291   // Append metadata to nvvm.annotations
292   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
293 }
294 
295 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
296   return false;
297 }
298 
299 llvm::Constant *
300 NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
301                                        llvm::PointerType *PT,
302                                        QualType QT) const {
303   auto &Ctx = CGM.getContext();
304   if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
305     return llvm::ConstantPointerNull::get(PT);
306 
307   auto NPT = llvm::PointerType::get(
308       PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
309   return llvm::ConstantExpr::getAddrSpaceCast(
310       llvm::ConstantPointerNull::get(NPT), PT);
311 }
312 }
313 
314 void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
315                                                const CUDALaunchBoundsAttr *Attr,
316                                                int32_t *MaxThreadsVal,
317                                                int32_t *MinBlocksVal,
318                                                int32_t *MaxClusterRankVal) {
319   // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
320   llvm::APSInt MaxThreads(32);
321   MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
322   if (MaxThreads > 0) {
323     if (MaxThreadsVal)
324       *MaxThreadsVal = MaxThreads.getExtValue();
325     if (F) {
326       // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
327       NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
328                                               MaxThreads.getExtValue());
329     }
330   }
331 
332   // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
333   // was not specified in __launch_bounds__ or if the user specified a 0 value,
334   // we don't have to add a PTX directive.
335   if (Attr->getMinBlocks()) {
336     llvm::APSInt MinBlocks(32);
337     MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
338     if (MinBlocks > 0) {
339       if (MinBlocksVal)
340         *MinBlocksVal = MinBlocks.getExtValue();
341       if (F) {
342         // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
343         NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
344                                                 MinBlocks.getExtValue());
345       }
346     }
347   }
348   if (Attr->getMaxBlocks()) {
349     llvm::APSInt MaxBlocks(32);
350     MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
351     if (MaxBlocks > 0) {
352       if (MaxClusterRankVal)
353         *MaxClusterRankVal = MaxBlocks.getExtValue();
354       if (F) {
355         // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
356         NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
357                                                 MaxBlocks.getExtValue());
358       }
359     }
360   }
361 }
362 
363 std::unique_ptr<TargetCodeGenInfo>
364 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
365   return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
366 }
367