xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- NVPTX.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringExtras.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/IR/IntrinsicsNVPTX.h"
15 
16 using namespace clang;
17 using namespace clang::CodeGen;
18 
19 //===----------------------------------------------------------------------===//
20 // NVPTX ABI Implementation
21 //===----------------------------------------------------------------------===//
22 
23 namespace {
24 
25 class NVPTXTargetCodeGenInfo;
26 
27 class NVPTXABIInfo : public ABIInfo {
28   NVPTXTargetCodeGenInfo &CGInfo;
29 
30 public:
31   NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
32       : ABIInfo(CGT), CGInfo(Info) {}
33 
34   ABIArgInfo classifyReturnType(QualType RetTy) const;
35   ABIArgInfo classifyArgumentType(QualType Ty) const;
36 
37   void computeInfo(CGFunctionInfo &FI) const override;
38   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
39                    AggValueSlot Slot) const override;
40   bool isUnsupportedType(QualType T) const;
41   ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
42 };
43 
44 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
45 public:
46   NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
47       : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
48 
49   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
50                            CodeGen::CodeGenModule &M) const override;
51   bool shouldEmitStaticExternCAliases() const override;
52 
53   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
54                                  llvm::PointerType *T,
55                                  QualType QT) const override;
56 
57   llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
58     // On the device side, surface reference is represented as an object handle
59     // in 64-bit integer.
60     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
61   }
62 
63   llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
64     // On the device side, texture reference is represented as an object handle
65     // in 64-bit integer.
66     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
67   }
68 
69   bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
70                                               LValue Src) const override {
71     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
72     return true;
73   }
74 
75   bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
76                                               LValue Src) const override {
77     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
78     return true;
79   }
80 
81   unsigned getDeviceKernelCallingConv() const override {
82     return llvm::CallingConv::PTX_Kernel;
83   }
84 
85   // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
86   // resulting MDNode to the nvvm.annotations MDNode.
87   static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
88                               int Operand);
89 
90   static void
91   addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
92                               const SmallVectorImpl<int> &GridConstantArgs);
93 
94 private:
95   static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
96                                            LValue Src) {
97     llvm::Value *Handle = nullptr;
98     llvm::Constant *C =
99         llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF));
100     // Lookup `addrspacecast` through the constant pointer if any.
101     if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
102       C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
103     if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
104       // Load the handle from the specific global variable using
105       // `nvvm.texsurf.handle.internal` intrinsic.
106       Handle = CGF.EmitRuntimeCall(
107           CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
108                                {GV->getType()}),
109           {GV}, "texsurf_handle");
110     } else
111       Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
112     CGF.EmitStoreOfScalar(Handle, Dst);
113   }
114 };
115 
116 /// Checks if the type is unsupported directly by the current target.
117 bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
118   ASTContext &Context = getContext();
119   if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
120     return true;
121   if (!Context.getTargetInfo().hasFloat128Type() &&
122       (T->isFloat128Type() ||
123        (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
124     return true;
125   if (const auto *EIT = T->getAs<BitIntType>())
126     return EIT->getNumBits() >
127            (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
128   if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
129       Context.getTypeSize(T) > 64U)
130     return true;
131   if (const auto *AT = T->getAsArrayTypeUnsafe())
132     return isUnsupportedType(AT->getElementType());
133   const auto *RT = T->getAs<RecordType>();
134   if (!RT)
135     return false;
136   const RecordDecl *RD = RT->getDecl();
137 
138   // If this is a C++ record, check the bases first.
139   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
140     for (const CXXBaseSpecifier &I : CXXRD->bases())
141       if (isUnsupportedType(I.getType()))
142         return true;
143 
144   for (const FieldDecl *I : RD->fields())
145     if (isUnsupportedType(I->getType()))
146       return true;
147   return false;
148 }
149 
150 /// Coerce the given type into an array with maximum allowed size of elements.
151 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
152                                                    unsigned MaxSize) const {
153   // Alignment and Size are measured in bits.
154   const uint64_t Size = getContext().getTypeSize(Ty);
155   const uint64_t Alignment = getContext().getTypeAlign(Ty);
156   const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
157   llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
158   const uint64_t NumElements = (Size + Div - 1) / Div;
159   return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
160 }
161 
162 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
163   if (RetTy->isVoidType())
164     return ABIArgInfo::getIgnore();
165 
166   if (getContext().getLangOpts().OpenMP &&
167       getContext().getLangOpts().OpenMPIsTargetDevice &&
168       isUnsupportedType(RetTy))
169     return coerceToIntArrayWithLimit(RetTy, 64);
170 
171   // note: this is different from default ABI
172   if (!RetTy->isScalarType())
173     return ABIArgInfo::getDirect();
174 
175   // Treat an enum type as its underlying type.
176   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
177     RetTy = EnumTy->getDecl()->getIntegerType();
178 
179   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
180                                                : ABIArgInfo::getDirect());
181 }
182 
183 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
184   // Treat an enum type as its underlying type.
185   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
186     Ty = EnumTy->getDecl()->getIntegerType();
187 
188   // Return aggregates type as indirect by value
189   if (isAggregateTypeForABI(Ty)) {
190     // Under CUDA device compilation, tex/surf builtin types are replaced with
191     // object types and passed directly.
192     if (getContext().getLangOpts().CUDAIsDevice) {
193       if (Ty->isCUDADeviceBuiltinSurfaceType())
194         return ABIArgInfo::getDirect(
195             CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
196       if (Ty->isCUDADeviceBuiltinTextureType())
197         return ABIArgInfo::getDirect(
198             CGInfo.getCUDADeviceBuiltinTextureDeviceType());
199     }
200     return getNaturalAlignIndirect(
201         Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
202         /* byval */ true);
203   }
204 
205   if (const auto *EIT = Ty->getAs<BitIntType>()) {
206     if ((EIT->getNumBits() > 128) ||
207         (!getContext().getTargetInfo().hasInt128Type() &&
208          EIT->getNumBits() > 64))
209       return getNaturalAlignIndirect(
210           Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
211           /* byval */ true);
212   }
213 
214   return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
215                                             : ABIArgInfo::getDirect());
216 }
217 
218 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
219   if (!getCXXABI().classifyReturnType(FI))
220     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
221 
222   for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments()))
223     I.info = ArgumentsCount < FI.getNumRequiredArgs()
224                  ? classifyArgumentType(I.type)
225                  : ABIArgInfo::getDirect();
226 
227   // Always honor user-specified calling convention.
228   if (FI.getCallingConvention() != llvm::CallingConv::C)
229     return;
230 
231   FI.setEffectiveCallingConvention(getRuntimeCC());
232 }
233 
234 RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
235                                QualType Ty, AggValueSlot Slot) const {
236   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false,
237                           getContext().getTypeInfoInChars(Ty),
238                           CharUnits::fromQuantity(1),
239                           /*AllowHigherAlign=*/true, Slot);
240 }
241 
242 void NVPTXTargetCodeGenInfo::setTargetAttributes(
243     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
244   if (GV->isDeclaration())
245     return;
246   const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
247   if (VD) {
248     if (M.getLangOpts().CUDA) {
249       if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
250         addNVVMMetadata(GV, "surface", 1);
251       else if (VD->getType()->isCUDADeviceBuiltinTextureType())
252         addNVVMMetadata(GV, "texture", 1);
253       return;
254     }
255   }
256 
257   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
258   if (!FD)
259     return;
260 
261   llvm::Function *F = cast<llvm::Function>(GV);
262 
263   // Perform special handling in OpenCL/CUDA mode
264   if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) {
265     // Use function attributes to check for kernel functions
266     // By default, all functions are device functions
267     if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
268       // OpenCL/CUDA kernel functions get kernel metadata
269       // Create !{<func-ref>, metadata !"kernel", i32 1} node
270       // And kernel functions are not subject to inlining
271       F->addFnAttr(llvm::Attribute::NoInline);
272       if (FD->hasAttr<CUDAGlobalAttr>()) {
273         SmallVector<int, 10> GCI;
274         for (auto IV : llvm::enumerate(FD->parameters()))
275           if (IV.value()->hasAttr<CUDAGridConstantAttr>())
276             // For some reason arg indices are 1-based in NVVM
277             GCI.push_back(IV.index() + 1);
278         // Create !{<func-ref>, metadata !"kernel", i32 1} node
279         F->setCallingConv(llvm::CallingConv::PTX_Kernel);
280         addGridConstantNVVMMetadata(F, GCI);
281       }
282       if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
283         M.handleCUDALaunchBoundsAttr(F, Attr);
284     }
285   }
286   // Attach kernel metadata directly if compiling for NVPTX.
287   if (FD->hasAttr<DeviceKernelAttr>()) {
288     F->setCallingConv(llvm::CallingConv::PTX_Kernel);
289   }
290 }
291 
292 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
293                                              StringRef Name, int Operand) {
294   llvm::Module *M = GV->getParent();
295   llvm::LLVMContext &Ctx = M->getContext();
296 
297   // Get "nvvm.annotations" metadata node
298   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
299 
300   SmallVector<llvm::Metadata *, 5> MDVals = {
301       llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
302       llvm::ConstantAsMetadata::get(
303           llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
304 
305   // Append metadata to nvvm.annotations
306   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
307 }
308 
309 void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
310     llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
311 
312   llvm::Module *M = GV->getParent();
313   llvm::LLVMContext &Ctx = M->getContext();
314 
315   // Get "nvvm.annotations" metadata node
316   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
317 
318   SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
319   if (!GridConstantArgs.empty()) {
320     SmallVector<llvm::Metadata *, 10> GCM;
321     for (int I : GridConstantArgs)
322       GCM.push_back(llvm::ConstantAsMetadata::get(
323           llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
324     MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
325                    llvm::MDNode::get(Ctx, GCM)});
326   }
327 
328   // Append metadata to nvvm.annotations
329   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
330 }
331 
332 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
333   return false;
334 }
335 
336 llvm::Constant *
337 NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
338                                        llvm::PointerType *PT,
339                                        QualType QT) const {
340   auto &Ctx = CGM.getContext();
341   if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
342     return llvm::ConstantPointerNull::get(PT);
343 
344   auto NPT = llvm::PointerType::get(
345       PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
346   return llvm::ConstantExpr::getAddrSpaceCast(
347       llvm::ConstantPointerNull::get(NPT), PT);
348 }
349 } // namespace
350 
351 void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
352                                                const CUDALaunchBoundsAttr *Attr,
353                                                int32_t *MaxThreadsVal,
354                                                int32_t *MinBlocksVal,
355                                                int32_t *MaxClusterRankVal) {
356   llvm::APSInt MaxThreads(32);
357   MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
358   if (MaxThreads > 0) {
359     if (MaxThreadsVal)
360       *MaxThreadsVal = MaxThreads.getExtValue();
361     if (F)
362       F->addFnAttr("nvvm.maxntid", llvm::utostr(MaxThreads.getExtValue()));
363   }
364 
365   // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
366   // was not specified in __launch_bounds__ or if the user specified a 0 value,
367   // we don't have to add a PTX directive.
368   if (Attr->getMinBlocks()) {
369     llvm::APSInt MinBlocks(32);
370     MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
371     if (MinBlocks > 0) {
372       if (MinBlocksVal)
373         *MinBlocksVal = MinBlocks.getExtValue();
374       if (F)
375         F->addFnAttr("nvvm.minctasm", llvm::utostr(MinBlocks.getExtValue()));
376     }
377   }
378   if (Attr->getMaxBlocks()) {
379     llvm::APSInt MaxBlocks(32);
380     MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
381     if (MaxBlocks > 0) {
382       if (MaxClusterRankVal)
383         *MaxClusterRankVal = MaxBlocks.getExtValue();
384       if (F)
385         F->addFnAttr("nvvm.maxclusterrank",
386                      llvm::utostr(MaxBlocks.getExtValue()));
387     }
388   }
389 }
390 
391 std::unique_ptr<TargetCodeGenInfo>
392 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
393   return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
394 }
395