xref: /freebsd/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements semantic analysis for CUDA constructs.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Sema/SemaCUDA.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Basic/Cuda.h"
18 #include "clang/Basic/TargetInfo.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "clang/Sema/Lookup.h"
21 #include "clang/Sema/Overload.h"
22 #include "clang/Sema/ScopeInfo.h"
23 #include "clang/Sema/Sema.h"
24 #include "clang/Sema/Template.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include <optional>
27 using namespace clang;
28 
SemaCUDA(Sema & S)29 SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {}
30 
hasExplicitAttr(const VarDecl * D)31 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
32   if (!D)
33     return false;
34   if (auto *A = D->getAttr<AttrT>())
35     return !A->isImplicit();
36   return false;
37 }
38 
PushForceHostDevice()39 void SemaCUDA::PushForceHostDevice() {
40   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
41   ForceHostDeviceDepth++;
42 }
43 
PopForceHostDevice()44 bool SemaCUDA::PopForceHostDevice() {
45   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
46   if (ForceHostDeviceDepth == 0)
47     return false;
48   ForceHostDeviceDepth--;
49   return true;
50 }
51 
ActOnExecConfigExpr(Scope * S,SourceLocation LLLLoc,MultiExprArg ExecConfig,SourceLocation GGGLoc)52 ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
53                                          MultiExprArg ExecConfig,
54                                          SourceLocation GGGLoc) {
55   FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl();
56   if (!ConfigDecl)
57     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
58                      << getConfigureFuncName());
59   QualType ConfigQTy = ConfigDecl->getType();
60 
61   DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr(
62       getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
63   SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl);
64 
65   return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
66                                /*IsExecConfig=*/true);
67 }
68 
IdentifyTarget(const ParsedAttributesView & Attrs)69 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
70   bool HasHostAttr = false;
71   bool HasDeviceAttr = false;
72   bool HasGlobalAttr = false;
73   bool HasInvalidTargetAttr = false;
74   for (const ParsedAttr &AL : Attrs) {
75     switch (AL.getKind()) {
76     case ParsedAttr::AT_CUDAGlobal:
77       HasGlobalAttr = true;
78       break;
79     case ParsedAttr::AT_CUDAHost:
80       HasHostAttr = true;
81       break;
82     case ParsedAttr::AT_CUDADevice:
83       HasDeviceAttr = true;
84       break;
85     case ParsedAttr::AT_CUDAInvalidTarget:
86       HasInvalidTargetAttr = true;
87       break;
88     default:
89       break;
90     }
91   }
92 
93   if (HasInvalidTargetAttr)
94     return CUDAFunctionTarget::InvalidTarget;
95 
96   if (HasGlobalAttr)
97     return CUDAFunctionTarget::Global;
98 
99   if (HasHostAttr && HasDeviceAttr)
100     return CUDAFunctionTarget::HostDevice;
101 
102   if (HasDeviceAttr)
103     return CUDAFunctionTarget::Device;
104 
105   return CUDAFunctionTarget::Host;
106 }
107 
108 template <typename A>
hasAttr(const Decl * D,bool IgnoreImplicitAttr)109 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
110   return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
111            return isa<A>(Attribute) &&
112                   !(IgnoreImplicitAttr && Attribute->isImplicit());
113          });
114 }
115 
CUDATargetContextRAII(SemaCUDA & S_,SemaCUDA::CUDATargetContextKind K,Decl * D)116 SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(
117     SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)
118     : S(S_) {
119   SavedCtx = S.CurCUDATargetCtx;
120   assert(K == SemaCUDA::CTCK_InitGlobalVar);
121   auto *VD = dyn_cast_or_null<VarDecl>(D);
122   if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
123     auto Target = CUDAFunctionTarget::Host;
124     if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
125          !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
126         hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
127         hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
128       Target = CUDAFunctionTarget::Device;
129     S.CurCUDATargetCtx = {Target, K, VD};
130   }
131 }
132 
133 /// IdentifyTarget - Determine the CUDA compilation target for this function
IdentifyTarget(const FunctionDecl * D,bool IgnoreImplicitHDAttr)134 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
135                                             bool IgnoreImplicitHDAttr) {
136   // Code that lives outside a function gets the target from CurCUDATargetCtx.
137   if (D == nullptr)
138     return CurCUDATargetCtx.Target;
139 
140   if (D->hasAttr<CUDAInvalidTargetAttr>())
141     return CUDAFunctionTarget::InvalidTarget;
142 
143   if (D->hasAttr<CUDAGlobalAttr>())
144     return CUDAFunctionTarget::Global;
145 
146   if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
147     if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
148       return CUDAFunctionTarget::HostDevice;
149     return CUDAFunctionTarget::Device;
150   } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
151     return CUDAFunctionTarget::Host;
152   } else if ((D->isImplicit() || !D->isUserProvided()) &&
153              !IgnoreImplicitHDAttr) {
154     // Some implicit declarations (like intrinsic functions) are not marked.
155     // Set the most lenient target on them for maximal flexibility.
156     return CUDAFunctionTarget::HostDevice;
157   }
158 
159   return CUDAFunctionTarget::Host;
160 }
161 
162 /// IdentifyTarget - Determine the CUDA compilation target for this variable.
IdentifyTarget(const VarDecl * Var)163 SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {
164   if (Var->hasAttr<HIPManagedAttr>())
165     return CVT_Unified;
166   // Only constexpr and const variabless with implicit constant attribute
167   // are emitted on both sides. Such variables are promoted to device side
168   // only if they have static constant intializers on device side.
169   if ((Var->isConstexpr() || Var->getType().isConstQualified()) &&
170       Var->hasAttr<CUDAConstantAttr>() &&
171       !hasExplicitAttr<CUDAConstantAttr>(Var))
172     return CVT_Both;
173   if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
174       Var->hasAttr<CUDASharedAttr>() ||
175       Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
176       Var->getType()->isCUDADeviceBuiltinTextureType())
177     return CVT_Device;
178   // Function-scope static variable without explicit device or constant
179   // attribute are emitted
180   //  - on both sides in host device functions
181   //  - on device side in device or global functions
182   if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
183     switch (IdentifyTarget(FD)) {
184     case CUDAFunctionTarget::HostDevice:
185       return CVT_Both;
186     case CUDAFunctionTarget::Device:
187     case CUDAFunctionTarget::Global:
188       return CVT_Device;
189     default:
190       return CVT_Host;
191     }
192   }
193   return CVT_Host;
194 }
195 
196 // * CUDA Call preference table
197 //
198 // F - from,
199 // T - to
200 // Ph - preference in host mode
201 // Pd - preference in device mode
202 // H  - handled in (x)
203 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
204 //
205 // | F  | T  | Ph  | Pd  |  H  |
206 // |----+----+-----+-----+-----+
207 // | d  | d  | N   | N   | (c) |
208 // | d  | g  | --  | --  | (a) |
209 // | d  | h  | --  | --  | (e) |
210 // | d  | hd | HD  | HD  | (b) |
211 // | g  | d  | N   | N   | (c) |
212 // | g  | g  | --  | --  | (a) |
213 // | g  | h  | --  | --  | (e) |
214 // | g  | hd | HD  | HD  | (b) |
215 // | h  | d  | --  | --  | (e) |
216 // | h  | g  | N   | N   | (c) |
217 // | h  | h  | N   | N   | (c) |
218 // | h  | hd | HD  | HD  | (b) |
219 // | hd | d  | WS  | SS  | (d) |
220 // | hd | g  | SS  | --  |(d/a)|
221 // | hd | h  | SS  | WS  | (d) |
222 // | hd | hd | HD  | HD  | (b) |
223 
224 SemaCUDA::CUDAFunctionPreference
IdentifyPreference(const FunctionDecl * Caller,const FunctionDecl * Callee)225 SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
226                              const FunctionDecl *Callee) {
227   assert(Callee && "Callee must be valid.");
228 
229   // Treat ctor/dtor as host device function in device var initializer to allow
230   // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
231   // will be diagnosed by checkAllowedInitializer.
232   if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
233       CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
234       (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))
235     return CFP_HostDevice;
236 
237   CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);
238   CUDAFunctionTarget CalleeTarget = IdentifyTarget(Callee);
239 
240   // If one of the targets is invalid, the check always fails, no matter what
241   // the other target is.
242   if (CallerTarget == CUDAFunctionTarget::InvalidTarget ||
243       CalleeTarget == CUDAFunctionTarget::InvalidTarget)
244     return CFP_Never;
245 
246   // (a) Can't call global from some contexts until we support CUDA's
247   // dynamic parallelism.
248   if (CalleeTarget == CUDAFunctionTarget::Global &&
249       (CallerTarget == CUDAFunctionTarget::Global ||
250        CallerTarget == CUDAFunctionTarget::Device))
251     return CFP_Never;
252 
253   // (b) Calling HostDevice is OK for everyone.
254   if (CalleeTarget == CUDAFunctionTarget::HostDevice)
255     return CFP_HostDevice;
256 
257   // (c) Best case scenarios
258   if (CalleeTarget == CallerTarget ||
259       (CallerTarget == CUDAFunctionTarget::Host &&
260        CalleeTarget == CUDAFunctionTarget::Global) ||
261       (CallerTarget == CUDAFunctionTarget::Global &&
262        CalleeTarget == CUDAFunctionTarget::Device))
263     return CFP_Native;
264 
265   // HipStdPar mode is special, in that assessing whether a device side call to
266   // a host target is deferred to a subsequent pass, and cannot unambiguously be
267   // adjudicated in the AST, hence we optimistically allow them to pass here.
268   if (getLangOpts().HIPStdPar &&
269       (CallerTarget == CUDAFunctionTarget::Global ||
270        CallerTarget == CUDAFunctionTarget::Device ||
271        CallerTarget == CUDAFunctionTarget::HostDevice) &&
272       CalleeTarget == CUDAFunctionTarget::Host)
273     return CFP_HostDevice;
274 
275   // (d) HostDevice behavior depends on compilation mode.
276   if (CallerTarget == CUDAFunctionTarget::HostDevice) {
277     // It's OK to call a compilation-mode matching function from an HD one.
278     if ((getLangOpts().CUDAIsDevice &&
279          CalleeTarget == CUDAFunctionTarget::Device) ||
280         (!getLangOpts().CUDAIsDevice &&
281          (CalleeTarget == CUDAFunctionTarget::Host ||
282           CalleeTarget == CUDAFunctionTarget::Global)))
283       return CFP_SameSide;
284 
285     // Calls from HD to non-mode-matching functions (i.e., to host functions
286     // when compiling in device mode or to device functions when compiling in
287     // host mode) are allowed at the sema level, but eventually rejected if
288     // they're ever codegened.  TODO: Reject said calls earlier.
289     return CFP_WrongSide;
290   }
291 
292   // (e) Calling across device/host boundary is not something you should do.
293   if ((CallerTarget == CUDAFunctionTarget::Host &&
294        CalleeTarget == CUDAFunctionTarget::Device) ||
295       (CallerTarget == CUDAFunctionTarget::Device &&
296        CalleeTarget == CUDAFunctionTarget::Host) ||
297       (CallerTarget == CUDAFunctionTarget::Global &&
298        CalleeTarget == CUDAFunctionTarget::Host))
299     return CFP_Never;
300 
301   llvm_unreachable("All cases should've been handled by now.");
302 }
303 
hasImplicitAttr(const FunctionDecl * D)304 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
305   if (!D)
306     return false;
307   if (auto *A = D->getAttr<AttrT>())
308     return A->isImplicit();
309   return D->isImplicit();
310 }
311 
isImplicitHostDeviceFunction(const FunctionDecl * D)312 bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) {
313   bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
314   bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
315   return IsImplicitDevAttr && IsImplicitHostAttr;
316 }
317 
EraseUnwantedMatches(const FunctionDecl * Caller,SmallVectorImpl<std::pair<DeclAccessPair,FunctionDecl * >> & Matches)318 void SemaCUDA::EraseUnwantedMatches(
319     const FunctionDecl *Caller,
320     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
321   if (Matches.size() <= 1)
322     return;
323 
324   using Pair = std::pair<DeclAccessPair, FunctionDecl *>;
325 
326   // Gets the CUDA function preference for a call from Caller to Match.
327   auto GetCFP = [&](const Pair &Match) {
328     return IdentifyPreference(Caller, Match.second);
329   };
330 
331   // Find the best call preference among the functions in Matches.
332   CUDAFunctionPreference BestCFP =
333       GetCFP(*llvm::max_element(Matches, [&](const Pair &M1, const Pair &M2) {
334         return GetCFP(M1) < GetCFP(M2);
335       }));
336 
337   // Erase all functions with lower priority.
338   llvm::erase_if(Matches,
339                  [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
340 }
341 
342 /// When an implicitly-declared special member has to invoke more than one
343 /// base/field special member, conflicts may occur in the targets of these
344 /// members. For example, if one base's member __host__ and another's is
345 /// __device__, it's a conflict.
346 /// This function figures out if the given targets \param Target1 and
347 /// \param Target2 conflict, and if they do not it fills in
348 /// \param ResolvedTarget with a target that resolves for both calls.
349 /// \return true if there's a conflict, false otherwise.
350 static bool
resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,CUDAFunctionTarget Target2,CUDAFunctionTarget * ResolvedTarget)351 resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,
352                                 CUDAFunctionTarget Target2,
353                                 CUDAFunctionTarget *ResolvedTarget) {
354   // Only free functions and static member functions may be global.
355   assert(Target1 != CUDAFunctionTarget::Global);
356   assert(Target2 != CUDAFunctionTarget::Global);
357 
358   if (Target1 == CUDAFunctionTarget::HostDevice) {
359     *ResolvedTarget = Target2;
360   } else if (Target2 == CUDAFunctionTarget::HostDevice) {
361     *ResolvedTarget = Target1;
362   } else if (Target1 != Target2) {
363     return true;
364   } else {
365     *ResolvedTarget = Target1;
366   }
367 
368   return false;
369 }
370 
inferTargetForImplicitSpecialMember(CXXRecordDecl * ClassDecl,CXXSpecialMemberKind CSM,CXXMethodDecl * MemberDecl,bool ConstRHS,bool Diagnose)371 bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
372                                                    CXXSpecialMemberKind CSM,
373                                                    CXXMethodDecl *MemberDecl,
374                                                    bool ConstRHS,
375                                                    bool Diagnose) {
376   // If MemberDecl is virtual destructor of an explicit template class
377   // instantiation, it must be emitted, therefore it needs to be inferred
378   // conservatively by ignoring implicit host/device attrs of member and parent
379   // dtors called by it. Also, it needs to be checed by deferred diag visitor.
380   bool IsExpVDtor = false;
381   if (isa<CXXDestructorDecl>(MemberDecl) && MemberDecl->isVirtual()) {
382     if (auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(ClassDecl)) {
383       TemplateSpecializationKind TSK = Spec->getTemplateSpecializationKind();
384       IsExpVDtor = TSK == TSK_ExplicitInstantiationDeclaration ||
385                    TSK == TSK_ExplicitInstantiationDefinition;
386     }
387   }
388   if (IsExpVDtor)
389     SemaRef.DeclsToCheckForDeferredDiags.insert(MemberDecl);
390 
391   // If the defaulted special member is defined lexically outside of its
392   // owning class, or the special member already has explicit device or host
393   // attributes, do not infer.
394   bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
395   bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
396   bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
397   bool HasExplicitAttr =
398       (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||
399       (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
400   if (!InClass || HasExplicitAttr)
401     return false;
402 
403   std::optional<CUDAFunctionTarget> InferredTarget;
404 
405   // We're going to invoke special member lookup; mark that these special
406   // members are called from this one, and not from its caller.
407   Sema::ContextRAII MethodContext(SemaRef, MemberDecl);
408 
409   // Look for special members in base classes that should be invoked from here.
410   // Infer the target of this member base on the ones it should call.
411   // Skip direct and indirect virtual bases for abstract classes.
412   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
413   for (const auto &B : ClassDecl->bases()) {
414     if (!B.isVirtual()) {
415       Bases.push_back(&B);
416     }
417   }
418 
419   if (!ClassDecl->isAbstract()) {
420     llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases()));
421   }
422 
423   for (const auto *B : Bases) {
424     const RecordType *BaseType = B->getType()->getAs<RecordType>();
425     if (!BaseType) {
426       continue;
427     }
428 
429     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
430     Sema::SpecialMemberOverloadResult SMOR =
431         SemaRef.LookupSpecialMember(BaseClassDecl, CSM,
432                                     /* ConstArg */ ConstRHS,
433                                     /* VolatileArg */ false,
434                                     /* RValueThis */ false,
435                                     /* ConstThis */ false,
436                                     /* VolatileThis */ false);
437 
438     if (!SMOR.getMethod())
439       continue;
440 
441     CUDAFunctionTarget BaseMethodTarget =
442         IdentifyTarget(SMOR.getMethod(), IsExpVDtor);
443 
444     if (!InferredTarget) {
445       InferredTarget = BaseMethodTarget;
446     } else {
447       bool ResolutionError = resolveCalleeCUDATargetConflict(
448           *InferredTarget, BaseMethodTarget, &*InferredTarget);
449       if (ResolutionError) {
450         if (Diagnose) {
451           Diag(ClassDecl->getLocation(),
452                diag::note_implicit_member_target_infer_collision)
453               << (unsigned)CSM << *InferredTarget << BaseMethodTarget;
454         }
455         MemberDecl->addAttr(
456             CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
457         return true;
458       }
459     }
460   }
461 
462   // Same as for bases, but now for special members of fields.
463   for (const auto *F : ClassDecl->fields()) {
464     if (F->isInvalidDecl()) {
465       continue;
466     }
467 
468     const RecordType *FieldType =
469         getASTContext().getBaseElementType(F->getType())->getAs<RecordType>();
470     if (!FieldType) {
471       continue;
472     }
473 
474     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
475     Sema::SpecialMemberOverloadResult SMOR =
476         SemaRef.LookupSpecialMember(FieldRecDecl, CSM,
477                                     /* ConstArg */ ConstRHS && !F->isMutable(),
478                                     /* VolatileArg */ false,
479                                     /* RValueThis */ false,
480                                     /* ConstThis */ false,
481                                     /* VolatileThis */ false);
482 
483     if (!SMOR.getMethod())
484       continue;
485 
486     CUDAFunctionTarget FieldMethodTarget =
487         IdentifyTarget(SMOR.getMethod(), IsExpVDtor);
488 
489     if (!InferredTarget) {
490       InferredTarget = FieldMethodTarget;
491     } else {
492       bool ResolutionError = resolveCalleeCUDATargetConflict(
493           *InferredTarget, FieldMethodTarget, &*InferredTarget);
494       if (ResolutionError) {
495         if (Diagnose) {
496           Diag(ClassDecl->getLocation(),
497                diag::note_implicit_member_target_infer_collision)
498               << (unsigned)CSM << *InferredTarget << FieldMethodTarget;
499         }
500         MemberDecl->addAttr(
501             CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
502         return true;
503       }
504     }
505   }
506 
507   // If no target was inferred, mark this member as __host__ __device__;
508   // it's the least restrictive option that can be invoked from any target.
509   bool NeedsH = true, NeedsD = true;
510   if (InferredTarget) {
511     if (*InferredTarget == CUDAFunctionTarget::Device)
512       NeedsH = false;
513     else if (*InferredTarget == CUDAFunctionTarget::Host)
514       NeedsD = false;
515   }
516 
517   // We either setting attributes first time, or the inferred ones must match
518   // previously set ones.
519   if (NeedsD && !HasD)
520     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
521   if (NeedsH && !HasH)
522     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
523 
524   return false;
525 }
526 
isEmptyConstructor(SourceLocation Loc,CXXConstructorDecl * CD)527 bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
528   if (!CD->isDefined() && CD->isTemplateInstantiation())
529     SemaRef.InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
530 
531   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
532   // empty at a point in the translation unit, if it is either a
533   // trivial constructor
534   if (CD->isTrivial())
535     return true;
536 
537   // ... or it satisfies all of the following conditions:
538   // The constructor function has been defined.
539   // The constructor function has no parameters,
540   // and the function body is an empty compound statement.
541   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
542     return false;
543 
544   // Its class has no virtual functions and no virtual base classes.
545   if (CD->getParent()->isDynamicClass())
546     return false;
547 
548   // Union ctor does not call ctors of its data members.
549   if (CD->getParent()->isUnion())
550     return true;
551 
552   // The only form of initializer allowed is an empty constructor.
553   // This will recursively check all base classes and member initializers
554   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
555         if (const CXXConstructExpr *CE =
556                 dyn_cast<CXXConstructExpr>(CI->getInit()))
557           return isEmptyConstructor(Loc, CE->getConstructor());
558         return false;
559       }))
560     return false;
561 
562   return true;
563 }
564 
isEmptyDestructor(SourceLocation Loc,CXXDestructorDecl * DD)565 bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
566   // No destructor -> no problem.
567   if (!DD)
568     return true;
569 
570   if (!DD->isDefined() && DD->isTemplateInstantiation())
571     SemaRef.InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
572 
573   // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
574   // empty at a point in the translation unit, if it is either a
575   // trivial constructor
576   if (DD->isTrivial())
577     return true;
578 
579   // ... or it satisfies all of the following conditions:
580   // The destructor function has been defined.
581   // and the function body is an empty compound statement.
582   if (!DD->hasTrivialBody())
583     return false;
584 
585   const CXXRecordDecl *ClassDecl = DD->getParent();
586 
587   // Its class has no virtual functions and no virtual base classes.
588   if (ClassDecl->isDynamicClass())
589     return false;
590 
591   // Union does not have base class and union dtor does not call dtors of its
592   // data members.
593   if (DD->getParent()->isUnion())
594     return true;
595 
596   // Only empty destructors are allowed. This will recursively check
597   // destructors for all base classes...
598   if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
599         if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
600           return isEmptyDestructor(Loc, RD->getDestructor());
601         return true;
602       }))
603     return false;
604 
605   // ... and member fields.
606   if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
607         if (CXXRecordDecl *RD = Field->getType()
608                                     ->getBaseElementTypeUnsafe()
609                                     ->getAsCXXRecordDecl())
610           return isEmptyDestructor(Loc, RD->getDestructor());
611         return true;
612       }))
613     return false;
614 
615   return true;
616 }
617 
618 namespace {
619 enum CUDAInitializerCheckKind {
620   CICK_DeviceOrConstant, // Check initializer for device/constant variable
621   CICK_Shared,           // Check initializer for shared variable
622 };
623 
IsDependentVar(VarDecl * VD)624 bool IsDependentVar(VarDecl *VD) {
625   if (VD->getType()->isDependentType())
626     return true;
627   if (const auto *Init = VD->getInit())
628     return Init->isValueDependent();
629   return false;
630 }
631 
632 // Check whether a variable has an allowed initializer for a CUDA device side
633 // variable with global storage. \p VD may be a host variable to be checked for
634 // potential promotion to device side variable.
635 //
636 // CUDA/HIP allows only empty constructors as initializers for global
637 // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
638 // __shared__ variables whether they are local or not (they all are implicitly
639 // static in CUDA). One exception is that CUDA allows constant initializers
640 // for __constant__ and __device__ variables.
HasAllowedCUDADeviceStaticInitializer(SemaCUDA & S,VarDecl * VD,CUDAInitializerCheckKind CheckKind)641 bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD,
642                                            CUDAInitializerCheckKind CheckKind) {
643   assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
644   assert(!IsDependentVar(VD) && "do not check dependent var");
645   const Expr *Init = VD->getInit();
646   auto IsEmptyInit = [&](const Expr *Init) {
647     if (!Init)
648       return true;
649     if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) {
650       return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor());
651     }
652     return false;
653   };
654   auto IsConstantInit = [&](const Expr *Init) {
655     assert(Init);
656     ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(),
657                                                     /*NoWronSidedVars=*/true);
658     return Init->isConstantInitializer(S.getASTContext(),
659                                        VD->getType()->isReferenceType());
660   };
661   auto HasEmptyDtor = [&](VarDecl *VD) {
662     if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
663       return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor());
664     return true;
665   };
666   if (CheckKind == CICK_Shared)
667     return IsEmptyInit(Init) && HasEmptyDtor(VD);
668   return S.getLangOpts().GPUAllowDeviceInit ||
669          ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD));
670 }
671 } // namespace
672 
checkAllowedInitializer(VarDecl * VD)673 void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
674   // Return early if VD is inside a non-instantiated template function since
675   // the implicit constructor is not defined yet.
676   if (const FunctionDecl *FD =
677           dyn_cast_or_null<FunctionDecl>(VD->getDeclContext());
678       FD && FD->isDependentContext())
679     return;
680 
681   bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
682   bool IsDeviceOrConstantVar =
683       !IsSharedVar &&
684       (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>());
685   if ((IsSharedVar || IsDeviceOrConstantVar) &&
686       VD->getType().getQualifiers().getAddressSpace() != LangAS::Default) {
687     Diag(VD->getLocation(), diag::err_cuda_address_space_gpuvar);
688     VD->setInvalidDecl();
689     return;
690   }
691   // Do not check dependent variables since the ctor/dtor/initializer are not
692   // determined. Do it after instantiation.
693   if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() ||
694       IsDependentVar(VD))
695     return;
696   const Expr *Init = VD->getInit();
697   if (IsDeviceOrConstantVar || IsSharedVar) {
698     if (HasAllowedCUDADeviceStaticInitializer(
699             *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
700       return;
701     Diag(VD->getLocation(),
702          IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
703         << Init->getSourceRange();
704     VD->setInvalidDecl();
705   } else {
706     // This is a host-side global variable.  Check that the initializer is
707     // callable from the host side.
708     const FunctionDecl *InitFn = nullptr;
709     if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
710       InitFn = CE->getConstructor();
711     } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
712       InitFn = CE->getDirectCallee();
713     }
714     if (InitFn) {
715       CUDAFunctionTarget InitFnTarget = IdentifyTarget(InitFn);
716       if (InitFnTarget != CUDAFunctionTarget::Host &&
717           InitFnTarget != CUDAFunctionTarget::HostDevice) {
718         Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
719             << InitFnTarget << InitFn;
720         Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
721         VD->setInvalidDecl();
722       }
723     }
724   }
725 }
726 
RecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl * Callee)727 void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
728     const FunctionDecl *Callee) {
729   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
730   if (!Caller)
731     return;
732 
733   if (!isImplicitHostDeviceFunction(Callee))
734     return;
735 
736   CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);
737 
738   // Record whether an implicit host device function is used on device side.
739   if (CallerTarget != CUDAFunctionTarget::Device &&
740       CallerTarget != CUDAFunctionTarget::Global &&
741       (CallerTarget != CUDAFunctionTarget::HostDevice ||
742        (isImplicitHostDeviceFunction(Caller) &&
743         !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller))))
744     return;
745 
746   getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee);
747 }
748 
749 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
750 // treated as implicitly __host__ __device__, unless:
751 //  * it is a variadic function (device-side variadic functions are not
752 //    allowed), or
753 //  * a __device__ function with this signature was already declared, in which
754 //    case in which case we output an error, unless the __device__ decl is in a
755 //    system header, in which case we leave the constexpr function unattributed.
756 //
757 // In addition, all function decls are treated as __host__ __device__ when
758 // ForceHostDeviceDepth > 0 (corresponding to code within a
759 //   #pragma clang force_cuda_host_device_begin/end
760 // pair).
maybeAddHostDeviceAttrs(FunctionDecl * NewD,const LookupResult & Previous)761 void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD,
762                                        const LookupResult &Previous) {
763   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
764 
765   if (ForceHostDeviceDepth > 0) {
766     if (!NewD->hasAttr<CUDAHostAttr>())
767       NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
768     if (!NewD->hasAttr<CUDADeviceAttr>())
769       NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
770     return;
771   }
772 
773   // If a template function has no host/device/global attributes,
774   // make it implicitly host device function.
775   if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
776       !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
777       !NewD->hasAttr<CUDAGlobalAttr>() &&
778       (NewD->getDescribedFunctionTemplate() ||
779        NewD->isFunctionTemplateSpecialization())) {
780     NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
781     NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
782     return;
783   }
784 
785   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
786       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
787       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
788     return;
789 
790   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
791   // attributes?
792   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
793     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
794       D = Using->getTargetDecl();
795     FunctionDecl *OldD = D->getAsFunction();
796     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
797            !OldD->hasAttr<CUDAHostAttr>() &&
798            !SemaRef.IsOverload(NewD, OldD,
799                                /* UseMemberUsingDeclRules = */ false,
800                                /* ConsiderCudaAttrs = */ false);
801   };
802   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
803   if (It != Previous.end()) {
804     // We found a __device__ function with the same name and signature as NewD
805     // (ignoring CUDA attrs).  This is an error unless that function is defined
806     // in a system header, in which case we simply return without making NewD
807     // host+device.
808     NamedDecl *Match = *It;
809     if (!SemaRef.getSourceManager().isInSystemHeader(Match->getLocation())) {
810       Diag(NewD->getLocation(),
811            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
812           << NewD;
813       Diag(Match->getLocation(),
814            diag::note_cuda_conflicting_device_function_declared_here);
815     }
816     return;
817   }
818 
819   NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
820   NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
821 }
822 
823 // TODO: `__constant__` memory may be a limited resource for certain targets.
824 // A safeguard may be needed at the end of compilation pipeline if
825 // `__constant__` memory usage goes beyond limit.
MaybeAddConstantAttr(VarDecl * VD)826 void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) {
827   // Do not promote dependent variables since the cotr/dtor/initializer are
828   // not determined. Do it after instantiation.
829   if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
830       !VD->hasAttr<CUDASharedAttr>() &&
831       (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
832       !IsDependentVar(VD) &&
833       ((VD->isConstexpr() || VD->getType().isConstQualified()) &&
834        HasAllowedCUDADeviceStaticInitializer(*this, VD,
835                                              CICK_DeviceOrConstant))) {
836     VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
837   }
838 }
839 
DiagIfDeviceCode(SourceLocation Loc,unsigned DiagID)840 SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc,
841                                                            unsigned DiagID) {
842   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
843   FunctionDecl *CurFunContext =
844       SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
845   SemaDiagnosticBuilder::Kind DiagKind = [&] {
846     if (!CurFunContext)
847       return SemaDiagnosticBuilder::K_Nop;
848     switch (CurrentTarget()) {
849     case CUDAFunctionTarget::Global:
850     case CUDAFunctionTarget::Device:
851       return SemaDiagnosticBuilder::K_Immediate;
852     case CUDAFunctionTarget::HostDevice:
853       // An HD function counts as host code if we're compiling for host, and
854       // device code if we're compiling for device.  Defer any errors in device
855       // mode until the function is known-emitted.
856       if (!getLangOpts().CUDAIsDevice)
857         return SemaDiagnosticBuilder::K_Nop;
858       if (SemaRef.IsLastErrorImmediate &&
859           getDiagnostics().getDiagnosticIDs()->isNote(DiagID))
860         return SemaDiagnosticBuilder::K_Immediate;
861       return (SemaRef.getEmissionStatus(CurFunContext) ==
862               Sema::FunctionEmissionStatus::Emitted)
863                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
864                  : SemaDiagnosticBuilder::K_Deferred;
865     default:
866       return SemaDiagnosticBuilder::K_Nop;
867     }
868   }();
869   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);
870 }
871 
DiagIfHostCode(SourceLocation Loc,unsigned DiagID)872 Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc,
873                                                      unsigned DiagID) {
874   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
875   FunctionDecl *CurFunContext =
876       SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
877   SemaDiagnosticBuilder::Kind DiagKind = [&] {
878     if (!CurFunContext)
879       return SemaDiagnosticBuilder::K_Nop;
880     switch (CurrentTarget()) {
881     case CUDAFunctionTarget::Host:
882       return SemaDiagnosticBuilder::K_Immediate;
883     case CUDAFunctionTarget::HostDevice:
884       // An HD function counts as host code if we're compiling for host, and
885       // device code if we're compiling for device.  Defer any errors in device
886       // mode until the function is known-emitted.
887       if (getLangOpts().CUDAIsDevice)
888         return SemaDiagnosticBuilder::K_Nop;
889       if (SemaRef.IsLastErrorImmediate &&
890           getDiagnostics().getDiagnosticIDs()->isNote(DiagID))
891         return SemaDiagnosticBuilder::K_Immediate;
892       return (SemaRef.getEmissionStatus(CurFunContext) ==
893               Sema::FunctionEmissionStatus::Emitted)
894                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
895                  : SemaDiagnosticBuilder::K_Deferred;
896     default:
897       return SemaDiagnosticBuilder::K_Nop;
898     }
899   }();
900   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);
901 }
902 
CheckCall(SourceLocation Loc,FunctionDecl * Callee)903 bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
904   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
905   assert(Callee && "Callee may not be null.");
906 
907   const auto &ExprEvalCtx = SemaRef.currentEvaluationContext();
908   if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
909     return true;
910 
911   // FIXME: Is bailing out early correct here?  Should we instead assume that
912   // the caller is a global initializer?
913   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
914   if (!Caller)
915     return true;
916 
917   // If the caller is known-emitted, mark the callee as known-emitted.
918   // Otherwise, mark the call in our call graph so we can traverse it later.
919   bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) ==
920                             Sema::FunctionEmissionStatus::Emitted;
921   SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
922                                           CallerKnownEmitted] {
923     switch (IdentifyPreference(Caller, Callee)) {
924     case CFP_Never:
925     case CFP_WrongSide:
926       assert(Caller && "Never/wrongSide calls require a non-null caller");
927       // If we know the caller will be emitted, we know this wrong-side call
928       // will be emitted, so it's an immediate error.  Otherwise, defer the
929       // error until we know the caller is emitted.
930       return CallerKnownEmitted
931                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
932                  : SemaDiagnosticBuilder::K_Deferred;
933     default:
934       return SemaDiagnosticBuilder::K_Nop;
935     }
936   }();
937 
938   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
939     // For -fgpu-rdc, keep track of external kernels used by host functions.
940     if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode &&
941         Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
942         (!Caller || (!Caller->getDescribedFunctionTemplate() &&
943                      getASTContext().GetGVALinkageForFunction(Caller) ==
944                          GVA_StrongExternal)))
945       getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
946     return true;
947   }
948 
949   // Avoid emitting this error twice for the same location.  Using a hashtable
950   // like this is unfortunate, but because we must continue parsing as normal
951   // after encountering a deferred error, it's otherwise very tricky for us to
952   // ensure that we only emit this deferred error once.
953   if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
954     return true;
955 
956   SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller,
957                         SemaRef)
958       << IdentifyTarget(Callee) << /*function*/ 0 << Callee
959       << IdentifyTarget(Caller);
960   if (!Callee->getBuiltinID())
961     SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),
962                           diag::note_previous_decl, Caller, SemaRef)
963         << Callee;
964   return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
965          DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
966 }
967 
968 // Check the wrong-sided reference capture of lambda for CUDA/HIP.
969 // A lambda function may capture a stack variable by reference when it is
970 // defined and uses the capture by reference when the lambda is called. When
971 // the capture and use happen on different sides, the capture is invalid and
972 // should be diagnosed.
CheckLambdaCapture(CXXMethodDecl * Callee,const sema::Capture & Capture)973 void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee,
974                                   const sema::Capture &Capture) {
975   // In host compilation we only need to check lambda functions emitted on host
976   // side. In such lambda functions, a reference capture is invalid only
977   // if the lambda structure is populated by a device function or kernel then
978   // is passed to and called by a host function. However that is impossible,
979   // since a device function or kernel can only call a device function, also a
980   // kernel cannot pass a lambda back to a host function since we cannot
981   // define a kernel argument type which can hold the lambda before the lambda
982   // itself is defined.
983   if (!getLangOpts().CUDAIsDevice)
984     return;
985 
986   // File-scope lambda can only do init captures for global variables, which
987   // results in passing by value for these global variables.
988   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
989   if (!Caller)
990     return;
991 
992   // In device compilation, we only need to check lambda functions which are
993   // emitted on device side. For such lambdas, a reference capture is invalid
994   // only if the lambda structure is populated by a host function then passed
995   // to and called in a device function or kernel.
996   bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
997   bool CallerIsHost =
998       !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
999   bool ShouldCheck = CalleeIsDevice && CallerIsHost;
1000   if (!ShouldCheck || !Capture.isReferenceCapture())
1001     return;
1002   auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
1003   if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
1004     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
1005                           diag::err_capture_bad_target, Callee, SemaRef)
1006         << Capture.getVariable();
1007   } else if (Capture.isThisCapture()) {
1008     // Capture of this pointer is allowed since this pointer may be pointing to
1009     // managed memory which is accessible on both device and host sides. It only
1010     // results in invalid memory access if this pointer points to memory not
1011     // accessible on device side.
1012     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
1013                           diag::warn_maybe_capture_bad_target_this_ptr, Callee,
1014                           SemaRef);
1015   }
1016 }
1017 
SetLambdaAttrs(CXXMethodDecl * Method)1018 void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) {
1019   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1020   if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
1021     return;
1022   Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
1023   Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
1024 }
1025 
checkTargetOverload(FunctionDecl * NewFD,const LookupResult & Previous)1026 void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD,
1027                                    const LookupResult &Previous) {
1028   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1029   CUDAFunctionTarget NewTarget = IdentifyTarget(NewFD);
1030   for (NamedDecl *OldND : Previous) {
1031     FunctionDecl *OldFD = OldND->getAsFunction();
1032     if (!OldFD)
1033       continue;
1034 
1035     CUDAFunctionTarget OldTarget = IdentifyTarget(OldFD);
1036     // Don't allow HD and global functions to overload other functions with the
1037     // same signature.  We allow overloading based on CUDA attributes so that
1038     // functions can have different implementations on the host and device, but
1039     // HD/global functions "exist" in some sense on both the host and device, so
1040     // should have the same implementation on both sides.
1041     if (NewTarget != OldTarget &&
1042         !SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
1043                             /* ConsiderCudaAttrs = */ false)) {
1044       if ((NewTarget == CUDAFunctionTarget::HostDevice &&
1045            !(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1046              isImplicitHostDeviceFunction(NewFD) &&
1047              OldTarget == CUDAFunctionTarget::Device)) ||
1048           (OldTarget == CUDAFunctionTarget::HostDevice &&
1049            !(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1050              isImplicitHostDeviceFunction(OldFD) &&
1051              NewTarget == CUDAFunctionTarget::Device)) ||
1052           (NewTarget == CUDAFunctionTarget::Global) ||
1053           (OldTarget == CUDAFunctionTarget::Global)) {
1054         Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
1055             << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
1056         Diag(OldFD->getLocation(), diag::note_previous_declaration);
1057         NewFD->setInvalidDecl();
1058         break;
1059       }
1060       if ((NewTarget == CUDAFunctionTarget::Host &&
1061            OldTarget == CUDAFunctionTarget::Device) ||
1062           (NewTarget == CUDAFunctionTarget::Device &&
1063            OldTarget == CUDAFunctionTarget::Host)) {
1064         Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare)
1065             << NewTarget << OldTarget;
1066         Diag(OldFD->getLocation(), diag::note_previous_declaration);
1067       }
1068     }
1069   }
1070 }
1071 
1072 template <typename AttrTy>
copyAttrIfPresent(Sema & S,FunctionDecl * FD,const FunctionDecl & TemplateFD)1073 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
1074                               const FunctionDecl &TemplateFD) {
1075   if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
1076     AttrTy *Clone = Attribute->clone(S.Context);
1077     Clone->setInherited(true);
1078     FD->addAttr(Clone);
1079   }
1080 }
1081 
inheritTargetAttrs(FunctionDecl * FD,const FunctionTemplateDecl & TD)1082 void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD,
1083                                   const FunctionTemplateDecl &TD) {
1084   const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
1085   copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD);
1086   copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD);
1087   copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD);
1088 }
1089 
getConfigureFuncName() const1090 std::string SemaCUDA::getConfigureFuncName() const {
1091   if (getLangOpts().OffloadViaLLVM)
1092     return "__llvmPushCallConfiguration";
1093 
1094   if (getLangOpts().HIP)
1095     return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
1096                                             : "hipConfigureCall";
1097 
1098   // New CUDA kernel launch sequence.
1099   if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(),
1100                          CudaFeature::CUDA_USES_NEW_LAUNCH))
1101     return "__cudaPushCallConfiguration";
1102 
1103   // Legacy CUDA kernel configuration call
1104   return "cudaConfigureCall";
1105 }
1106 
1107 // Record any local constexpr variables that are passed one way on the host
1108 // and another on the device.
recordPotentialODRUsedVariable(MultiExprArg Arguments,OverloadCandidateSet & Candidates)1109 void SemaCUDA::recordPotentialODRUsedVariable(
1110     MultiExprArg Arguments, OverloadCandidateSet &Candidates) {
1111   sema::LambdaScopeInfo *LambdaInfo = SemaRef.getCurLambda();
1112   if (!LambdaInfo)
1113     return;
1114 
1115   for (unsigned I = 0; I < Arguments.size(); ++I) {
1116     auto *DeclRef = dyn_cast<DeclRefExpr>(Arguments[I]);
1117     if (!DeclRef)
1118       continue;
1119     auto *Variable = dyn_cast<VarDecl>(DeclRef->getDecl());
1120     if (!Variable || !Variable->isLocalVarDecl() || !Variable->isConstexpr())
1121       continue;
1122 
1123     bool HostByValue = false, HostByRef = false;
1124     bool DeviceByValue = false, DeviceByRef = false;
1125 
1126     for (OverloadCandidate &Candidate : Candidates) {
1127       FunctionDecl *Callee = Candidate.Function;
1128       if (!Callee || I >= Callee->getNumParams())
1129         continue;
1130 
1131       CUDAFunctionTarget Target = IdentifyTarget(Callee);
1132       if (Target == CUDAFunctionTarget::InvalidTarget ||
1133           Target == CUDAFunctionTarget::Global)
1134         continue;
1135 
1136       bool CoversHost = (Target == CUDAFunctionTarget::Host ||
1137                          Target == CUDAFunctionTarget::HostDevice);
1138       bool CoversDevice = (Target == CUDAFunctionTarget::Device ||
1139                            Target == CUDAFunctionTarget::HostDevice);
1140 
1141       bool IsRef = Callee->getParamDecl(I)->getType()->isReferenceType();
1142       HostByValue |= CoversHost && !IsRef;
1143       HostByRef |= CoversHost && IsRef;
1144       DeviceByValue |= CoversDevice && !IsRef;
1145       DeviceByRef |= CoversDevice && IsRef;
1146     }
1147 
1148     if ((HostByValue && DeviceByRef) || (HostByRef && DeviceByValue))
1149       LambdaInfo->CUDAPotentialODRUsedVars.insert(Variable);
1150   }
1151 }
1152