xref: /freebsd/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements semantic analysis for CUDA constructs.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Sema/SemaCUDA.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Basic/Cuda.h"
18 #include "clang/Basic/TargetInfo.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "clang/Sema/Lookup.h"
21 #include "clang/Sema/ScopeInfo.h"
22 #include "clang/Sema/Sema.h"
23 #include "clang/Sema/SemaDiagnostic.h"
24 #include "clang/Sema/SemaInternal.h"
25 #include "clang/Sema/Template.h"
26 #include "llvm/ADT/STLForwardCompat.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include <optional>
29 using namespace clang;
30 
31 SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {}
32 
33 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
34   if (!D)
35     return false;
36   if (auto *A = D->getAttr<AttrT>())
37     return !A->isImplicit();
38   return false;
39 }
40 
41 void SemaCUDA::PushForceHostDevice() {
42   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
43   ForceHostDeviceDepth++;
44 }
45 
46 bool SemaCUDA::PopForceHostDevice() {
47   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
48   if (ForceHostDeviceDepth == 0)
49     return false;
50   ForceHostDeviceDepth--;
51   return true;
52 }
53 
54 ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
55                                          MultiExprArg ExecConfig,
56                                          SourceLocation GGGLoc) {
57   FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl();
58   if (!ConfigDecl)
59     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
60                      << getConfigureFuncName());
61   QualType ConfigQTy = ConfigDecl->getType();
62 
63   DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr(
64       getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
65   SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl);
66 
67   return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
68                                /*IsExecConfig=*/true);
69 }
70 
71 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
72   bool HasHostAttr = false;
73   bool HasDeviceAttr = false;
74   bool HasGlobalAttr = false;
75   bool HasInvalidTargetAttr = false;
76   for (const ParsedAttr &AL : Attrs) {
77     switch (AL.getKind()) {
78     case ParsedAttr::AT_CUDAGlobal:
79       HasGlobalAttr = true;
80       break;
81     case ParsedAttr::AT_CUDAHost:
82       HasHostAttr = true;
83       break;
84     case ParsedAttr::AT_CUDADevice:
85       HasDeviceAttr = true;
86       break;
87     case ParsedAttr::AT_CUDAInvalidTarget:
88       HasInvalidTargetAttr = true;
89       break;
90     default:
91       break;
92     }
93   }
94 
95   if (HasInvalidTargetAttr)
96     return CUDAFunctionTarget::InvalidTarget;
97 
98   if (HasGlobalAttr)
99     return CUDAFunctionTarget::Global;
100 
101   if (HasHostAttr && HasDeviceAttr)
102     return CUDAFunctionTarget::HostDevice;
103 
104   if (HasDeviceAttr)
105     return CUDAFunctionTarget::Device;
106 
107   return CUDAFunctionTarget::Host;
108 }
109 
110 template <typename A>
111 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
112   return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
113            return isa<A>(Attribute) &&
114                   !(IgnoreImplicitAttr && Attribute->isImplicit());
115          });
116 }
117 
118 SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(
119     SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)
120     : S(S_) {
121   SavedCtx = S.CurCUDATargetCtx;
122   assert(K == SemaCUDA::CTCK_InitGlobalVar);
123   auto *VD = dyn_cast_or_null<VarDecl>(D);
124   if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
125     auto Target = CUDAFunctionTarget::Host;
126     if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
127          !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
128         hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
129         hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
130       Target = CUDAFunctionTarget::Device;
131     S.CurCUDATargetCtx = {Target, K, VD};
132   }
133 }
134 
135 /// IdentifyTarget - Determine the CUDA compilation target for this function
136 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
137                                             bool IgnoreImplicitHDAttr) {
138   // Code that lives outside a function gets the target from CurCUDATargetCtx.
139   if (D == nullptr)
140     return CurCUDATargetCtx.Target;
141 
142   if (D->hasAttr<CUDAInvalidTargetAttr>())
143     return CUDAFunctionTarget::InvalidTarget;
144 
145   if (D->hasAttr<CUDAGlobalAttr>())
146     return CUDAFunctionTarget::Global;
147 
148   if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
149     if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
150       return CUDAFunctionTarget::HostDevice;
151     return CUDAFunctionTarget::Device;
152   } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
153     return CUDAFunctionTarget::Host;
154   } else if ((D->isImplicit() || !D->isUserProvided()) &&
155              !IgnoreImplicitHDAttr) {
156     // Some implicit declarations (like intrinsic functions) are not marked.
157     // Set the most lenient target on them for maximal flexibility.
158     return CUDAFunctionTarget::HostDevice;
159   }
160 
161   return CUDAFunctionTarget::Host;
162 }
163 
164 /// IdentifyTarget - Determine the CUDA compilation target for this variable.
165 SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {
166   if (Var->hasAttr<HIPManagedAttr>())
167     return CVT_Unified;
168   // Only constexpr and const variabless with implicit constant attribute
169   // are emitted on both sides. Such variables are promoted to device side
170   // only if they have static constant intializers on device side.
171   if ((Var->isConstexpr() || Var->getType().isConstQualified()) &&
172       Var->hasAttr<CUDAConstantAttr>() &&
173       !hasExplicitAttr<CUDAConstantAttr>(Var))
174     return CVT_Both;
175   if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
176       Var->hasAttr<CUDASharedAttr>() ||
177       Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
178       Var->getType()->isCUDADeviceBuiltinTextureType())
179     return CVT_Device;
180   // Function-scope static variable without explicit device or constant
181   // attribute are emitted
182   //  - on both sides in host device functions
183   //  - on device side in device or global functions
184   if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
185     switch (IdentifyTarget(FD)) {
186     case CUDAFunctionTarget::HostDevice:
187       return CVT_Both;
188     case CUDAFunctionTarget::Device:
189     case CUDAFunctionTarget::Global:
190       return CVT_Device;
191     default:
192       return CVT_Host;
193     }
194   }
195   return CVT_Host;
196 }
197 
198 // * CUDA Call preference table
199 //
200 // F - from,
201 // T - to
202 // Ph - preference in host mode
203 // Pd - preference in device mode
204 // H  - handled in (x)
205 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
206 //
207 // | F  | T  | Ph  | Pd  |  H  |
208 // |----+----+-----+-----+-----+
209 // | d  | d  | N   | N   | (c) |
210 // | d  | g  | --  | --  | (a) |
211 // | d  | h  | --  | --  | (e) |
212 // | d  | hd | HD  | HD  | (b) |
213 // | g  | d  | N   | N   | (c) |
214 // | g  | g  | --  | --  | (a) |
215 // | g  | h  | --  | --  | (e) |
216 // | g  | hd | HD  | HD  | (b) |
217 // | h  | d  | --  | --  | (e) |
218 // | h  | g  | N   | N   | (c) |
219 // | h  | h  | N   | N   | (c) |
220 // | h  | hd | HD  | HD  | (b) |
221 // | hd | d  | WS  | SS  | (d) |
222 // | hd | g  | SS  | --  |(d/a)|
223 // | hd | h  | SS  | WS  | (d) |
224 // | hd | hd | HD  | HD  | (b) |
225 
226 SemaCUDA::CUDAFunctionPreference
227 SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
228                              const FunctionDecl *Callee) {
229   assert(Callee && "Callee must be valid.");
230 
231   // Treat ctor/dtor as host device function in device var initializer to allow
232   // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
233   // will be diagnosed by checkAllowedInitializer.
234   if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
235       CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
236       (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))
237     return CFP_HostDevice;
238 
239   CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);
240   CUDAFunctionTarget CalleeTarget = IdentifyTarget(Callee);
241 
242   // If one of the targets is invalid, the check always fails, no matter what
243   // the other target is.
244   if (CallerTarget == CUDAFunctionTarget::InvalidTarget ||
245       CalleeTarget == CUDAFunctionTarget::InvalidTarget)
246     return CFP_Never;
247 
248   // (a) Can't call global from some contexts until we support CUDA's
249   // dynamic parallelism.
250   if (CalleeTarget == CUDAFunctionTarget::Global &&
251       (CallerTarget == CUDAFunctionTarget::Global ||
252        CallerTarget == CUDAFunctionTarget::Device))
253     return CFP_Never;
254 
255   // (b) Calling HostDevice is OK for everyone.
256   if (CalleeTarget == CUDAFunctionTarget::HostDevice)
257     return CFP_HostDevice;
258 
259   // (c) Best case scenarios
260   if (CalleeTarget == CallerTarget ||
261       (CallerTarget == CUDAFunctionTarget::Host &&
262        CalleeTarget == CUDAFunctionTarget::Global) ||
263       (CallerTarget == CUDAFunctionTarget::Global &&
264        CalleeTarget == CUDAFunctionTarget::Device))
265     return CFP_Native;
266 
267   // HipStdPar mode is special, in that assessing whether a device side call to
268   // a host target is deferred to a subsequent pass, and cannot unambiguously be
269   // adjudicated in the AST, hence we optimistically allow them to pass here.
270   if (getLangOpts().HIPStdPar &&
271       (CallerTarget == CUDAFunctionTarget::Global ||
272        CallerTarget == CUDAFunctionTarget::Device ||
273        CallerTarget == CUDAFunctionTarget::HostDevice) &&
274       CalleeTarget == CUDAFunctionTarget::Host)
275     return CFP_HostDevice;
276 
277   // (d) HostDevice behavior depends on compilation mode.
278   if (CallerTarget == CUDAFunctionTarget::HostDevice) {
279     // It's OK to call a compilation-mode matching function from an HD one.
280     if ((getLangOpts().CUDAIsDevice &&
281          CalleeTarget == CUDAFunctionTarget::Device) ||
282         (!getLangOpts().CUDAIsDevice &&
283          (CalleeTarget == CUDAFunctionTarget::Host ||
284           CalleeTarget == CUDAFunctionTarget::Global)))
285       return CFP_SameSide;
286 
287     // Calls from HD to non-mode-matching functions (i.e., to host functions
288     // when compiling in device mode or to device functions when compiling in
289     // host mode) are allowed at the sema level, but eventually rejected if
290     // they're ever codegened.  TODO: Reject said calls earlier.
291     return CFP_WrongSide;
292   }
293 
294   // (e) Calling across device/host boundary is not something you should do.
295   if ((CallerTarget == CUDAFunctionTarget::Host &&
296        CalleeTarget == CUDAFunctionTarget::Device) ||
297       (CallerTarget == CUDAFunctionTarget::Device &&
298        CalleeTarget == CUDAFunctionTarget::Host) ||
299       (CallerTarget == CUDAFunctionTarget::Global &&
300        CalleeTarget == CUDAFunctionTarget::Host))
301     return CFP_Never;
302 
303   llvm_unreachable("All cases should've been handled by now.");
304 }
305 
306 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
307   if (!D)
308     return false;
309   if (auto *A = D->getAttr<AttrT>())
310     return A->isImplicit();
311   return D->isImplicit();
312 }
313 
314 bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) {
315   bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
316   bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
317   return IsImplicitDevAttr && IsImplicitHostAttr;
318 }
319 
320 void SemaCUDA::EraseUnwantedMatches(
321     const FunctionDecl *Caller,
322     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
323   if (Matches.size() <= 1)
324     return;
325 
326   using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
327 
328   // Gets the CUDA function preference for a call from Caller to Match.
329   auto GetCFP = [&](const Pair &Match) {
330     return IdentifyPreference(Caller, Match.second);
331   };
332 
333   // Find the best call preference among the functions in Matches.
334   CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
335       Matches.begin(), Matches.end(),
336       [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
337 
338   // Erase all functions with lower priority.
339   llvm::erase_if(Matches,
340                  [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
341 }
342 
343 /// When an implicitly-declared special member has to invoke more than one
344 /// base/field special member, conflicts may occur in the targets of these
345 /// members. For example, if one base's member __host__ and another's is
346 /// __device__, it's a conflict.
347 /// This function figures out if the given targets \param Target1 and
348 /// \param Target2 conflict, and if they do not it fills in
349 /// \param ResolvedTarget with a target that resolves for both calls.
350 /// \return true if there's a conflict, false otherwise.
351 static bool
352 resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,
353                                 CUDAFunctionTarget Target2,
354                                 CUDAFunctionTarget *ResolvedTarget) {
355   // Only free functions and static member functions may be global.
356   assert(Target1 != CUDAFunctionTarget::Global);
357   assert(Target2 != CUDAFunctionTarget::Global);
358 
359   if (Target1 == CUDAFunctionTarget::HostDevice) {
360     *ResolvedTarget = Target2;
361   } else if (Target2 == CUDAFunctionTarget::HostDevice) {
362     *ResolvedTarget = Target1;
363   } else if (Target1 != Target2) {
364     return true;
365   } else {
366     *ResolvedTarget = Target1;
367   }
368 
369   return false;
370 }
371 
372 bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
373                                                    CXXSpecialMemberKind CSM,
374                                                    CXXMethodDecl *MemberDecl,
375                                                    bool ConstRHS,
376                                                    bool Diagnose) {
377   // If the defaulted special member is defined lexically outside of its
378   // owning class, or the special member already has explicit device or host
379   // attributes, do not infer.
380   bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
381   bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
382   bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
383   bool HasExplicitAttr =
384       (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||
385       (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
386   if (!InClass || HasExplicitAttr)
387     return false;
388 
389   std::optional<CUDAFunctionTarget> InferredTarget;
390 
391   // We're going to invoke special member lookup; mark that these special
392   // members are called from this one, and not from its caller.
393   Sema::ContextRAII MethodContext(SemaRef, MemberDecl);
394 
395   // Look for special members in base classes that should be invoked from here.
396   // Infer the target of this member base on the ones it should call.
397   // Skip direct and indirect virtual bases for abstract classes.
398   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
399   for (const auto &B : ClassDecl->bases()) {
400     if (!B.isVirtual()) {
401       Bases.push_back(&B);
402     }
403   }
404 
405   if (!ClassDecl->isAbstract()) {
406     llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases()));
407   }
408 
409   for (const auto *B : Bases) {
410     const RecordType *BaseType = B->getType()->getAs<RecordType>();
411     if (!BaseType) {
412       continue;
413     }
414 
415     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
416     Sema::SpecialMemberOverloadResult SMOR =
417         SemaRef.LookupSpecialMember(BaseClassDecl, CSM,
418                                     /* ConstArg */ ConstRHS,
419                                     /* VolatileArg */ false,
420                                     /* RValueThis */ false,
421                                     /* ConstThis */ false,
422                                     /* VolatileThis */ false);
423 
424     if (!SMOR.getMethod())
425       continue;
426 
427     CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod());
428     if (!InferredTarget) {
429       InferredTarget = BaseMethodTarget;
430     } else {
431       bool ResolutionError = resolveCalleeCUDATargetConflict(
432           *InferredTarget, BaseMethodTarget, &*InferredTarget);
433       if (ResolutionError) {
434         if (Diagnose) {
435           Diag(ClassDecl->getLocation(),
436                diag::note_implicit_member_target_infer_collision)
437               << (unsigned)CSM << llvm::to_underlying(*InferredTarget)
438               << llvm::to_underlying(BaseMethodTarget);
439         }
440         MemberDecl->addAttr(
441             CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
442         return true;
443       }
444     }
445   }
446 
447   // Same as for bases, but now for special members of fields.
448   for (const auto *F : ClassDecl->fields()) {
449     if (F->isInvalidDecl()) {
450       continue;
451     }
452 
453     const RecordType *FieldType =
454         getASTContext().getBaseElementType(F->getType())->getAs<RecordType>();
455     if (!FieldType) {
456       continue;
457     }
458 
459     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
460     Sema::SpecialMemberOverloadResult SMOR =
461         SemaRef.LookupSpecialMember(FieldRecDecl, CSM,
462                                     /* ConstArg */ ConstRHS && !F->isMutable(),
463                                     /* VolatileArg */ false,
464                                     /* RValueThis */ false,
465                                     /* ConstThis */ false,
466                                     /* VolatileThis */ false);
467 
468     if (!SMOR.getMethod())
469       continue;
470 
471     CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod());
472     if (!InferredTarget) {
473       InferredTarget = FieldMethodTarget;
474     } else {
475       bool ResolutionError = resolveCalleeCUDATargetConflict(
476           *InferredTarget, FieldMethodTarget, &*InferredTarget);
477       if (ResolutionError) {
478         if (Diagnose) {
479           Diag(ClassDecl->getLocation(),
480                diag::note_implicit_member_target_infer_collision)
481               << (unsigned)CSM << llvm::to_underlying(*InferredTarget)
482               << llvm::to_underlying(FieldMethodTarget);
483         }
484         MemberDecl->addAttr(
485             CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
486         return true;
487       }
488     }
489   }
490 
491 
492   // If no target was inferred, mark this member as __host__ __device__;
493   // it's the least restrictive option that can be invoked from any target.
494   bool NeedsH = true, NeedsD = true;
495   if (InferredTarget) {
496     if (*InferredTarget == CUDAFunctionTarget::Device)
497       NeedsH = false;
498     else if (*InferredTarget == CUDAFunctionTarget::Host)
499       NeedsD = false;
500   }
501 
502   // We either setting attributes first time, or the inferred ones must match
503   // previously set ones.
504   if (NeedsD && !HasD)
505     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
506   if (NeedsH && !HasH)
507     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
508 
509   return false;
510 }
511 
512 bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
513   if (!CD->isDefined() && CD->isTemplateInstantiation())
514     SemaRef.InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
515 
516   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
517   // empty at a point in the translation unit, if it is either a
518   // trivial constructor
519   if (CD->isTrivial())
520     return true;
521 
522   // ... or it satisfies all of the following conditions:
523   // The constructor function has been defined.
524   // The constructor function has no parameters,
525   // and the function body is an empty compound statement.
526   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
527     return false;
528 
529   // Its class has no virtual functions and no virtual base classes.
530   if (CD->getParent()->isDynamicClass())
531     return false;
532 
533   // Union ctor does not call ctors of its data members.
534   if (CD->getParent()->isUnion())
535     return true;
536 
537   // The only form of initializer allowed is an empty constructor.
538   // This will recursively check all base classes and member initializers
539   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
540         if (const CXXConstructExpr *CE =
541                 dyn_cast<CXXConstructExpr>(CI->getInit()))
542           return isEmptyConstructor(Loc, CE->getConstructor());
543         return false;
544       }))
545     return false;
546 
547   return true;
548 }
549 
550 bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
551   // No destructor -> no problem.
552   if (!DD)
553     return true;
554 
555   if (!DD->isDefined() && DD->isTemplateInstantiation())
556     SemaRef.InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
557 
558   // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
559   // empty at a point in the translation unit, if it is either a
560   // trivial constructor
561   if (DD->isTrivial())
562     return true;
563 
564   // ... or it satisfies all of the following conditions:
565   // The destructor function has been defined.
566   // and the function body is an empty compound statement.
567   if (!DD->hasTrivialBody())
568     return false;
569 
570   const CXXRecordDecl *ClassDecl = DD->getParent();
571 
572   // Its class has no virtual functions and no virtual base classes.
573   if (ClassDecl->isDynamicClass())
574     return false;
575 
576   // Union does not have base class and union dtor does not call dtors of its
577   // data members.
578   if (DD->getParent()->isUnion())
579     return true;
580 
581   // Only empty destructors are allowed. This will recursively check
582   // destructors for all base classes...
583   if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
584         if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
585           return isEmptyDestructor(Loc, RD->getDestructor());
586         return true;
587       }))
588     return false;
589 
590   // ... and member fields.
591   if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
592         if (CXXRecordDecl *RD = Field->getType()
593                                     ->getBaseElementTypeUnsafe()
594                                     ->getAsCXXRecordDecl())
595           return isEmptyDestructor(Loc, RD->getDestructor());
596         return true;
597       }))
598     return false;
599 
600   return true;
601 }
602 
603 namespace {
604 enum CUDAInitializerCheckKind {
605   CICK_DeviceOrConstant, // Check initializer for device/constant variable
606   CICK_Shared,           // Check initializer for shared variable
607 };
608 
609 bool IsDependentVar(VarDecl *VD) {
610   if (VD->getType()->isDependentType())
611     return true;
612   if (const auto *Init = VD->getInit())
613     return Init->isValueDependent();
614   return false;
615 }
616 
617 // Check whether a variable has an allowed initializer for a CUDA device side
618 // variable with global storage. \p VD may be a host variable to be checked for
619 // potential promotion to device side variable.
620 //
621 // CUDA/HIP allows only empty constructors as initializers for global
622 // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
623 // __shared__ variables whether they are local or not (they all are implicitly
624 // static in CUDA). One exception is that CUDA allows constant initializers
625 // for __constant__ and __device__ variables.
626 bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD,
627                                            CUDAInitializerCheckKind CheckKind) {
628   assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
629   assert(!IsDependentVar(VD) && "do not check dependent var");
630   const Expr *Init = VD->getInit();
631   auto IsEmptyInit = [&](const Expr *Init) {
632     if (!Init)
633       return true;
634     if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) {
635       return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor());
636     }
637     return false;
638   };
639   auto IsConstantInit = [&](const Expr *Init) {
640     assert(Init);
641     ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(),
642                                                     /*NoWronSidedVars=*/true);
643     return Init->isConstantInitializer(S.getASTContext(),
644                                        VD->getType()->isReferenceType());
645   };
646   auto HasEmptyDtor = [&](VarDecl *VD) {
647     if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
648       return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor());
649     return true;
650   };
651   if (CheckKind == CICK_Shared)
652     return IsEmptyInit(Init) && HasEmptyDtor(VD);
653   return S.getLangOpts().GPUAllowDeviceInit ||
654          ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD));
655 }
656 } // namespace
657 
658 void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
659   // Return early if VD is inside a non-instantiated template function since
660   // the implicit constructor is not defined yet.
661   if (const FunctionDecl *FD =
662           dyn_cast_or_null<FunctionDecl>(VD->getDeclContext()))
663     if (FD->isDependentContext())
664       return;
665 
666   // Do not check dependent variables since the ctor/dtor/initializer are not
667   // determined. Do it after instantiation.
668   if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() ||
669       IsDependentVar(VD))
670     return;
671   const Expr *Init = VD->getInit();
672   bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
673   bool IsDeviceOrConstantVar =
674       !IsSharedVar &&
675       (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>());
676   if (IsDeviceOrConstantVar || IsSharedVar) {
677     if (HasAllowedCUDADeviceStaticInitializer(
678             *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
679       return;
680     Diag(VD->getLocation(),
681          IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
682         << Init->getSourceRange();
683     VD->setInvalidDecl();
684   } else {
685     // This is a host-side global variable.  Check that the initializer is
686     // callable from the host side.
687     const FunctionDecl *InitFn = nullptr;
688     if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
689       InitFn = CE->getConstructor();
690     } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
691       InitFn = CE->getDirectCallee();
692     }
693     if (InitFn) {
694       CUDAFunctionTarget InitFnTarget = IdentifyTarget(InitFn);
695       if (InitFnTarget != CUDAFunctionTarget::Host &&
696           InitFnTarget != CUDAFunctionTarget::HostDevice) {
697         Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
698             << llvm::to_underlying(InitFnTarget) << InitFn;
699         Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
700         VD->setInvalidDecl();
701       }
702     }
703   }
704 }
705 
706 void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
707     const FunctionDecl *Callee) {
708   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
709   if (!Caller)
710     return;
711 
712   if (!isImplicitHostDeviceFunction(Callee))
713     return;
714 
715   CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);
716 
717   // Record whether an implicit host device function is used on device side.
718   if (CallerTarget != CUDAFunctionTarget::Device &&
719       CallerTarget != CUDAFunctionTarget::Global &&
720       (CallerTarget != CUDAFunctionTarget::HostDevice ||
721        (isImplicitHostDeviceFunction(Caller) &&
722         !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller))))
723     return;
724 
725   getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee);
726 }
727 
728 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
729 // treated as implicitly __host__ __device__, unless:
730 //  * it is a variadic function (device-side variadic functions are not
731 //    allowed), or
732 //  * a __device__ function with this signature was already declared, in which
733 //    case in which case we output an error, unless the __device__ decl is in a
734 //    system header, in which case we leave the constexpr function unattributed.
735 //
736 // In addition, all function decls are treated as __host__ __device__ when
737 // ForceHostDeviceDepth > 0 (corresponding to code within a
738 //   #pragma clang force_cuda_host_device_begin/end
739 // pair).
740 void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD,
741                                        const LookupResult &Previous) {
742   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
743 
744   if (ForceHostDeviceDepth > 0) {
745     if (!NewD->hasAttr<CUDAHostAttr>())
746       NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
747     if (!NewD->hasAttr<CUDADeviceAttr>())
748       NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
749     return;
750   }
751 
752   // If a template function has no host/device/global attributes,
753   // make it implicitly host device function.
754   if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
755       !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
756       !NewD->hasAttr<CUDAGlobalAttr>() &&
757       (NewD->getDescribedFunctionTemplate() ||
758        NewD->isFunctionTemplateSpecialization())) {
759     NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
760     NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
761     return;
762   }
763 
764   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
765       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
766       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
767     return;
768 
769   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
770   // attributes?
771   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
772     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
773       D = Using->getTargetDecl();
774     FunctionDecl *OldD = D->getAsFunction();
775     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
776            !OldD->hasAttr<CUDAHostAttr>() &&
777            !SemaRef.IsOverload(NewD, OldD,
778                                /* UseMemberUsingDeclRules = */ false,
779                                /* ConsiderCudaAttrs = */ false);
780   };
781   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
782   if (It != Previous.end()) {
783     // We found a __device__ function with the same name and signature as NewD
784     // (ignoring CUDA attrs).  This is an error unless that function is defined
785     // in a system header, in which case we simply return without making NewD
786     // host+device.
787     NamedDecl *Match = *It;
788     if (!SemaRef.getSourceManager().isInSystemHeader(Match->getLocation())) {
789       Diag(NewD->getLocation(),
790            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
791           << NewD;
792       Diag(Match->getLocation(),
793            diag::note_cuda_conflicting_device_function_declared_here);
794     }
795     return;
796   }
797 
798   NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
799   NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
800 }
801 
802 // TODO: `__constant__` memory may be a limited resource for certain targets.
803 // A safeguard may be needed at the end of compilation pipeline if
804 // `__constant__` memory usage goes beyond limit.
805 void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) {
806   // Do not promote dependent variables since the cotr/dtor/initializer are
807   // not determined. Do it after instantiation.
808   if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
809       !VD->hasAttr<CUDASharedAttr>() &&
810       (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
811       !IsDependentVar(VD) &&
812       ((VD->isConstexpr() || VD->getType().isConstQualified()) &&
813        HasAllowedCUDADeviceStaticInitializer(*this, VD,
814                                              CICK_DeviceOrConstant))) {
815     VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
816   }
817 }
818 
819 SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc,
820                                                            unsigned DiagID) {
821   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
822   FunctionDecl *CurFunContext =
823       SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
824   SemaDiagnosticBuilder::Kind DiagKind = [&] {
825     if (!CurFunContext)
826       return SemaDiagnosticBuilder::K_Nop;
827     switch (CurrentTarget()) {
828     case CUDAFunctionTarget::Global:
829     case CUDAFunctionTarget::Device:
830       return SemaDiagnosticBuilder::K_Immediate;
831     case CUDAFunctionTarget::HostDevice:
832       // An HD function counts as host code if we're compiling for host, and
833       // device code if we're compiling for device.  Defer any errors in device
834       // mode until the function is known-emitted.
835       if (!getLangOpts().CUDAIsDevice)
836         return SemaDiagnosticBuilder::K_Nop;
837       if (SemaRef.IsLastErrorImmediate &&
838           getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID))
839         return SemaDiagnosticBuilder::K_Immediate;
840       return (SemaRef.getEmissionStatus(CurFunContext) ==
841               Sema::FunctionEmissionStatus::Emitted)
842                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
843                  : SemaDiagnosticBuilder::K_Deferred;
844     default:
845       return SemaDiagnosticBuilder::K_Nop;
846     }
847   }();
848   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);
849 }
850 
851 Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc,
852                                                      unsigned DiagID) {
853   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
854   FunctionDecl *CurFunContext =
855       SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
856   SemaDiagnosticBuilder::Kind DiagKind = [&] {
857     if (!CurFunContext)
858       return SemaDiagnosticBuilder::K_Nop;
859     switch (CurrentTarget()) {
860     case CUDAFunctionTarget::Host:
861       return SemaDiagnosticBuilder::K_Immediate;
862     case CUDAFunctionTarget::HostDevice:
863       // An HD function counts as host code if we're compiling for host, and
864       // device code if we're compiling for device.  Defer any errors in device
865       // mode until the function is known-emitted.
866       if (getLangOpts().CUDAIsDevice)
867         return SemaDiagnosticBuilder::K_Nop;
868       if (SemaRef.IsLastErrorImmediate &&
869           getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID))
870         return SemaDiagnosticBuilder::K_Immediate;
871       return (SemaRef.getEmissionStatus(CurFunContext) ==
872               Sema::FunctionEmissionStatus::Emitted)
873                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
874                  : SemaDiagnosticBuilder::K_Deferred;
875     default:
876       return SemaDiagnosticBuilder::K_Nop;
877     }
878   }();
879   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);
880 }
881 
882 bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
883   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
884   assert(Callee && "Callee may not be null.");
885 
886   const auto &ExprEvalCtx = SemaRef.currentEvaluationContext();
887   if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
888     return true;
889 
890   // FIXME: Is bailing out early correct here?  Should we instead assume that
891   // the caller is a global initializer?
892   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
893   if (!Caller)
894     return true;
895 
896   // If the caller is known-emitted, mark the callee as known-emitted.
897   // Otherwise, mark the call in our call graph so we can traverse it later.
898   bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) ==
899                             Sema::FunctionEmissionStatus::Emitted;
900   SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
901                                           CallerKnownEmitted] {
902     switch (IdentifyPreference(Caller, Callee)) {
903     case CFP_Never:
904     case CFP_WrongSide:
905       assert(Caller && "Never/wrongSide calls require a non-null caller");
906       // If we know the caller will be emitted, we know this wrong-side call
907       // will be emitted, so it's an immediate error.  Otherwise, defer the
908       // error until we know the caller is emitted.
909       return CallerKnownEmitted
910                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
911                  : SemaDiagnosticBuilder::K_Deferred;
912     default:
913       return SemaDiagnosticBuilder::K_Nop;
914     }
915   }();
916 
917   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
918     // For -fgpu-rdc, keep track of external kernels used by host functions.
919     if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode &&
920         Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
921         (!Caller || (!Caller->getDescribedFunctionTemplate() &&
922                      getASTContext().GetGVALinkageForFunction(Caller) ==
923                          GVA_StrongExternal)))
924       getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
925     return true;
926   }
927 
928   // Avoid emitting this error twice for the same location.  Using a hashtable
929   // like this is unfortunate, but because we must continue parsing as normal
930   // after encountering a deferred error, it's otherwise very tricky for us to
931   // ensure that we only emit this deferred error once.
932   if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
933     return true;
934 
935   SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller,
936                         SemaRef)
937       << llvm::to_underlying(IdentifyTarget(Callee)) << /*function*/ 0 << Callee
938       << llvm::to_underlying(IdentifyTarget(Caller));
939   if (!Callee->getBuiltinID())
940     SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),
941                           diag::note_previous_decl, Caller, SemaRef)
942         << Callee;
943   return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
944          DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
945 }
946 
947 // Check the wrong-sided reference capture of lambda for CUDA/HIP.
948 // A lambda function may capture a stack variable by reference when it is
949 // defined and uses the capture by reference when the lambda is called. When
950 // the capture and use happen on different sides, the capture is invalid and
951 // should be diagnosed.
952 void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee,
953                                   const sema::Capture &Capture) {
954   // In host compilation we only need to check lambda functions emitted on host
955   // side. In such lambda functions, a reference capture is invalid only
956   // if the lambda structure is populated by a device function or kernel then
957   // is passed to and called by a host function. However that is impossible,
958   // since a device function or kernel can only call a device function, also a
959   // kernel cannot pass a lambda back to a host function since we cannot
960   // define a kernel argument type which can hold the lambda before the lambda
961   // itself is defined.
962   if (!getLangOpts().CUDAIsDevice)
963     return;
964 
965   // File-scope lambda can only do init captures for global variables, which
966   // results in passing by value for these global variables.
967   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
968   if (!Caller)
969     return;
970 
971   // In device compilation, we only need to check lambda functions which are
972   // emitted on device side. For such lambdas, a reference capture is invalid
973   // only if the lambda structure is populated by a host function then passed
974   // to and called in a device function or kernel.
975   bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
976   bool CallerIsHost =
977       !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
978   bool ShouldCheck = CalleeIsDevice && CallerIsHost;
979   if (!ShouldCheck || !Capture.isReferenceCapture())
980     return;
981   auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
982   if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
983     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
984                           diag::err_capture_bad_target, Callee, SemaRef)
985         << Capture.getVariable();
986   } else if (Capture.isThisCapture()) {
987     // Capture of this pointer is allowed since this pointer may be pointing to
988     // managed memory which is accessible on both device and host sides. It only
989     // results in invalid memory access if this pointer points to memory not
990     // accessible on device side.
991     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
992                           diag::warn_maybe_capture_bad_target_this_ptr, Callee,
993                           SemaRef);
994   }
995 }
996 
997 void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) {
998   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
999   if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
1000     return;
1001   Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
1002   Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
1003 }
1004 
1005 void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD,
1006                                    const LookupResult &Previous) {
1007   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1008   CUDAFunctionTarget NewTarget = IdentifyTarget(NewFD);
1009   for (NamedDecl *OldND : Previous) {
1010     FunctionDecl *OldFD = OldND->getAsFunction();
1011     if (!OldFD)
1012       continue;
1013 
1014     CUDAFunctionTarget OldTarget = IdentifyTarget(OldFD);
1015     // Don't allow HD and global functions to overload other functions with the
1016     // same signature.  We allow overloading based on CUDA attributes so that
1017     // functions can have different implementations on the host and device, but
1018     // HD/global functions "exist" in some sense on both the host and device, so
1019     // should have the same implementation on both sides.
1020     if (NewTarget != OldTarget &&
1021         !SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
1022                             /* ConsiderCudaAttrs = */ false)) {
1023       if ((NewTarget == CUDAFunctionTarget::HostDevice &&
1024            !(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1025              isImplicitHostDeviceFunction(NewFD) &&
1026              OldTarget == CUDAFunctionTarget::Device)) ||
1027           (OldTarget == CUDAFunctionTarget::HostDevice &&
1028            !(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1029              isImplicitHostDeviceFunction(OldFD) &&
1030              NewTarget == CUDAFunctionTarget::Device)) ||
1031           (NewTarget == CUDAFunctionTarget::Global) ||
1032           (OldTarget == CUDAFunctionTarget::Global)) {
1033         Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
1034             << llvm::to_underlying(NewTarget) << NewFD->getDeclName()
1035             << llvm::to_underlying(OldTarget) << OldFD;
1036         Diag(OldFD->getLocation(), diag::note_previous_declaration);
1037         NewFD->setInvalidDecl();
1038         break;
1039       }
1040       if ((NewTarget == CUDAFunctionTarget::Host &&
1041            OldTarget == CUDAFunctionTarget::Device) ||
1042           (NewTarget == CUDAFunctionTarget::Device &&
1043            OldTarget == CUDAFunctionTarget::Host)) {
1044         Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare)
1045             << llvm::to_underlying(NewTarget) << llvm::to_underlying(OldTarget);
1046         Diag(OldFD->getLocation(), diag::note_previous_declaration);
1047       }
1048     }
1049   }
1050 }
1051 
1052 template <typename AttrTy>
1053 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
1054                               const FunctionDecl &TemplateFD) {
1055   if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
1056     AttrTy *Clone = Attribute->clone(S.Context);
1057     Clone->setInherited(true);
1058     FD->addAttr(Clone);
1059   }
1060 }
1061 
1062 void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD,
1063                                   const FunctionTemplateDecl &TD) {
1064   const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
1065   copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD);
1066   copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD);
1067   copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD);
1068 }
1069 
1070 std::string SemaCUDA::getConfigureFuncName() const {
1071   if (getLangOpts().HIP)
1072     return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
1073                                             : "hipConfigureCall";
1074 
1075   // New CUDA kernel launch sequence.
1076   if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(),
1077                          CudaFeature::CUDA_USES_NEW_LAUNCH))
1078     return "__cudaPushCallConfiguration";
1079 
1080   // Legacy CUDA kernel configuration call
1081   return "cudaConfigureCall";
1082 }
1083