1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Sema/SemaCUDA.h" 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Decl.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Basic/Cuda.h" 18 #include "clang/Basic/TargetInfo.h" 19 #include "clang/Lex/Preprocessor.h" 20 #include "clang/Sema/Lookup.h" 21 #include "clang/Sema/ScopeInfo.h" 22 #include "clang/Sema/Sema.h" 23 #include "clang/Sema/SemaDiagnostic.h" 24 #include "clang/Sema/SemaInternal.h" 25 #include "clang/Sema/Template.h" 26 #include "llvm/ADT/STLForwardCompat.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include <optional> 29 using namespace clang; 30 31 SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {} 32 33 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { 34 if (!D) 35 return false; 36 if (auto *A = D->getAttr<AttrT>()) 37 return !A->isImplicit(); 38 return false; 39 } 40 41 void SemaCUDA::PushForceHostDevice() { 42 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 43 ForceHostDeviceDepth++; 44 } 45 46 bool SemaCUDA::PopForceHostDevice() { 47 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 48 if (ForceHostDeviceDepth == 0) 49 return false; 50 ForceHostDeviceDepth--; 51 return true; 52 } 53 54 ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, 55 MultiExprArg ExecConfig, 56 SourceLocation GGGLoc) { 57 FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl(); 58 if (!ConfigDecl) 59 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 60 << getConfigureFuncName()); 61 QualType ConfigQTy = ConfigDecl->getType(); 62 63 DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr( 64 getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 65 SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl); 66 67 return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 68 /*IsExecConfig=*/true); 69 } 70 71 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { 72 bool HasHostAttr = false; 73 bool HasDeviceAttr = false; 74 bool HasGlobalAttr = false; 75 bool HasInvalidTargetAttr = false; 76 for (const ParsedAttr &AL : Attrs) { 77 switch (AL.getKind()) { 78 case ParsedAttr::AT_CUDAGlobal: 79 HasGlobalAttr = true; 80 break; 81 case ParsedAttr::AT_CUDAHost: 82 HasHostAttr = true; 83 break; 84 case ParsedAttr::AT_CUDADevice: 85 HasDeviceAttr = true; 86 break; 87 case ParsedAttr::AT_CUDAInvalidTarget: 88 HasInvalidTargetAttr = true; 89 break; 90 default: 91 break; 92 } 93 } 94 95 if (HasInvalidTargetAttr) 96 return CUDAFunctionTarget::InvalidTarget; 97 98 if (HasGlobalAttr) 99 return CUDAFunctionTarget::Global; 100 101 if (HasHostAttr && HasDeviceAttr) 102 return CUDAFunctionTarget::HostDevice; 103 104 if (HasDeviceAttr) 105 return CUDAFunctionTarget::Device; 106 107 return CUDAFunctionTarget::Host; 108 } 109 110 template <typename A> 111 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { 112 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 113 return isa<A>(Attribute) && 114 !(IgnoreImplicitAttr && Attribute->isImplicit()); 115 }); 116 } 117 118 SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII( 119 SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D) 120 : S(S_) { 121 SavedCtx = S.CurCUDATargetCtx; 122 assert(K == SemaCUDA::CTCK_InitGlobalVar); 123 auto *VD = dyn_cast_or_null<VarDecl>(D); 124 if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { 125 auto Target = CUDAFunctionTarget::Host; 126 if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) && 127 !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) || 128 hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) || 129 hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true)) 130 Target = CUDAFunctionTarget::Device; 131 S.CurCUDATargetCtx = {Target, K, VD}; 132 } 133 } 134 135 /// IdentifyTarget - Determine the CUDA compilation target for this function 136 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D, 137 bool IgnoreImplicitHDAttr) { 138 // Code that lives outside a function gets the target from CurCUDATargetCtx. 139 if (D == nullptr) 140 return CurCUDATargetCtx.Target; 141 142 if (D->hasAttr<CUDAInvalidTargetAttr>()) 143 return CUDAFunctionTarget::InvalidTarget; 144 145 if (D->hasAttr<CUDAGlobalAttr>()) 146 return CUDAFunctionTarget::Global; 147 148 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 149 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 150 return CUDAFunctionTarget::HostDevice; 151 return CUDAFunctionTarget::Device; 152 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 153 return CUDAFunctionTarget::Host; 154 } else if ((D->isImplicit() || !D->isUserProvided()) && 155 !IgnoreImplicitHDAttr) { 156 // Some implicit declarations (like intrinsic functions) are not marked. 157 // Set the most lenient target on them for maximal flexibility. 158 return CUDAFunctionTarget::HostDevice; 159 } 160 161 return CUDAFunctionTarget::Host; 162 } 163 164 /// IdentifyTarget - Determine the CUDA compilation target for this variable. 165 SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) { 166 if (Var->hasAttr<HIPManagedAttr>()) 167 return CVT_Unified; 168 // Only constexpr and const variabless with implicit constant attribute 169 // are emitted on both sides. Such variables are promoted to device side 170 // only if they have static constant intializers on device side. 171 if ((Var->isConstexpr() || Var->getType().isConstQualified()) && 172 Var->hasAttr<CUDAConstantAttr>() && 173 !hasExplicitAttr<CUDAConstantAttr>(Var)) 174 return CVT_Both; 175 if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || 176 Var->hasAttr<CUDASharedAttr>() || 177 Var->getType()->isCUDADeviceBuiltinSurfaceType() || 178 Var->getType()->isCUDADeviceBuiltinTextureType()) 179 return CVT_Device; 180 // Function-scope static variable without explicit device or constant 181 // attribute are emitted 182 // - on both sides in host device functions 183 // - on device side in device or global functions 184 if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { 185 switch (IdentifyTarget(FD)) { 186 case CUDAFunctionTarget::HostDevice: 187 return CVT_Both; 188 case CUDAFunctionTarget::Device: 189 case CUDAFunctionTarget::Global: 190 return CVT_Device; 191 default: 192 return CVT_Host; 193 } 194 } 195 return CVT_Host; 196 } 197 198 // * CUDA Call preference table 199 // 200 // F - from, 201 // T - to 202 // Ph - preference in host mode 203 // Pd - preference in device mode 204 // H - handled in (x) 205 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 206 // 207 // | F | T | Ph | Pd | H | 208 // |----+----+-----+-----+-----+ 209 // | d | d | N | N | (c) | 210 // | d | g | -- | -- | (a) | 211 // | d | h | -- | -- | (e) | 212 // | d | hd | HD | HD | (b) | 213 // | g | d | N | N | (c) | 214 // | g | g | -- | -- | (a) | 215 // | g | h | -- | -- | (e) | 216 // | g | hd | HD | HD | (b) | 217 // | h | d | -- | -- | (e) | 218 // | h | g | N | N | (c) | 219 // | h | h | N | N | (c) | 220 // | h | hd | HD | HD | (b) | 221 // | hd | d | WS | SS | (d) | 222 // | hd | g | SS | -- |(d/a)| 223 // | hd | h | SS | WS | (d) | 224 // | hd | hd | HD | HD | (b) | 225 226 SemaCUDA::CUDAFunctionPreference 227 SemaCUDA::IdentifyPreference(const FunctionDecl *Caller, 228 const FunctionDecl *Callee) { 229 assert(Callee && "Callee must be valid."); 230 231 // Treat ctor/dtor as host device function in device var initializer to allow 232 // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor 233 // will be diagnosed by checkAllowedInitializer. 234 if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && 235 CurCUDATargetCtx.Target == CUDAFunctionTarget::Device && 236 (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee))) 237 return CFP_HostDevice; 238 239 CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller); 240 CUDAFunctionTarget CalleeTarget = IdentifyTarget(Callee); 241 242 // If one of the targets is invalid, the check always fails, no matter what 243 // the other target is. 244 if (CallerTarget == CUDAFunctionTarget::InvalidTarget || 245 CalleeTarget == CUDAFunctionTarget::InvalidTarget) 246 return CFP_Never; 247 248 // (a) Can't call global from some contexts until we support CUDA's 249 // dynamic parallelism. 250 if (CalleeTarget == CUDAFunctionTarget::Global && 251 (CallerTarget == CUDAFunctionTarget::Global || 252 CallerTarget == CUDAFunctionTarget::Device)) 253 return CFP_Never; 254 255 // (b) Calling HostDevice is OK for everyone. 256 if (CalleeTarget == CUDAFunctionTarget::HostDevice) 257 return CFP_HostDevice; 258 259 // (c) Best case scenarios 260 if (CalleeTarget == CallerTarget || 261 (CallerTarget == CUDAFunctionTarget::Host && 262 CalleeTarget == CUDAFunctionTarget::Global) || 263 (CallerTarget == CUDAFunctionTarget::Global && 264 CalleeTarget == CUDAFunctionTarget::Device)) 265 return CFP_Native; 266 267 // HipStdPar mode is special, in that assessing whether a device side call to 268 // a host target is deferred to a subsequent pass, and cannot unambiguously be 269 // adjudicated in the AST, hence we optimistically allow them to pass here. 270 if (getLangOpts().HIPStdPar && 271 (CallerTarget == CUDAFunctionTarget::Global || 272 CallerTarget == CUDAFunctionTarget::Device || 273 CallerTarget == CUDAFunctionTarget::HostDevice) && 274 CalleeTarget == CUDAFunctionTarget::Host) 275 return CFP_HostDevice; 276 277 // (d) HostDevice behavior depends on compilation mode. 278 if (CallerTarget == CUDAFunctionTarget::HostDevice) { 279 // It's OK to call a compilation-mode matching function from an HD one. 280 if ((getLangOpts().CUDAIsDevice && 281 CalleeTarget == CUDAFunctionTarget::Device) || 282 (!getLangOpts().CUDAIsDevice && 283 (CalleeTarget == CUDAFunctionTarget::Host || 284 CalleeTarget == CUDAFunctionTarget::Global))) 285 return CFP_SameSide; 286 287 // Calls from HD to non-mode-matching functions (i.e., to host functions 288 // when compiling in device mode or to device functions when compiling in 289 // host mode) are allowed at the sema level, but eventually rejected if 290 // they're ever codegened. TODO: Reject said calls earlier. 291 return CFP_WrongSide; 292 } 293 294 // (e) Calling across device/host boundary is not something you should do. 295 if ((CallerTarget == CUDAFunctionTarget::Host && 296 CalleeTarget == CUDAFunctionTarget::Device) || 297 (CallerTarget == CUDAFunctionTarget::Device && 298 CalleeTarget == CUDAFunctionTarget::Host) || 299 (CallerTarget == CUDAFunctionTarget::Global && 300 CalleeTarget == CUDAFunctionTarget::Host)) 301 return CFP_Never; 302 303 llvm_unreachable("All cases should've been handled by now."); 304 } 305 306 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 307 if (!D) 308 return false; 309 if (auto *A = D->getAttr<AttrT>()) 310 return A->isImplicit(); 311 return D->isImplicit(); 312 } 313 314 bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) { 315 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 316 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 317 return IsImplicitDevAttr && IsImplicitHostAttr; 318 } 319 320 void SemaCUDA::EraseUnwantedMatches( 321 const FunctionDecl *Caller, 322 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 323 if (Matches.size() <= 1) 324 return; 325 326 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 327 328 // Gets the CUDA function preference for a call from Caller to Match. 329 auto GetCFP = [&](const Pair &Match) { 330 return IdentifyPreference(Caller, Match.second); 331 }; 332 333 // Find the best call preference among the functions in Matches. 334 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 335 Matches.begin(), Matches.end(), 336 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 337 338 // Erase all functions with lower priority. 339 llvm::erase_if(Matches, 340 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 341 } 342 343 /// When an implicitly-declared special member has to invoke more than one 344 /// base/field special member, conflicts may occur in the targets of these 345 /// members. For example, if one base's member __host__ and another's is 346 /// __device__, it's a conflict. 347 /// This function figures out if the given targets \param Target1 and 348 /// \param Target2 conflict, and if they do not it fills in 349 /// \param ResolvedTarget with a target that resolves for both calls. 350 /// \return true if there's a conflict, false otherwise. 351 static bool 352 resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1, 353 CUDAFunctionTarget Target2, 354 CUDAFunctionTarget *ResolvedTarget) { 355 // Only free functions and static member functions may be global. 356 assert(Target1 != CUDAFunctionTarget::Global); 357 assert(Target2 != CUDAFunctionTarget::Global); 358 359 if (Target1 == CUDAFunctionTarget::HostDevice) { 360 *ResolvedTarget = Target2; 361 } else if (Target2 == CUDAFunctionTarget::HostDevice) { 362 *ResolvedTarget = Target1; 363 } else if (Target1 != Target2) { 364 return true; 365 } else { 366 *ResolvedTarget = Target1; 367 } 368 369 return false; 370 } 371 372 bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 373 CXXSpecialMemberKind CSM, 374 CXXMethodDecl *MemberDecl, 375 bool ConstRHS, 376 bool Diagnose) { 377 // If the defaulted special member is defined lexically outside of its 378 // owning class, or the special member already has explicit device or host 379 // attributes, do not infer. 380 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 381 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 382 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 383 bool HasExplicitAttr = 384 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 385 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 386 if (!InClass || HasExplicitAttr) 387 return false; 388 389 std::optional<CUDAFunctionTarget> InferredTarget; 390 391 // We're going to invoke special member lookup; mark that these special 392 // members are called from this one, and not from its caller. 393 Sema::ContextRAII MethodContext(SemaRef, MemberDecl); 394 395 // Look for special members in base classes that should be invoked from here. 396 // Infer the target of this member base on the ones it should call. 397 // Skip direct and indirect virtual bases for abstract classes. 398 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 399 for (const auto &B : ClassDecl->bases()) { 400 if (!B.isVirtual()) { 401 Bases.push_back(&B); 402 } 403 } 404 405 if (!ClassDecl->isAbstract()) { 406 llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases())); 407 } 408 409 for (const auto *B : Bases) { 410 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 411 if (!BaseType) { 412 continue; 413 } 414 415 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 416 Sema::SpecialMemberOverloadResult SMOR = 417 SemaRef.LookupSpecialMember(BaseClassDecl, CSM, 418 /* ConstArg */ ConstRHS, 419 /* VolatileArg */ false, 420 /* RValueThis */ false, 421 /* ConstThis */ false, 422 /* VolatileThis */ false); 423 424 if (!SMOR.getMethod()) 425 continue; 426 427 CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod()); 428 if (!InferredTarget) { 429 InferredTarget = BaseMethodTarget; 430 } else { 431 bool ResolutionError = resolveCalleeCUDATargetConflict( 432 *InferredTarget, BaseMethodTarget, &*InferredTarget); 433 if (ResolutionError) { 434 if (Diagnose) { 435 Diag(ClassDecl->getLocation(), 436 diag::note_implicit_member_target_infer_collision) 437 << (unsigned)CSM << llvm::to_underlying(*InferredTarget) 438 << llvm::to_underlying(BaseMethodTarget); 439 } 440 MemberDecl->addAttr( 441 CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); 442 return true; 443 } 444 } 445 } 446 447 // Same as for bases, but now for special members of fields. 448 for (const auto *F : ClassDecl->fields()) { 449 if (F->isInvalidDecl()) { 450 continue; 451 } 452 453 const RecordType *FieldType = 454 getASTContext().getBaseElementType(F->getType())->getAs<RecordType>(); 455 if (!FieldType) { 456 continue; 457 } 458 459 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 460 Sema::SpecialMemberOverloadResult SMOR = 461 SemaRef.LookupSpecialMember(FieldRecDecl, CSM, 462 /* ConstArg */ ConstRHS && !F->isMutable(), 463 /* VolatileArg */ false, 464 /* RValueThis */ false, 465 /* ConstThis */ false, 466 /* VolatileThis */ false); 467 468 if (!SMOR.getMethod()) 469 continue; 470 471 CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod()); 472 if (!InferredTarget) { 473 InferredTarget = FieldMethodTarget; 474 } else { 475 bool ResolutionError = resolveCalleeCUDATargetConflict( 476 *InferredTarget, FieldMethodTarget, &*InferredTarget); 477 if (ResolutionError) { 478 if (Diagnose) { 479 Diag(ClassDecl->getLocation(), 480 diag::note_implicit_member_target_infer_collision) 481 << (unsigned)CSM << llvm::to_underlying(*InferredTarget) 482 << llvm::to_underlying(FieldMethodTarget); 483 } 484 MemberDecl->addAttr( 485 CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); 486 return true; 487 } 488 } 489 } 490 491 492 // If no target was inferred, mark this member as __host__ __device__; 493 // it's the least restrictive option that can be invoked from any target. 494 bool NeedsH = true, NeedsD = true; 495 if (InferredTarget) { 496 if (*InferredTarget == CUDAFunctionTarget::Device) 497 NeedsH = false; 498 else if (*InferredTarget == CUDAFunctionTarget::Host) 499 NeedsD = false; 500 } 501 502 // We either setting attributes first time, or the inferred ones must match 503 // previously set ones. 504 if (NeedsD && !HasD) 505 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 506 if (NeedsH && !HasH) 507 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 508 509 return false; 510 } 511 512 bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 513 if (!CD->isDefined() && CD->isTemplateInstantiation()) 514 SemaRef.InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 515 516 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 517 // empty at a point in the translation unit, if it is either a 518 // trivial constructor 519 if (CD->isTrivial()) 520 return true; 521 522 // ... or it satisfies all of the following conditions: 523 // The constructor function has been defined. 524 // The constructor function has no parameters, 525 // and the function body is an empty compound statement. 526 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 527 return false; 528 529 // Its class has no virtual functions and no virtual base classes. 530 if (CD->getParent()->isDynamicClass()) 531 return false; 532 533 // Union ctor does not call ctors of its data members. 534 if (CD->getParent()->isUnion()) 535 return true; 536 537 // The only form of initializer allowed is an empty constructor. 538 // This will recursively check all base classes and member initializers 539 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 540 if (const CXXConstructExpr *CE = 541 dyn_cast<CXXConstructExpr>(CI->getInit())) 542 return isEmptyConstructor(Loc, CE->getConstructor()); 543 return false; 544 })) 545 return false; 546 547 return true; 548 } 549 550 bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 551 // No destructor -> no problem. 552 if (!DD) 553 return true; 554 555 if (!DD->isDefined() && DD->isTemplateInstantiation()) 556 SemaRef.InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 557 558 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 559 // empty at a point in the translation unit, if it is either a 560 // trivial constructor 561 if (DD->isTrivial()) 562 return true; 563 564 // ... or it satisfies all of the following conditions: 565 // The destructor function has been defined. 566 // and the function body is an empty compound statement. 567 if (!DD->hasTrivialBody()) 568 return false; 569 570 const CXXRecordDecl *ClassDecl = DD->getParent(); 571 572 // Its class has no virtual functions and no virtual base classes. 573 if (ClassDecl->isDynamicClass()) 574 return false; 575 576 // Union does not have base class and union dtor does not call dtors of its 577 // data members. 578 if (DD->getParent()->isUnion()) 579 return true; 580 581 // Only empty destructors are allowed. This will recursively check 582 // destructors for all base classes... 583 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 584 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 585 return isEmptyDestructor(Loc, RD->getDestructor()); 586 return true; 587 })) 588 return false; 589 590 // ... and member fields. 591 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 592 if (CXXRecordDecl *RD = Field->getType() 593 ->getBaseElementTypeUnsafe() 594 ->getAsCXXRecordDecl()) 595 return isEmptyDestructor(Loc, RD->getDestructor()); 596 return true; 597 })) 598 return false; 599 600 return true; 601 } 602 603 namespace { 604 enum CUDAInitializerCheckKind { 605 CICK_DeviceOrConstant, // Check initializer for device/constant variable 606 CICK_Shared, // Check initializer for shared variable 607 }; 608 609 bool IsDependentVar(VarDecl *VD) { 610 if (VD->getType()->isDependentType()) 611 return true; 612 if (const auto *Init = VD->getInit()) 613 return Init->isValueDependent(); 614 return false; 615 } 616 617 // Check whether a variable has an allowed initializer for a CUDA device side 618 // variable with global storage. \p VD may be a host variable to be checked for 619 // potential promotion to device side variable. 620 // 621 // CUDA/HIP allows only empty constructors as initializers for global 622 // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all 623 // __shared__ variables whether they are local or not (they all are implicitly 624 // static in CUDA). One exception is that CUDA allows constant initializers 625 // for __constant__ and __device__ variables. 626 bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD, 627 CUDAInitializerCheckKind CheckKind) { 628 assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); 629 assert(!IsDependentVar(VD) && "do not check dependent var"); 630 const Expr *Init = VD->getInit(); 631 auto IsEmptyInit = [&](const Expr *Init) { 632 if (!Init) 633 return true; 634 if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) { 635 return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor()); 636 } 637 return false; 638 }; 639 auto IsConstantInit = [&](const Expr *Init) { 640 assert(Init); 641 ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(), 642 /*NoWronSidedVars=*/true); 643 return Init->isConstantInitializer(S.getASTContext(), 644 VD->getType()->isReferenceType()); 645 }; 646 auto HasEmptyDtor = [&](VarDecl *VD) { 647 if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) 648 return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor()); 649 return true; 650 }; 651 if (CheckKind == CICK_Shared) 652 return IsEmptyInit(Init) && HasEmptyDtor(VD); 653 return S.getLangOpts().GPUAllowDeviceInit || 654 ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); 655 } 656 } // namespace 657 658 void SemaCUDA::checkAllowedInitializer(VarDecl *VD) { 659 // Return early if VD is inside a non-instantiated template function since 660 // the implicit constructor is not defined yet. 661 if (const FunctionDecl *FD = 662 dyn_cast_or_null<FunctionDecl>(VD->getDeclContext())) 663 if (FD->isDependentContext()) 664 return; 665 666 // Do not check dependent variables since the ctor/dtor/initializer are not 667 // determined. Do it after instantiation. 668 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || 669 IsDependentVar(VD)) 670 return; 671 const Expr *Init = VD->getInit(); 672 bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); 673 bool IsDeviceOrConstantVar = 674 !IsSharedVar && 675 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()); 676 if (IsDeviceOrConstantVar || IsSharedVar) { 677 if (HasAllowedCUDADeviceStaticInitializer( 678 *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) 679 return; 680 Diag(VD->getLocation(), 681 IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) 682 << Init->getSourceRange(); 683 VD->setInvalidDecl(); 684 } else { 685 // This is a host-side global variable. Check that the initializer is 686 // callable from the host side. 687 const FunctionDecl *InitFn = nullptr; 688 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 689 InitFn = CE->getConstructor(); 690 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 691 InitFn = CE->getDirectCallee(); 692 } 693 if (InitFn) { 694 CUDAFunctionTarget InitFnTarget = IdentifyTarget(InitFn); 695 if (InitFnTarget != CUDAFunctionTarget::Host && 696 InitFnTarget != CUDAFunctionTarget::HostDevice) { 697 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 698 << llvm::to_underlying(InitFnTarget) << InitFn; 699 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 700 VD->setInvalidDecl(); 701 } 702 } 703 } 704 } 705 706 void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice( 707 const FunctionDecl *Callee) { 708 FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 709 if (!Caller) 710 return; 711 712 if (!isImplicitHostDeviceFunction(Callee)) 713 return; 714 715 CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller); 716 717 // Record whether an implicit host device function is used on device side. 718 if (CallerTarget != CUDAFunctionTarget::Device && 719 CallerTarget != CUDAFunctionTarget::Global && 720 (CallerTarget != CUDAFunctionTarget::HostDevice || 721 (isImplicitHostDeviceFunction(Caller) && 722 !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller)))) 723 return; 724 725 getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee); 726 } 727 728 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 729 // treated as implicitly __host__ __device__, unless: 730 // * it is a variadic function (device-side variadic functions are not 731 // allowed), or 732 // * a __device__ function with this signature was already declared, in which 733 // case in which case we output an error, unless the __device__ decl is in a 734 // system header, in which case we leave the constexpr function unattributed. 735 // 736 // In addition, all function decls are treated as __host__ __device__ when 737 // ForceHostDeviceDepth > 0 (corresponding to code within a 738 // #pragma clang force_cuda_host_device_begin/end 739 // pair). 740 void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD, 741 const LookupResult &Previous) { 742 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 743 744 if (ForceHostDeviceDepth > 0) { 745 if (!NewD->hasAttr<CUDAHostAttr>()) 746 NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 747 if (!NewD->hasAttr<CUDADeviceAttr>()) 748 NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 749 return; 750 } 751 752 // If a template function has no host/device/global attributes, 753 // make it implicitly host device function. 754 if (getLangOpts().OffloadImplicitHostDeviceTemplates && 755 !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && 756 !NewD->hasAttr<CUDAGlobalAttr>() && 757 (NewD->getDescribedFunctionTemplate() || 758 NewD->isFunctionTemplateSpecialization())) { 759 NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 760 NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 761 return; 762 } 763 764 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 765 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 766 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 767 return; 768 769 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 770 // attributes? 771 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 772 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 773 D = Using->getTargetDecl(); 774 FunctionDecl *OldD = D->getAsFunction(); 775 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 776 !OldD->hasAttr<CUDAHostAttr>() && 777 !SemaRef.IsOverload(NewD, OldD, 778 /* UseMemberUsingDeclRules = */ false, 779 /* ConsiderCudaAttrs = */ false); 780 }; 781 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 782 if (It != Previous.end()) { 783 // We found a __device__ function with the same name and signature as NewD 784 // (ignoring CUDA attrs). This is an error unless that function is defined 785 // in a system header, in which case we simply return without making NewD 786 // host+device. 787 NamedDecl *Match = *It; 788 if (!SemaRef.getSourceManager().isInSystemHeader(Match->getLocation())) { 789 Diag(NewD->getLocation(), 790 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 791 << NewD; 792 Diag(Match->getLocation(), 793 diag::note_cuda_conflicting_device_function_declared_here); 794 } 795 return; 796 } 797 798 NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 799 NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 800 } 801 802 // TODO: `__constant__` memory may be a limited resource for certain targets. 803 // A safeguard may be needed at the end of compilation pipeline if 804 // `__constant__` memory usage goes beyond limit. 805 void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) { 806 // Do not promote dependent variables since the cotr/dtor/initializer are 807 // not determined. Do it after instantiation. 808 if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && 809 !VD->hasAttr<CUDASharedAttr>() && 810 (VD->isFileVarDecl() || VD->isStaticDataMember()) && 811 !IsDependentVar(VD) && 812 ((VD->isConstexpr() || VD->getType().isConstQualified()) && 813 HasAllowedCUDADeviceStaticInitializer(*this, VD, 814 CICK_DeviceOrConstant))) { 815 VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); 816 } 817 } 818 819 SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc, 820 unsigned DiagID) { 821 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 822 FunctionDecl *CurFunContext = 823 SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 824 SemaDiagnosticBuilder::Kind DiagKind = [&] { 825 if (!CurFunContext) 826 return SemaDiagnosticBuilder::K_Nop; 827 switch (CurrentTarget()) { 828 case CUDAFunctionTarget::Global: 829 case CUDAFunctionTarget::Device: 830 return SemaDiagnosticBuilder::K_Immediate; 831 case CUDAFunctionTarget::HostDevice: 832 // An HD function counts as host code if we're compiling for host, and 833 // device code if we're compiling for device. Defer any errors in device 834 // mode until the function is known-emitted. 835 if (!getLangOpts().CUDAIsDevice) 836 return SemaDiagnosticBuilder::K_Nop; 837 if (SemaRef.IsLastErrorImmediate && 838 getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID)) 839 return SemaDiagnosticBuilder::K_Immediate; 840 return (SemaRef.getEmissionStatus(CurFunContext) == 841 Sema::FunctionEmissionStatus::Emitted) 842 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 843 : SemaDiagnosticBuilder::K_Deferred; 844 default: 845 return SemaDiagnosticBuilder::K_Nop; 846 } 847 }(); 848 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); 849 } 850 851 Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc, 852 unsigned DiagID) { 853 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 854 FunctionDecl *CurFunContext = 855 SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 856 SemaDiagnosticBuilder::Kind DiagKind = [&] { 857 if (!CurFunContext) 858 return SemaDiagnosticBuilder::K_Nop; 859 switch (CurrentTarget()) { 860 case CUDAFunctionTarget::Host: 861 return SemaDiagnosticBuilder::K_Immediate; 862 case CUDAFunctionTarget::HostDevice: 863 // An HD function counts as host code if we're compiling for host, and 864 // device code if we're compiling for device. Defer any errors in device 865 // mode until the function is known-emitted. 866 if (getLangOpts().CUDAIsDevice) 867 return SemaDiagnosticBuilder::K_Nop; 868 if (SemaRef.IsLastErrorImmediate && 869 getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID)) 870 return SemaDiagnosticBuilder::K_Immediate; 871 return (SemaRef.getEmissionStatus(CurFunContext) == 872 Sema::FunctionEmissionStatus::Emitted) 873 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 874 : SemaDiagnosticBuilder::K_Deferred; 875 default: 876 return SemaDiagnosticBuilder::K_Nop; 877 } 878 }(); 879 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); 880 } 881 882 bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { 883 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 884 assert(Callee && "Callee may not be null."); 885 886 const auto &ExprEvalCtx = SemaRef.currentEvaluationContext(); 887 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 888 return true; 889 890 // FIXME: Is bailing out early correct here? Should we instead assume that 891 // the caller is a global initializer? 892 FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 893 if (!Caller) 894 return true; 895 896 // If the caller is known-emitted, mark the callee as known-emitted. 897 // Otherwise, mark the call in our call graph so we can traverse it later. 898 bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) == 899 Sema::FunctionEmissionStatus::Emitted; 900 SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, 901 CallerKnownEmitted] { 902 switch (IdentifyPreference(Caller, Callee)) { 903 case CFP_Never: 904 case CFP_WrongSide: 905 assert(Caller && "Never/wrongSide calls require a non-null caller"); 906 // If we know the caller will be emitted, we know this wrong-side call 907 // will be emitted, so it's an immediate error. Otherwise, defer the 908 // error until we know the caller is emitted. 909 return CallerKnownEmitted 910 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 911 : SemaDiagnosticBuilder::K_Deferred; 912 default: 913 return SemaDiagnosticBuilder::K_Nop; 914 } 915 }(); 916 917 if (DiagKind == SemaDiagnosticBuilder::K_Nop) { 918 // For -fgpu-rdc, keep track of external kernels used by host functions. 919 if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode && 920 Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() && 921 (!Caller || (!Caller->getDescribedFunctionTemplate() && 922 getASTContext().GetGVALinkageForFunction(Caller) == 923 GVA_StrongExternal))) 924 getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); 925 return true; 926 } 927 928 // Avoid emitting this error twice for the same location. Using a hashtable 929 // like this is unfortunate, but because we must continue parsing as normal 930 // after encountering a deferred error, it's otherwise very tricky for us to 931 // ensure that we only emit this deferred error once. 932 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 933 return true; 934 935 SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, 936 SemaRef) 937 << llvm::to_underlying(IdentifyTarget(Callee)) << /*function*/ 0 << Callee 938 << llvm::to_underlying(IdentifyTarget(Caller)); 939 if (!Callee->getBuiltinID()) 940 SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), 941 diag::note_previous_decl, Caller, SemaRef) 942 << Callee; 943 return DiagKind != SemaDiagnosticBuilder::K_Immediate && 944 DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; 945 } 946 947 // Check the wrong-sided reference capture of lambda for CUDA/HIP. 948 // A lambda function may capture a stack variable by reference when it is 949 // defined and uses the capture by reference when the lambda is called. When 950 // the capture and use happen on different sides, the capture is invalid and 951 // should be diagnosed. 952 void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee, 953 const sema::Capture &Capture) { 954 // In host compilation we only need to check lambda functions emitted on host 955 // side. In such lambda functions, a reference capture is invalid only 956 // if the lambda structure is populated by a device function or kernel then 957 // is passed to and called by a host function. However that is impossible, 958 // since a device function or kernel can only call a device function, also a 959 // kernel cannot pass a lambda back to a host function since we cannot 960 // define a kernel argument type which can hold the lambda before the lambda 961 // itself is defined. 962 if (!getLangOpts().CUDAIsDevice) 963 return; 964 965 // File-scope lambda can only do init captures for global variables, which 966 // results in passing by value for these global variables. 967 FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 968 if (!Caller) 969 return; 970 971 // In device compilation, we only need to check lambda functions which are 972 // emitted on device side. For such lambdas, a reference capture is invalid 973 // only if the lambda structure is populated by a host function then passed 974 // to and called in a device function or kernel. 975 bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); 976 bool CallerIsHost = 977 !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); 978 bool ShouldCheck = CalleeIsDevice && CallerIsHost; 979 if (!ShouldCheck || !Capture.isReferenceCapture()) 980 return; 981 auto DiagKind = SemaDiagnosticBuilder::K_Deferred; 982 if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { 983 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 984 diag::err_capture_bad_target, Callee, SemaRef) 985 << Capture.getVariable(); 986 } else if (Capture.isThisCapture()) { 987 // Capture of this pointer is allowed since this pointer may be pointing to 988 // managed memory which is accessible on both device and host sides. It only 989 // results in invalid memory access if this pointer points to memory not 990 // accessible on device side. 991 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 992 diag::warn_maybe_capture_bad_target_this_ptr, Callee, 993 SemaRef); 994 } 995 } 996 997 void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) { 998 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 999 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 1000 return; 1001 Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 1002 Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 1003 } 1004 1005 void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD, 1006 const LookupResult &Previous) { 1007 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 1008 CUDAFunctionTarget NewTarget = IdentifyTarget(NewFD); 1009 for (NamedDecl *OldND : Previous) { 1010 FunctionDecl *OldFD = OldND->getAsFunction(); 1011 if (!OldFD) 1012 continue; 1013 1014 CUDAFunctionTarget OldTarget = IdentifyTarget(OldFD); 1015 // Don't allow HD and global functions to overload other functions with the 1016 // same signature. We allow overloading based on CUDA attributes so that 1017 // functions can have different implementations on the host and device, but 1018 // HD/global functions "exist" in some sense on both the host and device, so 1019 // should have the same implementation on both sides. 1020 if (NewTarget != OldTarget && 1021 !SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 1022 /* ConsiderCudaAttrs = */ false)) { 1023 if ((NewTarget == CUDAFunctionTarget::HostDevice && 1024 !(getLangOpts().OffloadImplicitHostDeviceTemplates && 1025 isImplicitHostDeviceFunction(NewFD) && 1026 OldTarget == CUDAFunctionTarget::Device)) || 1027 (OldTarget == CUDAFunctionTarget::HostDevice && 1028 !(getLangOpts().OffloadImplicitHostDeviceTemplates && 1029 isImplicitHostDeviceFunction(OldFD) && 1030 NewTarget == CUDAFunctionTarget::Device)) || 1031 (NewTarget == CUDAFunctionTarget::Global) || 1032 (OldTarget == CUDAFunctionTarget::Global)) { 1033 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 1034 << llvm::to_underlying(NewTarget) << NewFD->getDeclName() 1035 << llvm::to_underlying(OldTarget) << OldFD; 1036 Diag(OldFD->getLocation(), diag::note_previous_declaration); 1037 NewFD->setInvalidDecl(); 1038 break; 1039 } 1040 if ((NewTarget == CUDAFunctionTarget::Host && 1041 OldTarget == CUDAFunctionTarget::Device) || 1042 (NewTarget == CUDAFunctionTarget::Device && 1043 OldTarget == CUDAFunctionTarget::Host)) { 1044 Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare) 1045 << llvm::to_underlying(NewTarget) << llvm::to_underlying(OldTarget); 1046 Diag(OldFD->getLocation(), diag::note_previous_declaration); 1047 } 1048 } 1049 } 1050 } 1051 1052 template <typename AttrTy> 1053 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 1054 const FunctionDecl &TemplateFD) { 1055 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 1056 AttrTy *Clone = Attribute->clone(S.Context); 1057 Clone->setInherited(true); 1058 FD->addAttr(Clone); 1059 } 1060 } 1061 1062 void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD, 1063 const FunctionTemplateDecl &TD) { 1064 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 1065 copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD); 1066 copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD); 1067 copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD); 1068 } 1069 1070 std::string SemaCUDA::getConfigureFuncName() const { 1071 if (getLangOpts().HIP) 1072 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 1073 : "hipConfigureCall"; 1074 1075 // New CUDA kernel launch sequence. 1076 if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(), 1077 CudaFeature::CUDA_USES_NEW_LAUNCH)) 1078 return "__cudaPushCallConfiguration"; 1079 1080 // Legacy CUDA kernel configuration call 1081 return "cudaConfigureCall"; 1082 } 1083